├── priv
    └── categories
    │   ├── no_person_or_person.json
    │   ├── coco_with_voc_labels_categories.json
    │   ├── coco_categories.json
    │   └── imagenet_v2_categories.json
├── config
    ├── prod.exs
    ├── test.exs
    ├── dev.exs
    ├── config.exs
    └── runtime.exs
├── test
    ├── test_helper.exs
    ├── assets
    │   ├── cat.jpg
    │   ├── results
    │   │   └── style_transfer
    │   │   │   ├── cat_candy.gt
    │   │   │   ├── cat_mosaic.gt
    │   │   │   ├── cat_udnie.gt
    │   │   │   ├── cat_princess.gt
    │   │   │   ├── cat_candy_fast.gt
    │   │   │   ├── cat_mosaic_fast.gt
    │   │   │   ├── cat_udnie_fast.gt
    │   │   │   └── cat_princess_fast.gt
    │   └── categories.json
    ├── ex_vision
    │   ├── object_detection
    │   │   ├── fasterrcnn_resnet50_fpn_test.exs
    │   │   └── ssdlite320_mobilenetv3_test.exs
    │   ├── instance_segmentation
    │   │   └── maskrcnn_resnet50_fpn_v2_test.exs
    │   ├── semantic_segmentation
    │   │   └── deep_lab_v3_mobilenet_v3_test.exs
    │   ├── classification
    │   │   ├── mobilenet_v3_small_test.exs
    │   │   ├── squeezenet1_1_test.exs
    │   │   ├── efficientnet_v2_l_test.exs
    │   │   ├── efficientnet_v2_m_test.exs
    │   │   └── efficientnet_v2_s_test.exs
    │   ├── keypoint_detection
    │   │   └── keypointrcnn_resnet50_fpn_test.exs
    │   ├── style_transfer
    │   │   └── style_transfer_test.exs
    │   ├── cache_test.exs
    │   └── utils_test.exs
    └── support
    │   └── exvision
    │       ├── test_utils.ex
    │       └── model
    │           └── case.ex
├── .formatter.exs
├── .editorconfig
├── lib
    └── ex_vision
    │   ├── ex_vision.ex
    │   ├── types.ex
    │   ├── types
    │       ├── metadata.ex
    │       ├── bbox.ex
    │       ├── bboxwithmask.ex
    │       └── bboxwithkeypoints.ex
    │   ├── utils
    │       └── macros.ex
    │   ├── classification
    │       ├── squeezenet1_1.ex
    │       ├── efficientnet_v2_l.ex
    │       ├── mobilenet_v3_small.ex
    │       ├── efficientnet_v2_m.ex
    │       ├── efficientnet_v2_s.ex
    │       └── generic_classifier.ex
    │   ├── object_detection
    │       ├── ssdlite320_mobilenetv3.ex
    │       ├── fasterrcnn_resnet50_fpn.ex
    │       └── generic_detector.ex
    │   ├── model
    │       ├── definition
    │       │   ├── parts
    │       │   │   └── with_categories.ex
    │       │   └── ortex.ex
    │       └── definition.ex
    │   ├── semantic_segmentation
    │       └── deep_lab_v3_mobilenet_v3.ex
    │   ├── instance_segmentation
    │       └── maskrcnn_resnet50_fpn_v2.ex
    │   ├── style_transfer
    │       └── style_transfer.ex
    │   ├── keypoint_detection
    │       └── keypointrcnn_resnet50_fpn.ex
    │   ├── model.ex
    │   ├── utils.ex
    │   └── cache.ex
├── .gitattributes
├── devcontainer.json
├── .github
    └── workflows
    │   └── elixir.yml
├── python
    └── exports
    │   ├── deep_lab_v3.py
    │   ├── classification.py
    │   ├── instance_segmentation.py
    │   ├── object_detection.py
    │   └── keypoint_detection.py
├── .gitignore
├── README.md
├── mix.exs
├── examples
    ├── 2-usage-as-nx-serving.livemd
    ├── 3-membrane.livemd
    └── 1-basic-tutorial.livemd
├── .credo.exs
├── LICENSE
└── mix.lock


/priv/categories/no_person_or_person.json:
--------------------------------------------------------------------------------
1 | ["no person", "person"]


--------------------------------------------------------------------------------
/config/prod.exs:
--------------------------------------------------------------------------------
1 | import Config
2 | 
3 | config :logger, level: :info
4 | 


--------------------------------------------------------------------------------
/config/test.exs:
--------------------------------------------------------------------------------
1 | import Config
2 | 
3 | config :ex_vision, cache_path: "models"
4 | 


--------------------------------------------------------------------------------
/test/test_helper.exs:
--------------------------------------------------------------------------------
1 | Mimic.copy(Req)
2 | 
3 | ExUnit.start(capture_log: true)
4 | 


--------------------------------------------------------------------------------
/test/assets/cat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/cat.jpg


--------------------------------------------------------------------------------
/.formatter.exs:
--------------------------------------------------------------------------------
1 | [
2 |   inputs: [
3 |     "{lib,test,config}/**/*.{ex,exs}",
4 |     ".formatter.exs",
5 |     "*.exs"
6 |   ]
7 | ]
8 | 


--------------------------------------------------------------------------------
/config/dev.exs:
--------------------------------------------------------------------------------
1 | import Config
2 | 
3 | config :ortex, Ortex.Native, features: ["coreml"]
4 | 
5 | config :ex_vision, cache_path: "models"
6 | 


--------------------------------------------------------------------------------
/test/assets/results/style_transfer/cat_candy.gt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_candy.gt


--------------------------------------------------------------------------------
/test/assets/results/style_transfer/cat_mosaic.gt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_mosaic.gt


--------------------------------------------------------------------------------
/test/assets/results/style_transfer/cat_udnie.gt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_udnie.gt


--------------------------------------------------------------------------------
/test/assets/results/style_transfer/cat_princess.gt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_princess.gt


--------------------------------------------------------------------------------
/test/assets/results/style_transfer/cat_candy_fast.gt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_candy_fast.gt


--------------------------------------------------------------------------------
/test/assets/results/style_transfer/cat_mosaic_fast.gt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_mosaic_fast.gt


--------------------------------------------------------------------------------
/test/assets/results/style_transfer/cat_udnie_fast.gt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_udnie_fast.gt


--------------------------------------------------------------------------------
/test/assets/results/style_transfer/cat_princess_fast.gt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/software-mansion-labs/ex_vision/HEAD/test/assets/results/style_transfer/cat_princess_fast.gt


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | charset = utf-8
 5 | end_of_line = lf
 6 | indent_size = 2
 7 | indent_style = space
 8 | insert_final_newline = true
 9 | max_line_length = 100
10 | tab_width = 2
11 | trim_trailing_whitespace = true
12 | 


--------------------------------------------------------------------------------
/config/config.exs:
--------------------------------------------------------------------------------
 1 | import Config
 2 | 
 3 | config :nx, default_backend: EXLA.Backend
 4 | config :logger, level: :debug
 5 | 
 6 | config :ex_vision,
 7 |   server_url: URI.new!("https://ai.swmansion.com/exvision/files")
 8 | 
 9 | import_config "#{config_env()}.exs"
10 | 


--------------------------------------------------------------------------------
/config/runtime.exs:
--------------------------------------------------------------------------------
1 | import Config
2 | 
3 | config :ex_vision,
4 |   server_url:
5 |     "EX_VISION_HOSTING_URI"
6 |     |> System.get_env("https://ai.swmansion.com/exvision/files")
7 |     |> URI.new!(),
8 |   cache_path: System.get_env("EX_VISION_CACHE_DIR", "/tmp/ex_vision/cache")
9 | 


--------------------------------------------------------------------------------
/lib/ex_vision/ex_vision.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision do
 2 |   @moduledoc false
 3 |   use Application
 4 | 
 5 |   @impl true
 6 |   def start(_type, _args) do
 7 |     children = [{ExVision.Cache, name: ExVision.Cache}]
 8 |     Supervisor.start_link(children, strategy: :one_for_one)
 9 |   end
10 | end
11 | 


--------------------------------------------------------------------------------
/lib/ex_vision/types.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Types do
 2 |   @moduledoc """
 3 |   A collection of commonly used types in ExVision
 4 |   """
 5 | 
 6 |   @typedoc """
 7 |   Type describing image size as a two element tuple `{width, height}`
 8 |   """
 9 |   @type image_size_t() :: {width :: number(), height :: number()}
10 | end
11 | 


--------------------------------------------------------------------------------
/priv/categories/coco_with_voc_labels_categories.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   "__background__",
 3 |   "aeroplane",
 4 |   "bicycle",
 5 |   "bird",
 6 |   "boat",
 7 |   "bottle",
 8 |   "bus",
 9 |   "car",
10 |   "cat",
11 |   "chair",
12 |   "cow",
13 |   "diningtable",
14 |   "dog",
15 |   "horse",
16 |   "motorbike",
17 |   "person",
18 |   "pottedplant",
19 |   "sheep",
20 |   "sofa",
21 |   "train",
22 |   "tvmonitor"
23 | ]
24 | 


--------------------------------------------------------------------------------
/test/ex_vision/object_detection/fasterrcnn_resnet50_fpn_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.ObjectDetection.FasterRCNN_ResNet50_FPN_Test do
 2 |   use ExVision.Model.Case, module: ExVision.ObjectDetection.FasterRCNN_ResNet50_FPN
 3 |   use ExVision.TestUtils
 4 |   alias ExVision.Types.BBox
 5 | 
 6 |   @impl true
 7 |   def test_inference_result(result) do
 8 |     assert [%BBox{x1: 135, y1: 22, label: :cat, score: score}] = result
 9 |     assert_floats_equal(score, 1.0)
10 |   end
11 | end
12 | 


--------------------------------------------------------------------------------
/test/ex_vision/object_detection/ssdlite320_mobilenetv3_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.ObjectDetection.Ssdlite320_MobileNetv3Test do
 2 |   use ExVision.Model.Case, module: ExVision.ObjectDetection.Ssdlite320_MobileNetv3
 3 |   use ExVision.TestUtils
 4 | 
 5 |   alias ExVision.Types.BBox
 6 | 
 7 |   @impl true
 8 |   def test_inference_result(result) do
 9 |     assert [%BBox{x1: 132, y1: 12, label: :cat, score: score}] = result
10 |     assert_floats_equal(score, 1.0)
11 |   end
12 | end
13 | 


--------------------------------------------------------------------------------
/lib/ex_vision/types/metadata.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Types.ImageMetadata do
 2 |   @moduledoc """
 3 |   Type describing image metadata that is being passed to `ExVision.Model.Implementation` callbacks.
 4 |   """
 5 | 
 6 |   @enforce_keys [:original_size]
 7 |   defstruct @enforce_keys
 8 | 
 9 |   @typedoc """
10 |   Type describing image metadata that is being passed to `ExVision.Model.Implementation` callbacks.
11 | 
12 |   - `original_size` - gives the original size of originally loaded image
13 |   """
14 |   @type t() :: %__MODULE__{
15 |           original_size: ExVision.Types.image_size_t()
16 |         }
17 | end
18 | 


--------------------------------------------------------------------------------
/test/ex_vision/instance_segmentation/maskrcnn_resnet50_fpn_v2_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.InstanceSegmentation.MaskRCNN_ResNet50_FPN_V2_Test do
 2 |   use ExVision.Model.Case, module: ExVision.InstanceSegmentation.MaskRCNN_ResNet50_FPN_V2
 3 |   use ExVision.TestUtils
 4 |   alias ExVision.Types.BBoxWithMask
 5 | 
 6 |   @impl true
 7 |   def test_inference_result(result) do
 8 |     assert [%BBoxWithMask{x1: 129, y1: 15, label: :cat, score: score, mask: mask}] = result
 9 |     assert_floats_equal(score, 1.0)
10 | 
11 |     assert_floats_equal(nx_mean(mask), 0.37)
12 |   end
13 | 
14 |   defp nx_mean(t), do: t |> Nx.mean() |> Nx.to_number()
15 | end
16 | 


--------------------------------------------------------------------------------
/test/ex_vision/semantic_segmentation/deep_lab_v3_mobilenet_v3_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.SemanticSegmentation.DeepLabV3_MobileNetV3Test do
 2 |   use ExVision.Model.Case, module: ExVision.SemanticSegmentation.DeepLabV3_MobileNetV3
 3 |   use ExVision.TestUtils
 4 | 
 5 |   @impl true
 6 |   def test_inference_result(result) do
 7 |     assert %{cat: cat, __background__: background} = result,
 8 |            "The result doesn't contain required classes"
 9 | 
10 |     assert_floats_equal(nx_mean(cat) + nx_mean(background), 1.0)
11 |     assert_floats_equal(nx_mean(cat), 0.36)
12 |   end
13 | 
14 |   defp nx_mean(t), do: t |> Nx.mean() |> Nx.to_number()
15 | end
16 | 


--------------------------------------------------------------------------------
/lib/ex_vision/utils/macros.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Utils.Macros do
 2 |   @moduledoc false
 3 |   defmacro defunimplemented(function, options \\ []) do
 4 |     options =
 5 |       Keyword.validate!(options,
 6 |         with_impl: false,
 7 |         message: "This function is not implemented"
 8 |       )
 9 | 
10 |     quote do
11 |       if unquote(options[:with_impl]) do
12 |         @impl true
13 |       end
14 | 
15 |       # credo:disable-for-next-line
16 |       def unquote(function) do
17 |         raise RuntimeError, message: unquote(options[:message])
18 |       end
19 |     end
20 |   end
21 | 
22 |   defmacro __using__(_opts) do
23 |     quote do
24 |       import ExVision.Utils.Macros, only: :macros
25 |     end
26 |   end
27 | end
28 | 


--------------------------------------------------------------------------------
/lib/ex_vision/classification/squeezenet1_1.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Classification.SqueezeNet1_1 do
 2 |   @moduledoc """
 3 |   An object classifier based on SqueezeNet1_1.
 4 |   Exported from `torchvision`.
 5 |   Weights from Imagenet 1k.
 6 |   """
 7 |   use ExVision.Model.Definition.Ortex,
 8 |     model: "squeezenet1_1_classifier.onnx",
 9 |     categories: "priv/categories/imagenet_v2_categories.json"
10 | 
11 |   use ExVision.Classification.GenericClassifier
12 | 
13 |   @impl true
14 |   def preprocessing(image, _metadata) do
15 |     image
16 |     |> ExVision.Utils.resize({224, 224})
17 |     |> NxImage.normalize(
18 |       Nx.f32([0.485, 0.456, 0.406]),
19 |       Nx.f32([0.229, 0.224, 0.225]),
20 |       channels: :first
21 |     )
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/test/ex_vision/classification/mobilenet_v3_small_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Classification.MobileNetV3Test do
 2 |   @moduledoc false
 3 |   use ExVision.Model.Case, module: ExVision.Classification.MobileNetV3Small
 4 |   use ExVision.TestUtils
 5 | 
 6 |   @expected_result "test/assets/results/classification/mobilenet_v3_small.json"
 7 |                    |> File.read!()
 8 |                    |> Jason.decode!()
 9 |                    |> Map.new(fn {k, v} -> {ExVision.Utils.normalize_category_name(k), v} end)
10 | 
11 |   @impl true
12 |   def test_inference_result(result) do
13 |     assert_float_dicts_equal(@expected_result, result)
14 | 
15 |     top_result = Enum.max_by(result, &elem(&1, 1))
16 |     assert {:tabby, _pred} = top_result
17 |   end
18 | end
19 | 


--------------------------------------------------------------------------------
/test/ex_vision/classification/squeezenet1_1_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Classification.SqueezeNet1_1_Test do
 2 |   @moduledoc false
 3 |   use ExVision.Model.Case, module: ExVision.Classification.SqueezeNet1_1
 4 |   use ExVision.TestUtils
 5 | 
 6 |   @expected_result "test/assets/results/classification/squeezenet1_1.json"
 7 |                    |> File.read!()
 8 |                    |> Jason.decode!()
 9 |                    |> Map.new(fn {k, v} -> {ExVision.Utils.normalize_category_name(k), v} end)
10 | 
11 |   @impl true
12 |   def test_inference_result(result) do
13 |     assert_float_dicts_equal(@expected_result, result, 0.21)
14 | 
15 |     top_result = Enum.max_by(result, &elem(&1, 1))
16 |     assert {:egyptian_cat, _pred} = top_result
17 |   end
18 | end
19 | 


--------------------------------------------------------------------------------
/lib/ex_vision/classification/efficientnet_v2_l.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Classification.EfficientNet_V2_L do
 2 |   @moduledoc """
 3 |   An object classifier based on EfficientNet_V2_L.
 4 |   Exported from `torchvision`.
 5 |   Weights from Imagenet 1k.
 6 |   """
 7 |   use ExVision.Model.Definition.Ortex,
 8 |     model: "efficientnet_v2_l_classifier.onnx",
 9 |     categories: "priv/categories/imagenet_v2_categories.json"
10 | 
11 |   use ExVision.Classification.GenericClassifier
12 | 
13 |   @impl true
14 |   def preprocessing(image, _metadata) do
15 |     image
16 |     |> ExVision.Utils.resize({480, 480})
17 |     |> NxImage.normalize(
18 |       Nx.f32([0.5, 0.5, 0.5]),
19 |       Nx.f32([0.5, 0.5, 0.5]),
20 |       channels: :first
21 |     )
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/lib/ex_vision/classification/mobilenet_v3_small.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Classification.MobileNetV3Small do
 2 |   @moduledoc """
 3 |   An object detector based on MobileNetV1 Large.
 4 |   Exported from `torchvision`.
 5 |   Weights from Imagenet 1k.
 6 |   """
 7 |   use ExVision.Model.Definition.Ortex,
 8 |     model: "mobilenetv3small-classifier.onnx",
 9 |     categories: "priv/categories/imagenet_v2_categories.json"
10 | 
11 |   use ExVision.Classification.GenericClassifier
12 | 
13 |   @impl true
14 |   def preprocessing(image, _metadata) do
15 |     image
16 |     |> ExVision.Utils.resize({224, 224})
17 |     |> NxImage.normalize(
18 |       Nx.f32([0.485, 0.456, 0.406]),
19 |       Nx.f32([0.229, 0.224, 0.225]),
20 |       channels: :first
21 |     )
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/test/ex_vision/classification/efficientnet_v2_l_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Classification.EfficientNet_V2_L_Test do
 2 |   @moduledoc false
 3 |   use ExVision.Model.Case, module: ExVision.Classification.EfficientNet_V2_L
 4 |   use ExVision.TestUtils
 5 | 
 6 |   @expected_result "test/assets/results/classification/efficientnet_v2_l.json"
 7 |                    |> File.read!()
 8 |                    |> Jason.decode!()
 9 |                    |> Map.new(fn {k, v} -> {ExVision.Utils.normalize_category_name(k), v} end)
10 | 
11 |   @impl true
12 |   def test_inference_result(result) do
13 |     assert_float_dicts_equal(@expected_result, result)
14 | 
15 |     top_result = Enum.max_by(result, &elem(&1, 1))
16 |     assert {:egyptian_cat, _pred} = top_result
17 |   end
18 | end
19 | 


--------------------------------------------------------------------------------
/test/ex_vision/classification/efficientnet_v2_m_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Classification.EfficientNet_V2_M_Test do
 2 |   @moduledoc false
 3 |   use ExVision.Model.Case, module: ExVision.Classification.EfficientNet_V2_M
 4 |   use ExVision.TestUtils
 5 | 
 6 |   @expected_result "test/assets/results/classification/efficientnet_v2_m.json"
 7 |                    |> File.read!()
 8 |                    |> Jason.decode!()
 9 |                    |> Map.new(fn {k, v} -> {ExVision.Utils.normalize_category_name(k), v} end)
10 | 
11 |   @impl true
12 |   def test_inference_result(result) do
13 |     assert_float_dicts_equal(@expected_result, result)
14 | 
15 |     top_result = Enum.max_by(result, &elem(&1, 1))
16 |     assert {:egyptian_cat, _pred} = top_result
17 |   end
18 | end
19 | 


--------------------------------------------------------------------------------
/test/ex_vision/classification/efficientnet_v2_s_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Classification.EfficientNet_V2_S_Test do
 2 |   @moduledoc false
 3 |   use ExVision.Model.Case, module: ExVision.Classification.EfficientNet_V2_S
 4 |   use ExVision.TestUtils
 5 | 
 6 |   @expected_result "test/assets/results/classification/efficientnet_v2_s.json"
 7 |                    |> File.read!()
 8 |                    |> Jason.decode!()
 9 |                    |> Map.new(fn {k, v} -> {ExVision.Utils.normalize_category_name(k), v} end)
10 | 
11 |   @impl true
12 |   def test_inference_result(result) do
13 |     assert_float_dicts_equal(@expected_result, result)
14 | 
15 |     top_result = Enum.max_by(result, &elem(&1, 1))
16 |     assert {:egyptian_cat, _pred} = top_result
17 |   end
18 | end
19 | 


--------------------------------------------------------------------------------
/lib/ex_vision/classification/efficientnet_v2_m.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Classification.EfficientNet_V2_M do
 2 |   @moduledoc """
 3 |   An object classifier based on EfficientNet_V2_M.
 4 |   Exported from `torchvision`.
 5 |   Weights from Imagenet 1k.
 6 |   """
 7 |   use ExVision.Model.Definition.Ortex,
 8 |     model: "efficientnet_v2_m_classifier.onnx",
 9 |     categories: "priv/categories/imagenet_v2_categories.json"
10 | 
11 |   use ExVision.Classification.GenericClassifier
12 | 
13 |   @impl true
14 |   def preprocessing(image, _metadata) do
15 |     image
16 |     |> ExVision.Utils.resize({480, 480})
17 |     |> NxImage.normalize(
18 |       Nx.f32([0.485, 0.456, 0.406]),
19 |       Nx.f32([0.229, 0.224, 0.225]),
20 |       channels: :first
21 |     )
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/lib/ex_vision/classification/efficientnet_v2_s.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Classification.EfficientNet_V2_S do
 2 |   @moduledoc """
 3 |   An object classifier based on EfficientNet_V2_S.
 4 |   Exported from `torchvision`.
 5 |   Weights from Imagenet 1k.
 6 |   """
 7 |   use ExVision.Model.Definition.Ortex,
 8 |     model: "efficientnet_v2_s_classifier.onnx",
 9 |     categories: "priv/categories/imagenet_v2_categories.json"
10 | 
11 |   use ExVision.Classification.GenericClassifier
12 | 
13 |   @impl true
14 |   def preprocessing(image, _metadata) do
15 |     image
16 |     |> ExVision.Utils.resize({384, 384})
17 |     |> NxImage.normalize(
18 |       Nx.f32([0.485, 0.456, 0.406]),
19 |       Nx.f32([0.229, 0.224, 0.225]),
20 |       channels: :first
21 |     )
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/lib/ex_vision/object_detection/ssdlite320_mobilenetv3.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.ObjectDetection.Ssdlite320_MobileNetv3 do
 2 |   @moduledoc """
 3 |   SSDLite320 object detector with MobileNetV3 Large architecture, exported from torchvision.
 4 |   """
 5 |   use ExVision.Model.Definition.Ortex,
 6 |     model: "ssdlite320_mobilenet_v3_large_object_detector.onnx",
 7 |     categories: "priv/categories/coco_categories.json"
 8 | 
 9 |   use ExVision.ObjectDetection.GenericDetector
10 | 
11 |   require Logger
12 | 
13 |   @impl true
14 |   def load(options \\ []) do
15 |     if Keyword.has_key?(options, :batch_size) do
16 |       Logger.warning(
17 |         "`:max_batch_size` was given, but this model can only process batch of size 1. Overriding"
18 |       )
19 |     end
20 | 
21 |     options
22 |     |> Keyword.put(:batch_size, 1)
23 |     |> default_model_load()
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/lib/ex_vision/object_detection/fasterrcnn_resnet50_fpn.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.ObjectDetection.FasterRCNN_ResNet50_FPN do
 2 |   @moduledoc """
 3 |   FasterRCNN object detector with ResNet50 backbone and FPN detection head, exported from torchvision.
 4 |   """
 5 |   use ExVision.Model.Definition.Ortex,
 6 |     model: "fasterrcnn_resnet50_fpn_object_detector.onnx",
 7 |     categories: "priv/categories/coco_categories.json"
 8 | 
 9 |   use ExVision.ObjectDetection.GenericDetector
10 | 
11 |   require Logger
12 | 
13 |   @impl true
14 |   def load(options \\ []) do
15 |     if Keyword.has_key?(options, :batch_size) do
16 |       Logger.warning(
17 |         "`:max_batch_size` was given, but this model can only process batch of size 1. Overriding"
18 |       )
19 |     end
20 | 
21 |     options
22 |     |> Keyword.put(:batch_size, 1)
23 |     |> default_model_load()
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | models/**/*.onnx filter=lfs diff=lfs merge=lfs -text
 2 | models/deeplab_v3_mobilenetv3_segmentation.onnx filter=lfs diff=lfs merge=lfs -text
 3 | models/maskrcnn_resnet50_fpn_v2_instance_segmentation.onnx filter=lfs diff=lfs merge=lfs -text
 4 | models/keypointrcnn_resnet50_fpn_keypoint_detector.onnx filter=lfs diff=lfs merge=lfs -text
 5 | models/fasterrcnn_resnet50_fpn_object_detector.onnx filter=lfs diff=lfs merge=lfs -text
 6 | models/mobilenetv3small-classifier.onnx filter=lfs diff=lfs merge=lfs -text
 7 | models/efficientnet_v2_s_classifier.onnx filter=lfs diff=lfs merge=lfs -text
 8 | models/efficientnet_v2_m_classifier.onnx filter=lfs diff=lfs merge=lfs -text
 9 | models/efficientnet_v2_l_classifier.onnx filter=lfs diff=lfs merge=lfs -text
10 | models/squeezenet1_1_classifier.onnx filter=lfs diff=lfs merge=lfs -text
11 | models/ssdlite320_mobilenet_v3_large_object_detector.onnx filter=lfs diff=lfs merge=lfs -text
12 | 


--------------------------------------------------------------------------------
/test/assets/categories.json:
--------------------------------------------------------------------------------
1 | ["__background__", "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "N/A", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "N/A", "backpack", "umbrella", "N/A", "N/A", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "N/A", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "N/A", "dining table", "N/A", "N/A", "toilet", "N/A", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "N/A", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]


--------------------------------------------------------------------------------
/lib/ex_vision/model/definition/parts/with_categories.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Model.Definition.Parts.WithCategories do
 2 |   @moduledoc false
 3 |   require Logger
 4 |   alias ExVision.Utils
 5 | 
 6 |   defmacro __using__(options) do
 7 |     options = Keyword.validate!(options, [:name, :categories])
 8 | 
 9 |     unless is_nil(options |> Keyword.fetch!(:categories)) do
10 |       categories = options |> Keyword.fetch!(:categories) |> Utils.load_categories()
11 |       spec = categories |> Enum.uniq() |> Bunch.Typespec.enum_to_alternative()
12 | 
13 |       quote do
14 |         require Bunch.Typespec
15 | 
16 |         @typedoc """
17 |         Type describing all categories recognised by #{unquote(options[:name])}
18 |         """
19 |         @type category_t() :: unquote(spec)
20 | 
21 |         @doc """
22 |         Returns a list of all categories recognised by #{unquote(options[:name])}
23 |         """
24 |         @spec categories() :: [category_t()]
25 |         def categories(), do: unquote(categories)
26 |       end
27 |     end
28 |   end
29 | end
30 | 


--------------------------------------------------------------------------------
/lib/ex_vision/semantic_segmentation/deep_lab_v3_mobilenet_v3.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.SemanticSegmentation.DeepLabV3_MobileNetV3 do
 2 |   @moduledoc """
 3 |   A semantic segmentation model for MobileNetV3 Backbone. Exported from torchvision.
 4 |   """
 5 |   use ExVision.Model.Definition.Ortex,
 6 |     model: "deeplab_v3_mobilenetv3_segmentation.onnx",
 7 |     categories: "priv/categories/coco_with_voc_labels_categories.json"
 8 | 
 9 |   @type output_t() :: %{category_t() => Nx.Tensor.t()}
10 | 
11 |   @impl true
12 |   def preprocessing(img, _metdata) do
13 |     ExVision.Utils.resize(img, {224, 224})
14 |   end
15 | 
16 |   @impl true
17 |   def postprocessing(%{"output" => out}, metadata) do
18 |     cls_per_pixel =
19 |       out
20 |       |> Nx.backend_transfer()
21 |       |> NxImage.resize(metadata.original_size, channels: :first)
22 |       |> Nx.squeeze()
23 |       |> Axon.Activations.softmax(axis: [0])
24 |       |> Nx.argmax(axis: 0)
25 | 
26 |     categories()
27 |     |> Enum.with_index()
28 |     |> Map.new(fn {category, i} ->
29 |       {category, cls_per_pixel |> Nx.equal(i)}
30 |     end)
31 |   end
32 | end
33 | 


--------------------------------------------------------------------------------
/test/ex_vision/keypoint_detection/keypointrcnn_resnet50_fpn_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.KeypointDetection.KeypointRCNN_ResNet50_FPNTest do
 2 |   use ExVision.Model.Case, module: ExVision.KeypointDetection.KeypointRCNN_ResNet50_FPN
 3 |   use ExVision.TestUtils
 4 |   alias ExVision.Types.BBoxWithKeypoints
 5 | 
 6 |   @impl true
 7 |   def test_inference_result(result) do
 8 |     assert [
 9 |              %BBoxWithKeypoints{
10 |                x1: 113,
11 |                y1: 15,
12 |                label: :person,
13 |                score: score1,
14 |                keypoints: keypoints
15 |              },
16 |              %BBoxWithKeypoints{
17 |                x1: 141,
18 |                y1: 167,
19 |                label: :person,
20 |                score: score2
21 |              }
22 |            ] = result
23 | 
24 |     assert_floats_equal(score1, 0.46)
25 |     assert_floats_equal(score2, 0.29)
26 | 
27 |     assert max_keypoint_score(keypoints) < 5
28 |   end
29 | 
30 |   defp max_keypoint_score(keypoints) do
31 |     keypoints |> Enum.map(fn {_name, %{score: score}} -> score end) |> Enum.max()
32 |   end
33 | end
34 | 


--------------------------------------------------------------------------------
/devcontainer.json:
--------------------------------------------------------------------------------
 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the
 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/ubuntu
 3 | {
 4 | 	"name": "Ubuntu",
 5 | 	// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
 6 | 	"image": "mcr.microsoft.com/devcontainers/base:jammy",
 7 | 	"features": {
 8 | 		"ghcr.io/devcontainers/features/python:1": {
 9 | 			"installTools": true,
10 | 			"version": "3.11"
11 | 		},
12 | 		"ghcr.io/devcontainers/features/rust:1": {
13 | 			"version": "1.76",
14 | 			"profile": "minimal"
15 | 		},
16 | 		"ghcr.io/devcontainers-contrib/features/elixir-asdf:2": {
17 | 			"elixirVersion": "1.16.2",
18 | 			"erlangVersion": "26.0"
19 | 		}
20 | 	}
21 | 
22 | 	// Features to add to the dev container. More info: https://containers.dev/features.
23 | 	// "features": {},
24 | 
25 | 	// Use 'forwardPorts' to make a list of ports inside the container available locally.
26 | 	// "forwardPorts": [],
27 | 
28 | 	// Use 'postCreateCommand' to run commands after the container is created.
29 | 	// "postCreateCommand": "uname -a",
30 | 
31 | 	// Configure tool-specific properties.
32 | 	// "customizations": {},
33 | 
34 | 	// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
35 | 	// "remoteUser": "root"
36 | }
37 | 


--------------------------------------------------------------------------------
/lib/ex_vision/classification/generic_classifier.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Classification.GenericClassifier do
 2 |   @moduledoc false
 3 | 
 4 |   # Contains a default implementation of post processing for TorchVision classifiers
 5 |   # To use: `use ExVision.Classification.GenericClassifier`
 6 | 
 7 |   alias ExVision.Types.ImageMetadata
 8 | 
 9 |   @typep output_t() :: %{atom() => number()}
10 | 
11 |   @spec postprocessing(map(), ImageMetadata.t(), [atom()]) :: output_t()
12 |   def postprocessing(%{"output" => scores}, _metadata, categories) do
13 |     scores
14 |     |> Nx.backend_transfer()
15 |     |> Nx.flatten()
16 |     |> Axon.Activations.softmax(axis: [0])
17 |     |> Nx.to_flat_list()
18 |     |> then(&Enum.zip(categories, &1))
19 |     |> Map.new()
20 |   end
21 | 
22 |   defmacro __using__(_opts) do
23 |     quote do
24 |       @typedoc """
25 |       A type describing the output of a classification model as a mapping of category to probability.
26 |       """
27 |       @type output_t() :: %{category_t() => number()}
28 | 
29 |       @impl true
30 |       @spec postprocessing(map(), ExVision.Types.ImageMetadata.t()) :: output_t()
31 |       def postprocessing(output, metadata) do
32 |         ExVision.Classification.GenericClassifier.postprocessing(output, metadata, categories())
33 |       end
34 | 
35 |       defoverridable postprocessing: 2
36 |     end
37 |   end
38 | end
39 | 


--------------------------------------------------------------------------------
/.github/workflows/elixir.yml:
--------------------------------------------------------------------------------
 1 | name: Elixir CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["main"]
 6 |   pull_request:
 7 |     branches: ["main"]
 8 | 
 9 | permissions:
10 |   contents: read
11 | 
12 | env:
13 |   MIX_ENV: test
14 | 
15 | jobs:
16 |   build:
17 |     name: Build and test
18 |     runs-on: ubuntu-latest
19 | 
20 |     steps:
21 |       - uses: actions/checkout@v4
22 |       - name: Set up Elixir
23 |         uses: erlef/setup-beam@61e01a43a562a89bfc54c7f9a378ff67b03e4a21 # v1.16.0
24 |         with:
25 |           elixir-version: "1.16.2" # [Required] Define the Elixir version
26 |           otp-version: "26.0" # [Required] Define the Erlang/OTP version
27 |       - name: brndnmtthws/rust-action-rustup
28 |         uses: brndnmtthws/rust-action-rustup@v1.0.0
29 |       - name: Restore dependencies cache
30 |         uses: actions/cache@v3
31 |         with:
32 |           path: deps
33 |           key: ${{ runner.os }}-mix-${{ hashFiles('**/mix.lock') }}
34 |           restore-keys: ${{ runner.os }}-mix-
35 |       - name: Install dependencies
36 |         run: mix deps.get && mix deps.compile
37 |       - name: Checks if compiles without warning
38 |         run: mix compile --warnings-as-errors
39 |       - name: Run tests
40 |         run: mix test
41 |       - name: Run Credo
42 |         run: mix credo
43 |       - name: Test formatting
44 |         run: mix format --check-formatted
45 | 


--------------------------------------------------------------------------------
/python/exports/deep_lab_v3.py:
--------------------------------------------------------------------------------
 1 | from torchvision.models.segmentation import (
 2 |     deeplabv3_mobilenet_v3_large,
 3 |     DeepLabV3_MobileNet_V3_Large_Weights,
 4 | )
 5 | import torch
 6 | import json
 7 | from pathlib import Path
 8 | 
 9 | base_dir = Path("models/segmentation/deeplab_v3_mobilenetv3")
10 | base_dir.mkdir(parents=True, exist_ok=True)
11 | 
12 | model_file = base_dir / "model.onnx"
13 | categories_file = base_dir / "categories.json"
14 | 
15 | weights = DeepLabV3_MobileNet_V3_Large_Weights.DEFAULT
16 | model = deeplabv3_mobilenet_v3_large(weights=weights)
17 | model.eval()
18 | 
19 | categories = weights.meta["categories"]
20 | transforms = weights.transforms()
21 | 
22 | with open(categories_file, "w") as f:
23 |     json.dump(categories, f)
24 | 
25 | onnx_input = torch.rand(1, 3, 224, 224)
26 | 
27 | 
28 | from torchvision.io.image import read_image
29 | 
30 | cat = read_image("examples/files/cat.jpg")
31 | batch = transforms(cat).unsqueeze(0)
32 | outputs = model(batch)
33 | 
34 | torch.onnx.export(
35 |     model,
36 |     batch,
37 |     str(model_file),
38 |     verbose=False,
39 |     input_names=["input"],
40 |     output_names=["output", "aux"],
41 |     dynamic_axes={
42 |         "input": {0: "batch_size", 2: "width", 3: "height"},
43 |         "output": {0: "batch_size", 2: "width", 3: "height"},
44 |     },
45 |     export_params=True,
46 | )
47 | 
48 | print(transforms)
49 | print(batch.shape)
50 | print(outputs)
51 | 


--------------------------------------------------------------------------------
/lib/ex_vision/types/bbox.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Types.BBox do
 2 |   @moduledoc """
 3 |   A struct describing the bounding box returned by the object detection model.
 4 |   """
 5 | 
 6 |   @enforce_keys [:x1, :y1, :x2, :y2, :label, :score]
 7 |   defstruct @enforce_keys
 8 | 
 9 |   @typedoc """
10 |   A type describing the Bounding Box object.
11 | 
12 |   Bounding box is a rectangle encompassing the region.
13 |   When used in object detectors, this box will describe the location of the object in the image.
14 | 
15 |   - `x1` - x componenet of the upper left corner
16 |   - `y1` - y componenet of the upper left corner
17 |   - `x2` - x componenet of the lower right
18 |   - `y2` - y componenet of the lower right
19 |   - `score` - confidence of the predition
20 |   - `label` - label assigned to this bounding box
21 |   """
22 |   @type t(label_t) :: %__MODULE__{
23 |           x1: number(),
24 |           y1: number(),
25 |           y2: number(),
26 |           x2: number(),
27 |           label: label_t,
28 |           score: number()
29 |         }
30 | 
31 |   @typedoc """
32 |   Exactly like `t:t/1`, but doesn't put any constraints on the `label` field:
33 |   """
34 |   @type t() :: t(term())
35 | 
36 |   @doc """
37 |   Return the width of the bounding box
38 |   """
39 |   @spec width(t()) :: number()
40 |   def width(%__MODULE__{x1: x1, x2: x2}), do: abs(x2 - x1)
41 | 
42 |   @doc """
43 |   Return the height of the bounding box
44 |   """
45 |   @spec height(t()) :: number()
46 |   def height(%__MODULE__{y1: y1, y2: y2}), do: abs(y2 - y1)
47 | end
48 | 


--------------------------------------------------------------------------------
/priv/categories/coco_categories.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   "__background__",
 3 |   "person",
 4 |   "bicycle",
 5 |   "car",
 6 |   "motorcycle",
 7 |   "airplane",
 8 |   "bus",
 9 |   "train",
10 |   "truck",
11 |   "boat",
12 |   "traffic light",
13 |   "fire hydrant",
14 |   "N/A",
15 |   "stop sign",
16 |   "parking meter",
17 |   "bench",
18 |   "bird",
19 |   "cat",
20 |   "dog",
21 |   "horse",
22 |   "sheep",
23 |   "cow",
24 |   "elephant",
25 |   "bear",
26 |   "zebra",
27 |   "giraffe",
28 |   "N/A",
29 |   "backpack",
30 |   "umbrella",
31 |   "N/A",
32 |   "N/A",
33 |   "handbag",
34 |   "tie",
35 |   "suitcase",
36 |   "frisbee",
37 |   "skis",
38 |   "snowboard",
39 |   "sports ball",
40 |   "kite",
41 |   "baseball bat",
42 |   "baseball glove",
43 |   "skateboard",
44 |   "surfboard",
45 |   "tennis racket",
46 |   "bottle",
47 |   "N/A",
48 |   "wine glass",
49 |   "cup",
50 |   "fork",
51 |   "knife",
52 |   "spoon",
53 |   "bowl",
54 |   "banana",
55 |   "apple",
56 |   "sandwich",
57 |   "orange",
58 |   "broccoli",
59 |   "carrot",
60 |   "hot dog",
61 |   "pizza",
62 |   "donut",
63 |   "cake",
64 |   "chair",
65 |   "couch",
66 |   "potted plant",
67 |   "bed",
68 |   "N/A",
69 |   "dining table",
70 |   "N/A",
71 |   "N/A",
72 |   "toilet",
73 |   "N/A",
74 |   "tv",
75 |   "laptop",
76 |   "mouse",
77 |   "remote",
78 |   "keyboard",
79 |   "cell phone",
80 |   "microwave",
81 |   "oven",
82 |   "toaster",
83 |   "sink",
84 |   "refrigerator",
85 |   "N/A",
86 |   "book",
87 |   "clock",
88 |   "vase",
89 |   "scissors",
90 |   "teddy bear",
91 |   "hair drier",
92 |   "toothbrush"
93 | ]
94 | 


--------------------------------------------------------------------------------
/lib/ex_vision/types/bboxwithmask.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Types.BBoxWithMask do
 2 |   @moduledoc """
 3 |   A struct describing the bounding box with mask returned by the instance segmentation model.
 4 |   """
 5 | 
 6 |   @enforce_keys [
 7 |     :x1,
 8 |     :y1,
 9 |     :x2,
10 |     :y2,
11 |     :label,
12 |     :score,
13 |     :mask
14 |   ]
15 |   defstruct @enforce_keys
16 | 
17 |   @typedoc """
18 |   A type describing the Bounding Box with Mask object.
19 | 
20 |   Bounding box is a rectangle encompassing the region.
21 |   When used in instance segmentation, this box will describe the location of the object in the image.
22 |   Additionally, a binary mask represents the instance segmentation of the object.
23 | 
24 |   - `x1` - x componenet of the upper left corner
25 |   - `y1` - y componenet of the upper left corner
26 |   - `x2` - x componenet of the lower right
27 |   - `y2` - y componenet of the lower right
28 |   - `score` - confidence of the predition
29 |   - `label` - label assigned to this bounding box
30 |   - `mask` - binary mask
31 |   """
32 |   @type t(label_t) :: %__MODULE__{
33 |           x1: number(),
34 |           y1: number(),
35 |           y2: number(),
36 |           x2: number(),
37 |           label: label_t,
38 |           score: number(),
39 |           mask: Nx.Tensor.t()
40 |         }
41 | 
42 |   @typedoc """
43 |   Exactly like `t:t/1`, but doesn't put any constraints on the `label` field:
44 |   """
45 |   @type t() :: t(term())
46 | 
47 |   @doc """
48 |   Return the width of the bounding box
49 |   """
50 |   @spec width(t()) :: number()
51 |   def width(%__MODULE__{x1: x1, x2: x2}), do: abs(x2 - x1)
52 | 
53 |   @doc """
54 |   Return the height of the bounding box
55 |   """
56 |   @spec height(t()) :: number()
57 |   def height(%__MODULE__{y1: y1, y2: y2}), do: abs(y2 - y1)
58 | end
59 | 


--------------------------------------------------------------------------------
/test/ex_vision/style_transfer/style_transfer_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule TestConfiguration do
 2 |   @spec configuration() :: %{}
 3 |   def configuration do
 4 |     %{
 5 |       ExVision.StyleTransfer.CandyTest => [
 6 |         module: ExVision.StyleTransfer.Candy,
 7 |         gt_file: "cat_candy.gt"
 8 |       ],
 9 |       ExVision.StyleTransfer.CandyFastTest => [
10 |         module: ExVision.StyleTransfer.CandyFast,
11 |         gt_file: "cat_candy_fast.gt"
12 |       ],
13 |       ExVision.StyleTransfer.PrincessTest => [
14 |         module: ExVision.StyleTransfer.Princess,
15 |         gt_file: "cat_princess.gt"
16 |       ],
17 |       ExVision.StyleTransfer.PrincessFastTest => [
18 |         module: ExVision.StyleTransfer.PrincessFast,
19 |         gt_file: "cat_princess_fast.gt"
20 |       ],
21 |       ExVision.StyleTransfer.UdnieTest => [
22 |         module: ExVision.StyleTransfer.Udnie,
23 |         gt_file: "cat_udnie.gt"
24 |       ],
25 |       ExVision.StyleTransfer.UdnieFastTest => [
26 |         module: ExVision.StyleTransfer.UdnieFast,
27 |         gt_file: "cat_udnie_fast.gt"
28 |       ],
29 |       ExVision.StyleTransfer.MosaicTest => [
30 |         module: ExVision.StyleTransfer.Mosaic,
31 |         gt_file: "cat_mosaic.gt"
32 |       ],
33 |       ExVision.StyleTransfer.MosaicFastTest => [
34 |         module: ExVision.StyleTransfer.MosaicFast,
35 |         gt_file: "cat_mosaic_fast.gt"
36 |       ]
37 |     }
38 |   end
39 | end
40 | 
41 | for {module, opts} <- TestConfiguration.configuration() do
42 |   defmodule module do
43 |     use ExVision.Model.Case, module: unquote(opts[:module])
44 |     use ExVision.TestUtils
45 | 
46 |     @impl true
47 |     def test_inference_result(result) do
48 |       expected_result =
49 |         "test/assets/results/style_transfer/#{unquote(opts[:gt_file])}"
50 |         |> File.read!()
51 |         |> Nx.deserialize()
52 | 
53 |       assert_tensors_equal(result, expected_result, 5, 0.05)
54 |     end
55 |   end
56 | end
57 | 


--------------------------------------------------------------------------------
/test/support/exvision/test_utils.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.TestUtils do
 2 |   @moduledoc false
 3 | 
 4 |   import ExUnit.Assertions, only: :macros
 5 | 
 6 |   @default_delta 0.05
 7 | 
 8 |   @doc """
 9 |   Compares to floats by ensuring that the distance between them is smaller than specified delta
10 |   """
11 |   @spec float_eq(float(), float(), float()) :: boolean()
12 |   def float_eq(a, b, delta \\ @default_delta) do
13 |     abs(a - b) < delta
14 |   end
15 | 
16 |   @typedoc """
17 |   Type describing a dictionary which values are floats
18 |   """
19 |   @type float_dict_t() :: %{any() => float()}
20 | 
21 |   @spec float_dict_eq(float_dict_t(), float_dict_t(), number()) :: boolean()
22 |   def float_dict_eq(a, b, delta \\ @default_delta) do
23 |     keys = MapSet.new(Map.keys(a) ++ Map.keys(b))
24 | 
25 |     Enum.reduce(keys, true, fn key, acc ->
26 |       a = a[key]
27 |       b = b[key]
28 | 
29 |       acc and not is_nil(a) and not is_nil(b) and float_eq(a, b, delta)
30 |     end)
31 |   end
32 | 
33 |   defmacro assert_floats_equal(a, b, delta \\ @default_delta) do
34 |     quote do
35 |       assert ExVision.TestUtils.float_eq(unquote(a), unquote(b), unquote(delta))
36 |     end
37 |   end
38 | 
39 |   defmacro assert_float_dicts_equal(a, b, delta \\ @default_delta) do
40 |     quote do
41 |       assert ExVision.TestUtils.float_dict_eq(unquote(a), unquote(b), unquote(delta))
42 |     end
43 |   end
44 | 
45 |   defmacro assert_tensors_equal(a, b, delta \\ @default_delta, relative_delta \\ 0.0) do
46 |     quote do
47 |       value_condition =
48 |         unquote(a)
49 |         |> Nx.all_close(unquote(b), atol: unquote(delta), rtol: unquote(relative_delta))
50 |         |> Nx.reduce_min()
51 |         |> Nx.to_number() == 1
52 | 
53 |       equal_on_count =
54 |         unquote(a)
55 |         |> Nx.equal(unquote(b))
56 |         |> Nx.as_type(:u64)
57 |         |> Nx.reduce(0, fn x, y -> Nx.add(x, y) end)
58 |         |> Nx.to_number()
59 | 
60 |       number_count = unquote(a) |> Nx.shape() |> Tuple.product()
61 |       proportional_condition = equal_on_count / number_count > 0.99
62 | 
63 |       assert value_condition or proportional_condition
64 |     end
65 |   end
66 | 
67 |   defmacro __using__(_opts) do
68 |     quote do
69 |       import ExVision.TestUtils, only: :macros
70 |     end
71 |   end
72 | end
73 | 


--------------------------------------------------------------------------------
/lib/ex_vision/instance_segmentation/maskrcnn_resnet50_fpn_v2.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.InstanceSegmentation.MaskRCNN_ResNet50_FPN_V2 do
 2 |   @moduledoc """
 3 |   An instance segmentation model with a ResNet-50-FPN backbone. Exported from torchvision.
 4 |   """
 5 |   use ExVision.Model.Definition.Ortex,
 6 |     model: "maskrcnn_resnet50_fpn_v2_instance_segmentation.onnx",
 7 |     categories: "priv/categories/coco_categories.json"
 8 | 
 9 |   import ExVision.Utils
10 | 
11 |   require Logger
12 | 
13 |   alias ExVision.Types.BBoxWithMask
14 | 
15 |   @type output_t() :: [BBoxWithMask.t()]
16 | 
17 |   @impl true
18 |   def load(options \\ []) do
19 |     if Keyword.has_key?(options, :batch_size) do
20 |       Logger.warning(
21 |         "`:max_batch_size` was given, but this model can only process batch of size 1. Overriding"
22 |       )
23 |     end
24 | 
25 |     options
26 |     |> Keyword.put(:batch_size, 1)
27 |     |> default_model_load()
28 |   end
29 | 
30 |   @impl true
31 |   def preprocessing(img, _metdata) do
32 |     ExVision.Utils.resize(img, {224, 224})
33 |   end
34 | 
35 |   @impl true
36 |   def postprocessing(
37 |         %{
38 |           "boxes_unsqueezed" => bboxes,
39 |           "labels_unsqueezed" => labels,
40 |           "masks_unsqueezed" => masks,
41 |           "scores_unsqueezed" => scores
42 |         },
43 |         metadata
44 |       ) do
45 |     categories = categories()
46 | 
47 |     {h, w} = metadata.original_size
48 |     scale_x = w / 224
49 |     scale_y = h / 224
50 | 
51 |     bboxes = scale_and_listify_bbox(bboxes, Nx.f32([scale_x, scale_y, scale_x, scale_y]))
52 | 
53 |     scores = squeeze_and_listify(scores)
54 |     labels = squeeze_and_listify(labels)
55 | 
56 |     masks =
57 |       masks
58 |       |> Nx.backend_transfer()
59 |       |> Nx.squeeze(axes: [0, 2])
60 |       |> NxImage.resize(metadata.original_size, channels: :first)
61 |       |> Nx.round()
62 |       |> Nx.as_type(:s64)
63 |       |> Nx.to_list()
64 | 
65 |     [bboxes, labels, scores, masks]
66 |     |> Enum.zip()
67 |     |> Enum.filter(fn {_bbox, _label, score, _mask} -> score > 0.1 end)
68 |     |> Enum.map(fn {[x1, y1, x2, y2], label, score, mask} ->
69 |       %BBoxWithMask{
70 |         x1: x1,
71 |         y1: y1,
72 |         x2: x2,
73 |         y2: y2,
74 |         label: Enum.at(categories, label),
75 |         score: score,
76 |         mask: Nx.tensor(mask)
77 |       }
78 |     end)
79 |   end
80 | end
81 | 


--------------------------------------------------------------------------------
/test/ex_vision/cache_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.CacheTest do
 2 |   use ExUnit.Case, async: false
 3 |   use Mimic
 4 | 
 5 |   alias ExVision.Cache
 6 | 
 7 |   @moduletag :tmp_dir
 8 | 
 9 |   setup ctx do
10 |     files =
11 |       Map.get(ctx, :files, %{
12 |         "/test" => rand_string(256)
13 |       })
14 | 
15 |     set_mimic_global()
16 | 
17 |     stub(Req, :get, fn
18 |       %URI{host: "mock_server", port: 8000, path: path}, options ->
19 |         options = Keyword.validate!(options, [:raw, :into])
20 | 
21 |         case Map.fetch(files, path) do
22 |           {:ok, content} ->
23 |             body = Enum.into([content], options[:into])
24 |             {:ok, %Req.Response{status: 200, body: body}}
25 | 
26 |           :error ->
27 |             # Req seems to be saving the file anyway
28 |             body = Enum.into([""], options[:into])
29 |             {:ok, %Req.Response{status: 404, body: body}}
30 |         end
31 | 
32 |       _uri, _options ->
33 |         {:error, %Mint.TransportError{reason: :connection_failed}}
34 |     end)
35 | 
36 |     [files: files]
37 |   end
38 | 
39 |   setup %{tmp_dir: tmp_dir} do
40 |     {:ok, _cache} =
41 |       Cache.start_link(
42 |         name: MyCache,
43 |         server_url: URI.new!("http://mock_server:8000"),
44 |         cache_path: tmp_dir
45 |       )
46 | 
47 |     :ok
48 |   end
49 | 
50 |   test "Can download the file", ctx do
51 |     [{path, expected_contents}] = Enum.to_list(ctx.files)
52 |     expected_path = Path.join(ctx.tmp_dir, path)
53 |     assert {:ok, ^expected_path} = Cache.lazy_get(MyCache, path)
54 |     verify_download(expected_path, expected_contents)
55 |   end
56 | 
57 |   test "will fail if server is unreachable" do
58 |     url = "http://localhost:9999"
59 |     {:ok, c} = Cache.start_link(server_url: url, name: nil)
60 | 
61 |     assert {:error, :connection_failed} = Cache.lazy_get(c, "/test")
62 |     assert {:error, :connection_failed} = Cache.lazy_get(c, "/test")
63 |   end
64 | 
65 |   test "will fail if we request file that doesn't exist" do
66 |     assert {:error, :doesnt_exist} = Cache.lazy_get(MyCache, "/idk")
67 |     assert {:error, :doesnt_exist} = Cache.lazy_get(MyCache, "/idk")
68 |   end
69 | 
70 |   defp verify_download(path, expected_contents) do
71 |     assert File.exists?(path)
72 |     assert not File.dir?(path)
73 |     assert File.read!(path) == expected_contents
74 |   end
75 | 
76 |   defp rand_string(length), do: :crypto.strong_rand_bytes(length)
77 | end
78 | 


--------------------------------------------------------------------------------
/lib/ex_vision/object_detection/generic_detector.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.ObjectDetection.GenericDetector do
 2 |   @moduledoc false
 3 | 
 4 |   # Contains a default implementation of pre and post processing for TorchVision detectors
 5 |   # To use: `use ExVision.ObjectDetection.GenericDetector`
 6 | 
 7 |   import ExVision.Utils
 8 | 
 9 |   require Logger
10 | 
11 |   alias ExVision.Types.{BBox, ImageMetadata}
12 | 
13 |   @typep output_t() :: [BBox.t()]
14 | 
15 |   @spec preprocessing(Nx.Tensor.t(), ImageMetadata.t()) :: Nx.Tensor.t()
16 |   def preprocessing(img, _metadata) do
17 |     ExVision.Utils.resize(img, {224, 224})
18 |   end
19 | 
20 |   @spec postprocessing(map(), ImageMetadata.t(), [atom()]) :: output_t()
21 |   def postprocessing(
22 |         %{
23 |           "boxes_unsqueezed" => bboxes,
24 |           "scores_unsqueezed" => scores,
25 |           "labels_unsqueezed" => labels
26 |         },
27 |         metadata,
28 |         categories
29 |       ) do
30 |     {h, w} = metadata.original_size
31 |     scale_x = w / 224
32 |     scale_y = h / 224
33 | 
34 |     bboxes = scale_and_listify_bbox(bboxes, Nx.f32([scale_x, scale_y, scale_x, scale_y]))
35 | 
36 |     scores = squeeze_and_listify(scores)
37 |     labels = squeeze_and_listify(labels)
38 | 
39 |     [bboxes, scores, labels]
40 |     |> Enum.zip()
41 |     |> Enum.filter(fn {_bbox, score, _label} -> score > 0.1 end)
42 |     |> Enum.map(fn {[x1, y1, x2, y2], score, label} ->
43 |       %BBox{
44 |         x1: x1,
45 |         x2: x2,
46 |         y1: y1,
47 |         y2: y2,
48 |         score: score,
49 |         label: Enum.at(categories, label)
50 |       }
51 |     end)
52 |   end
53 | 
54 |   defmacro __using__(_opts) do
55 |     quote do
56 |       @typedoc """
57 |       A type describing output of `run/2` as a list of a bounding boxes.
58 | 
59 |       Each bounding box describes the location of the object indicated by the `label`.
60 |       It also provides the `score` field marking the probability of the prediction.
61 |       Bounding boxes with very low scores should most likely be ignored.
62 |       """
63 |       @type output_t() :: [BBox.t()]
64 | 
65 |       @impl true
66 |       defdelegate preprocessing(image, metadata), to: ExVision.ObjectDetection.GenericDetector
67 | 
68 |       @impl true
69 |       @spec postprocessing(map(), ExVision.Types.ImageMetadata.t()) :: output_t()
70 |       def postprocessing(output, metadata) do
71 |         ExVision.ObjectDetection.GenericDetector.postprocessing(output, metadata, categories())
72 |       end
73 | 
74 |       defoverridable preprocessing: 2, postprocessing: 2
75 |     end
76 |   end
77 | end
78 | 


--------------------------------------------------------------------------------
/lib/ex_vision/types/bboxwithkeypoints.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Types.BBoxWithKeypoints do
 2 |   @moduledoc """
 3 |   A struct describing the bounding box with keypoints returned by the keypoint detection model.
 4 |   """
 5 | 
 6 |   @enforce_keys [
 7 |     :x1,
 8 |     :y1,
 9 |     :x2,
10 |     :y2,
11 |     :label,
12 |     :score,
13 |     :keypoints
14 |   ]
15 |   defstruct @enforce_keys
16 | 
17 |   @typedoc """
18 |   A type describing the Bounding Box object.
19 | 
20 |   Bounding box is a rectangle encompassing the region.
21 |   When used in object detectors, this box will describe the location of the object in the image.
22 |   It also includes keypoints. Each keypoint has a predefined atom as its name.
23 | 
24 |   - `x1` - x componenet of the upper left corner
25 |   - `y1` - y componenet of the upper left corner
26 |   - `x2` - x componenet of the lower right
27 |   - `y2` - y componenet of the lower right
28 |   - `label` - label assigned to this bounding box
29 |   - `score` - confidence of the predition
30 |   - `keypoints` - a map where keys are predefined names (represented as atoms) denoting the specific keypoints (body parts). The values associated with each key are another map, which contains the following:
31 |     - `:x`: The x-coordinate of the keypoint
32 |     - `:y`: The y-coordinate of the keypoint
33 |     - `:score`: The confidence score of the predicted keypoint
34 | 
35 |   Keypoint atom names include:
36 |   - `:nose`
37 |   - `:left_eye`
38 |   - `:right_eye`
39 |   - `:left_ear`
40 |   - `:right_ear`
41 |   - `:left_shoulder`
42 |   - `:right_shoulder`
43 |   - `:left_elbow`
44 |   - `:right_elbow`
45 |   - `:left_wrist`
46 |   - `:right_wrist`
47 |   - `:left_hip`
48 |   - `:right_hip`
49 |   - `:left_knee`
50 |   - `:right_knee`
51 |   - `:left_ankle`
52 |   - `:right_ankle`
53 |   """
54 |   @type t(label_t) :: %__MODULE__{
55 |           x1: number(),
56 |           y1: number(),
57 |           y2: number(),
58 |           x2: number(),
59 |           label: label_t,
60 |           score: number(),
61 |           keypoints: %{
62 |             atom() => %{
63 |               x: number(),
64 |               y: number(),
65 |               score: number()
66 |             }
67 |           }
68 |         }
69 | 
70 |   @typedoc """
71 |   Exactly like `t:t/1`, but doesn't put any constraints on the `label` field:
72 |   """
73 |   @type t() :: t(term())
74 | 
75 |   @doc """
76 |   Return the width of the bounding box
77 |   """
78 |   @spec width(t()) :: number()
79 |   def width(%__MODULE__{x1: x1, x2: x2}), do: abs(x2 - x1)
80 | 
81 |   @doc """
82 |   Return the height of the bounding box
83 |   """
84 |   @spec height(t()) :: number()
85 |   def height(%__MODULE__{y1: y1, y2: y2}), do: abs(y2 - y1)
86 | end
87 | 


--------------------------------------------------------------------------------
/test/support/exvision/model/case.ex:
--------------------------------------------------------------------------------
 1 | defmodule ExVision.Model.Case do
 2 |   @moduledoc false
 3 |   @img_path "test/assets/cat.jpg"
 4 | 
 5 |   @callback test_inference_result(result :: any()) :: any()
 6 | 
 7 |   defmacro __using__(opts) do
 8 |     opts = Keyword.validate!(opts, [:module])
 9 | 
10 |     quote do
11 |       use ExUnit.Case, async: true
12 |       # use ExVision.TestUtils.MockCacheServer
13 |       @behaviour ExVision.Model.Case
14 | 
15 |       setup_all do
16 |         {:ok, model} = unquote(opts[:module]).load()
17 |         [model: model]
18 |       end
19 | 
20 |       test "load/0", %{model: model} do
21 |         assert model
22 |       end
23 | 
24 |       test "inference", %{model: model} do
25 |         model
26 |         |> unquote(opts[:module]).run(unquote(@img_path))
27 |         |> test_inference_result()
28 |       end
29 | 
30 |       test "inference for batch", %{model: model} do
31 |         model
32 |         |> unquote(opts[:module]).run([unquote(@img_path), unquote(@img_path)])
33 |         |> Enum.each(&test_inference_result/1)
34 |       end
35 | 
36 |       test "child_spec/1" do
37 |         assert spec = unquote(opts[:module]).child_spec()
38 |       end
39 | 
40 |       describe "stateful/process workflow" do
41 |         setup ctx do
42 |           name = String.to_atom("#{__MODULE__}#{ctx[:test]}")
43 |           model = ctx[:model]
44 | 
45 |           {:ok, _supervisor} =
46 |             Supervisor.start_link(
47 |               [unquote(opts[:module]).child_spec(name: name)],
48 |               strategy: :one_for_one
49 |             )
50 | 
51 |           [name: name]
52 |         end
53 | 
54 |         test "inference", %{name: name} do
55 |           name
56 |           |> unquote(opts[:module]).batched_run(unquote(@img_path))
57 |           |> test_inference_result()
58 |         end
59 | 
60 |         test "inference for batch", %{name: name} do
61 |           name
62 |           |> unquote(opts[:module]).batched_run([unquote(@img_path), unquote(@img_path)])
63 |           |> Enum.each(&test_inference_result/1)
64 |         end
65 |       end
66 | 
67 |       test "stateful/process workflow accepts options" do
68 |         options = [
69 |           name: __MODULE__.TestProcess1,
70 |           batch_size: 8,
71 |           batch_timeout: 10,
72 |           partitions: true
73 |         ]
74 | 
75 |         child_spec = {unquote(opts[:module]), options}
76 | 
77 |         assert {:ok, _supervisor} =
78 |                  Supervisor.start_link([child_spec], strategy: :one_for_one, restarts: :none)
79 | 
80 |         assert unquote(opts[:module]).batched_run(
81 |                  __MODULE__.TestProcess1,
82 |                  unquote(@img_path)
83 |                )
84 |       end
85 |     end
86 |   end
87 | end
88 | 


--------------------------------------------------------------------------------
/lib/ex_vision/style_transfer/style_transfer.ex:
--------------------------------------------------------------------------------
 1 | defmodule Configuration do
 2 |   @moduledoc false
 3 | 
 4 |   @low_resolution {400, 300}
 5 |   @high_resolution {640, 480}
 6 | 
 7 |   @spec configuration() :: %{}
 8 |   def configuration do
 9 |     %{
10 |       ExVision.StyleTransfer.Candy => [model: "candy.onnx", resolution: @high_resolution],
11 |       ExVision.StyleTransfer.CandyFast => [model: "candy_fast.onnx", resolution: @low_resolution],
12 |       ExVision.StyleTransfer.Princess => [model: "princess.onnx", resolution: @high_resolution],
13 |       ExVision.StyleTransfer.PrincessFast => [
14 |         model: "princess_fast.onnx",
15 |         resolution: @low_resolution
16 |       ],
17 |       ExVision.StyleTransfer.Udnie => [model: "udnie.onnx", resolution: @high_resolution],
18 |       ExVision.StyleTransfer.UdnieFast => [model: "udnie_fast.onnx", resolution: @low_resolution],
19 |       ExVision.StyleTransfer.Mosaic => [model: "mosaic.onnx", resolution: @high_resolution],
20 |       ExVision.StyleTransfer.MosaicFast => [
21 |         model: "mosaic_fast.onnx",
22 |         resolution: @low_resolution
23 |       ]
24 |     }
25 |   end
26 | end
27 | 
28 | for {module, opts} <- Configuration.configuration() do
29 |   defmodule module do
30 |     @moduledoc """
31 |     #{module} is a custom style transfer model optimised for devices with low computational capabilities and CPU inference.
32 |     """
33 |     use ExVision.Model.Definition.Ortex, model: unquote(opts[:model])
34 | 
35 |     require Logger
36 | 
37 |     @typedoc """
38 |     A type consisting of output tesnor (stylized image tensor) from style transfer models of shape {#{Enum.join(Tuple.to_list(opts[:resolution]) ++ [3], ", ")}}.
39 |     """
40 |     @type output_t() :: Nx.Tensor.t()
41 | 
42 |     @impl true
43 |     def load(options \\ []) do
44 |       if Keyword.has_key?(options, :batch_size) do
45 |         Logger.warning(
46 |           "`:max_batch_size` was given, but this model can only process batch of size 1. Overriding"
47 |         )
48 |       end
49 | 
50 |       options
51 |       |> Keyword.put(:batch_size, 1)
52 |       |> default_model_load()
53 |     end
54 | 
55 |     @impl true
56 |     def preprocessing(img, _metdata) do
57 |       img |> ExVision.Utils.resize(unquote(opts[:resolution])) |> Nx.divide(255.0)
58 |     end
59 | 
60 |     @impl true
61 |     def postprocessing(
62 |           stylized_frame,
63 |           metadata
64 |         ) do
65 |       {h, w} = unquote(opts[:resolution])
66 | 
67 |       stylized_frame["55"]
68 |       |> Nx.reshape({3, h, w}, names: [:channel, :height, :width])
69 |       |> NxImage.resize(metadata.original_size, channels: :first, method: :bilinear)
70 |       |> Nx.clip(0.0, 255.0)
71 |       |> Nx.as_type(:u8)
72 |       |> Nx.transpose(axes: [1, 2, 0])
73 |     end
74 |   end
75 | end
76 | 


--------------------------------------------------------------------------------
/python/exports/classification.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from torchvision.transforms.functional import to_tensor, resize
 3 | import torch
 4 | import json
 5 | from pathlib import Path
 6 | from PIL import Image
 7 | 
 8 | 
 9 | def export(model_builder, Model_Weights, input_shape):
10 |     base_dir = Path(f"models/classification/{model_builder.__name__}")
11 |     base_dir.mkdir(parents=True, exist_ok=True)
12 | 
13 |     model_file = base_dir / "model.onnx"
14 |     categories_file = base_dir / "categories.json"
15 | 
16 |     weights = Model_Weights.DEFAULT
17 |     model = model_builder(weights=weights)
18 |     model.eval()
19 | 
20 |     categories = [x.lower().replace(" ", "_")
21 |                   for x in weights.meta["categories"]]
22 |     transforms = weights.transforms()
23 | 
24 |     with open(categories_file, "w") as f:
25 |         json.dump(categories, f)
26 | 
27 |     onnx_input = to_tensor(Image.open("test/assets/cat.jpg")).unsqueeze(0)
28 |     onnx_input = resize(onnx_input, input_shape)
29 |     onnx_input = transforms(onnx_input)
30 | 
31 |     torch.onnx.export(
32 |         model,
33 |         onnx_input,
34 |         str(model_file),
35 |         verbose=False,
36 |         input_names=["input"],
37 |         output_names=["output"],
38 |         dynamic_axes={
39 |             "input": {0: "batch_size"},
40 |             "output": {0: "batch_size"}
41 |         },
42 |         export_params=True,
43 |     )
44 | 
45 |     expected_output: torch.Tensor = model(onnx_input)
46 |     expected_output = expected_output.softmax(dim=1)
47 | 
48 |     result = dict(zip(categories, expected_output[0].tolist()))
49 | 
50 |     file = Path(
51 |         f"test/assets/results/classification/{model_builder.__name__}.json"
52 |     )
53 |     file.parent.mkdir(exist_ok=True, parents=True)
54 | 
55 |     with file.open("w") as f:
56 |         json.dump(result, f)
57 | 
58 | 
59 | parser = argparse.ArgumentParser()
60 | parser.add_argument("model")
61 | args = parser.parse_args()
62 | 
63 | match(args.model):
64 |     case "mobilenet_v3_small":
65 |         from torchvision.models import mobilenet_v3_small, MobileNet_V3_Small_Weights
66 |         export(mobilenet_v3_small, MobileNet_V3_Small_Weights, [224, 224])
67 |     case "efficientnet_v2_s":
68 |         from torchvision.models import efficientnet_v2_s, EfficientNet_V2_S_Weights
69 |         export(efficientnet_v2_s, EfficientNet_V2_S_Weights, [384, 384])
70 |     case "efficientnet_v2_m":
71 |         from torchvision.models import efficientnet_v2_m, EfficientNet_V2_M_Weights
72 |         export(efficientnet_v2_m, EfficientNet_V2_M_Weights, [480, 480])
73 |     case "efficientnet_v2_l":
74 |         from torchvision.models import efficientnet_v2_l, EfficientNet_V2_L_Weights
75 |         export(efficientnet_v2_l, EfficientNet_V2_L_Weights, [480, 480])
76 |     case "squeezenet1_1":
77 |         from torchvision.models import squeezenet1_1, SqueezeNet1_1_Weights
78 |         export(squeezenet1_1, SqueezeNet1_1_Weights, [224, 224])
79 |     case _:
80 |         print("Model not found")
81 | 


--------------------------------------------------------------------------------
/lib/ex_vision/keypoint_detection/keypointrcnn_resnet50_fpn.ex:
--------------------------------------------------------------------------------
  1 | defmodule ExVision.KeypointDetection.KeypointRCNN_ResNet50_FPN do
  2 |   @moduledoc """
  3 |   Keypoint R-CNN model with a ResNet-50-FPN backbone, exported from torchvision.
  4 |   """
  5 |   use ExVision.Model.Definition.Ortex,
  6 |     model: "keypointrcnn_resnet50_fpn_keypoint_detector.onnx",
  7 |     categories: "priv/categories/no_person_or_person.json"
  8 | 
  9 |   import ExVision.Utils
 10 | 
 11 |   require Logger
 12 | 
 13 |   alias ExVision.Types.BBoxWithKeypoints
 14 | 
 15 |   @typep output_t() :: [BBoxWithKeypoints.t()]
 16 | 
 17 |   @keypoints_names [
 18 |     :nose,
 19 |     :left_eye,
 20 |     :right_eye,
 21 |     :left_ear,
 22 |     :right_ear,
 23 |     :left_shoulder,
 24 |     :right_shoulder,
 25 |     :left_elbow,
 26 |     :right_elbow,
 27 |     :left_wrist,
 28 |     :right_wrist,
 29 |     :left_hip,
 30 |     :right_hip,
 31 |     :left_knee,
 32 |     :right_knee,
 33 |     :left_ankle,
 34 |     :right_ankle
 35 |   ]
 36 | 
 37 |   @impl true
 38 |   def load(options \\ []) do
 39 |     if Keyword.has_key?(options, :batch_size) do
 40 |       Logger.warning(
 41 |         "`:max_batch_size` was given, but this model can only process batch of size 1. Overriding"
 42 |       )
 43 |     end
 44 | 
 45 |     options
 46 |     |> Keyword.put(:batch_size, 1)
 47 |     |> default_model_load()
 48 |   end
 49 | 
 50 |   @impl true
 51 |   def preprocessing(img, _metadata) do
 52 |     ExVision.Utils.resize(img, {224, 224})
 53 |   end
 54 | 
 55 |   @impl true
 56 |   def postprocessing(
 57 |         %{
 58 |           "boxes_unsqueezed" => bboxes,
 59 |           "scores_unsqueezed" => scores,
 60 |           "labels_unsqueezed" => labels,
 61 |           "keypoints_unsqueezed" => keypoints_list,
 62 |           "keypoints_scores_unsqueezed" => keypoints_scores_list
 63 |         },
 64 |         metadata
 65 |       ) do
 66 |     categories = categories()
 67 | 
 68 |     {h, w} = metadata.original_size
 69 |     scale_x = w / 224
 70 |     scale_y = h / 224
 71 | 
 72 |     bboxes = scale_and_listify_bbox(bboxes, Nx.f32([scale_x, scale_y, scale_x, scale_y]))
 73 | 
 74 |     scores = squeeze_and_listify(scores)
 75 |     labels = squeeze_and_listify(labels)
 76 | 
 77 |     keypoints_list = scale_and_listify_bbox(keypoints_list, Nx.tensor([scale_x, scale_y, 1]))
 78 | 
 79 |     keypoints_scores_list = squeeze_and_listify(keypoints_scores_list)
 80 | 
 81 |     [bboxes, scores, labels, keypoints_list, keypoints_scores_list]
 82 |     |> Enum.zip()
 83 |     |> Enum.filter(fn {_bbox, score, _label, _keypoints, _keypoints_scores} -> score > 0.1 end)
 84 |     |> Enum.map(fn {[x1, y1, x2, y2], score, label, keypoints, keypoints_scores} ->
 85 |       keypoints =
 86 |         [keypoints, keypoints_scores]
 87 |         |> Enum.zip()
 88 |         |> Enum.map(fn {[x, y, _w], keypoint_score} -> %{x: x, y: y, score: keypoint_score} end)
 89 | 
 90 |       %BBoxWithKeypoints{
 91 |         x1: x1,
 92 |         x2: x2,
 93 |         y1: y1,
 94 |         y2: y2,
 95 |         score: score,
 96 |         label: Enum.at(categories, label),
 97 |         keypoints: [@keypoints_names, keypoints] |> Enum.zip() |> Map.new()
 98 |       }
 99 |     end)
100 |   end
101 | end
102 | 


--------------------------------------------------------------------------------
/lib/ex_vision/model.ex:
--------------------------------------------------------------------------------
  1 | defprotocol ExVision.Model do
  2 |   @moduledoc """
  3 |   A protocol describing a generic ExVision model.
  4 |   """
  5 | 
  6 |   @typedoc """
  7 |   A type describing a single element that can be processed by ExVision's models
  8 |   """
  9 |   @type model_input_t() :: Path.t() | Nx.Tensor.t() | Vix.Vips.Image.t()
 10 | 
 11 |   @typedoc """
 12 |   A typespec definiting ExVision's model input, either as single `t:model_input_t/0` or a list.
 13 |   """
 14 |   @type input_t() :: model_input_t() | [model_input_t()]
 15 | 
 16 |   @typedoc """
 17 |   A generic type indicating a model output. For details on each model, refer to it's own `output_t()` definition.
 18 |   """
 19 |   @type output_t() :: any()
 20 | 
 21 |   @doc """
 22 |   Starts and links the module in process workflow
 23 |   """
 24 |   @spec start_link(t(), keyword()) :: GenServer.on_start()
 25 |   def start_link(model, options \\ [])
 26 | 
 27 |   @doc """
 28 |   A function used to submit input for inference (inline variant).
 29 |   """
 30 |   @spec run(t(), input_t()) :: output_t() | [output_t()]
 31 |   def run(model, input)
 32 | 
 33 |   @doc """
 34 |   Function used to submit the input for inference in a process setting when the model is served as a process.
 35 |   """
 36 |   @spec batched_run(t(), input_t()) :: output_t()
 37 |   def batched_run(model, input)
 38 | 
 39 |   @spec as_serving(t()) :: Nx.Serving.t()
 40 |   def as_serving(model)
 41 | end
 42 | 
 43 | defimpl ExVision.Model, for: Any do
 44 |   require Logger
 45 | 
 46 |   def run(model, input) when is_list(input) do
 47 |     model |> as_serving() |> Nx.Serving.run(input)
 48 |   end
 49 | 
 50 |   def run(model, input) do
 51 |     model
 52 |     |> run([input])
 53 |     |> hd()
 54 |   end
 55 | 
 56 |   def start_link(model, options \\ []) do
 57 |     options
 58 |     |> validate_start_link_options!(name: process_name(model))
 59 |     |> Keyword.put(:serving, as_serving(model))
 60 |     |> Nx.Serving.start_link()
 61 |   end
 62 | 
 63 |   def batched_run(model, input) do
 64 |     Logger.warning("""
 65 |     Calling batched_run/2 at the ExVision.Model struct can lead to undefined behaviour.
 66 |     Referencing the already running process by name is preffered.
 67 |     """)
 68 | 
 69 |     model
 70 |     |> process_name()
 71 |     |> ExVision.Utils.batched_run(input)
 72 |   end
 73 | 
 74 |   def as_serving(%{serving: serving}), do: serving
 75 | 
 76 |   defp process_name(%module{}), do: module
 77 | 
 78 |   defp validate_start_link_options!(options, extras) do
 79 |     spec =
 80 |       [
 81 |         :partitions,
 82 |         :batch_timeout,
 83 |         :distribution_weight,
 84 |         :shutdown,
 85 |         :hibernate_after,
 86 |         :spawn_opt,
 87 |         :name
 88 |       ] -- Keyword.keys(extras)
 89 | 
 90 |     Keyword.validate!(
 91 |       options,
 92 |       spec ++ extras
 93 |     )
 94 |   end
 95 | end
 96 | 
 97 | defimpl ExVision.Model, for: Atom do
 98 |   use ExVision.Utils.Macros
 99 |   defunimplemented(run(_model, _input), with_impl: true)
100 |   defunimplemented(start_link(_model, _opts), with_impl: true)
101 |   defunimplemented(as_serving(_model), with_impl: true)
102 | 
103 |   @impl true
104 |   def batched_run(module, input) do
105 |     ExVision.Utils.batched_run(module, input)
106 |   end
107 | end
108 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | compile_commands.json
  2 | .gdb_history
  3 | bundlex.sh
  4 | bundlex.bat
  5 | 
  6 | # Dir generated by tmp_dir ExUnit tag
  7 | /tmp/
  8 | 
  9 | # Created by https://www.gitignore.io/api/c,vim,linux,macos,elixir,windows,visualstudiocode
 10 | # Edit at https://www.gitignore.io/?templates=c,vim,linux,macos,elixir,windows,visualstudiocode
 11 | 
 12 | ### C ###
 13 | # Prerequisites
 14 | *.d
 15 | 
 16 | # Object files
 17 | *.o
 18 | *.ko
 19 | *.obj
 20 | *.elf
 21 | 
 22 | # Linker output
 23 | *.ilk
 24 | *.map
 25 | *.exp
 26 | 
 27 | # Precompiled Headers
 28 | *.gch
 29 | *.pch
 30 | 
 31 | # Libraries
 32 | *.lib
 33 | *.a
 34 | *.la
 35 | *.lo
 36 | 
 37 | # Shared objects (inc. Windows DLLs)
 38 | *.dll
 39 | *.so
 40 | *.so.*
 41 | *.dylib
 42 | 
 43 | # Executables
 44 | *.exe
 45 | *.out
 46 | *.app
 47 | *.i*86
 48 | *.x86_64
 49 | *.hex
 50 | 
 51 | # Debug files
 52 | *.dSYM/
 53 | *.su
 54 | *.idb
 55 | *.pdb
 56 | 
 57 | # Kernel Module Compile Results
 58 | *.mod*
 59 | *.cmd
 60 | .tmp_versions/
 61 | modules.order
 62 | Module.symvers
 63 | Mkfile.old
 64 | dkms.conf
 65 | 
 66 | ### Elixir ###
 67 | /_build
 68 | /cover
 69 | /deps
 70 | /doc
 71 | /.fetch
 72 | erl_crash.dump
 73 | *.ez
 74 | *.beam
 75 | /config/*.secret.exs
 76 | .elixir_ls/
 77 | 
 78 | ### Elixir Patch ###
 79 | 
 80 | ### Linux ###
 81 | *~
 82 | 
 83 | # temporary files which can be created if a process still has a handle open of a deleted file
 84 | .fuse_hidden*
 85 | 
 86 | # KDE directory preferences
 87 | .directory
 88 | 
 89 | # Linux trash folder which might appear on any partition or disk
 90 | .Trash-*
 91 | 
 92 | # .nfs files are created when an open file is removed but is still being accessed
 93 | .nfs*
 94 | 
 95 | ### macOS ###
 96 | # General
 97 | .DS_Store
 98 | .AppleDouble
 99 | .LSOverride
100 | 
101 | # Icon must end with two \r
102 | Icon
103 | 
104 | # Thumbnails
105 | ._*
106 | 
107 | # Files that might appear in the root of a volume
108 | .DocumentRevisions-V100
109 | .fseventsd
110 | .Spotlight-V100
111 | .TemporaryItems
112 | .Trashes
113 | .VolumeIcon.icns
114 | .com.apple.timemachine.donotpresent
115 | 
116 | # Directories potentially created on remote AFP share
117 | .AppleDB
118 | .AppleDesktop
119 | Network Trash Folder
120 | Temporary Items
121 | .apdisk
122 | 
123 | ### Vim ###
124 | # Swap
125 | [._]*.s[a-v][a-z]
126 | [._]*.sw[a-p]
127 | [._]s[a-rt-v][a-z]
128 | [._]ss[a-gi-z]
129 | [._]sw[a-p]
130 | 
131 | # Session
132 | Session.vim
133 | Sessionx.vim
134 | 
135 | # Temporary
136 | .netrwhist
137 | # Auto-generated tag files
138 | tags
139 | # Persistent undo
140 | [._]*.un~
141 | 
142 | ### VisualStudioCode ###
143 | .vscode/*
144 | !.vscode/settings.json
145 | !.vscode/tasks.json
146 | !.vscode/launch.json
147 | !.vscode/extensions.json
148 | 
149 | ### VisualStudioCode Patch ###
150 | # Ignore all local history of files
151 | .history
152 | 
153 | ### Windows ###
154 | # Windows thumbnail cache files
155 | Thumbs.db
156 | Thumbs.db:encryptable
157 | ehthumbs.db
158 | ehthumbs_vista.db
159 | 
160 | # Dump file
161 | *.stackdump
162 | 
163 | # Folder config file
164 | [Dd]esktop.ini
165 | 
166 | # Recycle Bin used on file shares
167 | $RECYCLE.BIN/
168 | 
169 | # Windows Installer files
170 | *.cab
171 | *.msi
172 | *.msix
173 | *.msm
174 | *.msp
175 | 
176 | # Windows shortcuts
177 | *.lnk
178 | 
179 | # End of https://www.gitignore.io/api/c,vim,linux,macos,elixir,windows,visualstudiocode
180 | models/
181 | 


--------------------------------------------------------------------------------
/python/exports/instance_segmentation.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from torchvision.transforms.functional import to_tensor, resize
  3 | import torch
  4 | import json
  5 | from pathlib import Path
  6 | import onnx
  7 | from onnx import helper, TensorProto
  8 | from PIL import Image
  9 | 
 10 | 
 11 | def export(model_builder, Model_Weights):
 12 |     base_dir = Path(f"models/instance_segmentation/{model_builder.__name__}")
 13 |     base_dir.mkdir(parents=True, exist_ok=True)
 14 | 
 15 |     model_file = base_dir / "model.onnx"
 16 |     categories_file = base_dir / "categories.json"
 17 | 
 18 |     weights = Model_Weights.DEFAULT
 19 |     model = model_builder(weights=weights)
 20 |     model.eval()
 21 | 
 22 |     categories = weights.meta["categories"]
 23 |     transforms = weights.transforms()
 24 | 
 25 |     with open(categories_file, "w") as f:
 26 |         json.dump(categories, f)
 27 | 
 28 |     onnx_input = to_tensor(Image.open("test/assets/cat.jpg")).unsqueeze(0)
 29 |     onnx_input = resize(onnx_input, [224, 224])
 30 |     onnx_input = transforms(onnx_input)
 31 | 
 32 |     torch.onnx.export(
 33 |         model,
 34 |         onnx_input,
 35 |         str(model_file),
 36 |         verbose=False,
 37 |         input_names=["input"],
 38 |         output_names=["boxes", "labels", "scores", "masks"],
 39 |         dynamic_axes={
 40 |             "boxes": {0: "detections"},
 41 |             "labels": {0: "detections"},
 42 |             "scores": {0: "detections"},
 43 |             "masks": {0: "detections"},
 44 |         },
 45 |         export_params=True,
 46 |     )
 47 | 
 48 |     model = onnx.load(str(model_file))
 49 | 
 50 |     prev_names = ["boxes", "labels", "scores", "masks"]
 51 | 
 52 |     nodes = []
 53 |     for data in prev_names:
 54 |         axes_init = helper.make_tensor(
 55 |             name=data+"_axes",
 56 |             data_type=TensorProto.INT64,
 57 |             dims=[1],
 58 |             vals=[0]
 59 |         )
 60 |         model.graph.initializer.append(axes_init)
 61 | 
 62 |         node = helper.make_node(
 63 |             op_type="Unsqueeze",
 64 |             inputs=[data, data+"_axes"],
 65 |             outputs=[data+"_unsqueezed"]
 66 |         )
 67 |         nodes.append(node)
 68 | 
 69 |     model.graph.node.extend(nodes)
 70 | 
 71 |     new_outputs = []
 72 |     for data in prev_names:
 73 |         match data:
 74 |             case "boxes":
 75 |                 shape = [1, None, 4]
 76 |             case "masks":
 77 |                 shape = [1, None, 1, 224, 224]
 78 |             case _:
 79 |                 shape = [1, None]
 80 | 
 81 |         new_output = helper.make_tensor_value_info(
 82 |             name=data+"_unsqueezed",
 83 |             elem_type=TensorProto.INT64 if data == "labels" else TensorProto.FLOAT,
 84 |             shape=shape
 85 |         )
 86 |         new_outputs.append(new_output)
 87 | 
 88 |     model.graph.output.extend(new_outputs)
 89 | 
 90 |     for data in prev_names:
 91 |         old_output = next(i for i in model.graph.output if i.name == data)
 92 |         model.graph.output.remove(old_output)
 93 | 
 94 |     onnx.save(model, str(model_file))
 95 | 
 96 | 
 97 | parser = argparse.ArgumentParser()
 98 | parser.add_argument("model")
 99 | args = parser.parse_args()
100 | 
101 | match(args.model):
102 |     case "maskrcnn_resnet50_fpn_v2":
103 |         from torchvision.models.detection import maskrcnn_resnet50_fpn_v2, MaskRCNN_ResNet50_FPN_V2_Weights
104 |         export(maskrcnn_resnet50_fpn_v2, MaskRCNN_ResNet50_FPN_V2_Weights)
105 |     case _:
106 |         print("Model not found")
107 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ExVision
  2 | 
  3 | [![Hex.pm](https://img.shields.io/hexpm/v/ex_vision.svg)](https://hex.pm/packages/ex_vision)
  4 | [![API Docs](https://img.shields.io/badge/api-docs-yellow.svg?style=flat)](https://hexdocs.pm/ex_vision)
  5 | 
  6 | ExVision is the collection of AI models related to vision delivered with ready to use package and easy to understand API.
  7 | ExVision will take care of all necessary input transformations internally and return the result in the sensible format.
  8 | 
  9 | ExVision models are powered by [Ortex](https://www.github.com/elixir-nx/ortex).
 10 | 
 11 | ## Usage
 12 | 
 13 | In order to use the model, you need to first load it
 14 | 
 15 | ```elixir
 16 | alias ExVision.Classification.MobileNetV3Small
 17 | 
 18 | model = MobileNetV3Small.load() #=> %MobileNetV3{}
 19 | ```
 20 | 
 21 | After that, the model is available for inference.
 22 | ExVision will take care of all necessary input transformations and covert output to a format that makes sense.
 23 | 
 24 | ```elixir
 25 | MobileNetV3Small.run(model, "example/files/cat.jpg") #=> %{cat: 0.98, dog: 0.01, car: 0.00, ...}
 26 | ```
 27 | 
 28 | ExVision is also capable of accepting tensors and images on input:
 29 | 
 30 | ```elixir
 31 | cat = Image.open!("example/files/cat.jpg")
 32 | {:ok, cat_tensor} = Image.to_nx(cat)
 33 | MobileNetV3Small.run(model, cat) #=> %{cat: 0.98, dog: 0.01, car: 0.00, ...}
 34 | MobileNetV3Small.run(model, cat_tensor) #=> %{cat: 0.98, dog: 0.01, car: 0.00, ...}
 35 | ```
 36 | 
 37 | ### Usage in process workflow
 38 | 
 39 | All ExVision models are implemented using `Nx.Serving`.
 40 | They are therefore compatible with process workflow.
 41 | 
 42 | You can start a model's process:
 43 | 
 44 | ```elixir
 45 | {:ok, pid} = MobileNetV3Small.start_link(name: MyModel)
 46 | ```
 47 | 
 48 | or start it under the supervision tree
 49 | 
 50 | ```elixir
 51 | {:ok, _supervisor_pid} = Supervisor.start_link([
 52 |   {MobileNetV3Small, name: MyModel}
 53 | ], strategy: :one_for_one)
 54 | ```
 55 | 
 56 | After starting, it's immediatelly available for inference using `batched_run/2` function.
 57 | 
 58 | ```elixir
 59 | MobileNetV3Small.batched_run(MyModel, cat) #=> %{cat: 0.98, dog: 0.01, car: 0.00, ...}
 60 | ```
 61 | 
 62 | ## Installation
 63 | 
 64 | The package can be installed by adding `ex_vision` to your list of dependencies in `mix.exs`:
 65 | 
 66 | ```elixir
 67 | def deps do
 68 |   [
 69 |     {:ex_vision, "~> 0.4.0"}
 70 |   ]
 71 | end
 72 | ```
 73 | 
 74 | In order to compile, ExVision **requires Rust and Cargo** to be installed on your system.
 75 | 
 76 | ## Current Timeline
 77 | 
 78 | We have identified a set of models that we would like to support.
 79 | If the model that you would like to use is missing, feel free to open the issue, express interest in an existing one or contribute the model directly.
 80 | 
 81 | - [x] Classification
 82 |   - [x] MobileNetV3 Small
 83 |   - [x] EfficientNetV2
 84 |   - [x] SqueezeNet
 85 | - [x] Object detection
 86 |   - [x] SSDLite320 - MobileNetV3 Large backbone
 87 |   - [x] FasterRCNN ResNet50 FPN
 88 | - [x] Semantic segmentation
 89 |   - [x] DeepLabV3 - MobileNetV3
 90 | - [x] Instance segmentation
 91 |   - [x] Mask R-CNN
 92 | - [x] Keypoint Detection
 93 |   - [x] Keypoint R-CNN
 94 | 
 95 | ## Copyright and License
 96 | 
 97 | Copyright 2024, [Software Mansion](https://swmansion.com/?utm_source=git&utm_medium=readme&utm_campaign=ex_vision)
 98 | 
 99 | [![Software Mansion](https://logo.swmansion.com/logo?color=white&variant=desktop&width=200&tag=membrane-github)](https://swmansion.com/?utm_source=git&utm_medium=readme&utm_campaign=ex_vision)
100 | 
101 | Licensed under the [Apache License, Version 2.0](LICENSE)
102 | 


--------------------------------------------------------------------------------
/python/exports/object_detection.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from torchvision.transforms.functional import to_tensor, resize
  3 | import torch
  4 | import json
  5 | from pathlib import Path
  6 | import onnx
  7 | from onnx import helper, TensorProto
  8 | from PIL import Image
  9 | 
 10 | 
 11 | def export(model_builder, Model_Weights, output_names):
 12 |     base_dir = Path(f"models/object_detection/{model_builder.__name__}")
 13 |     base_dir.mkdir(parents=True, exist_ok=True)
 14 | 
 15 |     model_file = base_dir / "model.onnx"
 16 |     categories_file = base_dir / "categories.json"
 17 | 
 18 |     weights = Model_Weights.DEFAULT
 19 |     model = model_builder(weights=weights)
 20 |     model.eval()
 21 | 
 22 |     categories = weights.meta["categories"]
 23 |     transforms = weights.transforms()
 24 | 
 25 |     with open(categories_file, "w") as f:
 26 |         json.dump(categories, f)
 27 | 
 28 |     onnx_input = to_tensor(Image.open("test/assets/cat.jpg")).unsqueeze(0)
 29 |     onnx_input = resize(onnx_input, [224, 224])
 30 |     onnx_input = transforms(onnx_input)
 31 | 
 32 |     torch.onnx.export(
 33 |         model,
 34 |         onnx_input,
 35 |         str(model_file),
 36 |         verbose=False,
 37 |         input_names=["input"],
 38 |         output_names=output_names,
 39 |         dynamic_axes={
 40 |             "boxes": {0: "detections"},
 41 |             "labels": {0: "detections"},
 42 |             "scores": {0: "detections"},
 43 |         },
 44 |         export_params=True,
 45 |     )
 46 | 
 47 |     model = onnx.load(str(model_file))
 48 | 
 49 |     nodes = []
 50 |     for output_name in output_names:
 51 |         axes_init = helper.make_tensor(
 52 |             name=output_name+"_axes",
 53 |             data_type=TensorProto.INT64,
 54 |             dims=[1],
 55 |             vals=[0]
 56 |         )
 57 |         model.graph.initializer.append(axes_init)
 58 | 
 59 |         node = helper.make_node(
 60 |             op_type="Unsqueeze",
 61 |             inputs=[output_name, output_name+"_axes"],
 62 |             outputs=[output_name+"_unsqueezed"]
 63 |         )
 64 |         nodes.append(node)
 65 | 
 66 |     model.graph.node.extend(nodes)
 67 | 
 68 |     new_outputs = []
 69 |     for output_name in output_names:
 70 |         new_output = helper.make_tensor_value_info(
 71 |             name=output_name+"_unsqueezed",
 72 |             elem_type=TensorProto.INT64 if output_name == "labels" else TensorProto.FLOAT,
 73 |             shape=[1, None, 4] if output_name == "boxes" else [1, None]
 74 |         )
 75 |         new_outputs.append(new_output)
 76 | 
 77 |     model.graph.output.extend(new_outputs)
 78 | 
 79 |     for output_name in output_names:
 80 |         old_output = next(
 81 |             i for i in model.graph.output if i.name == output_name)
 82 |         model.graph.output.remove(old_output)
 83 | 
 84 |     onnx.save(model, str(model_file))
 85 | 
 86 | 
 87 | parser = argparse.ArgumentParser()
 88 | parser.add_argument("model")
 89 | args = parser.parse_args()
 90 | 
 91 | match(args.model):
 92 |     case "fasterrcnn_resnet50_fpn":
 93 |         from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
 94 |         export(
 95 |             fasterrcnn_resnet50_fpn,
 96 |             FasterRCNN_ResNet50_FPN_Weights,
 97 |             ["boxes", "labels", "scores"]
 98 |         )
 99 |     case "ssdlite320_mobilenet_v3_large":
100 |         from torchvision.models.detection import ssdlite320_mobilenet_v3_large, SSDLite320_MobileNet_V3_Large_Weights
101 |         export(
102 |             ssdlite320_mobilenet_v3_large,
103 |             SSDLite320_MobileNet_V3_Large_Weights,
104 |             ["boxes", "scores", "labels"]
105 |         )
106 |     case _:
107 |         print("Model not found")
108 | 


--------------------------------------------------------------------------------
/python/exports/keypoint_detection.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from torchvision.transforms.functional import to_tensor, resize
  3 | import torch
  4 | import json
  5 | from pathlib import Path
  6 | import onnx
  7 | from onnx import helper, TensorProto
  8 | from PIL import Image
  9 | 
 10 | 
 11 | def export(model_builder, Model_Weights):
 12 |     base_dir = Path(f"models/keypoint_detection/{model_builder.__name__}")
 13 |     base_dir.mkdir(parents=True, exist_ok=True)
 14 | 
 15 |     model_file = base_dir / "model.onnx"
 16 |     categories_file = base_dir / "categories.json"
 17 | 
 18 |     weights = Model_Weights.DEFAULT
 19 |     model = model_builder(weights=weights)
 20 |     model.eval()
 21 | 
 22 |     categories = weights.meta["categories"]
 23 |     transforms = weights.transforms()
 24 | 
 25 |     with open(categories_file, "w") as f:
 26 |         json.dump(categories, f)
 27 | 
 28 |     onnx_input = to_tensor(Image.open("test/assets/cat.jpg")).unsqueeze(0)
 29 |     onnx_input = resize(onnx_input, [224, 224])
 30 |     onnx_input = transforms(onnx_input)
 31 | 
 32 |     torch.onnx.export(
 33 |         model,
 34 |         onnx_input,
 35 |         str(model_file),
 36 |         verbose=False,
 37 |         input_names=["input"],
 38 |         output_names=["boxes", "labels", "scores",
 39 |                       "keypoints", "keypoints_scores"],
 40 |         dynamic_axes={
 41 |             "boxes": {0: "detections"},
 42 |             "labels": {0: "detections"},
 43 |             "scores": {0: "detections"},
 44 |             "keypoints": {0: "detections"},
 45 |             "keypoints_scores": {0: "detections"}
 46 |         },
 47 |         export_params=True,
 48 |     )
 49 | 
 50 |     output_names = ["boxes", "labels", "scores",
 51 |                     "keypoints", "keypoints_scores"]
 52 | 
 53 |     model = onnx.load(str(model_file))
 54 | 
 55 |     nodes = []
 56 |     for output_name in output_names:
 57 |         axes_init = helper.make_tensor(
 58 |             name=output_name+"_axes",
 59 |             data_type=TensorProto.INT64,
 60 |             dims=[1],
 61 |             vals=[0]
 62 |         )
 63 |         model.graph.initializer.append(axes_init)
 64 | 
 65 |         node = helper.make_node(
 66 |             op_type="Unsqueeze",
 67 |             inputs=[output_name, output_name+"_axes"],
 68 |             outputs=[output_name+"_unsqueezed"]
 69 |         )
 70 |         nodes.append(node)
 71 | 
 72 |     model.graph.node.extend(nodes)
 73 | 
 74 |     new_outputs = []
 75 |     for output_name in output_names:
 76 |         match output_name:
 77 |             case "boxes":
 78 |                 shape = [1, None, 4]
 79 |             case "keypoints":
 80 |                 shape = [1, None, 17, 3]
 81 |             case "keypoints_scores":
 82 |                 shape = [1, None, 17]
 83 |             case _:
 84 |                 shape = [1, None]
 85 | 
 86 |         new_output = helper.make_tensor_value_info(
 87 |             name=output_name+"_unsqueezed",
 88 |             elem_type=TensorProto.INT64 if output_name == "labels" else TensorProto.FLOAT,
 89 |             shape=shape
 90 |         )
 91 |         new_outputs.append(new_output)
 92 | 
 93 |     model.graph.output.extend(new_outputs)
 94 | 
 95 |     for output_name in output_names:
 96 |         old_output = next(
 97 |             i for i in model.graph.output if i.name == output_name)
 98 |         model.graph.output.remove(old_output)
 99 | 
100 |     onnx.save(model, str(model_file))
101 | 
102 | 
103 | parser = argparse.ArgumentParser()
104 | parser.add_argument("model")
105 | args = parser.parse_args()
106 | 
107 | match(args.model):
108 |     case "keypointrcnn_resnet50_fpn":
109 |         from torchvision.models.detection import keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights
110 |         export(keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights)
111 |     case _:
112 |         print("Model not found")
113 | 


--------------------------------------------------------------------------------
/lib/ex_vision/model/definition.ex:
--------------------------------------------------------------------------------
  1 | defmodule ExVision.Model.Definition do
  2 |   @moduledoc """
  3 |   A module describing the behaviour that MUST be implemented by all ExVision models.
  4 |   """
  5 | 
  6 |   require Bunch.Typespec
  7 | 
  8 |   @callback load(keyword()) :: {:ok, ExVision.Model.t()} | {:error, reason :: atom()}
  9 |   @callback run(ExVision.Model.t(), ExVision.Model.input_t()) :: any()
 10 |   @callback batched_run(atom(), ExVision.Model.input_t()) :: any()
 11 |   @callback child_spec(keyword()) :: Supervisor.child_spec()
 12 |   @callback start_link(keyword()) :: GenServer.on_start()
 13 | 
 14 |   defp module_to_name(module),
 15 |     do:
 16 |       module
 17 |       |> Module.split()
 18 |       |> List.last()
 19 |       |> String.split("_")
 20 |       |> Enum.map_join(" ", fn <<first::binary-size(1), rest::binary>> ->
 21 |         String.upcase(first) <> rest
 22 |       end)
 23 | 
 24 |   defmacro __using__(options) do
 25 |     Application.ensure_all_started(:req)
 26 | 
 27 |     options =
 28 |       Keyword.validate!(options,
 29 |         categories: nil,
 30 |         name: module_to_name(__CALLER__.module)
 31 |       )
 32 | 
 33 |     quote do
 34 |       # conditional defintion based on whether `categories` option is present has to be moved inside __using__ macro
 35 |       # here is explenation https://cocoa-research.works/2022/10/conditional-compliation-with-if-and-use-in-elixir/
 36 |       use ExVision.Model.Definition.Parts.WithCategories, unquote(options)
 37 | 
 38 |       @behaviour ExVision.Model.Definition
 39 | 
 40 |       @derive [ExVision.Model]
 41 |       @enforce_keys [:serving]
 42 |       defstruct [:serving]
 43 | 
 44 |       @typedoc """
 45 |       An instance of the #{__MODULE__}
 46 |       """
 47 |       @opaque t() :: %__MODULE__{serving: Nx.Serving.t()}
 48 | 
 49 |       @impl true
 50 |       def start_link(options \\ []) do
 51 |         {start_link_options, load_options} =
 52 |           Keyword.split(options, [
 53 |             :partitions,
 54 |             :batch_timeout,
 55 |             :distribution_weight,
 56 |             :shutdown,
 57 |             :hibernate_after,
 58 |             :spawn_opt,
 59 |             :name
 60 |           ])
 61 | 
 62 |         with {:ok, model} <- load(load_options),
 63 |              do: ExVision.Model.start_link(model, start_link_options)
 64 |       end
 65 | 
 66 |       @doc """
 67 |       Same as `load/1`, but raises and error on failure.
 68 |       """
 69 |       @spec load!(keyword()) :: t()
 70 |       def load!(opts \\ []) do
 71 |         case load(opts) do
 72 |           {:ok, model} ->
 73 |             model
 74 | 
 75 |           {:error, reason} ->
 76 |             require Logger
 77 | 
 78 |             Logger.error(
 79 |               "Failed to load model #{unquote(options[:name])} due to #{inspect(reason)}"
 80 |             )
 81 | 
 82 |             raise "Failed to load model"
 83 |         end
 84 |       end
 85 | 
 86 |       @impl true
 87 |       @doc """
 88 |       Immediatelly applies the model to the given input, in the scope of the current process.
 89 |       """
 90 |       @spec run(t(), ExVision.Model.input_t()) :: output_t() | [output_t()]
 91 |       defdelegate run(model, input), to: ExVision.Model
 92 | 
 93 |       @doc """
 94 |       Submits the input for inference to the process running the Nx.Serving for this model.
 95 |       """
 96 |       @impl true
 97 |       @spec batched_run(atom(), ExVision.Model.input_t()) :: output_t() | [output_t()]
 98 |       def batched_run(name \\ __MODULE__, input), do: ExVision.Model.batched_run(name, input)
 99 | 
100 |       @impl true
101 |       @spec child_spec(keyword()) :: Supervisor.child_spec()
102 |       def child_spec(options \\ []) do
103 |         {child_spec_opts, start_link_options} = Keyword.split(options, [:id])
104 |         child_spec_opts = Keyword.validate!(child_spec_opts, id: __MODULE__)
105 | 
106 |         %{
107 |           id: child_spec_opts[:id],
108 |           start: {__MODULE__, :start_link, [start_link_options]}
109 |         }
110 |       end
111 | 
112 |       defoverridable run: 2,
113 |                      batched_run: 2,
114 |                      child_spec: 1,
115 |                      child_spec: 0,
116 |                      start_link: 0,
117 |                      start_link: 1
118 |     end
119 |   end
120 | end
121 | 


--------------------------------------------------------------------------------
/mix.exs:
--------------------------------------------------------------------------------
  1 | defmodule ExVision.Mixfile do
  2 |   use Mix.Project
  3 | 
  4 |   @version "0.4.0"
  5 |   @github_url "https://github.com/software-mansion-labs/ex_vision/"
  6 | 
  7 |   def project do
  8 |     [
  9 |       app: :ex_vision,
 10 |       version: @version,
 11 |       elixir: "~> 1.16",
 12 |       elixirc_paths: elixirc_paths(Mix.env()),
 13 |       start_permanent: Mix.env() == :prod,
 14 |       deps: deps(),
 15 |       dialyzer: dialyzer(),
 16 | 
 17 |       # hex
 18 |       description: "A collection of ONNX vision AI models with wrappers based on Ortex",
 19 |       package: package(),
 20 | 
 21 |       # docs
 22 |       name: "Ex Vision",
 23 |       source_url: @github_url,
 24 |       docs: docs(),
 25 |       homepage_url: "https://hexdocs.pm/ex_vision"
 26 |     ]
 27 |   end
 28 | 
 29 |   def application do
 30 |     [
 31 |       included_applications: [:ex_vision],
 32 |       mod: {ExVision, []},
 33 |       extra_applications: []
 34 |     ]
 35 |   end
 36 | 
 37 |   defp elixirc_paths(:test), do: ["lib", "test/support"]
 38 |   defp elixirc_paths(_env), do: ["lib"]
 39 | 
 40 |   defp deps do
 41 |     [
 42 |       # TODO: change the `>= 0.0.0` dependencies to concrete versions
 43 |       {:nx, ">= 0.0.0"},
 44 |       {:ortex, ">= 0.0.0"},
 45 |       {:nx_image, "~> 0.1.2"},
 46 |       {:bunch, "~> 1.6", runtime: false},
 47 |       {:axon, "~> 0.6.1"},
 48 |       {:exla, ">= 0.0.0"},
 49 |       {:image, ">= 0.0.0"},
 50 |       {:req, ">= 0.0.0"},
 51 |       {:mimic, "~> 1.7", only: :test},
 52 |       {:ex_doc, ">= 0.0.0", only: :dev, runtime: false},
 53 |       {:dialyxir, ">= 0.0.0", only: :dev, runtime: false},
 54 |       {:credo, ">= 0.0.0", only: [:dev, :test], runtime: false}
 55 |     ]
 56 |   end
 57 | 
 58 |   defp dialyzer() do
 59 |     opts = [
 60 |       flags: [:error_handling]
 61 |     ]
 62 | 
 63 |     if System.get_env("CI") == "true" do
 64 |       # Store PLTs in cacheable directory for CI
 65 |       [plt_local_path: "priv/plts", plt_core_path: "priv/plts"] ++ opts
 66 |     else
 67 |       opts
 68 |     end
 69 |   end
 70 | 
 71 |   defp package do
 72 |     [
 73 |       maintainers: ["Software Mansion"],
 74 |       licenses: ["Apache-2.0"],
 75 |       links: %{
 76 |         "GitHub" => @github_url,
 77 |         "Software Mansion" => "https://www.swmansion.com"
 78 |       }
 79 |     ]
 80 |   end
 81 | 
 82 |   @tutorials Path.wildcard("examples/*.livemd")
 83 |   defp docs do
 84 |     [
 85 |       main: "readme",
 86 |       extras: [
 87 |         "README.md",
 88 |         "LICENSE"
 89 |         | @tutorials
 90 |       ],
 91 |       groups_for_extras: [
 92 |         Tutorials: @tutorials
 93 |       ],
 94 |       groups_for_modules: [
 95 |         Models: [
 96 |           ExVision.Classification.MobileNetV3Small,
 97 |           ExVision.Classification.EfficientNet_V2_S,
 98 |           ExVision.Classification.EfficientNet_V2_M,
 99 |           ExVision.Classification.EfficientNet_V2_L,
100 |           ExVision.Classification.SqueezeNet1_1,
101 |           ExVision.SemanticSegmentation.DeepLabV3_MobileNetV3,
102 |           ExVision.StyleTransfer.Candy,
103 |           ExVision.StyleTransfer.CandyFast,
104 |           ExVision.StyleTransfer.Udnie,
105 |           ExVision.StyleTransfer.UdnieFast,
106 |           ExVision.StyleTransfer.Mosaic,
107 |           ExVision.StyleTransfer.MosaicFast,
108 |           ExVision.StyleTransfer.Princess,
109 |           ExVision.StyleTransfer.PrincessFast,
110 |           ExVision.InstanceSegmentation.MaskRCNN_ResNet50_FPN_V2,
111 |           ExVision.ObjectDetection.Ssdlite320_MobileNetv3,
112 |           ExVision.ObjectDetection.FasterRCNN_ResNet50_FPN,
113 |           ExVision.KeypointDetection.KeypointRCNN_ResNet50_FPN
114 |         ],
115 |         Types: [
116 |           ExVision.Types,
117 |           ExVision.Types.BBox,
118 |           ExVision.Types.BBoxWithKeypoints,
119 |           ExVision.Types.BBoxWithMask,
120 |           ExVision.Types.ImageMetadata
121 |         ],
122 |         "Protocols and Behaviours": [
123 |           ExVision.Model,
124 |           ExVision.Model.Definition,
125 |           ExVision.Model.Definition.Ortex
126 |         ]
127 |       ],
128 |       nest_modules_by_prefix: [
129 |         ExVision.Model,
130 |         ExVision.Model.Definition,
131 |         ExVision.Types,
132 |         ExVision.Classification,
133 |         ExVision.SemanticSegmentation,
134 |         ExVision.StyleTransfer,
135 |         ExVision.InstanceSegmentation,
136 |         ExVision.ObjectDetection,
137 |         ExVision.KeypointDetection
138 |       ],
139 |       formatters: ["html"],
140 |       source_ref: "v#{@version}"
141 |     ]
142 |   end
143 | end
144 | 


--------------------------------------------------------------------------------
/examples/2-usage-as-nx-serving.livemd:
--------------------------------------------------------------------------------
  1 | <!-- livebook:{"app_settings":{"access_type":"public","output_type":"rich","show_source":true,"slug":"nx-serving-example"}} -->
  2 | 
  3 | # Usage in production - process workflow
  4 | 
  5 | ```elixir
  6 | Mix.install(
  7 |   [
  8 |     :ex_vision,
  9 |     :exla,
 10 |     :kino,
 11 |     :nx,
 12 |     :kino_bumblebee
 13 |   ],
 14 |   config: [nx: [default_backend: EXLA.Backend]]
 15 | )
 16 | ```
 17 | 
 18 | ## A word of introduction - what problem are we solving?
 19 | 
 20 | Deploying an AI model in a production environement can quite difficult to get right. In order to ensure efficient resource usage and high throughput, one needs to consider the following:
 21 | 
 22 | * creating a cluster of GPU enabled machines, effectively creating an AI-microservice. That comes with all of the associated challenges of service discovery and API implementation
 23 | * Even if the cluster is not necessary, most of the time running one model instance per user is not a viable option, as loading the model takes a long time and that approach wastes a lot of potential of your hardware
 24 | * intelligently batching requests from different sources, to get the most out of your GPU's concurrency potential, while also preventing the delay from mounting up while waiting for other requests to complete the batch
 25 | * Critical error handling
 26 | 
 27 | ### The solution
 28 | 
 29 | Fortunately, Elixir ecosystem features an amazing, prebuilt solution to most of these problems in form of [`Nx.Serving`](https://hexdocs.pm/nx/Nx.Serving.html). ExVision's models are all implemented using `Nx.Serving` underneath. In fact, our `ExVision.Model.run/2` and `ExVision.Model.batched_run/2` all make use of the matching `Nx.Serving.run/2` and `Nx.Serving.batched_run/2` respectively.
 30 | 
 31 | This approach allows us to make use of the built in intelligent batching and ability to be run as a standalone process provided by `Nx.Serving` out of the box.
 32 | 
 33 | In fact, we even expose `ExVision.Model.as_serving/1` that will extract the ExVision internal struct and expose the underlyeing `Nx.Serving`.
 34 | 
 35 | ## Basic usage example
 36 | 
 37 | In this section, we will showcase running the ExVision's models in the process workflow, but we will not attempt to explain every single detail of the `Nx.Serving`, as this part of the ExVision's API is just a thin convinience wrapper on top of it.
 38 | 
 39 | If you want to dig deeper, we would encourage consulting the [`Nx.Serving` statefull/process workflow documentation](https://hexdocs.pm/nx/Nx.Serving.html#module-stateful-process-workflow).
 40 | 
 41 | ### What we're building
 42 | 
 43 | In this example we will build a simple interactive app performing the classification of the uploaded image
 44 | 
 45 | ### Starting the model using the process workflow
 46 | 
 47 | In order to start the model process, just add it to your supervision tree. It is recommended that this process is started somewhere at the top of the tree. For all available options, please refer to the Nx documentation on [`Nx.Serving.start_link/1`](https://hexdocs.pm/nx/Nx.Serving.html#start_link/1).
 48 | 
 49 | If not explicitely provided, ExVision models will by default take their module name as the process name.
 50 | 
 51 | ```elixir
 52 | alias ExVision.Classification.MobileNetV3Small, as: Model
 53 | 
 54 | children = [
 55 |   {Model, batch_size: 8, batch_timeout: 500}
 56 | ]
 57 | 
 58 | {:ok, _pid} = Supervisor.start_link(children, strategy: :one_for_one)
 59 | Kino.nothing()
 60 | ```
 61 | 
 62 | And just like that, our model is now ready is now avaiable for inference for our entire cluster. And we can call on it like that:
 63 | 
 64 | <!-- livebook:{"force_markdown":true} -->
 65 | 
 66 | ```elixir
 67 | input = Nx.iota({3, 1920, 1280})
 68 | Model.batched_run(input)
 69 | ```
 70 | 
 71 | The only difference when compared to an inline workflow, from the perspective of the user is the necessity to use the `batched_run/2` instead of `run/2`.
 72 | 
 73 | This time we didn't need to provide the model argument for `batched_run/2`.
 74 | That is because we didn't specify the `:name` option when adding our model to the supervision tree and we relied on the default name assigned by ExVision, which by default is the name of their module.
 75 | If you assigned a custom name to the model, you can give it as a first argument to `batched_run/2`
 76 | 
 77 | <!-- livebook:{"force_markdown":true} -->
 78 | 
 79 | ```elixir
 80 | Model.batched_run(MyModel, input)
 81 | ```
 82 | 
 83 | ## Creating an example app
 84 | 
 85 | Now that we have a model instanciated and we know how to call on it, let's create an example app performing image classification.
 86 | 
 87 | We will make use of the [`Kino`](https://github.com/livebook-dev/kino) library to read the input image and in order to display the classification results.
 88 | 
 89 | ```elixir
 90 | form = Kino.Control.form([image: Kino.Input.image("Image", format: :jpeg)], submit: "Submit")
 91 | frame = Kino.Frame.new()
 92 | 
 93 | Kino.listen(form, fn %{data: %{image: %{file_ref: ref}}, origin: origin} ->
 94 |   input = Kino.Input.file_path(ref)
 95 |   result = Model.batched_run(input)
 96 | 
 97 |   result
 98 |   |> Enum.sort_by(fn {_label, score} -> score end, :desc)
 99 |   |> Enum.take(10)
100 |   |> Kino.Bumblebee.ScoredList.new()
101 |   |> then(&Kino.Frame.render(frame, &1, to: origin))
102 |   |> dbg()
103 | end)
104 | 
105 | Kino.Layout.grid([form, frame], columns: 2)
106 | ```
107 | 
108 | ## Next steps
109 | 
110 | After completing this tutorial you can check out our [Using ExVision with Membrane](3-membrane.livemd) tutorial.
111 | 


--------------------------------------------------------------------------------
/lib/ex_vision/model/definition/ortex.ex:
--------------------------------------------------------------------------------
  1 | defmodule ExVision.Model.Definition.Ortex do
  2 |   @moduledoc """
  3 |   A generic implementation of the `ExVision.Model.Definition` for Ortex based models.
  4 |   """
  5 | 
  6 |   # TODO: improve the documentation here
  7 | 
  8 |   require Logger
  9 | 
 10 |   alias ExVision.Types.ImageMetadata
 11 | 
 12 |   @doc """
 13 |   A callback used to apply preprocessing for your model.
 14 | 
 15 |   The requirements for that will differ depending on the model used.
 16 |   """
 17 |   @callback preprocessing(Nx.Tensor.t(), ImageMetadata.t()) :: Nx.Tensor.t()
 18 | 
 19 |   @doc """
 20 |   A callback used to apply postprocessing to the output of the ONNX model.
 21 | 
 22 |   In this callback, you should transform the output to match your desired format.
 23 |   """
 24 |   @callback postprocessing(map(), ImageMetadata.t()) :: ExVision.Model.output_t()
 25 | 
 26 |   @typedoc """
 27 |   A type describing ONNX provider that can be used with ExVision.
 28 | 
 29 |   For some providers, it may be necessary to use the local version of `libonnxruntime` and provide some configuration option.
 30 |   For details, please consult [Ortex documentaiton](https://hexdocs.pm/ortex/Ortex.html#load/3)
 31 |   """
 32 |   @type provider_t() :: :cpu | :coreml | :cpu
 33 | 
 34 |   @typedoc """
 35 |   A type describing all options possible to use with the default implementation of the `load/0` function.
 36 | 
 37 |   - `:cache_path` - specifies a caching directory for this model.
 38 |   - `:providers` - a list of desired providers, sorted by preference. Onnx will attempt to use the first available provider. If none of the provided is available, onnx will fallback to `:cpu`. Default: `[:cpu]`
 39 |   - `:batch_size` - specifies a default batch size for this instance. Default: `1`.
 40 |   """
 41 |   @type load_option_t() ::
 42 |           {:cache_path, Path.t()}
 43 |           | {:providers, [provider_t()]}
 44 |           | {:batch_size, pos_integer()}
 45 | 
 46 |   defmacrop get_client_preprocessing(module) do
 47 |     quote do
 48 |       fn input ->
 49 |         images = ExVision.Utils.load_image(input)
 50 | 
 51 |         metadata =
 52 |           Enum.map(
 53 |             images,
 54 |             &%ExVision.Types.ImageMetadata{
 55 |               original_size: ExVision.Utils.image_size(&1)
 56 |             }
 57 |           )
 58 | 
 59 |         batch =
 60 |           images
 61 |           |> Enum.zip(metadata)
 62 |           |> Enum.map(fn {image, metadata} -> unquote(module).preprocessing(image, metadata) end)
 63 |           |> Nx.Batch.stack()
 64 | 
 65 |         {batch, metadata}
 66 |       end
 67 |     end
 68 |   end
 69 | 
 70 |   defmacrop get_client_postprocessing(module, output_names) do
 71 |     quote do
 72 |       fn {result, _server_metadata}, metadata ->
 73 |         result
 74 |         |> split_onnx_result(unquote(output_names))
 75 |         |> Enum.zip(metadata)
 76 |         |> Enum.map(fn {result, metadata} -> unquote(module).postprocessing(result, metadata) end)
 77 |       end
 78 |     end
 79 |   end
 80 | 
 81 |   @doc """
 82 |   Loads the ONNX model and attaches the `Nx.Serving` to callbacks defined in the module
 83 |   """
 84 |   @spec load_ortex_model(module(), Path.t(), [load_option_t()]) ::
 85 |           {:ok, ExVision.Model.t()} | {:error, atom()}
 86 |   def load_ortex_model(module, model_path, options) do
 87 |     with {:ok, options} <-
 88 |            Keyword.validate(options,
 89 |              batch_size: 1,
 90 |              providers: [:cpu]
 91 |            ),
 92 |          {:ok, path} <- ExVision.Cache.lazy_get(ExVision.Cache, model_path),
 93 |          {:ok, model} <- do_load_model(path, options[:providers]) do
 94 |       output_names = ExVision.Utils.onnx_output_names(model)
 95 | 
 96 |       model
 97 |       |> then(&Nx.Serving.new(Ortex.Serving, &1))
 98 |       |> Nx.Serving.batch_size(options[:batch_size])
 99 |       |> Nx.Serving.client_preprocessing(get_client_preprocessing(module))
100 |       |> Nx.Serving.client_postprocessing(get_client_postprocessing(module, output_names))
101 |       |> then(&{:ok, struct!(module, serving: &1)})
102 |     end
103 |   end
104 | 
105 |   defp do_load_model(path, providers) do
106 |     try do
107 |       {:ok, Ortex.load(path, providers)}
108 |     rescue
109 |       e in RuntimeError ->
110 |         require Logger
111 |         Logger.error("Failed to load model from `#{inspect(path)}` due to #{inspect(e)}")
112 |         {:error, :onnx_load_failure}
113 |     end
114 |   end
115 | 
116 |   defp split_onnx_result(tuple, outputs) do
117 |     tuple
118 |     |> Tuple.to_list()
119 |     |> Enum.map(fn x ->
120 |       # Do a backend transfer and also return a list of batches here
121 |       x |> Nx.backend_transfer() |> Nx.to_batched(1)
122 |     end)
123 |     |> Enum.zip()
124 |     |> Enum.map(fn parts ->
125 |       parts |> Tuple.to_list() |> then(&Enum.zip(outputs, &1)) |> Enum.into(%{})
126 |     end)
127 |   end
128 | 
129 |   @type using_option_t() :: {:base_dir, Path.t()} | {:name, String.t()}
130 |   @spec __using__([using_option_t()]) :: Macro.t()
131 |   defmacro __using__(opts) do
132 |     {opts, generic_opts} = Keyword.split(opts, [:model])
133 |     opts = Keyword.validate!(opts, [:model])
134 |     model_path = Keyword.fetch!(opts, :model)
135 | 
136 |     quote do
137 |       use ExVision.Model.Definition, unquote(generic_opts)
138 |       @behaviour ExVision.Model.Definition.Ortex
139 | 
140 |       @doc """
141 |       Creates the model instance
142 |       """
143 |       @impl true
144 |       @spec load([ExVision.Model.Definition.Ortex.load_option_t()]) ::
145 |               {:ok, t()} | {:error, reason :: atom()}
146 |       def load(options \\ []) do
147 |         default_model_load(options)
148 |       end
149 | 
150 |       defp default_model_load(options) do
151 |         ExVision.Model.Definition.Ortex.load_ortex_model(__MODULE__, unquote(model_path), options)
152 |       end
153 | 
154 |       @impl true
155 |       def postprocessing(result, _metdata), do: result
156 | 
157 |       @impl true
158 |       def preprocessing(image, _metadata), do: image
159 | 
160 |       defoverridable load: 0, load: 1, preprocessing: 2, postprocessing: 2
161 |     end
162 |   end
163 | end
164 | 


--------------------------------------------------------------------------------
/lib/ex_vision/utils.ex:
--------------------------------------------------------------------------------
  1 | defmodule ExVision.Utils do
  2 |   @moduledoc false
  3 | 
  4 |   require Nx
  5 |   require Image
  6 |   alias ExVision.Types
  7 | 
  8 |   @type channel_spec_t() :: :first | :last
  9 |   @type pixel_size_t() :: 8 | 16 | 32 | 64
 10 |   @type pixel_type_t() :: {:u | :f, pixel_size_t()}
 11 |   @type load_image_option_t() ::
 12 |           {:pixel_type, pixel_type_t()}
 13 |           | {:channel_spec, channel_spec_t()}
 14 | 
 15 |   @spec load_image(ExVision.Model.input_t(), [load_image_option_t()]) :: [Nx.Tensor.t()]
 16 |   def load_image(image, options \\ []) do
 17 |     options = Keyword.validate!(options, pixel_type: {:f, 32}, channel_spec: :first)
 18 | 
 19 |     image
 20 |     |> read_image()
 21 |     |> List.flatten()
 22 |     |> Stream.map(&convert_pixel_type(&1, options[:pixel_type]))
 23 |     |> Stream.map(&convert_channel_spec(&1, options[:channel_spec]))
 24 |     |> Enum.to_list()
 25 |   end
 26 | 
 27 |   @spec convert_channel_spec(Nx.Tensor.t(), channel_spec_t()) :: Nx.Tensor.t()
 28 |   def convert_channel_spec(tensor, target) do
 29 |     current_spec = guess_channel_spec(tensor)
 30 | 
 31 |     cond do
 32 |       current_spec == target -> tensor
 33 |       target == :first -> Nx.transpose(tensor, axes: [2, 0, 1])
 34 |       target == :last -> Nx.transpose(tensor, axes: [1, 2, 0])
 35 |     end
 36 |   end
 37 | 
 38 |   @spec guess_channel_spec(Nx.Tensor.t()) :: channel_spec_t()
 39 |   defp guess_channel_spec(tensor) do
 40 |     case Nx.shape(tensor) do
 41 |       {3, _w, _h} -> :first
 42 |       {_batch, 3, _w, _h} -> :first
 43 |       {_w, _h, 3} -> :last
 44 |       {_batch, _w, _h, 3} -> :last
 45 |       shape -> raise "Failed to infer channel spec for shape #{inspect(shape)}"
 46 |     end
 47 |   end
 48 | 
 49 |   @spec convert_pixel_type(Nx.Tensor.t(), pixel_type_t()) :: Nx.Tensor.t()
 50 |   def convert_pixel_type(tensor, {:f, _size} = target) do
 51 |     case Nx.type(tensor) do
 52 |       {:f, _} -> Nx.as_type(tensor, target)
 53 |       {:u, _} -> tensor |> Nx.divide(255) |> convert_pixel_type(target)
 54 |     end
 55 |   end
 56 | 
 57 |   def convert_pixel_type(tensor, {:u, _size} = target) do
 58 |     case Nx.type(tensor) do
 59 |       ^target -> tensor
 60 |       {:u, _size} -> Nx.as_type(tensor, target)
 61 |       {:f, _size} -> tensor |> Nx.multiply(255) |> convert_pixel_type(target)
 62 |     end
 63 |   end
 64 | 
 65 |   def convert_pixel_type(tensor, nil), do: tensor
 66 | 
 67 |   @spec read_image(ExVision.Model.input_t()) :: [Nx.Tensor.t()]
 68 |   defp read_image(%Nx.Batch{} = batch), do: read_image(batch.stack)
 69 | 
 70 |   defp read_image(list) when is_list(list) do
 71 |     list |> Enum.map(&read_image/1)
 72 |   end
 73 | 
 74 |   defp read_image(%Vix.Vips.Image{} = image) do
 75 |     image |> Image.to_nx!() |> read_image()
 76 |   end
 77 | 
 78 |   defp read_image(x) when Nx.is_tensor(x) do
 79 |     ensure_grad_3(x)
 80 |   end
 81 | 
 82 |   defp read_image(x) when is_binary(x) do
 83 |     x |> Image.open!() |> read_image()
 84 |   end
 85 | 
 86 |   defp ensure_grad_3(tensor) do
 87 |     tensor
 88 |     |> Nx.rank()
 89 |     |> case do
 90 |       3 -> [tensor]
 91 |       4 -> tensor |> Nx.to_batched(1) |> Stream.map(&Nx.squeeze(&1, axes: [0])) |> Enum.to_list()
 92 |       other -> raise "Received unexpected tensor of grad #{other}"
 93 |     end
 94 |   end
 95 | 
 96 |   @type resize_spec_t() :: number() | Types.image_size_t()
 97 |   @spec resize(Nx.Tensor.t(), resize_spec_t()) :: Nx.Tensor.t()
 98 |   def resize(tensor, size) when is_number(size) do
 99 |     NxImage.resize_short(tensor, size, channels: guess_channel_spec(tensor))
100 |   end
101 | 
102 |   def resize(tensor, size) when is_tuple(size) do
103 |     NxImage.resize(tensor, size, channels: guess_channel_spec(tensor))
104 |   end
105 | 
106 |   @spec image_size(Vix.Vips.Image.t() | Nx.Tensor.t()) :: Types.image_size_t()
107 |   def image_size(%Vix.Vips.Image{} = image), do: {Image.height(image), Image.width(image)}
108 | 
109 |   def image_size(t) when Nx.is_tensor(t) do
110 |     case t |> Nx.squeeze() |> Nx.shape() do
111 |       {3, w, h} -> {w, h}
112 |       {w, h, 3} -> {w, h}
113 |     end
114 |   end
115 | 
116 |   @spec load_categories(Path.t()) :: [atom()]
117 |   def load_categories(path) do
118 |     path
119 |     |> File.read!()
120 |     |> Jason.decode!()
121 |     |> Enum.map(&normalize_category_name/1)
122 |   end
123 | 
124 |   @spec normalize_category_name(String.t()) :: atom()
125 |   def normalize_category_name(name),
126 |     do: name |> String.downcase() |> String.replace(~r(\ |\'|\-), "_") |> String.to_atom()
127 | 
128 |   @spec onnx_result_backend_transfer(tuple()) :: tuple()
129 |   def onnx_result_backend_transfer(tuple),
130 |     do: tuple |> Tuple.to_list() |> Enum.map(&Nx.backend_transfer/1) |> List.to_tuple()
131 | 
132 |   @spec onnx_input_shape(Ortex.Model.t()) :: tuple()
133 |   def onnx_input_shape(%Ortex.Model{reference: r}) do
134 |     ["input", "Float32", shape] =
135 |       r
136 |       |> Ortex.Native.show_session()
137 |       |> Enum.find(fn [name, _type, _shape] -> name == "input" end)
138 |       |> hd()
139 | 
140 |     List.to_tuple(shape)
141 |   end
142 | 
143 |   @spec onnx_output_names(Ortex.Model.t()) :: [String.t()]
144 |   def onnx_output_names(%Ortex.Model{reference: r}) do
145 |     {_inputs, outputs} = Ortex.Native.show_session(r)
146 | 
147 |     Enum.map(outputs, fn {name, _type, _shape} -> name end)
148 |   end
149 | 
150 |   @spec batched_run(atom(), ExVision.Model.input_t()) :: ExVision.Model.output_t()
151 |   def batched_run(process_name, input) when is_list(input) do
152 |     Nx.Serving.batched_run(process_name, input)
153 |   end
154 | 
155 |   def batched_run(process_name, input) do
156 |     process_name |> batched_run([input]) |> hd()
157 |   end
158 | 
159 |   @spec scale_and_listify_bbox(Nx.Tensor.t(), Nx.Tensor.t()) :: [integer()]
160 |   def scale_and_listify_bbox(bbox, scales) do
161 |     bbox
162 |     |> Nx.squeeze(axes: [0])
163 |     |> Nx.multiply(scales)
164 |     |> Nx.round()
165 |     |> Nx.as_type(:s64)
166 |     |> Nx.to_list()
167 |   end
168 | 
169 |   @spec squeeze_and_listify(Nx.Tensor.t()) :: [number()]
170 |   def squeeze_and_listify(batched_value) do
171 |     batched_value |> Nx.squeeze(axes: [0]) |> Nx.to_list()
172 |   end
173 | end
174 | 


--------------------------------------------------------------------------------
/lib/ex_vision/cache.ex:
--------------------------------------------------------------------------------
  1 | defmodule ExVision.Cache do
  2 |   @moduledoc false
  3 |   # Module responsible for handling model file caching
  4 | 
  5 |   use GenServer
  6 |   require Logger
  7 | 
  8 |   @type lazy_get_option_t() :: {:force, boolean()}
  9 | 
 10 |   @doc """
 11 |   Lazily evaluate the path from the cache directory.
 12 |   It will only download the file if it's missing or the `force: true` option is given.
 13 |   """
 14 |   @spec lazy_get(term() | pid(), Path.t(), options :: [lazy_get_option_t()]) ::
 15 |           {:ok, Path.t()} | {:error, reason :: atom()}
 16 |   def lazy_get(server, path, options \\ []) do
 17 |     with {:ok, options} <- Keyword.validate(options, force: false),
 18 |          do: GenServer.call(server, {:download, path, options}, :infinity)
 19 |   end
 20 | 
 21 |   @spec start_link(keyword()) :: GenServer.on_start()
 22 |   def start_link(opts) do
 23 |     {init_args, opts} = Keyword.split(opts, [:server_url, :cache_path])
 24 |     GenServer.start_link(__MODULE__, init_args, opts)
 25 |   end
 26 | 
 27 |   @impl true
 28 |   def init(opts) do
 29 |     opts = Keyword.validate!(opts, cache_path: get_cache_path(), server_url: get_server_url())
 30 | 
 31 |     with {:ok, server_url} <- URI.new(opts[:server_url]),
 32 |          :ok <- File.mkdir_p(opts[:cache_path]) do
 33 |       {:ok,
 34 |        %{
 35 |          downloads: %{},
 36 |          server_url: server_url,
 37 |          cache_path: opts[:cache_path],
 38 |          refs: %{}
 39 |        }}
 40 |     end
 41 |   end
 42 | 
 43 |   @impl true
 44 |   def handle_call({:download, cache_path, options}, from, state) do
 45 |     file_path = Path.join(state.cache_path, cache_path)
 46 | 
 47 |     updated_downloads =
 48 |       Map.update(state.downloads, cache_path, MapSet.new([from]), &MapSet.put(&1, from))
 49 | 
 50 |     cond do
 51 |       Map.has_key?(state.downloads, cache_path) ->
 52 |         {:noreply, %{state | downloads: updated_downloads}}
 53 | 
 54 |       File.exists?(file_path) or options[:force] ->
 55 |         {:reply, {:ok, file_path}, state}
 56 | 
 57 |       true ->
 58 |         ref = do_create_download_job(cache_path, state)
 59 | 
 60 |         {:noreply,
 61 |          %{state | downloads: updated_downloads, refs: Map.put(state.refs, ref, cache_path)}}
 62 |     end
 63 |   end
 64 | 
 65 |   @impl true
 66 |   def handle_info({ref, result}, state) do
 67 |     state = emit(result, ref, state)
 68 |     {:noreply, state}
 69 |   end
 70 | 
 71 |   @impl true
 72 |   def handle_info({:DOWN, ref, :process, _pid, reason}, state) do
 73 |     state =
 74 |       if reason != :normal do
 75 |         Logger.error("Task #{inspect(ref)} has crashed due to #{inspect(reason)}")
 76 |         emit({:error, reason}, ref, state)
 77 |       else
 78 |         state
 79 |       end
 80 | 
 81 |     {:noreply, state}
 82 |   end
 83 | 
 84 |   @impl true
 85 |   def handle_info(msg, state) do
 86 |     Logger.warning("Received an unknown message #{inspect(msg)}. Ignoring")
 87 |     {:noreply, state}
 88 |   end
 89 | 
 90 |   defp emit(message, ref, state) do
 91 |     path = state.refs[ref]
 92 | 
 93 |     state.downloads
 94 |     |> Map.get(path, [])
 95 |     |> Enum.each(fn from ->
 96 |       GenServer.reply(from, message)
 97 |     end)
 98 | 
 99 |     %{state | refs: Map.delete(state.refs, ref), downloads: Map.delete(state.downloads, path)}
100 |   end
101 | 
102 |   defp do_create_download_job(path, %{server_url: server_url, cache_path: cache_path}) do
103 |     target_file_path = Path.join(cache_path, path)
104 |     download_url = URI.append_path(server_url, ensure_backslash(path))
105 | 
106 |     %Task{ref: ref} =
107 |       Task.async(fn ->
108 |         download_file(download_url, target_file_path)
109 |       end)
110 | 
111 |     ref
112 |   end
113 | 
114 |   @default_cache_path Application.compile_env(:ex_vision, :cache_path, "/tmp/ex_vision/cache")
115 |   defp get_cache_path() do
116 |     Application.get_env(:ex_vision, :cache_path, @default_cache_path)
117 |   end
118 | 
119 |   @default_server_url Application.compile_env(
120 |                         :ex_vision,
121 |                         :server_url,
122 |                         URI.new!("https://ai.swmansion.com/exvision/files")
123 |                       )
124 |   defp get_server_url() do
125 |     Application.get_env(:ex_vision, :server_url, @default_server_url)
126 |   end
127 | 
128 |   @spec download_file(URI.t(), Path.t()) ::
129 |           {:ok, Path.t()} | {:error, reason :: any()}
130 |   defp download_file(url, cache_path) do
131 |     with :ok <- cache_path |> Path.dirname() |> File.mkdir_p(),
132 |          tmp_file_path = cache_path <> ".unconfirmed",
133 |          tmp_file = File.stream!(tmp_file_path),
134 |          :ok <- do_download_file(url, tmp_file),
135 |          :ok <- validate_download(tmp_file_path),
136 |          :ok <- File.rename(tmp_file_path, cache_path) do
137 |       {:ok, cache_path}
138 |     end
139 |   end
140 | 
141 |   defp ensure_backslash("/" <> _rest = i), do: i
142 |   defp ensure_backslash(i), do: "/" <> i
143 | 
144 |   defp validate_download(path) do
145 |     if File.exists?(path),
146 |       do: :ok,
147 |       else: {:error, :download_failed}
148 |   end
149 | 
150 |   @spec do_download_file(URI.t(), File.Stream.t()) :: :ok | {:error, reason :: any()}
151 |   defp do_download_file(%URI{} = url, %File.Stream{path: target_file_path} = target_file) do
152 |     Logger.debug("Downloading file from `#{url}` and saving to `#{target_file_path}`")
153 | 
154 |     case make_get_request(url, raw: true, into: target_file) do
155 |       {:ok, _resp} ->
156 |         :ok
157 | 
158 |       {:error, reason} = error ->
159 |         Logger.error("Failed to download the file due to #{inspect(reason)}")
160 |         File.rm(target_file_path)
161 |         error
162 |     end
163 |   end
164 | 
165 |   defp make_get_request(url, options) do
166 |     url
167 |     |> Req.get(options)
168 |     |> case do
169 |       {:ok, %Req.Response{status: 200}} = resp ->
170 |         resp
171 | 
172 |       {:ok, %Req.Response{status: 404}} ->
173 |         {:error, :doesnt_exist}
174 | 
175 |       {:ok, %Req.Response{status: status}} ->
176 |         Logger.warning("Request has failed with status #{status}")
177 |         {:error, :server_error}
178 | 
179 |       {:error, %Mint.TransportError{reason: reason}} ->
180 |         {:error, reason}
181 | 
182 |       {:error, _error} ->
183 |         {:error, :connection_failed}
184 |     end
185 |   end
186 | end
187 | 


--------------------------------------------------------------------------------
/test/ex_vision/utils_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule ExVision.UtilsTest do
  2 |   use ExUnit.Case, async: true
  3 |   alias ExVision.Utils
  4 | 
  5 |   @img_path Path.join(__DIR__, "../assets/cat.jpg")
  6 |   @categories_path Path.join(__DIR__, "../assets/categories.json")
  7 | 
  8 |   describe "load_image/1 loads from" do
  9 |     test "path" do
 10 |       assert [img] = Utils.load_image(@img_path)
 11 |       assert Nx.shape(img) == {3, 360, 543}
 12 |       assert Nx.type(img) == {:f, 32}
 13 |     end
 14 | 
 15 |     test ":image library image" do
 16 |       img = Image.open!(@img_path)
 17 |       assert [img_from_image] = Utils.load_image(img)
 18 |       assert [img_from_path] = Utils.load_image(@img_path)
 19 |       assert Nx.equal(img_from_image, img_from_path)
 20 |     end
 21 | 
 22 |     test "Nx.Tensor" do
 23 |       tensor = @img_path |> Image.open!() |> Image.to_nx!(shape: :hwc)
 24 |       assert [img_from_tensor] = Utils.load_image(tensor)
 25 |       assert [img_from_path] = Utils.load_image(@img_path)
 26 |       assert Nx.equal(img_from_tensor, img_from_path)
 27 |     end
 28 | 
 29 |     test "batched Nx.Tensor" do
 30 |       tensor =
 31 |         @img_path
 32 |         |> Image.open!()
 33 |         |> Image.to_nx!(shape: :hwc)
 34 |         |> Stream.duplicate(2)
 35 |         |> Enum.to_list()
 36 |         |> Nx.stack()
 37 | 
 38 |       tensor
 39 |       |> Utils.load_image()
 40 |       |> Enum.each(&assert Nx.shape(&1) == {3, 360, 543})
 41 |     end
 42 | 
 43 |     test "list of differently sized tensors" do
 44 |       input = [
 45 |         Nx.iota({3, 10, 20}, type: :f32),
 46 |         Nx.iota({3, 20, 10}, type: :f32)
 47 |       ]
 48 | 
 49 |       input
 50 |       |> Utils.load_image()
 51 |       |> Enum.zip(input)
 52 |       |> Enum.each(fn {a, b} -> assert Nx.equal(a, b) end)
 53 |     end
 54 | 
 55 |     test "list of tensors" do
 56 |       tensor =
 57 |         @img_path
 58 |         |> Image.open!()
 59 |         |> Image.to_nx!(shape: :hwc)
 60 |         |> Stream.duplicate(2)
 61 |         |> Enum.to_list()
 62 | 
 63 |       tensor
 64 |       |> Utils.load_image()
 65 |       |> Enum.each(&assert Nx.shape(&1) == {3, 360, 543})
 66 |     end
 67 | 
 68 |     test "list of paths" do
 69 |       assert [a, b] = img = Utils.load_image([@img_path, @img_path])
 70 | 
 71 |       Enum.each(img, fn img ->
 72 |         assert Nx.shape(img) == {3, 360, 543}
 73 |         assert Nx.type(img) == {:f, 32}
 74 |       end)
 75 | 
 76 |       assert Nx.equal(a, b)
 77 |     end
 78 | 
 79 |     test "Nx.Batch" do
 80 |       tensor =
 81 |         @img_path
 82 |         |> Image.open!()
 83 |         |> Image.to_nx!(shape: :hwc)
 84 | 
 85 |       serving = Nx.Serving.new(fn opts -> Nx.Defn.jit(fn a -> a end, opts) end)
 86 |       batch = Nx.Batch.stack([tensor, tensor])
 87 |       batch = Nx.Serving.run(serving, batch)
 88 |       output = Utils.load_image(batch)
 89 | 
 90 |       Enum.each(output, fn img ->
 91 |         assert Nx.shape(img) == {3, 360, 543}
 92 |       end)
 93 |     end
 94 |   end
 95 | 
 96 |   describe "load_image/2 handles option to" do
 97 |     test "channel spec change from to :last" do
 98 |       tensor_first = Nx.iota({3, 128, 256}, type: :f32)
 99 |       assert [tensor_last] = Utils.load_image(tensor_first, channel_spec: :last)
100 |       assert Nx.shape(tensor_last) == {128, 256, 3}
101 | 
102 |       assert [tensor_new_last] = Utils.load_image(tensor_last, channel_spec: :last)
103 |       assert Nx.equal(tensor_last, tensor_new_last)
104 |     end
105 | 
106 |     test "channel spec change to :first" do
107 |       assert [img] = Utils.load_image(@img_path, channel_spec: :last)
108 |       assert Nx.shape(img) == {360, 543, 3}
109 |     end
110 | 
111 |     test "pixel format change" do
112 |       for t <- [{:u, 8}, {:f, 16}] do
113 |         assert [img] = Utils.load_image(@img_path, pixel_type: t)
114 |         assert Nx.type(img) == t, "assertion failed for #{inspect(t)}"
115 |       end
116 |     end
117 |   end
118 | 
119 |   test "load_categories/1" do
120 |     expected_categories =
121 |       [
122 |         "__background__",
123 |         "person",
124 |         "bicycle",
125 |         "car",
126 |         "motorcycle",
127 |         "airplane",
128 |         "bus",
129 |         "train",
130 |         "truck",
131 |         "boat",
132 |         "traffic_light",
133 |         "fire_hydrant",
134 |         "n/a",
135 |         "stop_sign",
136 |         "parking_meter",
137 |         "bench",
138 |         "bird",
139 |         "cat",
140 |         "dog",
141 |         "horse",
142 |         "sheep",
143 |         "cow",
144 |         "elephant",
145 |         "bear",
146 |         "zebra",
147 |         "giraffe",
148 |         "n/a",
149 |         "backpack",
150 |         "umbrella",
151 |         "n/a",
152 |         "n/a",
153 |         "handbag",
154 |         "tie",
155 |         "suitcase",
156 |         "frisbee",
157 |         "skis",
158 |         "snowboard",
159 |         "sports_ball",
160 |         "kite",
161 |         "baseball_bat",
162 |         "baseball_glove",
163 |         "skateboard",
164 |         "surfboard",
165 |         "tennis_racket",
166 |         "bottle",
167 |         "n/a",
168 |         "wine_glass",
169 |         "cup",
170 |         "fork",
171 |         "knife",
172 |         "spoon",
173 |         "bowl",
174 |         "banana",
175 |         "apple",
176 |         "sandwich",
177 |         "orange",
178 |         "broccoli",
179 |         "carrot",
180 |         "hot_dog",
181 |         "pizza",
182 |         "donut",
183 |         "cake",
184 |         "chair",
185 |         "couch",
186 |         "potted_plant",
187 |         "bed",
188 |         "n/a",
189 |         "dining_table",
190 |         "n/a",
191 |         "n/a",
192 |         "toilet",
193 |         "n/a",
194 |         "tv",
195 |         "laptop",
196 |         "mouse",
197 |         "remote",
198 |         "keyboard",
199 |         "cell_phone",
200 |         "microwave",
201 |         "oven",
202 |         "toaster",
203 |         "sink",
204 |         "refrigerator",
205 |         "n/a",
206 |         "book",
207 |         "clock",
208 |         "vase",
209 |         "scissors",
210 |         "teddy_bear",
211 |         "hair_drier",
212 |         "toothbrush"
213 |       ]
214 |       |> Enum.map(&String.to_atom/1)
215 | 
216 |     assert Utils.load_categories(@categories_path) == expected_categories
217 |   end
218 | 
219 |   describe "convert_channel_spec/2" do
220 |     test "converts :last to :first" do
221 |       input = Nx.iota({1, 2, 3})
222 |       assert input |> Utils.convert_channel_spec(:first) |> Nx.shape() == {3, 1, 2}
223 |     end
224 | 
225 |     test "converts :first to :last" do
226 |       input = Nx.iota({3, 1, 2})
227 |       assert input |> Utils.convert_channel_spec(:last) |> Nx.shape() == {1, 2, 3}
228 |     end
229 |   end
230 | end
231 | 


--------------------------------------------------------------------------------
/.credo.exs:
--------------------------------------------------------------------------------
  1 | # This file contains the configuration for Credo and you are probably reading
  2 | # this after creating it with `mix credo.gen.config`.
  3 | #
  4 | # If you find anything wrong or unclear in this file, please report an
  5 | # issue on GitHub: https://github.com/rrrene/credo/issues
  6 | #
  7 | %{
  8 |   #
  9 |   # You can have as many configs as you like in the `configs:` field.
 10 |   configs: [
 11 |     %{
 12 |       #
 13 |       # Run any config using `mix credo -C <name>`. If no config name is given
 14 |       # "default" is used.
 15 |       #
 16 |       name: "default",
 17 |       #
 18 |       # These are the files included in the analysis:
 19 |       files: %{
 20 |         #
 21 |         # You can give explicit globs or simply directories.
 22 |         # In the latter case `**/*.{ex,exs}` will be used.
 23 |         #
 24 |         included: [
 25 |           "lib/",
 26 |           "src/",
 27 |           "test/",
 28 |           "web/",
 29 |           "apps/*/lib/",
 30 |           "apps/*/src/",
 31 |           "apps/*/test/",
 32 |           "apps/*/web/"
 33 |         ],
 34 |         excluded: [~r"/_build/", ~r"/deps/", ~r"/node_modules/"]
 35 |       },
 36 |       #
 37 |       # Load and configure plugins here:
 38 |       #
 39 |       plugins: [],
 40 |       #
 41 |       # If you create your own checks, you must specify the source files for
 42 |       # them here, so they can be loaded by Credo before running the analysis.
 43 |       #
 44 |       requires: [],
 45 |       #
 46 |       # If you want to enforce a style guide and need a more traditional linting
 47 |       # experience, you can change `strict` to `true` below:
 48 |       #
 49 |       strict: false,
 50 |       #
 51 |       # To modify the timeout for parsing files, change this value:
 52 |       #
 53 |       parse_timeout: 5000,
 54 |       #
 55 |       # If you want to use uncolored output by default, you can change `color`
 56 |       # to `false` below:
 57 |       #
 58 |       color: true,
 59 |       #
 60 |       # You can customize the parameters of any check by adding a second element
 61 |       # to the tuple.
 62 |       #
 63 |       # To disable a check put `false` as second element:
 64 |       #
 65 |       #     {Credo.Check.Design.DuplicatedCode, false}
 66 |       #
 67 |       checks: [
 68 |         #
 69 |         ## Consistency Checks
 70 |         #
 71 |         {Credo.Check.Consistency.ExceptionNames, []},
 72 |         {Credo.Check.Consistency.LineEndings, []},
 73 |         {Credo.Check.Consistency.ParameterPatternMatching, []},
 74 |         {Credo.Check.Consistency.SpaceAroundOperators, []},
 75 |         {Credo.Check.Consistency.SpaceInParentheses, []},
 76 |         {Credo.Check.Consistency.TabsOrSpaces, []},
 77 | 
 78 |         #
 79 |         ## Design Checks
 80 |         #
 81 |         # You can customize the priority of any check
 82 |         # Priority values are: `low, normal, high, higher`
 83 |         #
 84 |         {Credo.Check.Design.AliasUsage,
 85 |          [priority: :low, if_nested_deeper_than: 2, if_called_more_often_than: 0]},
 86 |         # You can also customize the exit_status of each check.
 87 |         # If you don't want TODO comments to cause `mix credo` to fail, just
 88 |         # set this value to 0 (zero).
 89 |         #
 90 |         {Credo.Check.Design.TagTODO, [exit_status: 0]},
 91 |         {Credo.Check.Design.TagFIXME, []},
 92 | 
 93 |         #
 94 |         ## Readability Checks
 95 |         #
 96 |         {Credo.Check.Readability.AliasOrder, [priority: :normal]},
 97 |         {Credo.Check.Readability.FunctionNames, []},
 98 |         {Credo.Check.Readability.LargeNumbers, []},
 99 |         {Credo.Check.Readability.MaxLineLength, [priority: :low, max_length: 120]},
100 |         {Credo.Check.Readability.ModuleAttributeNames, []},
101 |         {Credo.Check.Readability.ModuleDoc, []},
102 |         {Credo.Check.Readability.ModuleNames, false},
103 |         {Credo.Check.Readability.ParenthesesInCondition, []},
104 |         {Credo.Check.Readability.ParenthesesOnZeroArityDefs, parens: true},
105 |         {Credo.Check.Readability.PredicateFunctionNames, []},
106 |         {Credo.Check.Readability.PreferImplicitTry, []},
107 |         {Credo.Check.Readability.RedundantBlankLines, []},
108 |         {Credo.Check.Readability.Semicolons, []},
109 |         {Credo.Check.Readability.SpaceAfterCommas, []},
110 |         {Credo.Check.Readability.StringSigils, []},
111 |         {Credo.Check.Readability.TrailingBlankLine, []},
112 |         {Credo.Check.Readability.TrailingWhiteSpace, []},
113 |         {Credo.Check.Readability.UnnecessaryAliasExpansion, []},
114 |         {Credo.Check.Readability.VariableNames, []},
115 |         {Credo.Check.Readability.WithSingleClause, false},
116 | 
117 |         #
118 |         ## Refactoring Opportunities
119 |         #
120 |         {Credo.Check.Refactor.CondStatements, []},
121 |         {Credo.Check.Refactor.CyclomaticComplexity, []},
122 |         {Credo.Check.Refactor.FunctionArity, []},
123 |         {Credo.Check.Refactor.LongQuoteBlocks, []},
124 |         {Credo.Check.Refactor.MapInto, false},
125 |         {Credo.Check.Refactor.MatchInCondition, []},
126 |         {Credo.Check.Refactor.NegatedConditionsInUnless, []},
127 |         {Credo.Check.Refactor.NegatedConditionsWithElse, []},
128 |         {Credo.Check.Refactor.Nesting, [max_nesting: 3]},
129 |         {Credo.Check.Refactor.UnlessWithElse, []},
130 |         {Credo.Check.Refactor.WithClauses, []},
131 | 
132 |         #
133 |         ## Warnings
134 |         #
135 |         {Credo.Check.Warning.BoolOperationOnSameValues, []},
136 |         {Credo.Check.Warning.ExpensiveEmptyEnumCheck, []},
137 |         {Credo.Check.Warning.IExPry, []},
138 |         {Credo.Check.Warning.IoInspect, []},
139 |         {Credo.Check.Warning.LazyLogging, false},
140 |         {Credo.Check.Warning.MixEnv, []},
141 |         {Credo.Check.Warning.OperationOnSameValues, []},
142 |         {Credo.Check.Warning.OperationWithConstantResult, []},
143 |         {Credo.Check.Warning.RaiseInsideRescue, []},
144 |         {Credo.Check.Warning.UnusedEnumOperation, []},
145 |         {Credo.Check.Warning.UnusedFileOperation, []},
146 |         {Credo.Check.Warning.UnusedKeywordOperation, []},
147 |         {Credo.Check.Warning.UnusedListOperation, []},
148 |         {Credo.Check.Warning.UnusedPathOperation, []},
149 |         {Credo.Check.Warning.UnusedRegexOperation, []},
150 |         {Credo.Check.Warning.UnusedStringOperation, []},
151 |         {Credo.Check.Warning.UnusedTupleOperation, []},
152 |         {Credo.Check.Warning.UnsafeExec, []},
153 | 
154 |         #
155 |         # Checks scheduled for next check update (opt-in for now, just replace `false` with `[]`)
156 | 
157 |         #
158 |         # Controversial and experimental checks (opt-in, just replace `false` with `[]`)
159 |         #
160 |         {Credo.Check.Readability.StrictModuleLayout,
161 |          priority: :normal, order: ~w/shortdoc moduledoc behaviour use import require alias/a},
162 |         {Credo.Check.Consistency.MultiAliasImportRequireUse, false},
163 |         {Credo.Check.Consistency.UnusedVariableNames, force: :meaningful},
164 |         {Credo.Check.Design.DuplicatedCode, []},
165 |         {Credo.Check.Readability.AliasAs, false},
166 |         {Credo.Check.Readability.MultiAlias, []},
167 |         {Credo.Check.Readability.Specs, []},
168 |         {Credo.Check.Readability.SinglePipe, false},
169 |         {Credo.Check.Readability.WithCustomTaggedTuple, false},
170 |         {Credo.Check.Refactor.ABCSize, false},
171 |         {Credo.Check.Refactor.AppendSingleItem, false},
172 |         {Credo.Check.Refactor.DoubleBooleanNegation, false},
173 |         {Credo.Check.Refactor.ModuleDependencies, false},
174 |         {Credo.Check.Refactor.NegatedIsNil, false},
175 |         {Credo.Check.Refactor.PipeChainStart, []},
176 |         {Credo.Check.Refactor.VariableRebinding, false},
177 |         {Credo.Check.Warning.LeakyEnvironment, []},
178 |         {Credo.Check.Warning.MapGetUnsafePass, []},
179 |         {Credo.Check.Warning.UnsafeToAtom, false}
180 | 
181 |         #
182 |         # Custom checks can be created using `mix credo.gen.check`.
183 |         #
184 |       ]
185 |     }
186 |   ]
187 | }
188 | 


--------------------------------------------------------------------------------
/examples/3-membrane.livemd:
--------------------------------------------------------------------------------
  1 | <!-- livebook:{"file_entries":[{"name":"big-buck-bunny.h264","type":"attachment"}]} -->
  2 | 
  3 | # Using ExVision with Membrane
  4 | 
  5 | ```elixir
  6 | Mix.install(
  7 |   [
  8 |     :ex_vision,
  9 |     :image,
 10 |     :membrane_core,
 11 |     :membrane_file_plugin,
 12 |     :membrane_flv_plugin,
 13 |     :membrane_h26x_plugin,
 14 |     :membrane_h264_ffmpeg_plugin,
 15 |     :membrane_ffmpeg_swscale_plugin,
 16 |     {:membrane_mp4_plugin, "~> 0.34.2"},
 17 |     :kino,
 18 |     :kino_membrane
 19 |   ],
 20 |   config: [
 21 |     nx: [default_backend: EXLA.Backend]
 22 |   ]
 23 | )
 24 | ```
 25 | 
 26 | ## Introduction
 27 | 
 28 | In this example we will showcase ExVision by integrating it into media processing pipeline using [Membrane Framework](https://membrane.stream). This livebook can be treated as a tutorial on this process.
 29 | 
 30 | ### Prerequisites
 31 | 
 32 | * We will be using [Membrane Framework](https://membrane.stream), so basic familiarity with this framework is highly recommended
 33 | * Basic familiarity with ExVision
 34 | 
 35 | ### Contents of this tutorial
 36 | 
 37 | You will learn how to write a [Membrane Filter element](https://membrane.stream/learn/get_started_with_membrane/3) that makes use of one of the ExVision's models, using an example of object detection.
 38 | 
 39 | In particular, we will implement a bird detector.
 40 | 
 41 | ## Integrate with Membrane
 42 | 
 43 | The main part of integrating with Membrane is implementing a Filter - an element which is responsible for applying a transformation on each frame in the stream.
 44 | 
 45 | But before we dive into the code, here are a few tips that will make it both easier to understand and easier to modify for your own usecase:
 46 | 
 47 | * It's useful to constrain an accepted format on input and output pads to `%Membrane.RawVideo{pixel_format: :RGB}`.
 48 | 
 49 |   This format is equivalent to a stream of raw frames in RGB format, which is what most models are trained to accept. By setting this constraint, Membrane will be able to perform a sanity check to highlight errors some obvious errors in the processing pipeline.
 50 | 
 51 | * Model should be loaded in the `handle_setup/2` callback and stored in the element state.
 52 | 
 53 |   It may be tempting to initialize the model in `handle_init/2` but it will delay the initialization of the pipeline, as it runs in the pipeline process, not the element process
 54 | 
 55 | ### Writing the Membrane Element
 56 | 
 57 | With that knowledge, let's implement the Membrane Filter that will be responsible for:
 58 | 
 59 | 1. initialization of the detection model
 60 | 2. feeding the frames through the detector
 61 | 3. Drawing the boxes indicating the detected birds in the resulting image, using the `:image` library
 62 | 
 63 | ```elixir
 64 | defmodule Membrane.ExVision.Detector do
 65 |   use Membrane.Filter
 66 | 
 67 |   alias ExVision.ObjectDetection.Ssdlite320_MobileNetv3, as: Model
 68 |   alias ExVision.Types.BBox
 69 | 
 70 |   # Define both input and output pads
 71 |   # On both, we want to have raw image in RGB
 72 |   def_input_pad(:input,
 73 |     accepted_format: %Membrane.RawVideo{pixel_format: :RGB},
 74 |     flow_control: :auto
 75 |   )
 76 | 
 77 |   def_output_pad(:output,
 78 |     accepted_format: %Membrane.RawVideo{pixel_format: :RGB},
 79 |     flow_control: :auto
 80 |   )
 81 | 
 82 |   defmodule State do
 83 |     @moduledoc """
 84 |     A struct describing the state of the detector element
 85 |     """
 86 |     defstruct [:detector]
 87 | 
 88 |     @type t() :: %__MODULE__{
 89 |             detector: Model.t() | nil
 90 |           }
 91 |   end
 92 | 
 93 |   @impl true
 94 |   def handle_init(_ctx, _opts) do
 95 |     {[], %State{}}
 96 |   end
 97 | 
 98 |   # Model initialization should be performed in this callback
 99 |   @impl true
100 |   def handle_setup(_ctx, state) do
101 |     # due to the quirk in Nx.Serving, all servings need to be registered,
102 |     # as it's impossible  to make a call to Nx.Serving using PID
103 |     # Generate a random process name
104 |     name =
105 |       10
106 |       |> :crypto.strong_rand_bytes()
107 |       |> then(&"#{&1}")
108 |       |> :base64.encode()
109 |       |> String.to_atom()
110 | 
111 |     {:ok, _pid} = Model.start_link(name: name)
112 | 
113 |     {[], %State{state | detector: name}}
114 |   end
115 | 
116 |   # The frames will be received in this callback
117 |   @impl true
118 |   def handle_buffer(:input, buffer, ctx, %State{detector: detector} = state) do
119 |     tensor = buffer_to_tensor(buffer, ctx.pads.input.stream_format)
120 |     {:ok, image} = Image.from_nx(tensor)
121 | 
122 |     # Run inference and filter out unlikely bounding boxes
123 |     predictions =
124 |       detector
125 |       |> Model.batched_run(tensor)
126 |       # filter out butterfly bounding boxes
127 |       |> Enum.filter(fn %BBox{score: score, label: label} -> score > 0.3 and label == :bird end)
128 | 
129 |     # For each bounding box, represent it as a rectangle in the image
130 |     image =
131 |       Enum.reduce(predictions, image, fn %BBox{} = prediction, image ->
132 |         image
133 |         |> Image.Draw.rect!(
134 |           prediction.x1,
135 |           prediction.y1,
136 |           BBox.width(prediction),
137 |           BBox.height(prediction),
138 |           fill: false,
139 |           color: :red,
140 |           stroke_width: 5
141 |         )
142 |       end)
143 | 
144 |     # Emit the resulting buffer
145 |     {[buffer: {:output, fill_buffer_with_image(image, buffer)}], state}
146 |   end
147 | 
148 |   # Utility function that will 
149 |   defp buffer_to_tensor(%Membrane.Buffer{payload: payload}, %Membrane.RawVideo{
150 |          width: w,
151 |          height: h
152 |        }) do
153 |     payload
154 |     |> Nx.from_binary(:u8)
155 |     |> Nx.reshape({h, w, 3}, names: [:height, :width, :colors])
156 |   end
157 | 
158 |   # Replaces the payload of the Membrane Buffer with the image contents
159 |   # This way, we're maintaining the buffer metadata, ex. the timestamps
160 |   defp fill_buffer_with_image(image, buffer) do
161 |     image |> Image.to_nx!(shape: :hwc) |> Nx.to_binary() |> then(&%{buffer | payload: &1})
162 |   end
163 | end
164 | ```
165 | 
166 | ### Create the processing pipeline
167 | 
168 | <!-- livebook:{"break_markdown":true} -->
169 | 
170 | Now that we have a Membrane Filter implemented, the next step is to define a processing pipeline.
171 | 
172 | In this case, we will read the video from the file, feed it through our `Detector` element and then transform it back into a video in `.mp4` format.
173 | 
174 | The details of this process can be a little complicated. That said, in simple terms, we're going to:
175 | 
176 | 1. read the file
177 | 2. Parse the MP4 structure and extract the video from it
178 | 3. Decode the video to achieve raw frames (images) and convert them to RGB
179 | 4. **Apply our `Detector` module**
180 | 5. Encode our images to H264
181 | 6. Put our resulting video into the MP4 container
182 | 7. Save the result to the file
183 | 
184 | ```elixir
185 | defmodule Pipeline do
186 |   use Membrane.Pipeline
187 | 
188 |   @impl true
189 |   def handle_init(_ctx, {input_file, output_file}) do
190 |     structure =
191 |       child(%Membrane.File.Source{
192 |         chunk_size: 1024,
193 |         location: input_file,
194 |         seekable?: true
195 |       })
196 |       |> child(:demuxer, %Membrane.MP4.Demuxer.ISOM{optimize_for_non_fast_start?: true})
197 |       |> via_out(Pad.ref(:output, 1))
198 |       |> child(%Membrane.H264.Parser{
199 |         output_stream_structure: :annexb
200 |       })
201 |       |> child(Membrane.H264.FFmpeg.Decoder)
202 |       |> child(%Membrane.FFmpeg.SWScale.PixelFormatConverter{format: :RGB})
203 |       |> child(Membrane.ExVision.Detector)
204 |       |> child(%Membrane.FFmpeg.SWScale.PixelFormatConverter{format: :I420})
205 |       |> child(%Membrane.H264.FFmpeg.Encoder{profile: :baseline})
206 |       |> child(%Membrane.H264.Parser{
207 |         output_stream_structure: :avc1
208 |       })
209 |       |> child(Membrane.MP4.Muxer.ISOM)
210 |       |> child(:sink, %Membrane.File.Sink{
211 |         location: output_file
212 |       })
213 | 
214 |     {[spec: structure], %{}}
215 |   end
216 | 
217 |   # Terminate the process after the processing is finished
218 |   @impl true
219 |   def handle_element_end_of_stream(:sink, :input, _ctx, state) do
220 |     Membrane.Pipeline.terminate(self(), asynchronous?: true)
221 |     {[], state}
222 |   end
223 | 
224 |   @impl true
225 |   def handle_element_end_of_stream(_element, _pad, _ctx, state), do: {[], state}
226 | end
227 | ```
228 | 
229 | You're welcome to run the inference on your own file, but please keep in mind that this pipeline is specific to MP4 files containing H264 video and no audio stream, it will not work on other type of files.
230 | 
231 | ## Run inference
232 | 
233 | We have written the Filter responsible for applying our model and the full processing pipeline! It's time to make use of it. Let's download our input file first:
234 | 
235 | ```elixir
236 | {:ok, input_file} = ExVision.Cache.lazy_get(ExVision.Cache, "assets/example.mp4")
237 | ```
238 | 
239 | Define the location of our output file:
240 | 
241 | ```elixir
242 | output_file = Path.join("/tmp", "#{DateTime.utc_now()}.mp4")
243 | ```
244 | 
245 | And finally, execute our pipeline
246 | 
247 | ```elixir
248 | {:ok, _supervisor_pid, pipeline_pid} =
249 |   Membrane.Pipeline.start(Pipeline, {input_file, output_file})
250 | ```
251 | 
252 | ## Download the results
253 | 
254 | The pipeline is running in a separate process, therefore the previous call wasn't blocking. Our output file is not ready until the pipeline finishes and therefore terminates.
255 | 
256 | In order to get notified about the pipeline terminating, we will make use of `Process.monitor/1`
257 | 
258 | <!-- livebook:{"reevaluate_automatically":true} -->
259 | 
260 | ```elixir
261 | monitor = Process.monitor(pipeline_pid)
262 | 
263 | {time, _result} =
264 |   :timer.tc(fn ->
265 |     receive do
266 |       {:DOWN, ^monitor, :process, _pid, _reson} -> :ok
267 |     end
268 |   end)
269 | 
270 | Kino.Text.new("Operation took #{time / 1_000_000} seconds")
271 | ```
272 | 
273 | After the cell above has finished evaluating, our output file should already be all ready.
274 | 
275 | Let's write some code to fetch it from the notebook.
276 | 
277 | <!-- livebook:{"reevaluate_automatically":true} -->
278 | 
279 | ```elixir
280 | content_btn =
281 |   Kino.Download.new(fn -> File.read!(output_file) end,
282 |     label: "Download the video",
283 |     filename: "video.mp4"
284 |   )
285 | 
286 | delete_btn = Kino.Control.button("Delete the file permanently")
287 | no_file_msg = Kino.Text.new("The file doesn't exist")
288 | 
289 | Kino.listen(delete_btn, fn _data ->
290 |   File.rm!(output_file)
291 |   Kino.render(no_file_msg)
292 | end)
293 | 
294 | if File.exists?(output_file),
295 |   do: Kino.Layout.grid([content_btn, delete_btn], gap: 10),
296 |   else: no_file_msg
297 | ```
298 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2020 Software Mansion
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/examples/1-basic-tutorial.livemd:
--------------------------------------------------------------------------------
  1 | <!-- livebook:{"app_settings":{"access_type":"public","show_source":true,"slug":"ex-vision-master-demo","zero_downtime":true},"file_entries":[{"name":"cat.jpg","type":"attachment"}]} -->
  2 | 
  3 | # ExVision walkthrough
  4 | 
  5 | ```elixir
  6 | Mix.install(
  7 |   [
  8 |     :ex_vision,
  9 |     :kino,
 10 |     :kino_bumblebee,
 11 |     :stb_image,
 12 |     :exla,
 13 |     :image
 14 |   ],
 15 |   config: [
 16 |     nx: [default_backend: EXLA.Backend]
 17 |   ]
 18 | )
 19 | ```
 20 | 
 21 | ## ExVision introduction
 22 | 
 23 | **This Livebook will only work when the repository is cloned locally**
 24 | 
 25 | <!-- livebook:{"break_markdown":true} -->
 26 | 
 27 | ExVision is a collection of models with easy to use API and descriptive output formats.
 28 | It uses [Ortex](https://www.github.com/elixir-nx/ortex) under the hood to run it's predefined models.
 29 | 
 30 | The main objective of ExVision is ease of use. This sacrifices some control over the model but allows you to get started using predefined models in seconds. That approach should allow an average Elixir Developer to quickly introduce some AI into their app, just like that.
 31 | 
 32 | <!-- livebook:{"reevaluate_automatically":true} -->
 33 | 
 34 | ```elixir
 35 | alias ExVision.Classification.MobileNetV3Small, as: Classifier
 36 | alias ExVision.ObjectDetection.FasterRCNN_ResNet50_FPN, as: ObjectDetector
 37 | alias ExVision.SemanticSegmentation.DeepLabV3_MobileNetV3, as: SemanticSegmentation
 38 | alias ExVision.InstanceSegmentation.MaskRCNN_ResNet50_FPN_V2, as: InstanceSegmentation
 39 | alias ExVision.KeypointDetection.KeypointRCNN_ResNet50_FPN, as: KeypointDetector
 40 | 
 41 | {:ok, classifier} = Classifier.load()
 42 | {:ok, object_detector} = ObjectDetector.load()
 43 | {:ok, semantic_segmentation} = SemanticSegmentation.load()
 44 | {:ok, instance_segmentation} = InstanceSegmentation.load()
 45 | {:ok, keypoint_detector} = KeypointDetector.load()
 46 | 
 47 | Kino.nothing()
 48 | ```
 49 | 
 50 | At this point the model is loaded and ready for inference.
 51 | 
 52 | ExVision handles multiple types of input:
 53 | 
 54 | * file path
 55 | * pre-loaded Nx tensors, in both interleaved and planar formats
 56 | * Evision matricies.
 57 | 
 58 | Under the hood, all of these formats will be converted to Nx's Tensors and normalized for inference by the given model.
 59 | 
 60 | <!-- livebook:{"break_markdown":true} -->
 61 | 
 62 | ### Output formats
 63 | 
 64 | A big point of ExVision over using the models directly has to be documentation and intuitive outputs. Hence, models return the following types:
 65 | 
 66 | * Classifier - a mapping the category into the probability: [`%{category_t() => number()}`](http://localhost:55556/ExVision.Classification.MobileNetV3Small.html#t:output_t/0)
 67 | * Object Detector - a list of bounding boxes: [`list(BBox.t())`](http://localhost:55556/ExVision.ObjectDetection.Ssdlite320_MobileNetv3.BBox.html)
 68 | * Semantic Segmentation - a mapping of category to boolean tensor determining if the pixel is part of the mask for the given class: [`%{category_t() => Nx.Tensor.t()}`](http://localhost:55556/ExVision.SemanticSegmentation.DeepLabV3_MobileNetV3.html#t:output_t/0)
 69 | * Instance Segmentation - a list of bounding boxes with mask: [`list(BBoxWithMask.t())`](http://localhost:55556/ExVision.InstanceSegmentation.MaskRCNN_ResNet50_FPN_V2.html#t:output_t/0)
 70 | * Keypoint Detector - a list of bounding boxes with keypoints: [`list(BBoxWithKeypoints.t())`](http://localhost:55556/ExVision.KeypointDetection.KeypointRCNN_ResNet50_FPN.html#t:output_t/0)
 71 | 
 72 | <!-- livebook:{"break_markdown":true} -->
 73 | 
 74 | ### Example inference
 75 | 
 76 | Let's put it into practice and run some predictions on a sample image of the cat.
 77 | This code is intentionally using some calls to `dbg/1` macro in order to aid with the understanding of these formats.
 78 | 
 79 | <!-- livebook:{"break_markdown":true} -->
 80 | 
 81 | Let's start with loading our test suspect. For this purpose, we have defined a helper function that will automatically load some default images if you don't specify any.
 82 | 
 83 | ```elixir
 84 | defmodule ImageHandler do
 85 |   def get(input, default_image) do
 86 |     img_path =
 87 |       case Kino.Input.read(input) do
 88 |         nil ->
 89 |           {:ok, file} = ExVision.Cache.lazy_get(ExVision.Cache, default_image)
 90 |           file
 91 | 
 92 |         %{file_ref: image} ->
 93 |           Kino.Input.file_path(image)
 94 |       end
 95 | 
 96 |     Image.open!(img_path)
 97 |   end
 98 | end
 99 | ```
100 | 
101 | In the next cell, you can provide your own image that will be used as an example in this notebook. If you don't have anything handy, we're also providing a default image of a cat.
102 | 
103 | <!-- livebook:{"reevaluate_automatically":true} -->
104 | 
105 | ```elixir
106 | input = Kino.Input.image("Image to evaluate", format: :jpeg)
107 | ```
108 | 
109 | <!-- livebook:{"reevaluate_automatically":true} -->
110 | 
111 | ```elixir
112 | image = ImageHandler.get(input, "cat.jpg")
113 | ```
114 | 
115 | ### Image classification
116 | 
117 | Image classification is the process of assining the image a category that best describes the contents of that image. For example, when given an image of a cat, image classifier predict that the image should be assinged to `:cat` class.
118 | 
119 | The output format of an classifier is a dictionary that maps the category that the model knows into the probability. In most cases, that means that you will get a lot of categories with near zero probability and that's on purpose. Where possible, we don't want to make ExVision feel too much like magic. You're still doing AI, we're just handling the input and output format conversions.
120 | 
121 | Usually however, the class with the highest probability is the category you should assign. However, if there are multiple classes with comparatively high probabilities, this may indicate that the model has no idea and it's actually not a prediction at all.
122 | 
123 | #### Code example
124 | 
125 | In this example, we will try to find out the most likely class that the provided image could belong to. In order to do this, we will:
126 | 
127 | 1. Use the image classifier to gather predictions
128 | 2. Sort the predictions
129 | 3. Take 10 of the most likely ones
130 | 4. Plot the results
131 | 
132 | <!-- livebook:{"reevaluate_automatically":true} -->
133 | 
134 | ```elixir
135 | predictions =
136 |   image
137 |   # run inference
138 |   |> then(&Classifier.run(classifier, &1))
139 |   # sort the dictionary by the probability of the prediction
140 |   |> Enum.sort_by(fn {_label, score} -> score end, :desc)
141 |   # Only include a few of the most likely predictions in the output
142 |   |> Enum.take(10)
143 |   |> dbg()
144 | 
145 | [{top_prediction, _score} | _rest] = predictions
146 | 
147 | # Kino rendering stuff, not important
148 | scored_list = Kino.Bumblebee.ScoredList.new(predictions)
149 | 
150 | Kino.Layout.grid(
151 |   [
152 |     image,
153 |     Kino.Layout.grid([Kino.Text.new("Class probabilities"), scored_list])
154 |   ],
155 |   columns: 2,
156 |   gap: 25
157 | )
158 | ```
159 | 
160 | ### Object detection
161 | 
162 | In object detection, we're trying to locate the objects in the image. Format of the output in this case should provide a lot of clarification: it's a list of bounding boxes, which effectively indicate the area in the image that the object of the specified class are located in according to the image. Each bounding box is also assigned a score, which can be interpreted as the certainty of the detection.
163 | 
164 | By default, ExVision will discard extremely low probability bounding boxes (with scores lower than 0.1), as they are just noise.
165 | 
166 | #### Code example
167 | 
168 | In this example, we will draw a rectangle around the biggest object in the image. In order to do this, we will perform the following operations:
169 | 
170 | 1. Use the object detector to get the bounding boxes
171 | 2. Find the bounding box with the biggest total area
172 | 3. Draw a rectangle around the the region indicated by that bounding box
173 | 
174 | <!-- livebook:{"reevaluate_automatically":true} -->
175 | 
176 | ```elixir
177 | alias ExVision.Types.BBox
178 | 
179 | # apply the model
180 | prediction =
181 |   image
182 |   |> then(&ObjectDetector.run(object_detector, &1))
183 |   # Find the biggest object by area
184 |   |> Enum.max_by(&(BBox.width(&1) * BBox.height(&1)))
185 |   |> dbg()
186 | 
187 | # Render an image
188 | Image.Draw.rect!(
189 |   image,
190 |   prediction.x1,
191 |   prediction.y1,
192 |   BBox.width(prediction),
193 |   BBox.height(prediction),
194 |   fill: false,
195 |   color: :red,
196 |   stroke_width: 5
197 | )
198 | ```
199 | 
200 | ## Semantic segmentation
201 | 
202 | The goal of semantic segmentation is to generate per-pixel masks stating if the object of the given class is in the corresponding pixel.
203 | 
204 | In ExVision, the output of semantic segmentation models is a mapping of category to a binary per-pixel binary mask. In contrast to previous models, we're not getting scores. Each pixel is always assigned the most probable class.
205 | 
206 | ### Code example
207 | 
208 | In this example, we will feed the image to the semantic segmentation model and inspect some of the masks provided by the model.
209 | 
210 | <!-- livebook:{"reevaluate_automatically":true} -->
211 | 
212 | ```elixir
213 | nx_image = Image.to_nx!(image)
214 | uniform_black = 0 |> Nx.broadcast(Nx.shape(nx_image)) |> Nx.as_type(Nx.type(nx_image))
215 | 
216 | predictions =
217 |   image
218 |   |> then(&SemanticSegmentation.run(semantic_segmentation, &1))
219 |   # Filter out masks covering less than 5% of the total image area
220 |   |> Enum.filter(fn {_label, mask} ->
221 |     mask |> Nx.mean() |> Nx.to_number() > 0.05
222 |   end)
223 |   |> dbg()
224 | 
225 | predictions
226 | |> Enum.map(fn {label, mask} ->
227 |   # expand the mask to cover all channels
228 |   mask = Nx.broadcast(mask, Nx.shape(nx_image), axes: [0, 1])
229 | 
230 |   # Cut out the mask from the original image
231 |   image = Nx.select(mask, nx_image, uniform_black)
232 |   image = Nx.as_type(image, :u8)
233 | 
234 |   Kino.Layout.grid([
235 |     label |> Atom.to_string() |> Kino.Text.new(),
236 |     Kino.Image.new(image)
237 |   ])
238 | end)
239 | |> Kino.Layout.grid(columns: 2)
240 | ```
241 | 
242 | ## Instance segmentation
243 | 
244 | The objective of instance segmentation is to not only identify objects within an image on a per-pixel basis but also differentiate each specific object of the same class.
245 | 
246 | In ExVision, the output of instance segmentation models includes a bounding box with a label and a score (similar to object detection), and a binary mask for every instance detected in the image.
247 | 
248 | Extremely low probability detections (with scores lower than 0.1) will be discarded by ExVision, as they are just noise.
249 | 
250 | ### Code example
251 | 
252 | In the following example, we will pass an image through the instance segmentation model and examine the individual instance masks recognized by the model.
253 | 
254 | ```elixir
255 | alias ExVision.Types.BBoxWithMask
256 | 
257 | nx_image = Image.to_nx!(image)
258 | uniform_black = 0 |> Nx.broadcast(Nx.shape(nx_image)) |> Nx.as_type(Nx.type(nx_image))
259 | 
260 | predictions =
261 |   image
262 |   |> then(&InstanceSegmentation.run(instance_segmentation, &1))
263 |   # Get most likely predictions from the output
264 |   |> Enum.filter(fn %BBoxWithMask{score: score} -> score > 0.8 end)
265 |   |> dbg()
266 | 
267 | predictions
268 | |> Enum.map(fn %BBoxWithMask{label: label, mask: mask} ->
269 |   # expand the mask to cover all channels
270 |   mask = Nx.broadcast(mask, Nx.shape(nx_image), axes: [0, 1])
271 | 
272 |   # Cut out the mask from the original image
273 |   image = Nx.select(mask, nx_image, uniform_black)
274 |   image = Nx.as_type(image, :u8)
275 | 
276 |   Kino.Layout.grid([
277 |     label |> Atom.to_string() |> Kino.Text.new(),
278 |     Kino.Image.new(image)
279 |   ])
280 | end)
281 | |> Kino.Layout.grid(columns: 2)
282 | ```
283 | 
284 | ## Keypoint detection
285 | 
286 | In keypoint detection, we're trying to specific keypoints in the image. ExVision returns the output as a list of boudning boxes (similar to object detection) with named keypoints. Each keypoint consists of x, y coordinates and a score which is the model's certainty of that keypoint.
287 | 
288 | ExVision will discard extremely low probability detections (with scores lower than 0.1), as they are just noise.
289 | 
290 | <!-- livebook:{"break_markdown":true} -->
291 | 
292 | The KeypointRCNN_ResNet50_FPN model is commonly used for detecting human body parts in images. To illustrate this, let's begin by importing an image that features people.
293 | 
294 | ```elixir
295 | image = ImageHandler.get(input, "people.jpg")
296 | ```
297 | 
298 | #### Code example
299 | 
300 | In this example, we will draw keypoints for every detection with a high enough score returned by the model, additionally we will draw a bounding box around them.
301 | 
302 | ```elixir
303 | alias ExVision.Types.BBoxWithKeypoints
304 | 
305 | # define skeleton pose
306 | connections = [
307 |   # face
308 |   {:nose, :left_eye},
309 |   {:nose, :right_eye},
310 |   {:left_eye, :right_eye},
311 |   {:left_eye, :left_ear},
312 |   {:right_eye, :right_ear},
313 | 
314 |   # left arm
315 |   {:left_wrist, :left_elbow},
316 |   {:left_elbow, :left_shoulder},
317 | 
318 |   # right arm
319 |   {:right_wrist, :right_elbow},
320 |   {:right_elbow, :right_shoulder},
321 | 
322 |   # torso
323 |   {:left_shoulder, :right_shoulder},
324 |   {:left_shoulder, :left_hip},
325 |   {:right_shoulder, :right_hip},
326 |   {:left_hip, :right_hip},
327 |   {:left_shoulder, :left_ear},
328 |   {:right_shoulder, :right_ear},
329 | 
330 |   # left leg
331 |   {:left_ankle, :left_knee},
332 |   {:left_knee, :left_hip},
333 | 
334 |   # right leg
335 |   {:right_ankle, :right_knee},
336 |   {:right_knee, :right_hip}
337 | ]
338 | 
339 | # apply the model
340 | predictions =
341 |   image
342 |   |> then(&KeypointDetector.run(keypoint_detector, &1))
343 |   # Get most likely predictions from the output
344 |   |> Enum.filter(fn %BBoxWithKeypoints{score: score} -> score > 0.8 end)
345 |   |> dbg()
346 | 
347 | predictions
348 | |> Enum.reduce(image, fn prediction, image_acc ->
349 |   # draw keypoints
350 |   image_acc =
351 |     prediction.keypoints
352 |     |> Enum.reduce(image_acc, fn {_key, %{x: x, y: y}}, acc ->
353 |       Image.Draw.circle!(acc, x, y, 2, color: :red)
354 |     end)
355 | 
356 |   # draw skeleton pose
357 |   image_acc =
358 |     connections
359 |     |> Enum.reduce(image_acc, fn {from, to}, acc ->
360 |       %{x: x1, y: y1} = prediction.keypoints[from]
361 |       %{x: x2, y: y2} = prediction.keypoints[to]
362 | 
363 |       Image.Draw.line!(acc, x1, y1, x2, y2, color: :red)
364 |     end)
365 | 
366 |   # draw bounding box
367 |   Image.Draw.rect!(
368 |     image_acc,
369 |     prediction.x1,
370 |     prediction.y1,
371 |     BBoxWithKeypoints.width(prediction),
372 |     BBoxWithKeypoints.height(prediction),
373 |     fill: false,
374 |     color: :red,
375 |     stroke_width: 2
376 |   )
377 | end)
378 | ```
379 | 
380 | ## Next steps
381 | 
382 | After completing this tutorial you can also check out our next tutorial focusing on using models in production in process workflow [here](2-usage-as-nx-serving.livemd)
383 | 


--------------------------------------------------------------------------------
/mix.lock:
--------------------------------------------------------------------------------
 1 | %{
 2 |   "axon": {:hex, :axon, "0.6.1", "1d042fdba1c1b4413a3d65800524feebd1bc8ed218f8cdefe7a97510c3f427f3", [:mix], [{:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}, {:kino_vega_lite, "~> 0.1.7", [hex: :kino_vega_lite, repo: "hexpm", optional: true]}, {:nx, "~> 0.6.0 or ~> 0.7.0", [hex: :nx, repo: "hexpm", optional: false]}, {:polaris, "~> 0.1", [hex: :polaris, repo: "hexpm", optional: false]}, {:table_rex, "~> 3.1.1", [hex: :table_rex, repo: "hexpm", optional: true]}], "hexpm", "d6b0ae2f0dd284f6bf702edcab71e790d6c01ca502dd06c4070836554f5a48e1"},
 3 |   "bunch": {:hex, :bunch, "1.6.1", "5393d827a64d5f846092703441ea50e65bc09f37fd8e320878f13e63d410aec7", [:mix], [], "hexpm", "286cc3add551628b30605efbe2fca4e38cc1bea89bcd0a1a7226920b3364fe4a"},
 4 |   "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"},
 5 |   "castore": {:hex, :castore, "1.0.7", "b651241514e5f6956028147fe6637f7ac13802537e895a724f90bf3e36ddd1dd", [:mix], [], "hexpm", "da7785a4b0d2a021cd1292a60875a784b6caef71e76bf4917bdee1f390455cf5"},
 6 |   "cc_precompiler": {:hex, :cc_precompiler, "0.1.10", "47c9c08d8869cf09b41da36538f62bc1abd3e19e41701c2cea2675b53c704258", [:mix], [{:elixir_make, "~> 0.7", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "f6e046254e53cd6b41c6bacd70ae728011aa82b2742a80d6e2214855c6e06b22"},
 7 |   "coerce": {:hex, :coerce, "1.0.1", "211c27386315dc2894ac11bc1f413a0e38505d808153367bd5c6e75a4003d096", [:mix], [], "hexpm", "b44a691700f7a1a15b4b7e2ff1fa30bebd669929ac8aa43cffe9e2f8bf051cf1"},
 8 |   "complex": {:hex, :complex, "0.5.0", "af2d2331ff6170b61bb738695e481b27a66780e18763e066ee2cd863d0b1dd92", [:mix], [], "hexpm", "2683bd3c184466cfb94fad74cbfddfaa94b860e27ad4ca1bffe3bff169d91ef1"},
 9 |   "credo": {:hex, :credo, "1.7.7", "771445037228f763f9b2afd612b6aa2fd8e28432a95dbbc60d8e03ce71ba4446", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "8bc87496c9aaacdc3f90f01b7b0582467b69b4bd2441fe8aae3109d843cc2f2e"},
10 |   "dialyxir": {:hex, :dialyxir, "1.4.3", "edd0124f358f0b9e95bfe53a9fcf806d615d8f838e2202a9f430d59566b6b53b", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "bf2cfb75cd5c5006bec30141b131663299c661a864ec7fbbc72dfa557487a986"},
11 |   "earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"},
12 |   "elixir_make": {:hex, :elixir_make, "0.8.4", "4960a03ce79081dee8fe119d80ad372c4e7badb84c493cc75983f9d3bc8bde0f", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:certifi, "~> 2.0", [hex: :certifi, repo: "hexpm", optional: true]}], "hexpm", "6e7f1d619b5f61dfabd0a20aa268e575572b542ac31723293a4c1a567d5ef040"},
13 |   "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"},
14 |   "evision": {:hex, :evision, "0.1.38", "f8b23ad685c3ebd70969a3457027b5c74b5bc8dc51588661c516098c3240b92d", [:make, :mix, :rebar3], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.7", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:kino, "~> 0.11", [hex: :kino, repo: "hexpm", optional: true]}, {:nx, "~> 0.4", [hex: :nx, repo: "hexpm", optional: false]}, {:progress_bar, "~> 2.0 or ~> 3.0", [hex: :progress_bar, repo: "hexpm", optional: true]}], "hexpm", "f9302547d76c5e4ad7022ffdc76be13e33c990fdd67ad2af203f24ab5d3aee20"},
15 |   "ex_doc": {:hex, :ex_doc, "0.32.1", "21e40f939515373bcdc9cffe65f3b3543f05015ac6c3d01d991874129d173420", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.1", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "5142c9db521f106d61ff33250f779807ed2a88620e472ac95dc7d59c380113da"},
16 |   "exla": {:hex, :exla, "0.7.2", "8ac573093df8e5e6b36845beeb3f5a0ea92b05082bf2fa4678f80170cfc887f6", [:make, :mix], [{:elixir_make, "~> 0.6", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:nx, "~> 0.7.1", [hex: :nx, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:xla, "~> 0.6.0", [hex: :xla, repo: "hexpm", optional: false]}], "hexpm", "d061ea87858415e5585cbd4b7bdae5489000339519a2c6a7f51eb0defd73b588"},
17 |   "file_system": {:hex, :file_system, "1.0.1", "79e8ceaddb0416f8b8cd02a0127bdbababe7bf4a23d2a395b983c1f8b3f73edd", [:mix], [], "hexpm", "4414d1f38863ddf9120720cd976fce5bdde8e91d8283353f0e31850fa89feb9e"},
18 |   "finch": {:hex, :finch, "0.18.0", "944ac7d34d0bd2ac8998f79f7a811b21d87d911e77a786bc5810adb75632ada4", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.3", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.2.6 or ~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "69f5045b042e531e53edc2574f15e25e735b522c37e2ddb766e15b979e03aa65"},
19 |   "hpax": {:hex, :hpax, "0.2.0", "5a58219adcb75977b2edce5eb22051de9362f08236220c9e859a47111c194ff5", [:mix], [], "hexpm", "bea06558cdae85bed075e6c036993d43cd54d447f76d8190a8db0dc5893fa2f1"},
20 |   "image": {:hex, :image, "0.44.0", "e8eea9398abbed12b7784e786f26a5c839a00bcddd8f2f8ba12adf7e227beb9f", [:mix], [{:bumblebee, "~> 0.3", [hex: :bumblebee, repo: "hexpm", optional: true]}, {:evision, "~> 0.1.33", [hex: :evision, repo: "hexpm", optional: true]}, {:exla, "~> 0.5", [hex: :exla, repo: "hexpm", optional: true]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: true]}, {:kino, "~> 0.11", [hex: :kino, repo: "hexpm", optional: true]}, {:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: true]}, {:nx_image, "~> 0.1", [hex: :nx_image, repo: "hexpm", optional: true]}, {:phoenix_html, "~> 2.1 or ~> 3.2 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:plug, "~> 1.13", [hex: :plug, repo: "hexpm", optional: true]}, {:req, "~> 0.4", [hex: :req, repo: "hexpm", optional: true]}, {:rustler, "> 0.0.0", [hex: :rustler, repo: "hexpm", optional: true]}, {:sweet_xml, "~> 0.7", [hex: :sweet_xml, repo: "hexpm", optional: false]}, {:vix, "~> 0.23", [hex: :vix, repo: "hexpm", optional: false]}], "hexpm", "cd00a3de4d7a40a2cb1ca72b9852b0d81701793414af8babf4d33dbeb6de0f6f"},
21 |   "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"},
22 |   "makeup": {:hex, :makeup, "1.1.1", "fa0bc768698053b2b3869fa8a62616501ff9d11a562f3ce39580d60860c3a55e", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "5dc62fbdd0de44de194898b6710692490be74baa02d9d108bc29f007783b0b48"},
23 |   "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"},
24 |   "makeup_erlang": {:hex, :makeup_erlang, "0.1.5", "e0ff5a7c708dda34311f7522a8758e23bfcd7d8d8068dc312b5eb41c6fd76eba", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "94d2e986428585a21516d7d7149781480013c56e30c6a233534bedf38867a59a"},
25 |   "membrane_core": {:hex, :membrane_core, "1.0.0", "1b543aefd952283be1f2a215a1db213aa4d91222722ba03cd35280622f1905ee", [:mix], [{:bunch, "~> 1.6", [hex: :bunch, repo: "hexpm", optional: false]}, {:qex, "~> 0.3", [hex: :qex, repo: "hexpm", optional: false]}, {:ratio, "~> 3.0", [hex: :ratio, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "352c90fd0a29942143c4bf7a727cc05c632e323f50a1a4e99321b1e8982f1533"},
26 |   "mime": {:hex, :mime, "2.0.5", "dc34c8efd439abe6ae0343edbb8556f4d63f178594894720607772a041b04b02", [:mix], [], "hexpm", "da0d64a365c45bc9935cc5c8a7fc5e49a0e0f9932a761c55d6c52b142780a05c"},
27 |   "mimic": {:hex, :mimic, "1.7.4", "cd2772ffbc9edefe964bc668bfd4059487fa639a5b7f1cbdf4fd22946505aa4f", [:mix], [], "hexpm", "437c61041ecf8a7fae35763ce89859e4973bb0666e6ce76d75efc789204447c3"},
28 |   "mint": {:hex, :mint, "1.6.0", "88a4f91cd690508a04ff1c3e28952f322528934be541844d54e0ceb765f01d5e", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "3c5ae85d90a5aca0a49c0d8b67360bbe407f3b54f1030a111047ff988e8fefaa"},
29 |   "nimble_options": {:hex, :nimble_options, "1.1.0", "3b31a57ede9cb1502071fade751ab0c7b8dbe75a9a4c2b5bbb0943a690b63172", [:mix], [], "hexpm", "8bbbb3941af3ca9acc7835f5655ea062111c9c27bcac53e004460dfd19008a99"},
30 |   "nimble_ownership": {:hex, :nimble_ownership, "0.3.1", "99d5244672fafdfac89bfad3d3ab8f0d367603ce1dc4855f86a1c75008bce56f", [:mix], [], "hexpm", "4bf510adedff0449a1d6e200e43e57a814794c8b5b6439071274d248d272a549"},
31 |   "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"},
32 |   "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"},
33 |   "numbers": {:hex, :numbers, "5.2.4", "f123d5bb7f6acc366f8f445e10a32bd403c8469bdbce8ce049e1f0972b607080", [:mix], [{:coerce, "~> 1.0", [hex: :coerce, repo: "hexpm", optional: false]}, {:decimal, "~> 1.9 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "eeccf5c61d5f4922198395bf87a465b6f980b8b862dd22d28198c5e6fab38582"},
34 |   "nx": {:hex, :nx, "0.7.2", "7f6f6584585e49ffbf81769e7ccc2d01c5639074e399c1f94adc2b509869673e", [:mix], [{:complex, "~> 0.5", [hex: :complex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "e2c0680066eec5af8b8ef00c99e9bf40a0d08d8b2bbba77f59f801ec54a3f90e"},
35 |   "nx_image": {:hex, :nx_image, "0.1.2", "0c6e3453c1dc30fc80c723a54861204304cebc8a89ed3b806b972c73ee5d119d", [:mix], [{:nx, "~> 0.4", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "9161863c42405ddccb6dbbbeae078ad23e30201509cc804b3b3a7c9e98764b81"},
36 |   "ortex": {:hex, :ortex, "0.1.9", "a9b14552ef6058961a3e300f973a51887328a13c2ffa6f2cad1b0785f9c7e73c", [:mix], [{:nx, "~> 0.6", [hex: :nx, repo: "hexpm", optional: false]}, {:rustler, "~> 0.29.0", [hex: :rustler, repo: "hexpm", optional: false]}], "hexpm", "5201b9aa8e22a86f3a04e819266bfd1c5a8194f0c51f917c1d0cffe8bdbb76d8"},
37 |   "phoenix_html": {:hex, :phoenix_html, "4.1.1", "4c064fd3873d12ebb1388425a8f2a19348cef56e7289e1998e2d2fa758aa982e", [:mix], [], "hexpm", "f2f2df5a72bc9a2f510b21497fd7d2b86d932ec0598f0210fed4114adc546c6f"},
38 |   "poison": {:hex, :poison, "5.0.0", "d2b54589ab4157bbb82ec2050757779bfed724463a544b6e20d79855a9e43b24", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "11dc6117c501b80c62a7594f941d043982a1bd05a1184280c0d9166eb4d8d3fc"},
39 |   "polaris": {:hex, :polaris, "0.1.0", "dca61b18e3e801ecdae6ac9f0eca5f19792b44a5cb4b8d63db50fc40fc038d22", [:mix], [{:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "13ef2b166650e533cb24b10e2f3b8ab4f2f449ba4d63156e8c569527f206e2c2"},
40 |   "qex": {:hex, :qex, "0.5.1", "0d82c0f008551d24fffb99d97f8299afcb8ea9cf99582b770bd004ed5af63fd6", [:mix], [], "hexpm", "935a39fdaf2445834b95951456559e9dc2063d0a055742c558a99987b38d6bab"},
41 |   "ratio": {:hex, :ratio, "3.0.2", "60a5976872a4dc3d873ecc57eed1738589e99d1094834b9c935b118231297cfb", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}, {:numbers, "~> 5.2.0", [hex: :numbers, repo: "hexpm", optional: false]}], "hexpm", "3a13ed5a30ad0bfd7e4a86bf86d93d2b5a06f5904417d38d3f3ea6406cdfc7bb"},
42 |   "req": {:hex, :req, "0.4.14", "103de133a076a31044e5458e0f850d5681eef23dfabf3ea34af63212e3b902e2", [:mix], [{:aws_signature, "~> 0.3.2", [hex: :aws_signature, repo: "hexpm", optional: true]}, {:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:nimble_ownership, "~> 0.2.0 or ~> 0.3.0", [hex: :nimble_ownership, repo: "hexpm", optional: false]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "2ddd3d33f9ab714ced8d3c15fd03db40c14dbf129003c4a3eb80fac2cc0b1b08"},
43 |   "rustler": {:hex, :rustler, "0.29.1", "880f20ae3027bd7945def6cea767f5257bc926f33ff50c0d5d5a5315883c084d", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:toml, "~> 0.6", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "109497d701861bfcd26eb8f5801fe327a8eef304f56a5b63ef61151ff44ac9b6"},
44 |   "stb_image": {:hex, :stb_image, "0.6.8", "c68768e85045b2f40afe376cafa7d3fff491404366659bc2a01abe78d2ef9c4a", [:make, :mix], [{:cc_precompiler, "~> 0.1.0", [hex: :cc_precompiler, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.8.2", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}, {:nx, "~> 0.4", [hex: :nx, repo: "hexpm", optional: true]}], "hexpm", "6688b8a8dd3db718bb3fccf267cb7132d2193fd62441603b08539515fedfa8d6"},
45 |   "sweet_xml": {:hex, :sweet_xml, "0.7.4", "a8b7e1ce7ecd775c7e8a65d501bc2cd933bff3a9c41ab763f5105688ef485d08", [:mix], [], "hexpm", "e7c4b0bdbf460c928234951def54fe87edf1a170f6896675443279e2dbeba167"},
46 |   "telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"},
47 |   "toml": {:hex, :toml, "0.7.0", "fbcd773caa937d0c7a02c301a1feea25612720ac3fa1ccb8bfd9d30d822911de", [:mix], [], "hexpm", "0690246a2478c1defd100b0c9b89b4ea280a22be9a7b313a8a058a2408a2fa70"},
48 |   "vix": {:hex, :vix, "0.27.0", "c9d6be17abe6fd1b3daed52964331c67ff1f980ea188499d8ac5e723cf215576", [:make, :mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:cc_precompiler, "~> 0.1.4 or ~> 0.2", [hex: :cc_precompiler, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.7.3 or ~> 0.8", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}], "hexpm", "ae4ba5bb9882753396baadfff93b6cab5d4275b13751fd49723591eb116f373a"},
49 |   "xla": {:hex, :xla, "0.6.0", "67bb7695efa4a23b06211dc212de6a72af1ad5a9e17325e05e0a87e4c241feb8", [:make, :mix], [{:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "dd074daf942312c6da87c7ed61b62fb1a075bced157f1cc4d47af2d7c9f44fb7"},
50 | }
51 | 


--------------------------------------------------------------------------------
/priv/categories/imagenet_v2_categories.json:
--------------------------------------------------------------------------------
   1 | [
   2 |   "tench",
   3 |   "goldfish",
   4 |   "great white shark",
   5 |   "tiger shark",
   6 |   "hammerhead",
   7 |   "electric ray",
   8 |   "stingray",
   9 |   "cock",
  10 |   "hen",
  11 |   "ostrich",
  12 |   "brambling",
  13 |   "goldfinch",
  14 |   "house finch",
  15 |   "junco",
  16 |   "indigo bunting",
  17 |   "robin",
  18 |   "bulbul",
  19 |   "jay",
  20 |   "magpie",
  21 |   "chickadee",
  22 |   "water ouzel",
  23 |   "kite",
  24 |   "bald eagle",
  25 |   "vulture",
  26 |   "great grey owl",
  27 |   "European fire salamander",
  28 |   "common newt",
  29 |   "eft",
  30 |   "spotted salamander",
  31 |   "axolotl",
  32 |   "bullfrog",
  33 |   "tree frog",
  34 |   "tailed frog",
  35 |   "loggerhead",
  36 |   "leatherback turtle",
  37 |   "mud turtle",
  38 |   "terrapin",
  39 |   "box turtle",
  40 |   "banded gecko",
  41 |   "common iguana",
  42 |   "American chameleon",
  43 |   "whiptail",
  44 |   "agama",
  45 |   "frilled lizard",
  46 |   "alligator lizard",
  47 |   "Gila monster",
  48 |   "green lizard",
  49 |   "African chameleon",
  50 |   "Komodo dragon",
  51 |   "African crocodile",
  52 |   "American alligator",
  53 |   "triceratops",
  54 |   "thunder snake",
  55 |   "ringneck snake",
  56 |   "hognose snake",
  57 |   "green snake",
  58 |   "king snake",
  59 |   "garter snake",
  60 |   "water snake",
  61 |   "vine snake",
  62 |   "night snake",
  63 |   "boa constrictor",
  64 |   "rock python",
  65 |   "Indian cobra",
  66 |   "green mamba",
  67 |   "sea snake",
  68 |   "horned viper",
  69 |   "diamondback",
  70 |   "sidewinder",
  71 |   "trilobite",
  72 |   "harvestman",
  73 |   "scorpion",
  74 |   "black and gold garden spider",
  75 |   "barn spider",
  76 |   "garden spider",
  77 |   "black widow",
  78 |   "tarantula",
  79 |   "wolf spider",
  80 |   "tick",
  81 |   "centipede",
  82 |   "black grouse",
  83 |   "ptarmigan",
  84 |   "ruffed grouse",
  85 |   "prairie chicken",
  86 |   "peacock",
  87 |   "quail",
  88 |   "partridge",
  89 |   "African grey",
  90 |   "macaw",
  91 |   "sulphur-crested cockatoo",
  92 |   "lorikeet",
  93 |   "coucal",
  94 |   "bee eater",
  95 |   "hornbill",
  96 |   "hummingbird",
  97 |   "jacamar",
  98 |   "toucan",
  99 |   "drake",
 100 |   "red-breasted merganser",
 101 |   "goose",
 102 |   "black swan",
 103 |   "tusker",
 104 |   "echidna",
 105 |   "platypus",
 106 |   "wallaby",
 107 |   "koala",
 108 |   "wombat",
 109 |   "jellyfish",
 110 |   "sea anemone",
 111 |   "brain coral",
 112 |   "flatworm",
 113 |   "nematode",
 114 |   "conch",
 115 |   "snail",
 116 |   "slug",
 117 |   "sea slug",
 118 |   "chiton",
 119 |   "chambered nautilus",
 120 |   "Dungeness crab",
 121 |   "rock crab",
 122 |   "fiddler crab",
 123 |   "king crab",
 124 |   "American lobster",
 125 |   "spiny lobster",
 126 |   "crayfish",
 127 |   "hermit crab",
 128 |   "isopod",
 129 |   "white stork",
 130 |   "black stork",
 131 |   "spoonbill",
 132 |   "flamingo",
 133 |   "little blue heron",
 134 |   "American egret",
 135 |   "bittern",
 136 |   "crane bird",
 137 |   "limpkin",
 138 |   "European gallinule",
 139 |   "American coot",
 140 |   "bustard",
 141 |   "ruddy turnstone",
 142 |   "red-backed sandpiper",
 143 |   "redshank",
 144 |   "dowitcher",
 145 |   "oystercatcher",
 146 |   "pelican",
 147 |   "king penguin",
 148 |   "albatross",
 149 |   "grey whale",
 150 |   "killer whale",
 151 |   "dugong",
 152 |   "sea lion",
 153 |   "Chihuahua",
 154 |   "Japanese spaniel",
 155 |   "Maltese dog",
 156 |   "Pekinese",
 157 |   "Shih-Tzu",
 158 |   "Blenheim spaniel",
 159 |   "papillon",
 160 |   "toy terrier",
 161 |   "Rhodesian ridgeback",
 162 |   "Afghan hound",
 163 |   "basset",
 164 |   "beagle",
 165 |   "bloodhound",
 166 |   "bluetick",
 167 |   "black-and-tan coonhound",
 168 |   "Walker hound",
 169 |   "English foxhound",
 170 |   "redbone",
 171 |   "borzoi",
 172 |   "Irish wolfhound",
 173 |   "Italian greyhound",
 174 |   "whippet",
 175 |   "Ibizan hound",
 176 |   "Norwegian elkhound",
 177 |   "otterhound",
 178 |   "Saluki",
 179 |   "Scottish deerhound",
 180 |   "Weimaraner",
 181 |   "Staffordshire bullterrier",
 182 |   "American Staffordshire terrier",
 183 |   "Bedlington terrier",
 184 |   "Border terrier",
 185 |   "Kerry blue terrier",
 186 |   "Irish terrier",
 187 |   "Norfolk terrier",
 188 |   "Norwich terrier",
 189 |   "Yorkshire terrier",
 190 |   "wire-haired fox terrier",
 191 |   "Lakeland terrier",
 192 |   "Sealyham terrier",
 193 |   "Airedale",
 194 |   "cairn",
 195 |   "Australian terrier",
 196 |   "Dandie Dinmont",
 197 |   "Boston bull",
 198 |   "miniature schnauzer",
 199 |   "giant schnauzer",
 200 |   "standard schnauzer",
 201 |   "Scotch terrier",
 202 |   "Tibetan terrier",
 203 |   "silky terrier",
 204 |   "soft-coated wheaten terrier",
 205 |   "West Highland white terrier",
 206 |   "Lhasa",
 207 |   "flat-coated retriever",
 208 |   "curly-coated retriever",
 209 |   "golden retriever",
 210 |   "Labrador retriever",
 211 |   "Chesapeake Bay retriever",
 212 |   "German short-haired pointer",
 213 |   "vizsla",
 214 |   "English setter",
 215 |   "Irish setter",
 216 |   "Gordon setter",
 217 |   "Brittany spaniel",
 218 |   "clumber",
 219 |   "English springer",
 220 |   "Welsh springer spaniel",
 221 |   "cocker spaniel",
 222 |   "Sussex spaniel",
 223 |   "Irish water spaniel",
 224 |   "kuvasz",
 225 |   "schipperke",
 226 |   "groenendael",
 227 |   "malinois",
 228 |   "briard",
 229 |   "kelpie",
 230 |   "komondor",
 231 |   "Old English sheepdog",
 232 |   "Shetland sheepdog",
 233 |   "collie",
 234 |   "Border collie",
 235 |   "Bouvier des Flandres",
 236 |   "Rottweiler",
 237 |   "German shepherd",
 238 |   "Doberman",
 239 |   "miniature pinscher",
 240 |   "Greater Swiss Mountain dog",
 241 |   "Bernese mountain dog",
 242 |   "Appenzeller",
 243 |   "EntleBucher",
 244 |   "boxer",
 245 |   "bull mastiff",
 246 |   "Tibetan mastiff",
 247 |   "French bulldog",
 248 |   "Great Dane",
 249 |   "Saint Bernard",
 250 |   "Eskimo dog",
 251 |   "malamute",
 252 |   "Siberian husky",
 253 |   "dalmatian",
 254 |   "affenpinscher",
 255 |   "basenji",
 256 |   "pug",
 257 |   "Leonberg",
 258 |   "Newfoundland",
 259 |   "Great Pyrenees",
 260 |   "Samoyed",
 261 |   "Pomeranian",
 262 |   "chow",
 263 |   "keeshond",
 264 |   "Brabancon griffon",
 265 |   "Pembroke",
 266 |   "Cardigan",
 267 |   "toy poodle",
 268 |   "miniature poodle",
 269 |   "standard poodle",
 270 |   "Mexican hairless",
 271 |   "timber wolf",
 272 |   "white wolf",
 273 |   "red wolf",
 274 |   "coyote",
 275 |   "dingo",
 276 |   "dhole",
 277 |   "African hunting dog",
 278 |   "hyena",
 279 |   "red fox",
 280 |   "kit fox",
 281 |   "Arctic fox",
 282 |   "grey fox",
 283 |   "tabby",
 284 |   "tiger cat",
 285 |   "Persian cat",
 286 |   "Siamese cat",
 287 |   "Egyptian cat",
 288 |   "cougar",
 289 |   "lynx",
 290 |   "leopard",
 291 |   "snow leopard",
 292 |   "jaguar",
 293 |   "lion",
 294 |   "tiger",
 295 |   "cheetah",
 296 |   "brown bear",
 297 |   "American black bear",
 298 |   "ice bear",
 299 |   "sloth bear",
 300 |   "mongoose",
 301 |   "meerkat",
 302 |   "tiger beetle",
 303 |   "ladybug",
 304 |   "ground beetle",
 305 |   "long-horned beetle",
 306 |   "leaf beetle",
 307 |   "dung beetle",
 308 |   "rhinoceros beetle",
 309 |   "weevil",
 310 |   "fly",
 311 |   "bee",
 312 |   "ant",
 313 |   "grasshopper",
 314 |   "cricket",
 315 |   "walking stick",
 316 |   "cockroach",
 317 |   "mantis",
 318 |   "cicada",
 319 |   "leafhopper",
 320 |   "lacewing",
 321 |   "dragonfly",
 322 |   "damselfly",
 323 |   "admiral",
 324 |   "ringlet",
 325 |   "monarch",
 326 |   "cabbage butterfly",
 327 |   "sulphur butterfly",
 328 |   "lycaenid",
 329 |   "starfish",
 330 |   "sea urchin",
 331 |   "sea cucumber",
 332 |   "wood rabbit",
 333 |   "hare",
 334 |   "Angora",
 335 |   "hamster",
 336 |   "porcupine",
 337 |   "fox squirrel",
 338 |   "marmot",
 339 |   "beaver",
 340 |   "guinea pig",
 341 |   "sorrel",
 342 |   "zebra",
 343 |   "hog",
 344 |   "wild boar",
 345 |   "warthog",
 346 |   "hippopotamus",
 347 |   "ox",
 348 |   "water buffalo",
 349 |   "bison",
 350 |   "ram",
 351 |   "bighorn",
 352 |   "ibex",
 353 |   "hartebeest",
 354 |   "impala",
 355 |   "gazelle",
 356 |   "Arabian camel",
 357 |   "llama",
 358 |   "weasel",
 359 |   "mink",
 360 |   "polecat",
 361 |   "black-footed ferret",
 362 |   "otter",
 363 |   "skunk",
 364 |   "badger",
 365 |   "armadillo",
 366 |   "three-toed sloth",
 367 |   "orangutan",
 368 |   "gorilla",
 369 |   "chimpanzee",
 370 |   "gibbon",
 371 |   "siamang",
 372 |   "guenon",
 373 |   "patas",
 374 |   "baboon",
 375 |   "macaque",
 376 |   "langur",
 377 |   "colobus",
 378 |   "proboscis monkey",
 379 |   "marmoset",
 380 |   "capuchin",
 381 |   "howler monkey",
 382 |   "titi",
 383 |   "spider monkey",
 384 |   "squirrel monkey",
 385 |   "Madagascar cat",
 386 |   "indri",
 387 |   "Indian elephant",
 388 |   "African elephant",
 389 |   "lesser panda",
 390 |   "giant panda",
 391 |   "barracouta",
 392 |   "eel",
 393 |   "coho",
 394 |   "rock beauty",
 395 |   "anemone fish",
 396 |   "sturgeon",
 397 |   "gar",
 398 |   "lionfish",
 399 |   "puffer",
 400 |   "abacus",
 401 |   "abaya",
 402 |   "academic gown",
 403 |   "accordion",
 404 |   "acoustic guitar",
 405 |   "aircraft carrier",
 406 |   "airliner",
 407 |   "airship",
 408 |   "altar",
 409 |   "ambulance",
 410 |   "amphibian",
 411 |   "analog clock",
 412 |   "apiary",
 413 |   "apron",
 414 |   "ashcan",
 415 |   "assault rifle",
 416 |   "backpack",
 417 |   "bakery",
 418 |   "balance beam",
 419 |   "balloon",
 420 |   "ballpoint",
 421 |   "Band Aid",
 422 |   "banjo",
 423 |   "bannister",
 424 |   "barbell",
 425 |   "barber chair",
 426 |   "barbershop",
 427 |   "barn",
 428 |   "barometer",
 429 |   "barrel",
 430 |   "barrow",
 431 |   "baseball",
 432 |   "basketball",
 433 |   "bassinet",
 434 |   "bassoon",
 435 |   "bathing cap",
 436 |   "bath towel",
 437 |   "bathtub",
 438 |   "beach wagon",
 439 |   "beacon",
 440 |   "beaker",
 441 |   "bearskin",
 442 |   "beer bottle",
 443 |   "beer glass",
 444 |   "bell cote",
 445 |   "bib",
 446 |   "bicycle-built-for-two",
 447 |   "bikini",
 448 |   "binder",
 449 |   "binoculars",
 450 |   "birdhouse",
 451 |   "boathouse",
 452 |   "bobsled",
 453 |   "bolo tie",
 454 |   "bonnet",
 455 |   "bookcase",
 456 |   "bookshop",
 457 |   "bottlecap",
 458 |   "bow",
 459 |   "bow tie",
 460 |   "brass",
 461 |   "brassiere",
 462 |   "breakwater",
 463 |   "breastplate",
 464 |   "broom",
 465 |   "bucket",
 466 |   "buckle",
 467 |   "bulletproof vest",
 468 |   "bullet train",
 469 |   "butcher shop",
 470 |   "cab",
 471 |   "caldron",
 472 |   "candle",
 473 |   "cannon",
 474 |   "canoe",
 475 |   "can opener",
 476 |   "cardigan",
 477 |   "car mirror",
 478 |   "carousel",
 479 |   "carpenter's kit",
 480 |   "carton",
 481 |   "car wheel",
 482 |   "cash machine",
 483 |   "cassette",
 484 |   "cassette player",
 485 |   "castle",
 486 |   "catamaran",
 487 |   "CD player",
 488 |   "cello",
 489 |   "cellular telephone",
 490 |   "chain",
 491 |   "chainlink fence",
 492 |   "chain mail",
 493 |   "chain saw",
 494 |   "chest",
 495 |   "chiffonier",
 496 |   "chime",
 497 |   "china cabinet",
 498 |   "Christmas stocking",
 499 |   "church",
 500 |   "cinema",
 501 |   "cleaver",
 502 |   "cliff dwelling",
 503 |   "cloak",
 504 |   "clog",
 505 |   "cocktail shaker",
 506 |   "coffee mug",
 507 |   "coffeepot",
 508 |   "coil",
 509 |   "combination lock",
 510 |   "computer keyboard",
 511 |   "confectionery",
 512 |   "container ship",
 513 |   "convertible",
 514 |   "corkscrew",
 515 |   "cornet",
 516 |   "cowboy boot",
 517 |   "cowboy hat",
 518 |   "cradle",
 519 |   "crane",
 520 |   "crash helmet",
 521 |   "crate",
 522 |   "crib",
 523 |   "Crock Pot",
 524 |   "croquet ball",
 525 |   "crutch",
 526 |   "cuirass",
 527 |   "dam",
 528 |   "desk",
 529 |   "desktop computer",
 530 |   "dial telephone",
 531 |   "diaper",
 532 |   "digital clock",
 533 |   "digital watch",
 534 |   "dining table",
 535 |   "dishrag",
 536 |   "dishwasher",
 537 |   "disk brake",
 538 |   "dock",
 539 |   "dogsled",
 540 |   "dome",
 541 |   "doormat",
 542 |   "drilling platform",
 543 |   "drum",
 544 |   "drumstick",
 545 |   "dumbbell",
 546 |   "Dutch oven",
 547 |   "electric fan",
 548 |   "electric guitar",
 549 |   "electric locomotive",
 550 |   "entertainment center",
 551 |   "envelope",
 552 |   "espresso maker",
 553 |   "face powder",
 554 |   "feather boa",
 555 |   "file",
 556 |   "fireboat",
 557 |   "fire engine",
 558 |   "fire screen",
 559 |   "flagpole",
 560 |   "flute",
 561 |   "folding chair",
 562 |   "football helmet",
 563 |   "forklift",
 564 |   "fountain",
 565 |   "fountain pen",
 566 |   "four-poster",
 567 |   "freight car",
 568 |   "French horn",
 569 |   "frying pan",
 570 |   "fur coat",
 571 |   "garbage truck",
 572 |   "gasmask",
 573 |   "gas pump",
 574 |   "goblet",
 575 |   "go-kart",
 576 |   "golf ball",
 577 |   "golfcart",
 578 |   "gondola",
 579 |   "gong",
 580 |   "gown",
 581 |   "grand piano",
 582 |   "greenhouse",
 583 |   "grille",
 584 |   "grocery store",
 585 |   "guillotine",
 586 |   "hair slide",
 587 |   "hair spray",
 588 |   "half track",
 589 |   "hammer",
 590 |   "hamper",
 591 |   "hand blower",
 592 |   "hand-held computer",
 593 |   "handkerchief",
 594 |   "hard disc",
 595 |   "harmonica",
 596 |   "harp",
 597 |   "harvester",
 598 |   "hatchet",
 599 |   "holster",
 600 |   "home theater",
 601 |   "honeycomb",
 602 |   "hook",
 603 |   "hoopskirt",
 604 |   "horizontal bar",
 605 |   "horse cart",
 606 |   "hourglass",
 607 |   "iPod",
 608 |   "iron",
 609 |   "jack-o'-lantern",
 610 |   "jean",
 611 |   "jeep",
 612 |   "jersey",
 613 |   "jigsaw puzzle",
 614 |   "jinrikisha",
 615 |   "joystick",
 616 |   "kimono",
 617 |   "knee pad",
 618 |   "knot",
 619 |   "lab coat",
 620 |   "ladle",
 621 |   "lampshade",
 622 |   "laptop",
 623 |   "lawn mower",
 624 |   "lens cap",
 625 |   "letter opener",
 626 |   "library",
 627 |   "lifeboat",
 628 |   "lighter",
 629 |   "limousine",
 630 |   "liner",
 631 |   "lipstick",
 632 |   "Loafer",
 633 |   "lotion",
 634 |   "loudspeaker",
 635 |   "loupe",
 636 |   "lumbermill",
 637 |   "magnetic compass",
 638 |   "mailbag",
 639 |   "mailbox",
 640 |   "maillot",
 641 |   "maillot tank suit",
 642 |   "manhole cover",
 643 |   "maraca",
 644 |   "marimba",
 645 |   "mask",
 646 |   "matchstick",
 647 |   "maypole",
 648 |   "maze",
 649 |   "measuring cup",
 650 |   "medicine chest",
 651 |   "megalith",
 652 |   "microphone",
 653 |   "microwave",
 654 |   "military uniform",
 655 |   "milk can",
 656 |   "minibus",
 657 |   "miniskirt",
 658 |   "minivan",
 659 |   "missile",
 660 |   "mitten",
 661 |   "mixing bowl",
 662 |   "mobile home",
 663 |   "Model T",
 664 |   "modem",
 665 |   "monastery",
 666 |   "monitor",
 667 |   "moped",
 668 |   "mortar",
 669 |   "mortarboard",
 670 |   "mosque",
 671 |   "mosquito net",
 672 |   "motor scooter",
 673 |   "mountain bike",
 674 |   "mountain tent",
 675 |   "mouse",
 676 |   "mousetrap",
 677 |   "moving van",
 678 |   "muzzle",
 679 |   "nail",
 680 |   "neck brace",
 681 |   "necklace",
 682 |   "nipple",
 683 |   "notebook",
 684 |   "obelisk",
 685 |   "oboe",
 686 |   "ocarina",
 687 |   "odometer",
 688 |   "oil filter",
 689 |   "organ",
 690 |   "oscilloscope",
 691 |   "overskirt",
 692 |   "oxcart",
 693 |   "oxygen mask",
 694 |   "packet",
 695 |   "paddle",
 696 |   "paddlewheel",
 697 |   "padlock",
 698 |   "paintbrush",
 699 |   "pajama",
 700 |   "palace",
 701 |   "panpipe",
 702 |   "paper towel",
 703 |   "parachute",
 704 |   "parallel bars",
 705 |   "park bench",
 706 |   "parking meter",
 707 |   "passenger car",
 708 |   "patio",
 709 |   "pay-phone",
 710 |   "pedestal",
 711 |   "pencil box",
 712 |   "pencil sharpener",
 713 |   "perfume",
 714 |   "Petri dish",
 715 |   "photocopier",
 716 |   "pick",
 717 |   "pickelhaube",
 718 |   "picket fence",
 719 |   "pickup",
 720 |   "pier",
 721 |   "piggy bank",
 722 |   "pill bottle",
 723 |   "pillow",
 724 |   "ping-pong ball",
 725 |   "pinwheel",
 726 |   "pirate",
 727 |   "pitcher",
 728 |   "plane",
 729 |   "planetarium",
 730 |   "plastic bag",
 731 |   "plate rack",
 732 |   "plow",
 733 |   "plunger",
 734 |   "Polaroid camera",
 735 |   "pole",
 736 |   "police van",
 737 |   "poncho",
 738 |   "pool table",
 739 |   "pop bottle",
 740 |   "pot",
 741 |   "potter's wheel",
 742 |   "power drill",
 743 |   "prayer rug",
 744 |   "printer",
 745 |   "prison",
 746 |   "projectile",
 747 |   "projector",
 748 |   "puck",
 749 |   "punching bag",
 750 |   "purse",
 751 |   "quill",
 752 |   "quilt",
 753 |   "racer",
 754 |   "racket",
 755 |   "radiator",
 756 |   "radio",
 757 |   "radio telescope",
 758 |   "rain barrel",
 759 |   "recreational vehicle",
 760 |   "reel",
 761 |   "reflex camera",
 762 |   "refrigerator",
 763 |   "remote control",
 764 |   "restaurant",
 765 |   "revolver",
 766 |   "rifle",
 767 |   "rocking chair",
 768 |   "rotisserie",
 769 |   "rubber eraser",
 770 |   "rugby ball",
 771 |   "rule",
 772 |   "running shoe",
 773 |   "safe",
 774 |   "safety pin",
 775 |   "saltshaker",
 776 |   "sandal",
 777 |   "sarong",
 778 |   "sax",
 779 |   "scabbard",
 780 |   "scale",
 781 |   "school bus",
 782 |   "schooner",
 783 |   "scoreboard",
 784 |   "screen",
 785 |   "screw",
 786 |   "screwdriver",
 787 |   "seat belt",
 788 |   "sewing machine",
 789 |   "shield",
 790 |   "shoe shop",
 791 |   "shoji",
 792 |   "shopping basket",
 793 |   "shopping cart",
 794 |   "shovel",
 795 |   "shower cap",
 796 |   "shower curtain",
 797 |   "ski",
 798 |   "ski mask",
 799 |   "sleeping bag",
 800 |   "slide rule",
 801 |   "sliding door",
 802 |   "slot",
 803 |   "snorkel",
 804 |   "snowmobile",
 805 |   "snowplow",
 806 |   "soap dispenser",
 807 |   "soccer ball",
 808 |   "sock",
 809 |   "solar dish",
 810 |   "sombrero",
 811 |   "soup bowl",
 812 |   "space bar",
 813 |   "space heater",
 814 |   "space shuttle",
 815 |   "spatula",
 816 |   "speedboat",
 817 |   "spider web",
 818 |   "spindle",
 819 |   "sports car",
 820 |   "spotlight",
 821 |   "stage",
 822 |   "steam locomotive",
 823 |   "steel arch bridge",
 824 |   "steel drum",
 825 |   "stethoscope",
 826 |   "stole",
 827 |   "stone wall",
 828 |   "stopwatch",
 829 |   "stove",
 830 |   "strainer",
 831 |   "streetcar",
 832 |   "stretcher",
 833 |   "studio couch",
 834 |   "stupa",
 835 |   "submarine",
 836 |   "suit",
 837 |   "sundial",
 838 |   "sunglass",
 839 |   "sunglasses",
 840 |   "sunscreen",
 841 |   "suspension bridge",
 842 |   "swab",
 843 |   "sweatshirt",
 844 |   "swimming trunks",
 845 |   "swing",
 846 |   "switch",
 847 |   "syringe",
 848 |   "table lamp",
 849 |   "tank",
 850 |   "tape player",
 851 |   "teapot",
 852 |   "teddy",
 853 |   "television",
 854 |   "tennis ball",
 855 |   "thatch",
 856 |   "theater curtain",
 857 |   "thimble",
 858 |   "thresher",
 859 |   "throne",
 860 |   "tile roof",
 861 |   "toaster",
 862 |   "tobacco shop",
 863 |   "toilet seat",
 864 |   "torch",
 865 |   "totem pole",
 866 |   "tow truck",
 867 |   "toyshop",
 868 |   "tractor",
 869 |   "trailer truck",
 870 |   "tray",
 871 |   "trench coat",
 872 |   "tricycle",
 873 |   "trimaran",
 874 |   "tripod",
 875 |   "triumphal arch",
 876 |   "trolleybus",
 877 |   "trombone",
 878 |   "tub",
 879 |   "turnstile",
 880 |   "typewriter keyboard",
 881 |   "umbrella",
 882 |   "unicycle",
 883 |   "upright",
 884 |   "vacuum",
 885 |   "vase",
 886 |   "vault",
 887 |   "velvet",
 888 |   "vending machine",
 889 |   "vestment",
 890 |   "viaduct",
 891 |   "violin",
 892 |   "volleyball",
 893 |   "waffle iron",
 894 |   "wall clock",
 895 |   "wallet",
 896 |   "wardrobe",
 897 |   "warplane",
 898 |   "washbasin",
 899 |   "washer",
 900 |   "water bottle",
 901 |   "water jug",
 902 |   "water tower",
 903 |   "whiskey jug",
 904 |   "whistle",
 905 |   "wig",
 906 |   "window screen",
 907 |   "window shade",
 908 |   "Windsor tie",
 909 |   "wine bottle",
 910 |   "wing",
 911 |   "wok",
 912 |   "wooden spoon",
 913 |   "wool",
 914 |   "worm fence",
 915 |   "wreck",
 916 |   "yawl",
 917 |   "yurt",
 918 |   "web site",
 919 |   "comic book",
 920 |   "crossword puzzle",
 921 |   "street sign",
 922 |   "traffic light",
 923 |   "book jacket",
 924 |   "menu",
 925 |   "plate",
 926 |   "guacamole",
 927 |   "consomme",
 928 |   "hot pot",
 929 |   "trifle",
 930 |   "ice cream",
 931 |   "ice lolly",
 932 |   "French loaf",
 933 |   "bagel",
 934 |   "pretzel",
 935 |   "cheeseburger",
 936 |   "hotdog",
 937 |   "mashed potato",
 938 |   "head cabbage",
 939 |   "broccoli",
 940 |   "cauliflower",
 941 |   "zucchini",
 942 |   "spaghetti squash",
 943 |   "acorn squash",
 944 |   "butternut squash",
 945 |   "cucumber",
 946 |   "artichoke",
 947 |   "bell pepper",
 948 |   "cardoon",
 949 |   "mushroom",
 950 |   "Granny Smith",
 951 |   "strawberry",
 952 |   "orange",
 953 |   "lemon",
 954 |   "fig",
 955 |   "pineapple",
 956 |   "banana",
 957 |   "jackfruit",
 958 |   "custard apple",
 959 |   "pomegranate",
 960 |   "hay",
 961 |   "carbonara",
 962 |   "chocolate sauce",
 963 |   "dough",
 964 |   "meat loaf",
 965 |   "pizza",
 966 |   "potpie",
 967 |   "burrito",
 968 |   "red wine",
 969 |   "espresso",
 970 |   "cup",
 971 |   "eggnog",
 972 |   "alp",
 973 |   "bubble",
 974 |   "cliff",
 975 |   "coral reef",
 976 |   "geyser",
 977 |   "lakeside",
 978 |   "promontory",
 979 |   "sandbar",
 980 |   "seashore",
 981 |   "valley",
 982 |   "volcano",
 983 |   "ballplayer",
 984 |   "groom",
 985 |   "scuba diver",
 986 |   "rapeseed",
 987 |   "daisy",
 988 |   "yellow lady's slipper",
 989 |   "corn",
 990 |   "acorn",
 991 |   "hip",
 992 |   "buckeye",
 993 |   "coral fungus",
 994 |   "agaric",
 995 |   "gyromitra",
 996 |   "stinkhorn",
 997 |   "earthstar",
 998 |   "hen-of-the-woods",
 999 |   "bolete",
1000 |   "ear",
1001 |   "toilet tissue"
1002 | ]
1003 | 


--------------------------------------------------------------------------------