├── .clang-format ├── .formatter.exs ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── INSTALL.md ├── LICENSE ├── Makefile ├── README.md ├── c_src └── xav │ ├── audio_converter.c │ ├── audio_converter.h │ ├── channel_layout.c │ ├── channel_layout.h │ ├── decoder.c │ ├── decoder.h │ ├── encoder.c │ ├── encoder.h │ ├── reader.c │ ├── reader.h │ ├── utils.c │ ├── utils.h │ ├── video_converter.c │ ├── video_converter.h │ ├── xav_decoder.c │ ├── xav_decoder.h │ ├── xav_encoder.c │ ├── xav_encoder.h │ ├── xav_reader.c │ ├── xav_reader.h │ ├── xav_video_converter.c │ └── xav_video_converter.h ├── codecov.yml ├── lib ├── xav.ex └── xav │ ├── decoder.ex │ ├── decoder_nif.ex │ ├── encoder.ex │ ├── encoder_nif.ex │ ├── frame.ex │ ├── packet.ex │ ├── reader.ex │ ├── reader_nif.ex │ ├── video_converter.ex │ └── video_converter_nif.ex ├── mix.exs ├── mix.lock └── test ├── decoder_test.exs ├── encoder_test.exs ├── fixtures ├── decoder │ ├── sample_h264.h264 │ └── sample_h265.h265 ├── encoder │ └── audio │ │ ├── input-s16le.raw │ │ └── reference.al ├── one_frame.mp4 ├── sample_av1.mkv ├── sample_h264.h264 ├── sample_h264.mkv ├── sample_h264.mp4 ├── sample_vp8.webm ├── sample_vp9.webm ├── stt │ ├── README.md │ ├── harvard.mp3 │ ├── harvard.wav │ └── melnet_sample_0.mp3 └── video_converter │ ├── frame_360x240.yuv │ └── frame_480x360.yuv ├── reader_test.exs ├── test_helper.exs └── video_converter_test.exs /.clang-format: -------------------------------------------------------------------------------- 1 | ColumnLimit: 100 -------------------------------------------------------------------------------- /.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] 4 | ] 5 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: push 4 | 5 | jobs: 6 | lint: 7 | runs-on: ubuntu-latest 8 | name: lint / ubuntu-latest / OTP ${{matrix.otp}} / Elixir ${{matrix.elixir}} 9 | strategy: 10 | matrix: 11 | otp: ["25"] 12 | elixir: ["1.14"] 13 | steps: 14 | - uses: actions/checkout@v2 15 | - uses: erlef/setup-beam@v1 16 | with: 17 | otp-version: ${{matrix.otp}} 18 | elixir-version: ${{matrix.elixir}} 19 | - run: sudo apt update && sudo apt install libavcodec-dev libavformat-dev libavutil-dev libswscale-dev libavdevice-dev 20 | - name: Cache dialyzer artifacts 21 | uses: actions/cache@v4 22 | with: 23 | path: _dialyzer 24 | key: ${{ runner.os }}-dialyzer-${{ hashFiles('**/mix.lock') }} 25 | restore-keys: | 26 | ${{ runner.os }}-dialyzer- 27 | - run: mix deps.get 28 | - run: mix credo 29 | - run: mix format --check-formatted 30 | - run: mix dialyzer 31 | - run: mix docs 2>&1 | (! grep -q "warning:") 32 | 33 | test-linux: 34 | runs-on: ubuntu-22.04 35 | name: test-linux-x86-64 / ubuntu-22.04 / OTP ${{matrix.otp}} / Elixir ${{matrix.elixir}} 36 | strategy: 37 | matrix: 38 | otp: ["25"] 39 | elixir: ["1.14"] 40 | env: 41 | MIX_ENV: test 42 | steps: 43 | - uses: actions/checkout@v2 44 | - uses: erlef/setup-beam@v1 45 | with: 46 | otp-version: ${{matrix.otp}} 47 | elixir-version: ${{matrix.elixir}} 48 | - run: sudo apt update && sudo apt install libavcodec-dev libavformat-dev libavutil-dev libswscale-dev libavdevice-dev 49 | - run: mix deps.get 50 | - run: mix test 51 | 52 | test-linux-coverage: 53 | runs-on: ubuntu-24.04 54 | name: test-linux-x86-64 / ubuntu-24.04 / OTP ${{matrix.otp}} / Elixir ${{matrix.elixir}} 55 | strategy: 56 | matrix: 57 | otp: ["25"] 58 | elixir: ["1.14"] 59 | env: 60 | MIX_ENV: test 61 | steps: 62 | - uses: actions/checkout@v2 63 | - uses: erlef/setup-beam@v1 64 | with: 65 | otp-version: ${{matrix.otp}} 66 | elixir-version: ${{matrix.elixir}} 67 | - run: sudo apt update && sudo apt install libavcodec-dev libavformat-dev libavutil-dev libswscale-dev libavdevice-dev 68 | - run: mix deps.get 69 | - run: mix coveralls.json 70 | - name: Upload coverage reports to Codecov 71 | uses: codecov/codecov-action@v4.0.1 72 | with: 73 | token: ${{ secrets.CODECOV_TOKEN }} 74 | 75 | test-macos: 76 | runs-on: macos-13 77 | name: test-macos-x86-64 / macos-13 / OTP latest / Elixir latest 78 | env: 79 | MIX_ENV: test 80 | # MacOS runners seem to have static IP addresses 81 | # which results in GitHub rate limiting our requests 82 | # for downloading prebuilt XLA binaries. 83 | # Adding token seems to help. 84 | XLA_HTTP_HEADERS: "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" 85 | steps: 86 | - uses: actions/checkout@v2 87 | - run: brew install ffmpeg elixir 88 | - run: mix deps.get 89 | - run: mix test 90 | 91 | test-macos-arm: 92 | runs-on: macos-14 93 | name: test-macos-arm / macos-14 / OTP latest / Elixir latest 94 | env: 95 | MIX_ENV: test 96 | XLA_HTTP_HEADERS: "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" 97 | steps: 98 | - uses: actions/checkout@v2 99 | - run: brew install ffmpeg elixir 100 | - run: mix deps.get 101 | - run: mix test 102 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where third-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | # Ignore package tarball (built via "mix hex.build"). 23 | xav-*.tar 24 | 25 | # Temporary files, for example, from tests. 26 | /tmp/ 27 | 28 | .vscode/ 29 | ffmpeg_build/ 30 | .elixir_ls/ 31 | _dialyzer/ 32 | .cache/ 33 | compile_commands.json 34 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | Xav requires FFmpeg development packages to be installed on your system. 2 | You can install them with one of the following one-liners. 3 | 4 | On some platforms, we use pkg-config to determine ffmpeg's include and lib directories. 5 | 6 | **Fedora** 7 | 8 | ```bash 9 | dnf install pkg-config ffmpeg-devel ffmpeg-libs 10 | ``` 11 | 12 | **Ubuntu** 13 | 14 | ```bash 15 | apt install libavcodec-dev libavformat-dev libavutil-dev libswscale-dev libavdevice-dev 16 | ``` 17 | 18 | **MacOS x86_64** 19 | 20 | ```bash 21 | brew install ffmpeg 22 | ``` 23 | 24 | **MacOS arm64** 25 | 26 | ```bash 27 | brew install pkg-config ffmpeg 28 | ``` 29 | 30 | **Windows** 31 | 32 | Windows is not supported but PRs are welcomed. 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2024 Elixir WebRTC Developers 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Env vars provided by elixir_make 2 | # see https://hexdocs.pm/elixir_make/Mix.Tasks.Compile.ElixirMake.html#module-default-environment-variables 3 | # ERTS_INCLUDE_DIR 4 | # MIX_APP_PATH 5 | 6 | XAV_DIR = c_src/xav 7 | PRIV_DIR = $(MIX_APP_PATH)/priv 8 | XAV_DECODER_SO = $(PRIV_DIR)/libxavdecoder.so 9 | XAV_ENCODER_SO = $(PRIV_DIR)/libxavencoder.so 10 | XAV_READER_SO = $(PRIV_DIR)/libxavreader.so 11 | XAV_VIDEO_CONVERTER_SO = $(PRIV_DIR)/libxavvideoconverter.so 12 | 13 | # uncomment to compile with debug logs 14 | # XAV_DEBUG_LOGS = -DXAV_DEBUG=1 15 | 16 | DECODER_HEADERS = $(XAV_DIR)/xav_decoder.h $(XAV_DIR)/decoder.h $(XAV_DIR)/video_converter.h $(XAV_DIR)/audio_converter.h $(XAV_DIR)/utils.h $(XAV_DIR)/channel_layout.h 17 | DECODER_SOURCES = $(XAV_DIR)/xav_decoder.c $(XAV_DIR)/decoder.c $(XAV_DIR)/video_converter.c $(XAV_DIR)/audio_converter.c $(XAV_DIR)/utils.c $(XAV_DIR)/channel_layout.c 18 | 19 | ENCODER_HEADERS = $(XAV_DIR)/xav_encoder.h $(XAV_DIR)/encoder.h $(XAV_DIR)/utils.h $(XAV_DIR)/channel_layout.h 20 | ENCODER_SOURCES = $(XAV_DIR)/xav_encoder.c $(XAV_DIR)/encoder.c $(XAV_DIR)/utils.c $(XAV_DIR)/channel_layout.c 21 | 22 | READER_HEADERS = $(XAV_DIR)/xav_reader.h $(XAV_DIR)/reader.h $(XAV_DIR)/video_converter.h $(XAV_DIR)/audio_converter.h $(XAV_DIR)/utils.h $(XAV_DIR)/channel_layout.h 23 | READER_SOURCES = $(XAV_DIR)/xav_reader.c $(XAV_DIR)/reader.c $(XAV_DIR)/video_converter.c $(XAV_DIR)/audio_converter.c $(XAV_DIR)/utils.c 24 | 25 | VIDEO_CONVERTER_HEADERS = $(XAV_DIR)/xav_video_converter.h $(XAV_DIR)/video_converter.h $(XAV_DIR)/utils.h 26 | VIDEO_CONVERTER_SOURCES = $(XAV_DIR)/xav_video_converter.c $(XAV_DIR)/video_converter.c $(XAV_DIR)/utils.c 27 | 28 | CFLAGS = $(XAV_DEBUG_LOGS) -fPIC -shared 29 | IFLAGS = -I$(ERTS_INCLUDE_DIR) -I$(XAV_DIR) 30 | LDFLAGS = -lavcodec -lswscale -lavutil -lavformat -lavdevice -lswresample 31 | 32 | # Flags for MacOS 33 | ifeq ($(shell uname -s),Darwin) 34 | ifeq ($(shell uname -m),arm64) 35 | IFLAGS += $$(pkg-config --cflags-only-I libavcodec libswscale libavutil libavformat libavdevice libswresample) 36 | LFLAGS += $$(pkg-config --libs-only-L libavcodec libswscale libavutil libavformat libavdevice libswresample) 37 | CFLAGS += -undefined dynamic_lookup 38 | else 39 | CFLAGS += -undefined dynamic_lookup 40 | endif 41 | endif 42 | 43 | # Flags for Fedora 44 | ifneq (,$(wildcard /etc/fedora-release)) 45 | IFLAGS += $$(pkg-config --cflags-only-I libavcodec libswscale libavutil libavformat libavdevice libswresample) 46 | LFLAGS += $$(pkg-config --libs-only-L libavcodec libswscale libavutil libavformat libavdevice libswresample) 47 | endif 48 | 49 | all: $(XAV_DECODER_SO) $(XAV_READER_SO) $(XAV_VIDEO_CONVERTER_SO) $(XAV_ENCODER_SO) 50 | 51 | $(XAV_DECODER_SO): Makefile $(DECODER_SOURCES) $(DECODER_HEADERS) 52 | mkdir -p $(PRIV_DIR) 53 | $(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) $(DECODER_SOURCES) -o $(XAV_DECODER_SO) $(LDFLAGS) 54 | 55 | $(XAV_READER_SO): Makefile $(READER_SOURCES) $(READER_HEADERS) 56 | mkdir -p $(PRIV_DIR) 57 | $(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) $(READER_SOURCES) -o $(XAV_READER_SO) $(LDFLAGS) 58 | 59 | $(XAV_VIDEO_CONVERTER_SO): Makefile $(VIDEO_CONVERTER_SOURCES) $(VIDEO_CONVERTER_HEADERS) 60 | mkdir -p $(PRIV_DIR) 61 | $(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) $(VIDEO_CONVERTER_SOURCES) -o $(XAV_VIDEO_CONVERTER_SO) $(LDFLAGS) 62 | 63 | 64 | $(XAV_ENCODER_SO): Makefile $(ENCODER_SOURCES) $(ENCODER_HEADERS) 65 | mkdir -p $(PRIV_DIR) 66 | $(CC) $(CFLAGS) $(IFLAGS) $(LFLAGS) $(ENCODER_SOURCES) -o $(XAV_ENCODER_SO) $(LDFLAGS) 67 | 68 | format: 69 | clang-format -i $(XAV_DIR)/* 70 | 71 | .PHONY: format 72 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Xav 2 | 3 | [![Hex.pm](https://img.shields.io/hexpm/v/xav.svg)](https://hex.pm/packages/xav) 4 | [![API Docs](https://img.shields.io/badge/api-docs-yellow.svg?style=flat)](https://hexdocs.pm/xav) 5 | [![CI](https://img.shields.io/github/actions/workflow/status/elixir-webrtc/xav/ci.yml?logo=github&label=CI)](https://github.com/elixir-webrtc/xav/actions/workflows/ci.yml) 6 | [![codecov](https://codecov.io/gh/elixir-webrtc/xav/graph/badge.svg?token=2AG2acRhOf)](https://codecov.io/gh/elixir-webrtc/xav) 7 | 8 | Elixir wrapper over FFmpeg for reading audio and video files. 9 | 10 | See an interview with FFmpeg enthusiast: https://youtu.be/9kaIXkImCAM 11 | 12 | ## Installation 13 | 14 | Make sure you have installed FFMpeg (ver. 4.x - 7.x) development packages on your system 15 | (see [here](INSTALL.md) for installation one-liners) and add Xav to the list of your dependencies: 16 | 17 | ```elixir 18 | def deps do 19 | [ 20 | {:xav, "~> 0.10.0"}, 21 | # Add Nx if you want to have Xav.Frame.to_nx/1 22 | {:nx, ">= 0.0.0"} 23 | ] 24 | end 25 | ``` 26 | 27 | ## Usage 28 | 29 | Decode 30 | 31 | ```elixir 32 | decoder = Xav.Decoder.new(:vp8, out_format: :rgb24) 33 | {:ok, %Xav.Frame{} = frame} = Xav.Decoder.decode(decoder, <<"somebinary">>) 34 | ``` 35 | 36 | Decode with audio resampling 37 | 38 | ```elixir 39 | decoder = Xav.Decoder.new(:opus, out_format: :flt, out_sample_rate: 16_000) 40 | {:ok, %Xav.Frame{} = frame} = Xav.Decoder.decode(decoder, <<"somebinary">>) 41 | ``` 42 | 43 | Read from a file: 44 | 45 | ```elixir 46 | r = Xav.Reader.new!("./some_mp4_file.mp4") 47 | {:ok, %Xav.Frame{} = frame} = Xav.Reader.next_frame(r) 48 | tensor = Xav.Frame.to_nx(frame) 49 | Kino.Image.new(tensor) 50 | ``` 51 | 52 | Read from a camera: 53 | 54 | ```elixir 55 | r = Xav.Reader.new!("/dev/video0", device?: true, out_format: :rgb24) 56 | {:ok, %Xav.Frame{} = frame} = Xav.Reader.next_frame(r) 57 | tensor = Xav.Frame.to_nx(frame) 58 | Kino.Image.new(tensor) 59 | ``` 60 | 61 | Speech to text: 62 | 63 | ```elixir 64 | {:ok, whisper} = Bumblebee.load_model({:hf, "openai/whisper-tiny"}) 65 | {:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"}) 66 | {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"}) 67 | {:ok, generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-tiny"}) 68 | 69 | serving = 70 | Bumblebee.Audio.speech_to_text_whisper(whisper, featurizer, tokenizer, generation_config, 71 | defn_options: [compiler: EXLA] 72 | ) 73 | 74 | # Read a couple of frames. 75 | # See https://hexdocs.pm/bumblebee/Bumblebee.Audio.WhisperFeaturizer.html for default sampling rate. 76 | frames = 77 | Xav.Reader.stream!("sample.mp3", read: :audio, out_format: :flt, out_channels: 1, out_sample_rate: 16_000) 78 | |> Stream.take(200) 79 | |> Enum.map(fn frame -> Xav.Frame.to_nx(frame) end) 80 | 81 | batch = Nx.Batch.concatenate(frames) 82 | batch = Nx.Defn.jit_apply(&Function.identity/1, [batch]) 83 | Nx.Serving.run(serving, batch) 84 | ``` 85 | 86 | ## Development 87 | 88 | To make `clangd` aware of the header files used in your project, you can create a `compile_commands.json` file. 89 | `clangd` uses this file to know the compiler flags, include paths, and other compilation options for each source file. 90 | 91 | ### Install bear 92 | 93 | The easiest way to generate `compile_commands.json` from a Makefile is to use the `bear` tool. `bear` is a tool that records the compiler calls during a build and creates the `compile_commands.json` file. 94 | 95 | You can install `bear` with your package manager: 96 | 97 | - __macOS__: brew install bear 98 | - __Ubuntu/Debian__: sudo apt install bear 99 | - __Fedora__: sudo dnf install bear 100 | 101 | ### Generate compile_commands.json 102 | 103 | After installing bear, you can run it alongside your make command to capture the necessary information. 104 | 105 | ```bash 106 | bear -- mix compile 107 | ``` 108 | -------------------------------------------------------------------------------- /c_src/xav/audio_converter.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "audio_converter.h" 8 | #include "channel_layout.h" 9 | #include "utils.h" 10 | 11 | struct AudioConverter *audio_converter_alloc() { 12 | struct AudioConverter *converter = 13 | (struct AudioConverter *)XAV_ALLOC(sizeof(struct AudioConverter)); 14 | converter->swr_ctx = NULL; 15 | return converter; 16 | } 17 | 18 | int audio_converter_init(struct AudioConverter *c, struct ChannelLayout in_chlayout, 19 | int in_sample_rate, enum AVSampleFormat in_sample_fmt, 20 | struct ChannelLayout out_chlayout, int out_sample_rate, 21 | enum AVSampleFormat out_sample_fmt) { 22 | c->swr_ctx = swr_alloc(); 23 | c->in_sample_rate = in_sample_rate; 24 | c->out_sample_rate = out_sample_rate; 25 | c->out_chlayout = out_chlayout; 26 | c->out_sample_fmt = out_sample_fmt; 27 | 28 | #if LIBAVUTIL_VERSION_MAJOR >= 58 29 | av_opt_set_chlayout(c->swr_ctx, "in_chlayout", &in_chlayout.layout, 0); 30 | av_opt_set_chlayout(c->swr_ctx, "out_chlayout", &out_chlayout.layout, 0); 31 | c->out_channels = out_chlayout.layout.nb_channels; 32 | #else 33 | av_opt_set_channel_layout(c->swr_ctx, "in_channel_layout", in_chlayout.layout, 0); 34 | av_opt_set_channel_layout(c->swr_ctx, "out_channel_layout", out_chlayout.layout, 0); 35 | c->out_channels = av_get_channel_layout_nb_channels(out_chlayout.layout); 36 | #endif 37 | 38 | av_opt_set_int(c->swr_ctx, "in_sample_rate", in_sample_rate, 0); 39 | av_opt_set_int(c->swr_ctx, "out_sample_rate", out_sample_rate, 0); 40 | 41 | av_opt_set_sample_fmt(c->swr_ctx, "in_sample_fmt", in_sample_fmt, 0); 42 | av_opt_set_sample_fmt(c->swr_ctx, "out_sample_fmt", out_sample_fmt, 0); 43 | 44 | return swr_init(c->swr_ctx); 45 | } 46 | 47 | int audio_converter_convert(struct AudioConverter *c, AVFrame *src_frame, uint8_t ***out_data, 48 | int *out_samples, int *out_size) { 49 | 50 | #if LIBAVUTIL_VERSION_MAJOR >= 58 51 | int out_nb_channels = c->out_chlayout.layout.nb_channels; 52 | #else 53 | int out_nb_channels = av_get_channel_layout_nb_channels(c->out_chlayout.layout); 54 | #endif 55 | 56 | uint8_t **out_data_tmp = NULL; 57 | int max_out_nb_samples = swr_get_out_samples(c->swr_ctx, src_frame->nb_samples); 58 | int out_bytes_per_sample = av_get_bytes_per_sample(c->out_sample_fmt); 59 | 60 | // Some parts of ffmpeg require buffers to by divisible by 32 61 | // to use fast/aligned SIMD routines - this is what align option is used for. 62 | // See https://stackoverflow.com/questions/35678041/what-is-linesize-alignment-meaning 63 | // Because we return the binary straight to the Erlang, we can disable it. 64 | int ret = av_samples_alloc_array_and_samples(&out_data_tmp, NULL, out_nb_channels, 65 | max_out_nb_samples, c->out_sample_fmt, 1); 66 | 67 | if (ret < 0) { 68 | XAV_LOG_DEBUG("Couldn't allocate array for out samples."); 69 | return ret; 70 | } 71 | 72 | *out_samples = swr_convert(c->swr_ctx, out_data_tmp, max_out_nb_samples, 73 | (const uint8_t **)src_frame->data, src_frame->nb_samples); 74 | 75 | if (*out_samples < 0) { 76 | XAV_LOG_DEBUG("Couldn't convert samples: %d", *out_samples); 77 | av_freep(&out_data_tmp[0]); 78 | return -1; 79 | } 80 | 81 | XAV_LOG_DEBUG("Converted %d samples per channel", *out_samples); 82 | 83 | *out_size = *out_samples * out_bytes_per_sample * out_nb_channels; 84 | 85 | *out_data = out_data_tmp; 86 | 87 | return 0; 88 | } 89 | 90 | void audio_converter_free(struct AudioConverter **converter) { 91 | if (*converter != NULL) { 92 | struct AudioConverter *c = *converter; 93 | 94 | if (c->swr_ctx != NULL) { 95 | swr_free(&c->swr_ctx); 96 | } 97 | 98 | XAV_FREE(c); 99 | *converter = NULL; 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /c_src/xav/audio_converter.h: -------------------------------------------------------------------------------- 1 | #ifndef CONVERTER_H 2 | #define CONVERTER_H 3 | #include 4 | #include 5 | #include 6 | 7 | #include "channel_layout.h" 8 | 9 | struct AudioConverter { 10 | SwrContext *swr_ctx; 11 | int64_t in_sample_rate; 12 | int64_t out_sample_rate; 13 | int64_t out_channels; 14 | struct ChannelLayout out_chlayout; 15 | enum AVSampleFormat out_sample_fmt; 16 | }; 17 | 18 | struct AudioConverter *audio_converter_alloc(void); 19 | 20 | int audio_converter_init(struct AudioConverter *c, struct ChannelLayout in_chlayout, 21 | int in_sample_rate, enum AVSampleFormat in_sample_fmt, 22 | struct ChannelLayout out_chlayout, int out_sample_rate, 23 | enum AVSampleFormat out_sample_fmt); 24 | 25 | /** 26 | * Converts AVFrame to the output format. 27 | * 28 | * @param c audio converter 29 | * @param src_frame decoded source frame 30 | * @param out_data buffer where audio samples are written after convertion. 31 | * We always convert to the packed format, so only *out_data[0] is set. 32 | * It will be initialized internally and has to be freed with av_freep(&(*out_data[0])). 33 | * @param out_samples number of samples per channel in out_data buffer. 34 | * @param out_size size of out_buffer in bytes. 35 | * This is the same as *out_samples * bytes_per_sample(out_format) * out_channels 36 | * @return 0 on success and negative value on error. 37 | */ 38 | int audio_converter_convert(struct AudioConverter *c, AVFrame *src_frame, uint8_t ***out_data, 39 | int *out_samples, int *out_size); 40 | 41 | void audio_converter_free(struct AudioConverter **converter); 42 | #endif 43 | -------------------------------------------------------------------------------- /c_src/xav/channel_layout.c: -------------------------------------------------------------------------------- 1 | #include "channel_layout.h" 2 | 3 | int xav_get_channel_layout(const char *name, struct ChannelLayout *layout) { 4 | #if LIBAVUTIL_VERSION_MAJOR >= 58 5 | if (av_channel_layout_from_string(&layout->layout, name) < 0) { 6 | return 0; 7 | } 8 | #else 9 | layout->layout = av_get_channel_layout(name); 10 | if (layout->layout == 0) { 11 | return 0; 12 | } 13 | #endif 14 | 15 | return 1; 16 | } 17 | 18 | int xav_get_channel_layout_from_context(struct ChannelLayout *layout, const AVCodecContext *ctx) { 19 | #if LIBAVUTIL_VERSION_MAJOR >= 58 20 | return av_channel_layout_copy(&layout->layout, &ctx->ch_layout); 21 | #else 22 | layout->layout = ctx->channel_layout; 23 | return 0; 24 | #endif 25 | } 26 | 27 | int xav_set_channel_layout(AVCodecContext *ctx, struct ChannelLayout *layout) { 28 | #if LIBAVUTIL_VERSION_MAJOR >= 58 29 | return av_channel_layout_copy(&ctx->ch_layout, &layout->layout); 30 | #else 31 | ctx->channel_layout = layout->layout; 32 | return 0; 33 | #endif 34 | } 35 | 36 | int xav_set_default_channel_layout(struct ChannelLayout *layout, int channels) { 37 | #if LIBAVUTIL_VERSION_MAJOR >= 58 38 | av_channel_layout_default(&layout->layout, channels); 39 | #else 40 | layout->layout = av_get_default_channel_layout(channels); 41 | #endif 42 | return 0; 43 | } 44 | 45 | int xav_set_frame_channel_layout(AVFrame *frame, struct ChannelLayout *layout) { 46 | #if LIBAVUTIL_VERSION_MAJOR >= 58 47 | return av_channel_layout_copy(&frame->ch_layout, &layout->layout); 48 | #else 49 | frame->channel_layout = layout->layout; 50 | return 0; 51 | #endif 52 | } 53 | -------------------------------------------------------------------------------- /c_src/xav/channel_layout.h: -------------------------------------------------------------------------------- 1 | #ifndef XAV_CHANNEL_LAYOUT_H 2 | #define XAV_CHANNEL_LAYOUT_H 3 | #include 4 | #include 5 | 6 | struct ChannelLayout { 7 | #if LIBAVUTIL_VERSION_MAJOR >= 58 8 | AVChannelLayout layout; 9 | #else 10 | uint64_t layout; 11 | #endif 12 | }; 13 | 14 | int xav_get_channel_layout(const char *name, struct ChannelLayout *layout); 15 | int xav_get_channel_layout_from_context(struct ChannelLayout *layout, const AVCodecContext *ctx); 16 | int xav_set_channel_layout(AVCodecContext *ctx, struct ChannelLayout *layout); 17 | int xav_set_default_channel_layout(struct ChannelLayout *layout, int channels); 18 | int xav_set_frame_channel_layout(AVFrame *frame, struct ChannelLayout *layout); 19 | #endif -------------------------------------------------------------------------------- /c_src/xav/decoder.c: -------------------------------------------------------------------------------- 1 | #include "decoder.h" 2 | #include "utils.h" 3 | #include "video_converter.h" 4 | 5 | static int init_converter(struct Decoder *decoder); 6 | 7 | struct Decoder *decoder_alloc() { 8 | struct Decoder *decoder = (struct Decoder *)XAV_ALLOC(sizeof(struct Decoder)); 9 | 10 | decoder->codec = NULL; 11 | decoder->c = NULL; 12 | 13 | return decoder; 14 | } 15 | 16 | int decoder_init(struct Decoder *decoder, const AVCodec *codec, int channels) { 17 | decoder->media_type = codec->type; 18 | decoder->codec = codec; 19 | 20 | decoder->c = avcodec_alloc_context3(decoder->codec); 21 | if (!decoder->c) { 22 | return -1; 23 | } 24 | 25 | if (codec->type == AVMEDIA_TYPE_AUDIO && channels != -1) { 26 | struct ChannelLayout ch_layout; 27 | xav_set_default_channel_layout(&ch_layout, channels); 28 | xav_set_channel_layout(decoder->c, &ch_layout); 29 | } 30 | 31 | decoder->frame = av_frame_alloc(); 32 | if (!decoder->frame) { 33 | return -1; 34 | } 35 | 36 | decoder->pkt = av_packet_alloc(); 37 | if (!decoder->pkt) { 38 | return -1; 39 | } 40 | 41 | return avcodec_open2(decoder->c, decoder->codec, NULL); 42 | } 43 | 44 | int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame) { 45 | int ret; 46 | ret = avcodec_send_packet(decoder->c, pkt); 47 | if (ret != 0) { 48 | return -2; 49 | } 50 | 51 | return avcodec_receive_frame(decoder->c, frame); 52 | } 53 | 54 | int decoder_flush(struct Decoder *decoder, AVFrame **frames, int *frames_count) { 55 | int ret = avcodec_send_packet(decoder->c, NULL); 56 | if (ret != 0) { 57 | return ret; 58 | } 59 | 60 | while (1) { 61 | ret = avcodec_receive_frame(decoder->c, frames[*frames_count]); 62 | if (ret == AVERROR_EOF) { 63 | break; 64 | } else if (ret < 0) { 65 | return ret; 66 | } 67 | 68 | *frames_count += 1; 69 | } 70 | 71 | return 0; 72 | } 73 | 74 | void decoder_free_frame(struct Decoder *decoder) { 75 | if (decoder->frame != NULL) { 76 | av_frame_unref(decoder->frame); 77 | } 78 | if (decoder->pkt != NULL) { 79 | av_packet_unref(decoder->pkt); 80 | } 81 | } 82 | 83 | void decoder_free(struct Decoder **decoder) { 84 | XAV_LOG_DEBUG("Freeing Decoder object"); 85 | if (*decoder != NULL) { 86 | struct Decoder *d = *decoder; 87 | 88 | if (d->c != NULL) { 89 | avcodec_free_context(&d->c); 90 | } 91 | 92 | if (d->pkt != NULL) { 93 | av_packet_free(&d->pkt); 94 | } 95 | 96 | if (d->frame != NULL) { 97 | av_frame_free(&d->frame); 98 | } 99 | 100 | XAV_FREE(d); 101 | *decoder = NULL; 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /c_src/xav/decoder.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "audio_converter.h" 5 | #include "channel_layout.h" 6 | #include "utils.h" 7 | 8 | #define MAX_FLUSH_BUFFER 16 9 | 10 | struct Decoder { 11 | enum AVMediaType media_type; 12 | AVFrame *frame; 13 | AVPacket *pkt; 14 | const AVCodec *codec; 15 | AVCodecContext *c; 16 | }; 17 | 18 | struct Decoder *decoder_alloc(); 19 | 20 | int decoder_init(struct Decoder *decoder, const AVCodec *codec, int channels); 21 | 22 | int decoder_decode(struct Decoder *decoder, AVPacket *pkt, AVFrame *frame); 23 | 24 | int decoder_flush(struct Decoder *decoder, AVFrame **frames, int *frames_count); 25 | 26 | void decoder_free_frame(struct Decoder *decoder); 27 | 28 | void decoder_free(struct Decoder **decoder); -------------------------------------------------------------------------------- /c_src/xav/encoder.c: -------------------------------------------------------------------------------- 1 | #include "encoder.h" 2 | 3 | struct Encoder *encoder_alloc() { 4 | struct Encoder *encoder = XAV_ALLOC(sizeof(struct Encoder)); 5 | encoder->c = NULL; 6 | encoder->codec = NULL; 7 | encoder->num_packets = 0; 8 | encoder->max_num_packets = 8; 9 | encoder->packets = XAV_ALLOC(encoder->max_num_packets * sizeof(AVPacket *)); 10 | 11 | for (int i = 0; i < encoder->max_num_packets; i++) { 12 | encoder->packets[i] = av_packet_alloc(); 13 | } 14 | 15 | return encoder; 16 | } 17 | 18 | int encoder_init(struct Encoder *encoder, struct EncoderConfig *config) { 19 | encoder->codec = config->codec; 20 | 21 | encoder->c = avcodec_alloc_context3(encoder->codec); 22 | if (!encoder->c) { 23 | return -1; 24 | } 25 | 26 | if (encoder->codec->type == AVMEDIA_TYPE_VIDEO) { 27 | encoder->c->width = config->width; 28 | encoder->c->height = config->height; 29 | encoder->c->pix_fmt = config->format; 30 | encoder->c->time_base = config->time_base; 31 | 32 | if (config->gop_size > 0) { 33 | encoder->c->gop_size = config->gop_size; 34 | } 35 | 36 | if (config->max_b_frames >= 0) { 37 | encoder->c->max_b_frames = config->max_b_frames; 38 | } 39 | } else { 40 | encoder->c->sample_fmt = config->sample_format; 41 | encoder->c->sample_rate = config->sample_rate; 42 | xav_set_channel_layout(encoder->c, &config->channel_layout); 43 | } 44 | 45 | if (config->profile != FF_PROFILE_UNKNOWN) { 46 | encoder->c->profile = config->profile; 47 | } 48 | 49 | AVDictionary *opts = NULL; 50 | if (strcmp(encoder->codec->name, "libx265") == 0) { 51 | char x265_params[256] = "log-level=warning"; 52 | if (config->gop_size > 0) { 53 | sprintf(x265_params + strlen(x265_params), ":keyint=%d", config->gop_size); 54 | } 55 | 56 | if (config->max_b_frames >= 0) { 57 | sprintf(x265_params + strlen(x265_params), ":bframes=%d", config->max_b_frames); 58 | } 59 | 60 | av_dict_set(&opts, "x265-params", x265_params, 0); 61 | } 62 | 63 | return avcodec_open2(encoder->c, encoder->codec, &opts); 64 | } 65 | 66 | int encoder_encode(struct Encoder *encoder, AVFrame *frame) { 67 | int ret = avcodec_send_frame(encoder->c, frame); 68 | if (ret < 0) { 69 | return ret; 70 | } 71 | 72 | encoder->num_packets = 0; 73 | 74 | while (1) { 75 | ret = avcodec_receive_packet(encoder->c, encoder->packets[encoder->num_packets]); 76 | if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { 77 | break; 78 | } else if (ret < 0) { 79 | return ret; 80 | } 81 | 82 | if (++encoder->num_packets >= encoder->max_num_packets) { 83 | encoder->max_num_packets *= 2; 84 | encoder->packets = 85 | XAV_REALLOC(encoder->packets, encoder->max_num_packets * sizeof(AVPacket *)); 86 | for (int i = encoder->num_packets; i < encoder->max_num_packets; i++) { 87 | encoder->packets[i] = av_packet_alloc(); 88 | } 89 | } 90 | } 91 | 92 | return 0; 93 | } 94 | 95 | void encoder_free(struct Encoder **encoder) { 96 | if (*encoder != NULL) { 97 | struct Encoder *e = *encoder; 98 | 99 | if (e->c != NULL) { 100 | avcodec_free_context(&e->c); 101 | } 102 | 103 | for (int i = 0; i < e->max_num_packets; i++) { 104 | av_packet_free(&e->packets[i]); 105 | } 106 | 107 | XAV_FREE(e); 108 | *encoder = NULL; 109 | } 110 | } -------------------------------------------------------------------------------- /c_src/xav/encoder.h: -------------------------------------------------------------------------------- 1 | #include "channel_layout.h" 2 | #include "utils.h" 3 | #include 4 | 5 | struct Encoder { 6 | const AVCodec *codec; 7 | AVCodecContext *c; 8 | int num_packets; 9 | int max_num_packets; 10 | AVPacket **packets; 11 | }; 12 | 13 | struct EncoderConfig { 14 | enum AVMediaType media_type; 15 | const AVCodec *codec; 16 | int width; 17 | int height; 18 | enum AVPixelFormat format; 19 | enum AVSampleFormat sample_format; 20 | AVRational time_base; 21 | int gop_size; 22 | int max_b_frames; 23 | int profile; 24 | int sample_rate; 25 | struct ChannelLayout channel_layout; 26 | }; 27 | 28 | struct Encoder *encoder_alloc(); 29 | 30 | int encoder_init(struct Encoder *encoder, struct EncoderConfig *encoder_config); 31 | 32 | int encoder_encode(struct Encoder *encoder, AVFrame *frame); 33 | 34 | void encoder_free(struct Encoder **encoder); -------------------------------------------------------------------------------- /c_src/xav/reader.c: -------------------------------------------------------------------------------- 1 | #include "reader.h" 2 | #include "utils.h" 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | static int init_converter(struct Reader *reader); 9 | 10 | struct Reader *reader_alloc() { 11 | 12 | struct Reader *reader = (struct Reader *)XAV_ALLOC(sizeof(struct Reader)); 13 | 14 | reader->path = NULL; 15 | reader->frame = NULL; 16 | reader->pkt = NULL; 17 | reader->codec = NULL; 18 | reader->c = NULL; 19 | reader->fmt_ctx = NULL; 20 | reader->input_format = NULL; 21 | reader->options = NULL; 22 | 23 | return reader; 24 | } 25 | 26 | int reader_init(struct Reader *reader, unsigned char *path, size_t path_size, int device_flag, 27 | enum AVMediaType media_type) { 28 | int ret; 29 | reader->path = XAV_ALLOC(path_size + 1); 30 | memcpy(reader->path, path, path_size); 31 | reader->path[path_size] = '\0'; 32 | 33 | reader->media_type = media_type; 34 | 35 | if (device_flag == 1) { 36 | avdevice_register_all(); 37 | reader->input_format = av_find_input_format("v4l2"); 38 | av_dict_set(&reader->options, "framerate", "10", 0); 39 | } 40 | 41 | XAV_LOG_DEBUG("Trying to open %s", reader->path); 42 | 43 | if (avformat_open_input(&reader->fmt_ctx, reader->path, reader->input_format, NULL) < 0) { 44 | return -1; 45 | } 46 | 47 | if (avformat_find_stream_info(reader->fmt_ctx, NULL) < 0) { 48 | return -2; 49 | } 50 | 51 | reader->stream_idx = av_find_best_stream(reader->fmt_ctx, media_type, -1, -1, &reader->codec, 0); 52 | if (reader->stream_idx < 0) { 53 | return -2; 54 | } 55 | 56 | reader->c = avcodec_alloc_context3(reader->codec); 57 | if (!reader->c) { 58 | return -2; 59 | } 60 | 61 | AVStream *stream = reader->fmt_ctx->streams[reader->stream_idx]; 62 | 63 | // If avg_frame_rate is valid, use it; otherwise, calculate it from time_base. 64 | if (stream->avg_frame_rate.num != 0 && stream->avg_frame_rate.den != 0) { 65 | reader->framerate = stream->avg_frame_rate; 66 | } else { 67 | reader->framerate = av_inv_q(stream->time_base); 68 | } 69 | 70 | // TODO why is this actually needed? 71 | if (avcodec_parameters_to_context(reader->c, 72 | reader->fmt_ctx->streams[reader->stream_idx]->codecpar) < 0) { 73 | return -2; 74 | } 75 | 76 | reader->frame = av_frame_alloc(); 77 | if (!reader->frame) { 78 | return -2; 79 | } 80 | 81 | reader->pkt = av_packet_alloc(); 82 | if (!reader->pkt) { 83 | return -2; 84 | } 85 | 86 | if (avcodec_open2(reader->c, reader->codec, NULL) < 0) { 87 | return -2; 88 | } 89 | 90 | return 0; 91 | } 92 | 93 | int reader_next_frame(struct Reader *reader) { 94 | XAV_LOG_DEBUG("Trying to receive frame"); 95 | 96 | int ret = avcodec_receive_frame(reader->c, reader->frame); 97 | 98 | if (ret == 0) { 99 | XAV_LOG_DEBUG("Received frame"); 100 | return 0; 101 | } else if (ret == AVERROR_EOF) { 102 | XAV_LOG_DEBUG("EOF"); 103 | return ret; 104 | } else if (ret != AVERROR(EAGAIN)) { 105 | XAV_LOG_DEBUG("Error when trying to receive frame"); 106 | return ret; 107 | } else { 108 | XAV_LOG_DEBUG("Need more data"); 109 | } 110 | 111 | int frame_ready = 0; 112 | while (!frame_ready && (ret = av_read_frame(reader->fmt_ctx, reader->pkt)) >= 0) { 113 | 114 | if (reader->pkt->stream_index != reader->stream_idx) { 115 | continue; 116 | } 117 | 118 | XAV_LOG_DEBUG("Read packet from input. Sending to decoder"); 119 | 120 | ret = avcodec_send_packet(reader->c, reader->pkt); 121 | if (ret < 0) { 122 | return ret; 123 | } 124 | 125 | // it's unclear when av_packet_unref should 126 | // be called - right after calling avcodec_send_packet 127 | // or after receiving the last decoded frame using 128 | // avcodec_receive_frame? 129 | // 130 | // according to docs for avcodec_send_packet 131 | // 132 | // Ownership of the packet remains with the caller, 133 | // and the decoder will not write to the packet. 134 | // The decoder may create a reference to the packet data 135 | // (or copy it if the packet is not reference-counted). 136 | // Unlike with older APIs, the packet is always fully consumed. 137 | // 138 | // so it sounds like we can call av_packet_unref 139 | // right after avcodec_send_packet as packet is always 140 | // fully consumed 141 | av_packet_unref(reader->pkt); 142 | 143 | XAV_LOG_DEBUG("Trying to receive frame"); 144 | 145 | ret = avcodec_receive_frame(reader->c, reader->frame); 146 | 147 | if (ret == 0) { 148 | XAV_LOG_DEBUG("Successfully received frame"); 149 | frame_ready = 1; 150 | } else if (ret == AVERROR_EOF) { 151 | XAV_LOG_DEBUG("EOF"); 152 | return ret; 153 | } else if (ret != AVERROR(EAGAIN)) { 154 | XAV_LOG_DEBUG("Error when trying to receive frame"); 155 | return ret; 156 | } else { 157 | XAV_LOG_DEBUG("Need more data"); 158 | } 159 | } 160 | 161 | if (ret == AVERROR_EOF) { 162 | XAV_LOG_DEBUG("EOF. Flushing decoder"); 163 | 164 | ret = avcodec_send_packet(reader->c, NULL); 165 | if (ret < 0) { 166 | return ret; 167 | } 168 | 169 | XAV_LOG_DEBUG("Trying to receive frame"); 170 | ret = avcodec_receive_frame(reader->c, reader->frame); 171 | 172 | if (ret == AVERROR_EOF) { 173 | XAV_LOG_DEBUG("EOF"); 174 | return ret; 175 | } else if (ret == AVERROR(EAGAIN)) { 176 | XAV_LOG_DEBUG("Need more data"); 177 | } else if (ret < 0) { 178 | return ret; 179 | } else { 180 | XAV_LOG_DEBUG("Received frame"); 181 | } 182 | } 183 | 184 | return 0; 185 | } 186 | 187 | int reader_seek(struct Reader *reader, double time_in_seconds) { 188 | AVRational time_base = reader->fmt_ctx->streams[reader->stream_idx]->time_base; 189 | 190 | // keep floating time precision by multiplying with the internal AV_TIME_BASE (1_000_000) 191 | // and convert to the same time_base for the stream we're using in `av_seek_frame` because we're 192 | // explicitly specifying the stream index. for further information, see param docs in 193 | // [`av_seek_frame`](https://ffmpeg.org/doxygen/7.0/group__lavf__decoding.html#gaa23f7619d8d4ea0857065d9979c75ac8) 194 | int64_t seek_pos = 195 | av_rescale_q((int64_t)(time_in_seconds * AV_TIME_BASE), AV_TIME_BASE_Q, time_base); 196 | 197 | avcodec_flush_buffers(reader->c); 198 | 199 | if (av_seek_frame(reader->fmt_ctx, reader->stream_idx, seek_pos, AVSEEK_FLAG_BACKWARD) < 0) { 200 | XAV_LOG_DEBUG("Error while seeking to position %f / %f seconds", seek_pos, time_in_seconds); 201 | return -1; 202 | } 203 | 204 | // we have to read frames from the last keyframe until the desired timestamp 205 | while (av_read_frame(reader->fmt_ctx, reader->pkt) >= 0) { 206 | 207 | if (reader->pkt->stream_index != reader->stream_idx) { 208 | continue; 209 | } 210 | 211 | reader->pkt->flags |= AV_PKT_FLAG_DISCARD; 212 | int ret = avcodec_send_packet(reader->c, reader->pkt); 213 | if (ret < 0) { 214 | return ret; 215 | } 216 | 217 | int64_t current_pos = reader->pkt->pts != AV_NOPTS_VALUE ? reader->pkt->pts : reader->pkt->dts; 218 | 219 | if (current_pos >= seek_pos) { 220 | break; 221 | } 222 | } 223 | 224 | av_packet_unref(reader->pkt); 225 | return 0; 226 | } 227 | 228 | void reader_free_frame(struct Reader *reader) { 229 | if (reader->frame != NULL) { 230 | av_frame_unref(reader->frame); 231 | } 232 | } 233 | 234 | void reader_free(struct Reader **reader) { 235 | XAV_LOG_DEBUG("Freeing Reader object"); 236 | if (*reader != NULL) { 237 | struct Reader *r = *reader; 238 | 239 | if (r->c != NULL) { 240 | avcodec_free_context(&r->c); 241 | } 242 | 243 | if (r->pkt != NULL) { 244 | av_packet_free(&r->pkt); 245 | } 246 | 247 | if (r->frame != NULL) { 248 | av_frame_free(&r->frame); 249 | } 250 | 251 | if (r->fmt_ctx != NULL) { 252 | avformat_close_input(&r->fmt_ctx); 253 | } 254 | 255 | if (r->path != NULL) { 256 | XAV_FREE(r->path); 257 | } 258 | 259 | XAV_FREE(r); 260 | *reader = NULL; 261 | } 262 | } 263 | -------------------------------------------------------------------------------- /c_src/xav/reader.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "utils.h" 14 | 15 | struct Reader { 16 | char *path; 17 | AVFrame *frame; 18 | AVPacket *pkt; 19 | const AVCodec *codec; 20 | AVCodecContext *c; 21 | AVFormatContext *fmt_ctx; 22 | int stream_idx; 23 | const AVInputFormat *input_format; 24 | AVDictionary *options; 25 | enum AVMediaType media_type; 26 | AVRational framerate; 27 | }; 28 | 29 | struct Reader *reader_alloc(); 30 | 31 | int reader_init(struct Reader *reader, unsigned char *path, size_t path_size, int device_flag, 32 | enum AVMediaType media_type); 33 | 34 | int reader_next_frame(struct Reader *reader); 35 | 36 | int reader_seek(struct Reader *reader, double time_in_seconds); 37 | 38 | void reader_free_frame(struct Reader *reader); 39 | 40 | void reader_free(struct Reader **reader); 41 | -------------------------------------------------------------------------------- /c_src/xav/utils.c: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | #include 3 | #include 4 | #include 5 | 6 | ERL_NIF_TERM xav_nif_ok(ErlNifEnv *env, ERL_NIF_TERM data_term) { 7 | ERL_NIF_TERM ok_term = enif_make_atom(env, "ok"); 8 | return enif_make_tuple(env, 2, ok_term, data_term); 9 | } 10 | 11 | ERL_NIF_TERM xav_nif_error(ErlNifEnv *env, char *reason) { 12 | ERL_NIF_TERM error_term = enif_make_atom(env, "error"); 13 | ERL_NIF_TERM reason_term = enif_make_atom(env, reason); 14 | return enif_make_tuple(env, 2, error_term, reason_term); 15 | } 16 | 17 | ERL_NIF_TERM xav_nif_raise(ErlNifEnv *env, char *msg) { 18 | ERL_NIF_TERM reason = enif_make_atom(env, msg); 19 | return enif_raise_exception(env, reason); 20 | } 21 | 22 | int xav_nif_get_atom(ErlNifEnv *env, ERL_NIF_TERM term, char **value) { 23 | unsigned int atom_len; 24 | if (!enif_get_atom_length(env, term, &atom_len, ERL_NIF_LATIN1)) { 25 | return 0; 26 | } 27 | 28 | char *atom_value = (char *)XAV_ALLOC((atom_len + 1) * sizeof(char *)); 29 | if (!enif_get_atom(env, term, atom_value, atom_len + 1, ERL_NIF_LATIN1)) { 30 | XAV_FREE(atom_value); 31 | return 0; 32 | } 33 | 34 | *value = atom_value; 35 | return 1; 36 | } 37 | 38 | int xav_nif_get_string(ErlNifEnv *env, ERL_NIF_TERM term, char **value) { 39 | ErlNifBinary bin; 40 | if (!enif_inspect_binary(env, term, &bin)) { 41 | return 0; 42 | } 43 | 44 | char *str_value = (char *)XAV_ALLOC((bin.size + 1) * sizeof(char *)); 45 | memcpy(str_value, bin.data, bin.size); 46 | str_value[bin.size] = '\0'; 47 | 48 | *value = str_value; 49 | return 1; 50 | } 51 | 52 | ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int out_samples, 53 | int out_size, enum AVSampleFormat out_format, int pts) { 54 | ERL_NIF_TERM data_term; 55 | 56 | unsigned char *ptr = enif_make_new_binary(env, out_size, &data_term); 57 | memcpy(ptr, out_data[0], out_size); 58 | 59 | ERL_NIF_TERM samples_term = enif_make_int(env, out_samples); 60 | ERL_NIF_TERM format_term = enif_make_atom(env, av_get_sample_fmt_name(out_format)); 61 | ERL_NIF_TERM pts_term = enif_make_int(env, pts); 62 | 63 | return enif_make_tuple(env, 4, data_term, format_term, samples_term, pts_term); 64 | } 65 | 66 | ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame) { 67 | ERL_NIF_TERM data_term; 68 | 69 | int payload_size = av_image_get_buffer_size(frame->format, frame->width, frame->height, 1); 70 | unsigned char *ptr = enif_make_new_binary(env, payload_size, &data_term); 71 | 72 | av_image_copy_to_buffer(ptr, payload_size, (const uint8_t *const *)frame->data, 73 | (const int *)frame->linesize, frame->format, frame->width, frame->height, 74 | 1); 75 | 76 | ERL_NIF_TERM format_term = enif_make_atom(env, av_get_pix_fmt_name(frame->format)); 77 | ERL_NIF_TERM height_term = enif_make_int(env, frame->height); 78 | ERL_NIF_TERM width_term = enif_make_int(env, frame->width); 79 | ERL_NIF_TERM pts_term = enif_make_int64(env, frame->pts); 80 | return enif_make_tuple(env, 5, data_term, format_term, width_term, height_term, pts_term); 81 | } 82 | 83 | ERL_NIF_TERM xav_nif_packet_to_term(ErlNifEnv *env, AVPacket *packet) { 84 | ERL_NIF_TERM data_term; 85 | 86 | unsigned char *ptr = enif_make_new_binary(env, packet->size, &data_term); 87 | 88 | memcpy(ptr, packet->data, packet->size); 89 | 90 | ERL_NIF_TERM dts = enif_make_int(env, packet->dts); 91 | ERL_NIF_TERM pts = enif_make_int(env, packet->pts); 92 | ERL_NIF_TERM is_keyframe = 93 | enif_make_atom(env, packet->flags & AV_PKT_FLAG_KEY ? "true" : "false"); 94 | return enif_make_tuple(env, 4, data_term, dts, pts, is_keyframe); 95 | } 96 | 97 | int xav_get_nb_channels(const AVFrame *frame) { 98 | #if LIBAVUTIL_VERSION_MAJOR >= 58 99 | return frame->ch_layout.nb_channels; 100 | #else 101 | return frame->channels; 102 | #endif 103 | } -------------------------------------------------------------------------------- /c_src/xav/utils.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #ifdef XAV_DEBUG 11 | #define XAV_LOG_DEBUG(X, ...) \ 12 | fprintf(stderr, "[XAV DEBUG %s] %s:%d " X "\n", __TIME__, __FILE__, __LINE__, ##__VA_ARGS__) 13 | #else 14 | #define XAV_LOG_DEBUG(...) 15 | #endif 16 | 17 | #define XAV_ALLOC(X) enif_alloc(X) 18 | #define XAV_REALLOC(X, Y) enif_realloc(X, Y) 19 | #define XAV_FREE(X) enif_free(X) 20 | ERL_NIF_TERM xav_nif_ok(ErlNifEnv *env, ERL_NIF_TERM data_term); 21 | ERL_NIF_TERM xav_nif_error(ErlNifEnv *env, char *reason); 22 | ERL_NIF_TERM xav_nif_raise(ErlNifEnv *env, char *msg); 23 | int xav_nif_get_atom(ErlNifEnv *env, ERL_NIF_TERM term, char **value); 24 | int xav_nif_get_string(ErlNifEnv *env, ERL_NIF_TERM term, char **value); 25 | ERL_NIF_TERM xav_nif_video_frame_to_term(ErlNifEnv *env, AVFrame *frame); 26 | ERL_NIF_TERM xav_nif_audio_frame_to_term(ErlNifEnv *env, uint8_t **out_data, int out_samples, 27 | int out_size, enum AVSampleFormat out_format, int pts); 28 | ERL_NIF_TERM xav_nif_packet_to_term(ErlNifEnv *env, AVPacket *packet); 29 | int xav_get_nb_channels(const AVFrame *frame); 30 | -------------------------------------------------------------------------------- /c_src/xav/video_converter.c: -------------------------------------------------------------------------------- 1 | #include "video_converter.h" 2 | #include "utils.h" 3 | 4 | static inline unsigned int video_converter_resolution_changed(struct VideoConverter *converter, 5 | AVFrame *frame) { 6 | return converter->in_format != frame->format || converter->in_width != frame->width || 7 | converter->in_height != frame->height; 8 | } 9 | 10 | struct VideoConverter *video_converter_alloc() { 11 | struct VideoConverter *converter = 12 | (struct VideoConverter *)XAV_ALLOC(sizeof(struct VideoConverter)); 13 | if (converter) { 14 | converter->sws_ctx = NULL; 15 | converter->dst_frame = av_frame_alloc(); 16 | } 17 | return converter; 18 | } 19 | 20 | int video_converter_init(struct VideoConverter *converter, int in_width, int in_height, 21 | enum AVPixelFormat in_format, int out_width, int out_height, 22 | enum AVPixelFormat out_format) { 23 | converter->in_width = in_width; 24 | converter->in_height = in_height; 25 | converter->in_format = in_format; 26 | 27 | converter->out_width = out_width; 28 | converter->out_height = out_height; 29 | converter->out_format = out_format; 30 | 31 | AVFrame *dst_frame = converter->dst_frame; 32 | av_frame_unref(dst_frame); 33 | 34 | dst_frame->format = out_format; 35 | 36 | if (out_width == -1 && out_height == -1) { 37 | dst_frame->width = in_width; 38 | dst_frame->height = in_height; 39 | } else if (out_width == -1) { 40 | int width = in_width * out_height / in_height; 41 | width = width + (width % 2); 42 | 43 | dst_frame->width = width; 44 | dst_frame->height = out_height; 45 | } else if (out_height == -1) { 46 | int height = in_height * out_width / in_width; 47 | height = height + (height % 2); 48 | 49 | dst_frame->width = out_width; 50 | dst_frame->height = height; 51 | } else { 52 | dst_frame->width = out_width; 53 | dst_frame->height = out_height; 54 | } 55 | 56 | int ret = av_frame_get_buffer(dst_frame, 0); 57 | if (ret < 0) 58 | return ret; 59 | 60 | converter->sws_ctx = 61 | sws_getContext(in_width, in_height, in_format, dst_frame->width, dst_frame->height, 62 | dst_frame->format, SWS_BILINEAR, NULL, NULL, NULL); 63 | 64 | if (!converter->sws_ctx) { 65 | XAV_LOG_DEBUG("Couldn't get sws context"); 66 | return -1; 67 | } 68 | 69 | return 0; 70 | } 71 | 72 | int video_converter_convert(struct VideoConverter *converter, AVFrame *src_frame) { 73 | int ret; 74 | 75 | if (video_converter_resolution_changed(converter, src_frame)) { 76 | XAV_LOG_DEBUG("Frame resolution changed"); 77 | sws_freeContext(converter->sws_ctx); 78 | ret = video_converter_init(converter, src_frame->width, src_frame->height, src_frame->format, 79 | converter->out_width, converter->out_height, converter->out_format); 80 | if (ret < 0) { 81 | return ret; 82 | } 83 | } 84 | 85 | converter->dst_frame->pts = src_frame->pts; 86 | 87 | // is this (const uint8_t * const*) cast really correct? 88 | return sws_scale(converter->sws_ctx, (const uint8_t *const *)src_frame->data, src_frame->linesize, 89 | 0, src_frame->height, converter->dst_frame->data, 90 | converter->dst_frame->linesize); 91 | } 92 | 93 | void video_converter_free(struct VideoConverter **converter) { 94 | struct VideoConverter *vc = *converter; 95 | if (vc != NULL) { 96 | if (vc->sws_ctx != NULL) { 97 | sws_freeContext((*converter)->sws_ctx); 98 | } 99 | 100 | if (vc->dst_frame != NULL) { 101 | av_frame_free(&(*converter)->dst_frame); 102 | } 103 | 104 | XAV_FREE(vc); 105 | *converter = NULL; 106 | } 107 | } -------------------------------------------------------------------------------- /c_src/xav/video_converter.h: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | struct VideoConverter { 9 | struct SwsContext *sws_ctx; 10 | int in_width; 11 | int in_height; 12 | enum AVPixelFormat in_format; 13 | int out_width; 14 | int out_height; 15 | enum AVPixelFormat out_format; 16 | AVFrame *dst_frame; 17 | }; 18 | 19 | struct VideoConverter *video_converter_alloc(); 20 | 21 | int video_converter_init(struct VideoConverter *converter, int in_width, int in_height, 22 | enum AVPixelFormat in_format, int out_width, int out_height, 23 | enum AVPixelFormat out_format); 24 | 25 | int video_converter_convert(struct VideoConverter *converter, AVFrame *src_frame); 26 | 27 | void video_converter_free(struct VideoConverter **converter); 28 | -------------------------------------------------------------------------------- /c_src/xav/xav_decoder.c: -------------------------------------------------------------------------------- 1 | #include "xav_decoder.h" 2 | #include "audio_converter.h" 3 | 4 | ErlNifResourceType *xav_decoder_resource_type; 5 | 6 | static int init_audio_converter(struct XavDecoder *xav_decoder); 7 | static int init_video_converter(struct XavDecoder *xav_decoder, AVFrame *frame); 8 | 9 | void free_frames(AVFrame **frames, int size) { 10 | for (int i = 0; i < size; i++) { 11 | av_frame_unref(frames[i]); 12 | } 13 | } 14 | 15 | ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 16 | if (argc != 7) { 17 | return xav_nif_raise(env, "invalid_arg_count"); 18 | } 19 | 20 | ERL_NIF_TERM ret; 21 | char *codec_name = NULL; 22 | char *out_format = NULL; 23 | int channels; 24 | 25 | // resolve codec 26 | if (!xav_nif_get_atom(env, argv[0], &codec_name)) { 27 | return xav_nif_raise(env, "failed_to_get_atom"); 28 | } 29 | 30 | const AVCodec *codec = avcodec_find_decoder_by_name(codec_name); 31 | if (codec == NULL) { 32 | ret = xav_nif_raise(env, "unknown_codec"); 33 | goto clean; 34 | } 35 | 36 | if (codec->type != AVMEDIA_TYPE_VIDEO && codec->type != AVMEDIA_TYPE_AUDIO) { 37 | ret = xav_nif_raise(env, "unsupported_media_type"); 38 | goto clean; 39 | } 40 | 41 | if (!enif_get_int(env, argv[1], &channels)) { 42 | ret = xav_nif_raise(env, "failed_to_get_int"); 43 | goto clean; 44 | } 45 | 46 | // resolve output format 47 | if (!xav_nif_get_atom(env, argv[2], &out_format)) { 48 | ret = xav_nif_raise(env, "failed_to_get_atom"); 49 | goto clean; 50 | } 51 | 52 | enum AVPixelFormat out_video_fmt = AV_PIX_FMT_NONE; 53 | enum AVSampleFormat out_audo_fmt = AV_SAMPLE_FMT_NONE; 54 | if (codec->type == AVMEDIA_TYPE_VIDEO && strcmp(out_format, "nil") != 0) { 55 | out_video_fmt = av_get_pix_fmt(out_format); 56 | if (out_video_fmt == AV_PIX_FMT_NONE) { 57 | ret = xav_nif_raise(env, "unknown_out_format"); 58 | goto clean; 59 | } 60 | } else if (codec->type == AVMEDIA_TYPE_AUDIO && strcmp(out_format, "nil") != 0) { 61 | out_audo_fmt = av_get_sample_fmt(out_format); 62 | if (out_audo_fmt == AV_SAMPLE_FMT_NONE) { 63 | ret = xav_nif_raise(env, "unknown_out_format"); 64 | goto clean; 65 | } 66 | } 67 | 68 | // resolve other params 69 | int out_sample_rate; 70 | if (!enif_get_int(env, argv[3], &out_sample_rate)) { 71 | ret = xav_nif_raise(env, "invalid_out_sample_rate"); 72 | goto clean; 73 | } 74 | 75 | int out_channels; 76 | if (!enif_get_int(env, argv[4], &out_channels)) { 77 | ret = xav_nif_raise(env, "invalid_out_channels"); 78 | goto clean; 79 | } 80 | 81 | int out_width; 82 | if (!enif_get_int(env, argv[5], &out_width)) { 83 | ret = xav_nif_raise(env, "failed_to_get_int"); 84 | goto clean; 85 | } 86 | 87 | int out_height; 88 | if (!enif_get_int(env, argv[6], &out_height)) { 89 | ret = xav_nif_raise(env, "failed_to_get_int"); 90 | goto clean; 91 | } 92 | 93 | struct XavDecoder *xav_decoder = 94 | enif_alloc_resource(xav_decoder_resource_type, sizeof(struct XavDecoder)); 95 | xav_decoder->decoder = NULL; 96 | xav_decoder->ac = NULL; 97 | xav_decoder->vc = NULL; 98 | xav_decoder->out_video_fmt = out_video_fmt; 99 | xav_decoder->out_width = out_width; 100 | xav_decoder->out_height = out_height; 101 | xav_decoder->out_audio_fmt = out_audo_fmt; 102 | xav_decoder->out_sample_rate = out_sample_rate; 103 | xav_decoder->out_channels = out_channels; 104 | 105 | xav_decoder->decoder = decoder_alloc(); 106 | if (xav_decoder->decoder == NULL) { 107 | ret = xav_nif_raise(env, "failed_to_allocate_decoder"); 108 | goto clean; 109 | } 110 | 111 | if (decoder_init(xav_decoder->decoder, codec, channels)) { 112 | ret = xav_nif_raise(env, "failed_to_init_decoder"); 113 | goto clean; 114 | } 115 | 116 | ret = enif_make_resource(env, xav_decoder); 117 | enif_release_resource(xav_decoder); 118 | 119 | clean: 120 | if (codec_name != NULL) 121 | XAV_FREE(codec_name); 122 | if (out_format != NULL) 123 | XAV_FREE(out_format); 124 | 125 | return ret; 126 | } 127 | 128 | ERL_NIF_TERM convert(ErlNifEnv *env, struct XavDecoder *xav_decoder, AVFrame *frame) { 129 | ERL_NIF_TERM frame_term; 130 | int ret; 131 | 132 | if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_VIDEO) { 133 | XAV_LOG_DEBUG("Converting video to RGB"); 134 | 135 | // no pixel format conversion and no scaling 136 | if (xav_decoder->out_video_fmt == AV_PIX_FMT_NONE && xav_decoder->out_width == -1 && 137 | xav_decoder->out_height == -1) { 138 | return xav_nif_video_frame_to_term(env, frame); 139 | } 140 | 141 | if (xav_decoder->vc == NULL) { 142 | ret = init_video_converter(xav_decoder, frame); 143 | if (ret < 0) { 144 | return xav_nif_raise(env, "failed_to_init_converter"); 145 | } 146 | } 147 | 148 | ret = video_converter_convert(xav_decoder->vc, frame); 149 | if (ret < 0) { 150 | return xav_nif_raise(env, "failed_to_convert"); 151 | } 152 | 153 | frame_term = xav_nif_video_frame_to_term(env, xav_decoder->vc->dst_frame); 154 | 155 | } else if (xav_decoder->decoder->media_type == AVMEDIA_TYPE_AUDIO) { 156 | XAV_LOG_DEBUG("Converting audio to desired out format"); 157 | 158 | uint8_t **out_data; 159 | int out_samples; 160 | int out_size; 161 | 162 | if (xav_decoder->ac == NULL) { 163 | ret = init_audio_converter(xav_decoder); 164 | if (ret < 0) { 165 | return xav_nif_raise(env, "failed_to_init_converter"); 166 | } 167 | } 168 | 169 | ret = audio_converter_convert(xav_decoder->ac, frame, &out_data, &out_samples, &out_size); 170 | if (ret < 0) { 171 | return xav_nif_raise(env, "failed_to_decode"); 172 | } 173 | 174 | frame_term = xav_nif_audio_frame_to_term(env, out_data, out_samples, out_size, 175 | xav_decoder->out_audio_fmt, frame->pts); 176 | 177 | av_freep(&out_data[0]); 178 | } 179 | 180 | return frame_term; 181 | } 182 | 183 | ERL_NIF_TERM decode(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 184 | ERL_NIF_TERM frame_term; 185 | 186 | if (argc != 4) { 187 | return xav_nif_raise(env, "invalid_arg_count"); 188 | } 189 | 190 | struct XavDecoder *xav_decoder; 191 | if (!enif_get_resource(env, argv[0], xav_decoder_resource_type, (void **)&xav_decoder)) { 192 | return xav_nif_raise(env, "couldnt_get_decoder_resource"); 193 | } 194 | 195 | ErlNifBinary data; 196 | if (!enif_inspect_binary(env, argv[1], &data)) { 197 | return xav_nif_raise(env, "couldnt_inspect_binary"); 198 | } 199 | 200 | int pts; 201 | if (!enif_get_int(env, argv[2], &pts)) { 202 | return xav_nif_raise(env, "couldnt_get_int"); 203 | } 204 | 205 | int dts; 206 | if (!enif_get_int(env, argv[3], &dts)) { 207 | return xav_nif_raise(env, "couldnt_get_int"); 208 | } 209 | 210 | xav_decoder->decoder->pkt->data = data.data; 211 | xav_decoder->decoder->pkt->size = data.size; 212 | xav_decoder->decoder->pkt->pts = pts; 213 | xav_decoder->decoder->pkt->dts = dts; 214 | 215 | int ret = 216 | decoder_decode(xav_decoder->decoder, xav_decoder->decoder->pkt, xav_decoder->decoder->frame); 217 | if (ret == -2) { 218 | return xav_nif_error(env, "no_keyframe"); 219 | } else if (ret == AVERROR(EAGAIN)) { 220 | // Some frames are meant for decoder only 221 | // and they don't include actual video samples. 222 | decoder_free_frame(xav_decoder->decoder); 223 | return enif_make_atom(env, "ok"); 224 | } else if (ret != 0) { 225 | return xav_nif_raise(env, "failed_to_decode"); 226 | } 227 | 228 | frame_term = convert(env, xav_decoder, xav_decoder->decoder->frame); 229 | 230 | decoder_free_frame(xav_decoder->decoder); 231 | 232 | return xav_nif_ok(env, frame_term); 233 | } 234 | 235 | ERL_NIF_TERM flush(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 236 | if (argc != 1) { 237 | return xav_nif_raise(env, "invalid_arg_count"); 238 | } 239 | 240 | struct XavDecoder *xav_decoder; 241 | if (!enif_get_resource(env, argv[0], xav_decoder_resource_type, (void **)&xav_decoder)) { 242 | return xav_nif_raise(env, "couldnt_get_decoder_resource"); 243 | } 244 | 245 | AVFrame *frames[MAX_FLUSH_BUFFER]; 246 | int frames_count = 0; 247 | 248 | for (int i = 0; i < MAX_FLUSH_BUFFER; i++) { 249 | frames[i] = av_frame_alloc(); 250 | } 251 | 252 | int ret = decoder_flush(xav_decoder->decoder, frames, &frames_count); 253 | if (ret < 0) { 254 | free_frames(frames, MAX_FLUSH_BUFFER); 255 | return xav_nif_error(env, "failed_to_flush"); 256 | } 257 | 258 | ERL_NIF_TERM frame_terms[frames_count]; 259 | for (int i = 0; i < frames_count; i++) { 260 | frame_terms[i] = convert(env, xav_decoder, frames[i]); 261 | } 262 | 263 | free_frames(frames, MAX_FLUSH_BUFFER); 264 | 265 | return xav_nif_ok(env, enif_make_list_from_array(env, frame_terms, frames_count)); 266 | } 267 | 268 | ERL_NIF_TERM pixel_formats(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 269 | ERL_NIF_TERM result = enif_make_list(env, 0); 270 | 271 | const AVPixFmtDescriptor *desc = NULL; 272 | 273 | while ((desc = av_pix_fmt_desc_next(desc))) { 274 | ERL_NIF_TERM name = enif_make_atom(env, desc->name); 275 | ERL_NIF_TERM nb_components = enif_make_int(env, desc->nb_components); 276 | ERL_NIF_TERM is_hwaccel = 277 | enif_make_atom(env, desc->flags & AV_PIX_FMT_FLAG_HWACCEL ? "true" : "false"); 278 | 279 | result = 280 | enif_make_list_cell(env, enif_make_tuple3(env, name, nb_components, is_hwaccel), result); 281 | } 282 | 283 | return result; 284 | } 285 | 286 | ERL_NIF_TERM sample_formats(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 287 | ERL_NIF_TERM result = enif_make_list(env, 0); 288 | 289 | for (int fmt = 0; fmt < AV_SAMPLE_FMT_NB; fmt++) { 290 | enum AVSampleFormat sample_format = (enum AVSampleFormat)fmt; 291 | const char *name = av_get_sample_fmt_name(sample_format); 292 | int nb_bytes = av_get_bytes_per_sample(sample_format); 293 | 294 | ERL_NIF_TERM desc = 295 | enif_make_tuple2(env, enif_make_atom(env, name), enif_make_int(env, nb_bytes)); 296 | 297 | result = enif_make_list_cell(env, desc, result); 298 | } 299 | 300 | return result; 301 | } 302 | 303 | ERL_NIF_TERM list_decoders(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 304 | ERL_NIF_TERM result = enif_make_list(env, 0); 305 | 306 | const AVCodec *codec = NULL; 307 | void *iter = NULL; 308 | 309 | while ((codec = av_codec_iterate(&iter))) { 310 | if (av_codec_is_decoder(codec)) { 311 | ERL_NIF_TERM name = enif_make_atom(env, codec->name); 312 | ERL_NIF_TERM codec_name = enif_make_atom(env, avcodec_get_name(codec->id)); 313 | ERL_NIF_TERM long_name = codec->long_name 314 | ? enif_make_string(env, codec->long_name, ERL_NIF_LATIN1) 315 | : enif_make_string(env, "", ERL_NIF_LATIN1); 316 | ERL_NIF_TERM media_type = enif_make_atom(env, av_get_media_type_string(codec->type)); 317 | 318 | ERL_NIF_TERM desc = enif_make_tuple4(env, codec_name, name, long_name, media_type); 319 | result = enif_make_list_cell(env, desc, result); 320 | } 321 | } 322 | 323 | return result; 324 | } 325 | 326 | static int init_audio_converter(struct XavDecoder *xav_decoder) { 327 | xav_decoder->ac = audio_converter_alloc(); 328 | 329 | if (xav_decoder->ac == NULL) { 330 | XAV_LOG_DEBUG("Couldn't allocate converter"); 331 | return -1; 332 | } 333 | 334 | int out_sample_rate; 335 | if (xav_decoder->out_sample_rate == 0) { 336 | out_sample_rate = xav_decoder->decoder->c->sample_rate; 337 | } else { 338 | out_sample_rate = xav_decoder->out_sample_rate; 339 | } 340 | 341 | // If user didn't request any specific format, 342 | // just take the original format but in the packed form. 343 | // We need to call this function here, as in the decoder_init we don't know 344 | // what is the sample_fmt yet. 345 | if (xav_decoder->out_audio_fmt == AV_SAMPLE_FMT_NONE) { 346 | xav_decoder->out_audio_fmt = av_get_alt_sample_fmt(xav_decoder->decoder->c->sample_fmt, 0); 347 | } 348 | 349 | struct ChannelLayout in_chlayout, out_chlayout; 350 | xav_get_channel_layout_from_context(&in_chlayout, xav_decoder->decoder->c); 351 | 352 | if (xav_decoder->out_channels == 0) { 353 | xav_get_channel_layout_from_context(&out_chlayout, xav_decoder->decoder->c); 354 | } else { 355 | xav_set_default_channel_layout(&out_chlayout, xav_decoder->out_channels); 356 | } 357 | 358 | return audio_converter_init(xav_decoder->ac, in_chlayout, xav_decoder->decoder->c->sample_rate, 359 | xav_decoder->decoder->c->sample_fmt, out_chlayout, out_sample_rate, 360 | xav_decoder->out_audio_fmt); 361 | } 362 | 363 | static int init_video_converter(struct XavDecoder *xav_decoder, AVFrame *frame) { 364 | xav_decoder->vc = video_converter_alloc(); 365 | if (xav_decoder->vc == NULL) { 366 | XAV_LOG_DEBUG("Couldn't allocate video converter"); 367 | return -1; 368 | } 369 | 370 | enum AVPixelFormat out_format = xav_decoder->out_video_fmt; 371 | if (out_format == AV_PIX_FMT_NONE) 372 | out_format = frame->format; 373 | 374 | return video_converter_init(xav_decoder->vc, frame->width, frame->height, frame->format, 375 | xav_decoder->out_width, xav_decoder->out_height, out_format); 376 | } 377 | 378 | void free_xav_decoder(ErlNifEnv *env, void *obj) { 379 | XAV_LOG_DEBUG("Freeing XavDecoder object"); 380 | struct XavDecoder *xav_decoder = (struct XavDecoder *)obj; 381 | if (xav_decoder->decoder != NULL) { 382 | decoder_free(&xav_decoder->decoder); 383 | } 384 | 385 | if (xav_decoder->ac != NULL) { 386 | audio_converter_free(&xav_decoder->ac); 387 | } 388 | 389 | if (xav_decoder->vc != NULL) { 390 | video_converter_free(&xav_decoder->vc); 391 | } 392 | } 393 | 394 | static ErlNifFunc xav_funcs[] = {{"new", 7, new}, 395 | {"decode", 4, decode, ERL_NIF_DIRTY_JOB_CPU_BOUND}, 396 | {"flush", 1, flush, ERL_NIF_DIRTY_JOB_CPU_BOUND}, 397 | {"pixel_formats", 0, pixel_formats}, 398 | {"sample_formats", 0, sample_formats}, 399 | {"list_decoders", 0, list_decoders}}; 400 | 401 | static int load(ErlNifEnv *env, void **priv, ERL_NIF_TERM load_info) { 402 | xav_decoder_resource_type = 403 | enif_open_resource_type(env, NULL, "XavDecoder", free_xav_decoder, ERL_NIF_RT_CREATE, NULL); 404 | return 0; 405 | } 406 | 407 | ERL_NIF_INIT(Elixir.Xav.Decoder.NIF, xav_funcs, &load, NULL, NULL, NULL); 408 | -------------------------------------------------------------------------------- /c_src/xav/xav_decoder.h: -------------------------------------------------------------------------------- 1 | #include "audio_converter.h" 2 | #include "decoder.h" 3 | #include "video_converter.h" 4 | 5 | #include 6 | 7 | struct XavDecoder { 8 | struct Decoder *decoder; 9 | // Video params 10 | struct VideoConverter *vc; 11 | enum AVPixelFormat out_video_fmt; 12 | int out_width; 13 | int out_height; 14 | // Audio params 15 | struct AudioConverter *ac; 16 | enum AVSampleFormat out_audio_fmt; 17 | int out_sample_rate; 18 | int out_channels; 19 | }; -------------------------------------------------------------------------------- /c_src/xav/xav_encoder.c: -------------------------------------------------------------------------------- 1 | #include "xav_encoder.h" 2 | #include "channel_layout.h" 3 | 4 | ErlNifResourceType *xav_encoder_resource_type; 5 | 6 | static ERL_NIF_TERM packets_to_term(ErlNifEnv *, struct Encoder *); 7 | static int get_profile(enum AVCodecID, const char *); 8 | static ERL_NIF_TERM codec_get_profiles(ErlNifEnv *, const AVCodec *); 9 | static ERL_NIF_TERM codec_get_sample_formats(ErlNifEnv *, const AVCodec *); 10 | static ERL_NIF_TERM codec_get_sample_rates(ErlNifEnv *, const AVCodec *); 11 | static ERL_NIF_TERM codec_get_channel_layouts(ErlNifEnv *, const AVCodec *); 12 | 13 | ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 14 | if (argc != 2) { 15 | return xav_nif_raise(env, "invalid_arg_count"); 16 | } 17 | 18 | ERL_NIF_TERM ret; 19 | struct EncoderConfig encoder_config = {0}; 20 | encoder_config.max_b_frames = -1; 21 | encoder_config.profile = FF_PROFILE_UNKNOWN; 22 | 23 | char *codec_name = NULL, *format = NULL, *profile = NULL; 24 | char *channel_layout = NULL; 25 | int codec_id = 0; 26 | 27 | ErlNifMapIterator iter; 28 | ERL_NIF_TERM key, value; 29 | char *config_name = NULL; 30 | int err; 31 | 32 | if (!xav_nif_get_atom(env, argv[0], &codec_name)) { 33 | return xav_nif_raise(env, "failed_to_get_atom"); 34 | } 35 | 36 | if (!enif_is_map(env, argv[1])) { 37 | return xav_nif_raise(env, "failed_to_get_map"); 38 | } 39 | 40 | enif_map_iterator_create(env, argv[1], &iter, ERL_NIF_MAP_ITERATOR_FIRST); 41 | 42 | while (enif_map_iterator_get_pair(env, &iter, &key, &value)) { 43 | if (!xav_nif_get_atom(env, key, &config_name)) { 44 | ret = xav_nif_raise(env, "failed_to_get_map_key"); 45 | goto clean; 46 | } 47 | 48 | if (strcmp(config_name, "width") == 0) { 49 | err = enif_get_int(env, value, &encoder_config.width); 50 | } else if (strcmp(config_name, "height") == 0) { 51 | err = enif_get_int(env, value, &encoder_config.height); 52 | } else if (strcmp(config_name, "format") == 0) { 53 | err = xav_nif_get_atom(env, value, &format); 54 | } else if (strcmp(config_name, "time_base_num") == 0) { 55 | err = enif_get_int(env, value, &encoder_config.time_base.num); 56 | } else if (strcmp(config_name, "time_base_den") == 0) { 57 | err = enif_get_int(env, value, &encoder_config.time_base.den); 58 | } else if (strcmp(config_name, "gop_size") == 0) { 59 | err = enif_get_int(env, value, &encoder_config.gop_size); 60 | } else if (strcmp(config_name, "max_b_frames") == 0) { 61 | err = enif_get_int(env, value, &encoder_config.max_b_frames); 62 | } else if (strcmp(config_name, "profile") == 0) { 63 | err = xav_nif_get_string(env, value, &profile); 64 | } else if (strcmp(config_name, "codec_id") == 0) { 65 | err = enif_get_int(env, value, &codec_id); 66 | } else if (strcmp(config_name, "sample_rate") == 0) { 67 | err = enif_get_int(env, value, &encoder_config.sample_rate); 68 | } else if (strcmp(config_name, "channel_layout") == 0) { 69 | err = xav_nif_get_string(env, value, &channel_layout); 70 | } else { 71 | ret = xav_nif_raise(env, "unknown_config_key"); 72 | goto clean; 73 | } 74 | 75 | if (!err) { 76 | ret = xav_nif_raise(env, "couldnt_read_value"); 77 | goto clean; 78 | } 79 | 80 | XAV_FREE(config_name); 81 | enif_map_iterator_next(env, &iter); 82 | } 83 | 84 | if (strcmp(codec_name, "nil") == 0) { 85 | encoder_config.codec = avcodec_find_encoder((enum AVCodecID)codec_id); 86 | } else { 87 | encoder_config.codec = avcodec_find_encoder_by_name(codec_name); 88 | } 89 | 90 | if (!encoder_config.codec) { 91 | ret = xav_nif_raise(env, "unknown_codec"); 92 | goto clean; 93 | } 94 | 95 | if (encoder_config.codec->type == AVMEDIA_TYPE_VIDEO) { 96 | encoder_config.format = av_get_pix_fmt(format); 97 | if (encoder_config.format == AV_PIX_FMT_NONE) { 98 | ret = xav_nif_raise(env, "unknown_format"); 99 | goto clean; 100 | } 101 | } else { 102 | encoder_config.sample_format = av_get_sample_fmt(format); 103 | if (encoder_config.sample_format == AV_SAMPLE_FMT_NONE) { 104 | ret = xav_nif_raise(env, "unknown_format"); 105 | goto clean; 106 | } 107 | 108 | if (!xav_get_channel_layout(channel_layout, &encoder_config.channel_layout)) { 109 | ret = xav_nif_raise(env, "unknown_channel_layout"); 110 | goto clean; 111 | } 112 | } 113 | 114 | if (profile) { 115 | encoder_config.profile = get_profile(encoder_config.codec->id, profile); 116 | if (encoder_config.profile == FF_PROFILE_UNKNOWN) { 117 | ret = xav_nif_raise(env, "invalid_profile"); 118 | goto clean; 119 | } 120 | } 121 | 122 | struct XavEncoder *xav_encoder = 123 | enif_alloc_resource(xav_encoder_resource_type, sizeof(struct XavEncoder)); 124 | 125 | xav_encoder->encoder = encoder_alloc(); 126 | if (encoder_init(xav_encoder->encoder, &encoder_config) < 0) { 127 | ret = xav_nif_raise(env, "failed_to_init_encoder"); 128 | goto clean; 129 | } 130 | 131 | xav_encoder->frame = av_frame_alloc(); 132 | 133 | if (encoder_config.codec->type == AVMEDIA_TYPE_AUDIO) { 134 | xav_encoder->frame->format = encoder_config.format; 135 | xav_encoder->frame->nb_samples = xav_encoder->encoder->c->frame_size; 136 | // For encoder that accepts dynamic frame size, we set it to 1024. 137 | if (xav_encoder->frame->nb_samples == 0) { 138 | xav_encoder->frame->nb_samples = 1024; 139 | } 140 | 141 | if (xav_set_frame_channel_layout(xav_encoder->frame, &encoder_config.channel_layout) < 0) { 142 | ret = xav_nif_raise(env, "failed_to_set_channel_layout"); 143 | goto clean; 144 | } 145 | 146 | if (av_frame_get_buffer(xav_encoder->frame, 0) < 0) { 147 | ret = xav_nif_raise(env, "failed_to_get_buffer"); 148 | goto clean; 149 | } 150 | } 151 | 152 | ret = enif_make_resource(env, xav_encoder); 153 | enif_release_resource(xav_encoder); 154 | 155 | clean: 156 | if (!codec_name) 157 | XAV_FREE(codec_name); 158 | if (!format) 159 | XAV_FREE(format); 160 | if (!config_name) 161 | XAV_FREE(config_name); 162 | if (!profile) 163 | XAV_FREE(profile); 164 | if (!channel_layout) 165 | XAV_FREE(channel_layout); 166 | enif_map_iterator_destroy(env, &iter); 167 | 168 | return ret; 169 | } 170 | 171 | ERL_NIF_TERM encode(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 172 | int ret; 173 | 174 | if (argc != 3) { 175 | return xav_nif_raise(env, "invalid_arg_count"); 176 | } 177 | 178 | struct XavEncoder *xav_encoder; 179 | if (!enif_get_resource(env, argv[0], xav_encoder_resource_type, (void **)&xav_encoder)) { 180 | return xav_nif_raise(env, "invalid_resource"); 181 | } 182 | 183 | ErlNifBinary input; 184 | if (!enif_inspect_binary(env, argv[1], &input)) { 185 | return xav_nif_raise(env, "failed_to_inspect_binary"); 186 | } 187 | 188 | int pts; 189 | if (!enif_get_int(env, argv[2], &pts)) { 190 | return xav_nif_raise(env, "failed_to_get_int"); 191 | } 192 | 193 | AVFrame *frame = xav_encoder->frame; 194 | if (xav_encoder->encoder->codec->type == AVMEDIA_TYPE_VIDEO) { 195 | frame->width = xav_encoder->encoder->c->width; 196 | frame->height = xav_encoder->encoder->c->height; 197 | frame->format = xav_encoder->encoder->c->pix_fmt; 198 | frame->pts = pts; 199 | 200 | ret = av_image_fill_arrays(frame->data, frame->linesize, input.data, frame->format, 201 | frame->width, frame->height, 1); 202 | if (ret < 0) { 203 | return xav_nif_raise(env, "failed_to_fill_arrays"); 204 | } 205 | } else { 206 | frame->pts = pts; 207 | frame->nb_samples = input.size / av_get_bytes_per_sample(xav_encoder->encoder->c->sample_fmt); 208 | 209 | int nb_channels = xav_get_nb_channels(frame); 210 | ret = av_samples_fill_arrays(frame->data, frame->linesize, input.data, nb_channels, 211 | frame->nb_samples, xav_encoder->encoder->c->sample_fmt, 1); 212 | 213 | if (ret < 0) { 214 | return xav_nif_raise(env, "failed_to_fill_arrays"); 215 | } 216 | } 217 | 218 | ret = encoder_encode(xav_encoder->encoder, frame); 219 | if (ret < 0) { 220 | return xav_nif_raise(env, "failed_to_encode"); 221 | } 222 | 223 | return packets_to_term(env, xav_encoder->encoder); 224 | } 225 | 226 | ERL_NIF_TERM flush(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 227 | if (argc != 1) { 228 | return xav_nif_raise(env, "invalid_arg_count"); 229 | } 230 | 231 | struct XavEncoder *xav_encoder; 232 | if (!enif_get_resource(env, argv[0], xav_encoder_resource_type, (void **)&xav_encoder)) { 233 | return xav_nif_raise(env, "invalid_resource"); 234 | } 235 | 236 | int ret = encoder_encode(xav_encoder->encoder, NULL); 237 | if (ret < 0) { 238 | return xav_nif_raise(env, "failed_to_encode"); 239 | } 240 | 241 | return packets_to_term(env, xav_encoder->encoder); 242 | } 243 | 244 | ERL_NIF_TERM list_encoders(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 245 | ERL_NIF_TERM result = enif_make_list(env, 0); 246 | 247 | const AVCodec *codec = NULL; 248 | void *iter = NULL; 249 | 250 | while ((codec = av_codec_iterate(&iter))) { 251 | if (av_codec_is_encoder(codec)) { 252 | ERL_NIF_TERM name = enif_make_atom(env, codec->name); 253 | ERL_NIF_TERM codec_name = enif_make_atom(env, avcodec_get_name(codec->id)); 254 | ERL_NIF_TERM long_name = codec->long_name 255 | ? enif_make_string(env, codec->long_name, ERL_NIF_LATIN1) 256 | : enif_make_string(env, "", ERL_NIF_LATIN1); 257 | ERL_NIF_TERM media_type = enif_make_atom(env, av_get_media_type_string(codec->type)); 258 | ERL_NIF_TERM codec_id = enif_make_int64(env, codec->id); 259 | ERL_NIF_TERM profiles = codec_get_profiles(env, codec); 260 | ERL_NIF_TERM sample_formats = codec_get_sample_formats(env, codec); 261 | ERL_NIF_TERM sample_rates = codec_get_sample_rates(env, codec); 262 | 263 | ERL_NIF_TERM desc = enif_make_tuple8(env, codec_name, name, long_name, media_type, codec_id, 264 | profiles, sample_formats, sample_rates); 265 | result = enif_make_list_cell(env, desc, result); 266 | } 267 | } 268 | 269 | return result; 270 | } 271 | 272 | void free_xav_encoder(ErlNifEnv *env, void *obj) { 273 | XAV_LOG_DEBUG("Freeing XavEncoder object"); 274 | struct XavEncoder *xav_encoder = (struct XavEncoder *)obj; 275 | 276 | if (xav_encoder->encoder != NULL) { 277 | encoder_free(&xav_encoder->encoder); 278 | } 279 | 280 | if (xav_encoder->frame != NULL) { 281 | av_frame_free(&xav_encoder->frame); 282 | } 283 | } 284 | 285 | static ERL_NIF_TERM packets_to_term(ErlNifEnv *env, struct Encoder *encoder) { 286 | ERL_NIF_TERM ret; 287 | ERL_NIF_TERM *packets = XAV_ALLOC(sizeof(ERL_NIF_TERM) * encoder->num_packets); 288 | for (int i = 0; i < encoder->num_packets; i++) { 289 | packets[i] = xav_nif_packet_to_term(env, encoder->packets[i]); 290 | } 291 | 292 | ret = enif_make_list_from_array(env, packets, encoder->num_packets); 293 | 294 | for (int i = 0; i < encoder->num_packets; i++) 295 | av_packet_unref(encoder->packets[i]); 296 | XAV_FREE(packets); 297 | 298 | return ret; 299 | } 300 | 301 | static int get_profile(enum AVCodecID codec, const char *profile_name) { 302 | const AVCodecDescriptor *desc = avcodec_descriptor_get(codec); 303 | const AVProfile *profile = desc->profiles; 304 | 305 | if (profile == NULL) { 306 | return FF_PROFILE_UNKNOWN; 307 | } 308 | 309 | while (profile->profile != FF_PROFILE_UNKNOWN) { 310 | if (strcmp(profile->name, profile_name) == 0) { 311 | break; 312 | } 313 | 314 | profile++; 315 | } 316 | 317 | return profile->profile; 318 | } 319 | 320 | static ERL_NIF_TERM codec_get_profiles(ErlNifEnv *env, const AVCodec *codec) { 321 | ERL_NIF_TERM result = enif_make_list(env, 0); 322 | 323 | const AVCodecDescriptor *desc = avcodec_descriptor_get(codec->id); 324 | const AVProfile *profile = desc->profiles; 325 | 326 | if (profile == NULL) { 327 | return result; 328 | } 329 | 330 | while (profile->profile != FF_PROFILE_UNKNOWN) { 331 | ERL_NIF_TERM profile_name = enif_make_string(env, profile->name, ERL_NIF_LATIN1); 332 | result = enif_make_list_cell(env, profile_name, result); 333 | 334 | profile++; 335 | } 336 | 337 | return result; 338 | } 339 | 340 | static ERL_NIF_TERM codec_get_sample_formats(ErlNifEnv *env, const AVCodec *codec) { 341 | ERL_NIF_TERM result = enif_make_list(env, 0); 342 | 343 | if (codec->type != AVMEDIA_TYPE_AUDIO) { 344 | return result; 345 | } 346 | 347 | const enum AVSampleFormat *sample_format = codec->sample_fmts; 348 | while (*sample_format != AV_SAMPLE_FMT_NONE) { 349 | ERL_NIF_TERM format_name = enif_make_atom(env, av_get_sample_fmt_name(*sample_format)); 350 | result = enif_make_list_cell(env, format_name, result); 351 | 352 | sample_format++; 353 | } 354 | 355 | return result; 356 | } 357 | 358 | static ERL_NIF_TERM codec_get_sample_rates(ErlNifEnv *env, const AVCodec *codec) { 359 | ERL_NIF_TERM result = enif_make_list(env, 0); 360 | 361 | if (codec->type != AVMEDIA_TYPE_AUDIO || codec->supported_samplerates == NULL) { 362 | return result; 363 | } 364 | 365 | const int *sample_rate = codec->supported_samplerates; 366 | 367 | while (*sample_rate != 0) { 368 | result = enif_make_list_cell(env, enif_make_int(env, *sample_rate), result); 369 | sample_rate++; 370 | } 371 | 372 | return result; 373 | } 374 | 375 | static ErlNifFunc xav_funcs[] = {{"new", 2, new}, 376 | {"encode", 3, encode}, 377 | {"flush", 1, flush}, 378 | {"list_encoders", 0, list_encoders}}; 379 | 380 | static int load(ErlNifEnv *env, void **priv, ERL_NIF_TERM load_info) { 381 | xav_encoder_resource_type = 382 | enif_open_resource_type(env, NULL, "XavEncoder", free_xav_encoder, ERL_NIF_RT_CREATE, NULL); 383 | return 0; 384 | } 385 | 386 | ERL_NIF_INIT(Elixir.Xav.Encoder.NIF, xav_funcs, &load, NULL, NULL, NULL); -------------------------------------------------------------------------------- /c_src/xav/xav_encoder.h: -------------------------------------------------------------------------------- 1 | #include "encoder.h" 2 | #include "utils.h" 3 | #include 4 | 5 | struct XavEncoder { 6 | struct Encoder *encoder; 7 | AVFrame *frame; 8 | }; 9 | -------------------------------------------------------------------------------- /c_src/xav/xav_reader.c: -------------------------------------------------------------------------------- 1 | #include "xav_reader.h" 2 | 3 | static int init_audio_converter(struct XavReader *xav_reader); 4 | static int init_video_converter(struct XavReader *xav_reader, AVFrame *frame); 5 | 6 | ErlNifResourceType *xav_reader_resource_type; 7 | 8 | ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 9 | if (argc != 6) { 10 | return xav_nif_raise(env, "invalid_arg_count"); 11 | } 12 | 13 | ErlNifBinary bin; 14 | if (!enif_inspect_binary(env, argv[0], &bin)) { 15 | return xav_nif_raise(env, "invalid_path"); 16 | } 17 | 18 | int device_flag; 19 | if (!enif_get_int(env, argv[1], &device_flag)) { 20 | return xav_nif_raise(env, "invalid_device_flag"); 21 | } 22 | 23 | int media_type_flag; 24 | enum AVMediaType media_type; 25 | if (!enif_get_int(env, argv[2], &media_type_flag)) { 26 | return xav_nif_raise(env, "invalid_media_type_flag"); 27 | } 28 | 29 | if (media_type_flag == 1) { 30 | media_type = AVMEDIA_TYPE_VIDEO; 31 | } else { 32 | media_type = AVMEDIA_TYPE_AUDIO; 33 | } 34 | 35 | unsigned int out_format_len; 36 | if (!enif_get_atom_length(env, argv[3], &out_format_len, ERL_NIF_LATIN1)) { 37 | return xav_nif_raise(env, "failed_to_get_atom_length"); 38 | } 39 | 40 | char *out_format = (char *)XAV_ALLOC((out_format_len + 1) * sizeof(char *)); 41 | 42 | if (enif_get_atom(env, argv[3], out_format, out_format_len + 1, ERL_NIF_LATIN1) == 0) { 43 | return xav_nif_raise(env, "failed_to_get_atom"); 44 | } 45 | 46 | int out_sample_rate; 47 | if (!enif_get_int(env, argv[4], &out_sample_rate)) { 48 | return xav_nif_raise(env, "invalid_out_sample_rate"); 49 | } 50 | 51 | int out_channels; 52 | if (!enif_get_int(env, argv[5], &out_channels)) { 53 | return xav_nif_raise(env, "invalid_out_channels"); 54 | } 55 | 56 | struct XavReader *xav_reader = 57 | enif_alloc_resource(xav_reader_resource_type, sizeof(struct XavReader)); 58 | xav_reader->reader = NULL; 59 | xav_reader->ac = NULL; 60 | xav_reader->vc = NULL; 61 | xav_reader->out_format = out_format; 62 | xav_reader->out_sample_rate = out_sample_rate; 63 | xav_reader->out_channels = out_channels; 64 | 65 | xav_reader->reader = reader_alloc(); 66 | if (xav_reader->reader == NULL) { 67 | return xav_nif_raise(env, "couldnt_allocate_reader"); 68 | } 69 | 70 | int ret = reader_init(xav_reader->reader, bin.data, bin.size, device_flag, media_type); 71 | 72 | if (ret == -1) { 73 | return xav_nif_error(env, "couldnt_open_avformat_input"); 74 | } else if (ret == -2) { 75 | return xav_nif_raise(env, "couldnt_create_new_reader"); 76 | } 77 | 78 | if (xav_reader->reader->media_type == AVMEDIA_TYPE_AUDIO) { 79 | ret = init_audio_converter(xav_reader); 80 | if (ret < 0) { 81 | return xav_nif_raise(env, "couldnt_init_converter"); 82 | } 83 | } 84 | 85 | ERL_NIF_TERM ok_term = enif_make_atom(env, "ok"); 86 | ERL_NIF_TERM bit_rate_term = enif_make_int64(env, xav_reader->reader->fmt_ctx->bit_rate); 87 | ERL_NIF_TERM duration_term = 88 | enif_make_int64(env, xav_reader->reader->fmt_ctx->duration / AV_TIME_BASE); 89 | ERL_NIF_TERM codec_term = enif_make_atom(env, xav_reader->reader->codec->name); 90 | ERL_NIF_TERM xav_term = enif_make_resource(env, xav_reader); 91 | enif_release_resource(xav_reader); 92 | 93 | if (xav_reader->reader->media_type == AVMEDIA_TYPE_AUDIO) { 94 | ERL_NIF_TERM in_sample_rate_term = enif_make_int(env, xav_reader->reader->c->sample_rate); 95 | ERL_NIF_TERM in_format_term = 96 | enif_make_atom(env, av_get_sample_fmt_name(xav_reader->reader->c->sample_fmt)); 97 | 98 | #if LIBAVUTIL_VERSION_MAJOR >= 58 99 | ERL_NIF_TERM in_channels_term = 100 | enif_make_int(env, xav_reader->reader->c->ch_layout.nb_channels); 101 | #else 102 | ERL_NIF_TERM in_channels_term = enif_make_int(env, xav_reader->reader->c->channels); 103 | #endif 104 | 105 | ERL_NIF_TERM out_format_term = 106 | enif_make_atom(env, av_get_sample_fmt_name(xav_reader->ac->out_sample_fmt)); 107 | ERL_NIF_TERM out_sample_rate_term = enif_make_int(env, xav_reader->ac->out_sample_rate); 108 | ERL_NIF_TERM out_channels_term = enif_make_int(env, xav_reader->ac->out_channels); 109 | return enif_make_tuple(env, 11, ok_term, xav_term, in_format_term, out_format_term, 110 | in_sample_rate_term, out_sample_rate_term, in_channels_term, 111 | out_channels_term, bit_rate_term, duration_term, codec_term); 112 | 113 | } else if (xav_reader->reader->media_type == AVMEDIA_TYPE_VIDEO) { 114 | ERL_NIF_TERM in_format_term = 115 | enif_make_atom(env, av_get_pix_fmt_name(xav_reader->reader->c->pix_fmt)); 116 | ERL_NIF_TERM out_format_term = enif_make_atom(env, "rgb24"); 117 | ERL_NIF_TERM framerate_num_term = enif_make_int(env, xav_reader->reader->framerate.num); 118 | ERL_NIF_TERM framerate_den_term = enif_make_int(env, xav_reader->reader->framerate.den); 119 | ERL_NIF_TERM framerate_term = enif_make_tuple(env, 2, framerate_num_term, framerate_den_term); 120 | 121 | return enif_make_tuple(env, 8, ok_term, xav_term, in_format_term, out_format_term, 122 | bit_rate_term, duration_term, codec_term, framerate_term); 123 | } else { 124 | return xav_nif_raise(env, "unknown_media_type"); 125 | } 126 | } 127 | 128 | ERL_NIF_TERM next_frame(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 129 | ERL_NIF_TERM frame_term; 130 | 131 | if (argc != 1) { 132 | return xav_nif_raise(env, "invalid_arg_count"); 133 | } 134 | 135 | struct XavReader *xav_reader; 136 | if (!enif_get_resource(env, argv[0], xav_reader_resource_type, (void **)&xav_reader)) { 137 | return xav_nif_raise(env, "couldnt_get_reader_resource"); 138 | } 139 | 140 | int ret = reader_next_frame(xav_reader->reader); 141 | 142 | if (ret == AVERROR_EOF) { 143 | return xav_nif_error(env, "eof"); 144 | } else if (ret != 0) { 145 | return xav_nif_raise(env, "receive_frame"); 146 | } 147 | 148 | // convert 149 | if (xav_reader->reader->media_type == AVMEDIA_TYPE_VIDEO) { 150 | XAV_LOG_DEBUG("Converting video to RGB"); 151 | 152 | if (xav_reader->vc == NULL) { 153 | ret = init_video_converter(xav_reader, xav_reader->reader->frame); 154 | if (ret < 0) { 155 | return xav_nif_raise(env, "failed_to_init_converter"); 156 | } 157 | } 158 | 159 | ret = video_converter_convert(xav_reader->vc, xav_reader->reader->frame); 160 | if (ret <= 0) { 161 | return xav_nif_raise(env, "failed_to_read"); 162 | } 163 | 164 | frame_term = xav_nif_video_frame_to_term(env, xav_reader->vc->dst_frame); 165 | } else if (xav_reader->reader->media_type == AVMEDIA_TYPE_AUDIO) { 166 | XAV_LOG_DEBUG("Converting audio to desired out format"); 167 | 168 | uint8_t **out_data; 169 | int out_samples; 170 | int out_size; 171 | 172 | ret = audio_converter_convert(xav_reader->ac, xav_reader->reader->frame, &out_data, 173 | &out_samples, &out_size); 174 | if (ret < 0) { 175 | return xav_nif_raise(env, "failed_to_read"); 176 | } 177 | 178 | frame_term = 179 | xav_nif_audio_frame_to_term(env, out_data, out_samples, out_size, 180 | xav_reader->ac->out_sample_fmt, xav_reader->reader->frame->pts); 181 | av_freep(&out_data[0]); 182 | } 183 | 184 | reader_free_frame(xav_reader->reader); 185 | 186 | return xav_nif_ok(env, frame_term); 187 | } 188 | 189 | ERL_NIF_TERM seek(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 190 | ERL_NIF_TERM frame_term; 191 | 192 | if (argc != 2) { 193 | return xav_nif_raise(env, "invalid_arg_count"); 194 | } 195 | 196 | struct XavReader *xav_reader; 197 | if (!enif_get_resource(env, argv[0], xav_reader_resource_type, (void **)&xav_reader)) { 198 | return xav_nif_raise(env, "couldnt_get_reader_resource"); 199 | } 200 | 201 | double time_in_seconds; 202 | if (!enif_get_double(env, argv[1], &time_in_seconds)) { 203 | return xav_nif_raise(env, "invalid_time_in_seconds"); 204 | } 205 | 206 | int ret = reader_seek(xav_reader->reader, time_in_seconds); 207 | 208 | if (ret < 0) { 209 | return xav_nif_raise(env, "failed to seek"); 210 | } 211 | 212 | return enif_make_atom(env, "ok"); 213 | } 214 | 215 | static int init_audio_converter(struct XavReader *xav_reader) { 216 | xav_reader->ac = audio_converter_alloc(); 217 | 218 | if (xav_reader->ac == NULL) { 219 | XAV_LOG_DEBUG("Couldn't allocate converter"); 220 | return -1; 221 | } 222 | 223 | int out_sample_rate; 224 | if (xav_reader->out_sample_rate == 0) { 225 | out_sample_rate = xav_reader->reader->c->sample_rate; 226 | } else { 227 | out_sample_rate = xav_reader->out_sample_rate; 228 | } 229 | 230 | enum AVSampleFormat out_sample_fmt; 231 | if (strcmp(xav_reader->out_format, "nil") == 0) { 232 | out_sample_fmt = av_get_alt_sample_fmt(xav_reader->reader->c->sample_fmt, 0); 233 | } else { 234 | out_sample_fmt = av_get_sample_fmt(xav_reader->out_format); 235 | if (out_sample_fmt == AV_SAMPLE_FMT_NONE) { 236 | return -1; 237 | } 238 | } 239 | 240 | struct ChannelLayout in_chlayout, out_chlayout; 241 | #if LIBAVUTIL_VERSION_MAJOR >= 58 242 | in_chlayout.layout = xav_reader->reader->c->ch_layout; 243 | if (xav_reader->out_channels == 0) { 244 | out_chlayout.layout = in_chlayout.layout; 245 | } else { 246 | av_channel_layout_default(&out_chlayout.layout, xav_reader->out_channels); 247 | } 248 | #else 249 | in_chlayout.layout = xav_reader->reader->c->channel_layout; 250 | 251 | if (xav_reader->reader->c->channel_layout == 0 && xav_reader->reader->c->channels > 0) { 252 | // In newer FFmpeg versions, 0 means that the order of channels is 253 | // unspecified but there still might be information about channels number. 254 | // Let's check againts it and take default channel order for the given channels number. 255 | // This is also what newer FFmpeg versions do under the hood when passing 256 | // unspecified channel order. 257 | XAV_LOG_DEBUG("Channel layout unset. Setting to default for channels number: %d", 258 | xav_reader->reader->c->channels); 259 | in_chlayout.layout = av_get_default_channel_layout(xav_reader->reader->c->channels); 260 | } else if (xav_reader->reader->c->channel_layout == 0) { 261 | XAV_LOG_DEBUG("Both channel layout and channels are unset. Cannot init converter."); 262 | return -1; 263 | } 264 | 265 | if (xav_reader->out_channels == 0) { 266 | out_chlayout.layout = in_chlayout.layout; 267 | } else { 268 | out_chlayout.layout = av_get_default_channel_layout(xav_reader->out_channels); 269 | } 270 | #endif 271 | 272 | return audio_converter_init(xav_reader->ac, in_chlayout, xav_reader->reader->c->sample_rate, 273 | xav_reader->reader->c->sample_fmt, out_chlayout, out_sample_rate, 274 | out_sample_fmt); 275 | } 276 | 277 | static int init_video_converter(struct XavReader *xav_reader, AVFrame *frame) { 278 | xav_reader->vc = video_converter_alloc(); 279 | if (xav_reader->vc == NULL) { 280 | XAV_LOG_DEBUG("Couldn't allocate video converter"); 281 | return -1; 282 | } 283 | 284 | return video_converter_init(xav_reader->vc, frame->width, frame->height, frame->format, 285 | frame->width, frame->height, AV_PIX_FMT_RGB24); 286 | } 287 | 288 | void free_xav_reader(ErlNifEnv *env, void *obj) { 289 | XAV_LOG_DEBUG("Freeing XavReader object"); 290 | struct XavReader *xav_reader = (struct XavReader *)obj; 291 | if (xav_reader->reader != NULL) { 292 | reader_free(&xav_reader->reader); 293 | } 294 | 295 | if (xav_reader->ac != NULL) { 296 | audio_converter_free(&xav_reader->ac); 297 | } 298 | 299 | if (xav_reader->vc != NULL) { 300 | video_converter_free(&xav_reader->vc); 301 | } 302 | } 303 | 304 | static ErlNifFunc xav_funcs[] = {{"new", 6, new}, 305 | {"next_frame", 1, next_frame, ERL_NIF_DIRTY_JOB_CPU_BOUND}, 306 | {"seek", 2, seek, ERL_NIF_DIRTY_JOB_CPU_BOUND}}; 307 | 308 | static int load(ErlNifEnv *env, void **priv, ERL_NIF_TERM load_info) { 309 | 310 | xav_reader_resource_type = 311 | enif_open_resource_type(env, NULL, "XavReader", free_xav_reader, ERL_NIF_RT_CREATE, NULL); 312 | return 0; 313 | } 314 | 315 | ERL_NIF_INIT(Elixir.Xav.Reader.NIF, xav_funcs, &load, NULL, NULL, NULL); 316 | -------------------------------------------------------------------------------- /c_src/xav/xav_reader.h: -------------------------------------------------------------------------------- 1 | #include "audio_converter.h" 2 | #include "reader.h" 3 | #include "video_converter.h" 4 | 5 | struct XavReader { 6 | struct Reader *reader; 7 | struct AudioConverter *ac; 8 | struct VideoConverter *vc; 9 | char *out_format; 10 | int out_sample_rate; 11 | int out_channels; 12 | }; -------------------------------------------------------------------------------- /c_src/xav/xav_video_converter.c: -------------------------------------------------------------------------------- 1 | #include "xav_video_converter.h" 2 | 3 | ErlNifResourceType *xav_video_converter_resource_type; 4 | 5 | static int init_video_converter(struct XavVideoConverter *converter) { 6 | converter->vc = video_converter_alloc(); 7 | if (converter->vc == NULL) { 8 | return -1; 9 | } 10 | 11 | AVFrame *in_frame = converter->frame; 12 | 13 | enum AVPixelFormat out_pix_fmt = converter->out_format; 14 | if (out_pix_fmt == AV_PIX_FMT_NONE) { 15 | out_pix_fmt = in_frame->format; 16 | } 17 | 18 | return video_converter_init(converter->vc, in_frame->width, in_frame->height, in_frame->format, 19 | converter->out_width, converter->out_height, out_pix_fmt); 20 | } 21 | 22 | ERL_NIF_TERM new (ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 23 | if (argc != 3) { 24 | return xav_nif_error(env, "invalid_arg_count"); 25 | } 26 | 27 | ERL_NIF_TERM ret; 28 | enum AVPixelFormat pix_fmt = AV_PIX_FMT_NONE; 29 | int width, height; 30 | char *format = NULL; 31 | 32 | if (!xav_nif_get_atom(env, argv[0], &format)) { 33 | return xav_nif_raise(env, "failed_to_get_atom"); 34 | } 35 | 36 | if (strcmp(format, "nil") != 0) { 37 | pix_fmt = av_get_pix_fmt(format); 38 | if (pix_fmt == AV_PIX_FMT_NONE) { 39 | ret = xav_nif_raise(env, "unknown_format"); 40 | goto clean; 41 | } 42 | } 43 | 44 | if (!enif_get_int(env, argv[1], &width)) { 45 | ret = xav_nif_raise(env, "failed_to_get_int"); 46 | goto clean; 47 | } 48 | 49 | if (!enif_get_int(env, argv[2], &height)) { 50 | ret = xav_nif_raise(env, "failed_to_get_int"); 51 | goto clean; 52 | } 53 | 54 | struct XavVideoConverter *xav_video_converter = 55 | enif_alloc_resource(xav_video_converter_resource_type, sizeof(struct XavVideoConverter)); 56 | xav_video_converter->vc = NULL; 57 | xav_video_converter->frame = av_frame_alloc(); 58 | xav_video_converter->out_format = pix_fmt; 59 | xav_video_converter->out_width = width; 60 | xav_video_converter->out_height = height; 61 | 62 | ret = enif_make_resource(env, xav_video_converter); 63 | enif_release_resource(xav_video_converter); 64 | 65 | clean: 66 | XAV_FREE(format); 67 | 68 | return ret; 69 | } 70 | 71 | ERL_NIF_TERM convert(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { 72 | if (argc != 5) { 73 | return xav_nif_raise(env, "invalid_arg_count"); 74 | } 75 | 76 | struct XavVideoConverter *xav_video_converter; 77 | if (!enif_get_resource(env, argv[0], xav_video_converter_resource_type, 78 | (void **)&xav_video_converter)) { 79 | return xav_nif_raise(env, "couldnt_get_converter_resource"); 80 | } 81 | 82 | ERL_NIF_TERM ret; 83 | ErlNifBinary in_data; 84 | int width, height; 85 | char *format = NULL; 86 | enum AVPixelFormat pix_fmt; 87 | 88 | if (!enif_inspect_binary(env, argv[1], &in_data)) { 89 | return xav_nif_raise(env, "failed_to_inspect_binary"); 90 | } 91 | 92 | if (!enif_get_int(env, argv[2], &width)) { 93 | return xav_nif_raise(env, "failed_to_get_int"); 94 | } 95 | 96 | if (!enif_get_int(env, argv[3], &height)) { 97 | return xav_nif_raise(env, "failed_to_get_int"); 98 | } 99 | 100 | if (!xav_nif_get_atom(env, argv[4], &format)) { 101 | return xav_nif_raise(env, "failed_to_get_atom"); 102 | } 103 | 104 | pix_fmt = av_get_pix_fmt(format); 105 | if (pix_fmt == AV_PIX_FMT_NONE) { 106 | ret = xav_nif_raise(env, "unknown_format"); 107 | goto clean; 108 | } 109 | 110 | AVFrame *src_frame = xav_video_converter->frame; 111 | src_frame->width = width; 112 | src_frame->height = height; 113 | src_frame->format = pix_fmt; 114 | 115 | int int_ret = av_image_fill_arrays(src_frame->data, src_frame->linesize, in_data.data, 116 | src_frame->format, width, height, 1); 117 | 118 | if (int_ret < 0) { 119 | ret = xav_nif_raise(env, "failed_to_fill_arrays"); 120 | goto clean; 121 | } 122 | 123 | if (xav_video_converter->vc == NULL) { 124 | if (init_video_converter(xav_video_converter) < 0) { 125 | ret = xav_nif_raise(env, "failed_to_init_converter"); 126 | goto clean; 127 | } 128 | } 129 | 130 | if (video_converter_convert(xav_video_converter->vc, src_frame) < 0) { 131 | ret = xav_nif_raise(env, "failed_to_convert"); 132 | goto clean; 133 | } 134 | 135 | ret = xav_nif_video_frame_to_term(env, xav_video_converter->vc->dst_frame); 136 | 137 | clean: 138 | if (format != NULL) 139 | XAV_FREE(format); 140 | 141 | return ret; 142 | } 143 | 144 | void free_xav_video_converter(ErlNifEnv *env, void *obj) { 145 | XAV_LOG_DEBUG("Freeing XavVideoConverter object"); 146 | struct XavVideoConverter *xav_video_converter = (struct XavVideoConverter *)obj; 147 | if (xav_video_converter->vc != NULL) { 148 | video_converter_free(&xav_video_converter->vc); 149 | } 150 | 151 | av_frame_free(&xav_video_converter->frame); 152 | } 153 | 154 | static ErlNifFunc xav_funcs[] = {{"new", 3, new}, 155 | {"convert", 5, convert, ERL_NIF_DIRTY_JOB_CPU_BOUND}}; 156 | 157 | static int load(ErlNifEnv *env, void **priv, ERL_NIF_TERM load_info) { 158 | xav_video_converter_resource_type = enif_open_resource_type( 159 | env, NULL, "XavVideoConverter", free_xav_video_converter, ERL_NIF_RT_CREATE, NULL); 160 | return 0; 161 | } 162 | 163 | ERL_NIF_INIT(Elixir.Xav.VideoConverter.NIF, xav_funcs, &load, NULL, NULL, NULL); -------------------------------------------------------------------------------- /c_src/xav/xav_video_converter.h: -------------------------------------------------------------------------------- 1 | #include "utils.h" 2 | #include "video_converter.h" 3 | 4 | struct XavVideoConverter { 5 | struct VideoConverter *vc; 6 | enum AVPixelFormat out_format; 7 | int out_width; 8 | int out_height; 9 | AVFrame *frame; 10 | }; -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | require_ci_to_pass: false 3 | 4 | comment: 5 | layout: "header, diff, files, footer" 6 | behavior: default 7 | 8 | coverage: 9 | status: 10 | project: 11 | default: 12 | informational: true 13 | patch: 14 | default: 15 | informational: true 16 | 17 | github_checks: 18 | annotations: false -------------------------------------------------------------------------------- /lib/xav.ex: -------------------------------------------------------------------------------- 1 | defmodule Xav do 2 | @moduledoc File.read!("README.md") 3 | 4 | @type encoder :: %{ 5 | codec: atom(), 6 | name: atom(), 7 | long_name: String.t(), 8 | media_type: atom(), 9 | profiles: [String.t()], 10 | sample_formats: [atom()] 11 | } 12 | 13 | @type decoder :: %{ 14 | codec: atom(), 15 | name: atom(), 16 | long_name: String.t(), 17 | media_type: atom() 18 | } 19 | 20 | @doc """ 21 | Get all available pixel formats. 22 | 23 | The result is a list of 3-element tuples `{name, nb_components, hw_accelerated_format?}`: 24 | * `name` - The name of the pixel format. 25 | * `nb_components` - The number of the components in the pixel format. 26 | * `hw_accelerated_format?` - Whether the pixel format is a hardware accelerated format. 27 | """ 28 | @spec pixel_formats() :: [{atom(), integer(), boolean()}] 29 | def pixel_formats(), do: Xav.Decoder.NIF.pixel_formats() |> Enum.reverse() 30 | 31 | @doc """ 32 | Get all available audio sample formats. 33 | 34 | The result is a list of 2-element tuples `{name, nb_bytes}`: 35 | * `name` - The name of the sample format. 36 | * `nb_bytes` - The number of bytes per sample. 37 | """ 38 | @spec sample_formats() :: [{atom(), integer()}] 39 | def sample_formats(), do: Xav.Decoder.NIF.sample_formats() |> Enum.reverse() 40 | 41 | @doc """ 42 | List all decoders. 43 | """ 44 | @spec list_decoders() :: [decoder()] 45 | def list_decoders() do 46 | Xav.Decoder.NIF.list_decoders() 47 | |> Enum.map(fn {codec, name, long_name, media_type} -> 48 | %{ 49 | codec: codec, 50 | name: name, 51 | long_name: List.to_string(long_name), 52 | media_type: media_type 53 | } 54 | end) 55 | |> Enum.reverse() 56 | end 57 | 58 | @doc """ 59 | List all encoders. 60 | """ 61 | @spec list_encoders() :: [encoder()] 62 | def list_encoders() do 63 | Xav.Encoder.NIF.list_encoders() 64 | |> Enum.map(fn {family_name, name, long_name, media_type, _codec_id, profiles, sample_formats, 65 | sample_rates} -> 66 | %{ 67 | codec: family_name, 68 | name: name, 69 | long_name: List.to_string(long_name), 70 | media_type: media_type, 71 | profiles: profiles |> Enum.map(&List.to_string/1) |> Enum.reverse(), 72 | sample_formats: Enum.reverse(sample_formats), 73 | sample_rates: Enum.reverse(sample_rates) 74 | } 75 | end) 76 | |> Enum.reverse() 77 | end 78 | end 79 | -------------------------------------------------------------------------------- /lib/xav/decoder.ex: -------------------------------------------------------------------------------- 1 | defmodule Xav.Decoder do 2 | @moduledoc """ 3 | Audio/video decoder. 4 | """ 5 | 6 | @typedoc """ 7 | Supported codecs. 8 | 9 | To get the list of available decoders see `Xav.list_decoders/0`. 10 | """ 11 | @type codec() :: atom() 12 | 13 | @type t() :: reference() 14 | 15 | @decoder_options_schema [ 16 | channels: [ 17 | type: :pos_integer, 18 | doc: """ 19 | The number of channels of the encoded audio. 20 | 21 | Some decoders require this field to be set by the user. (e.g. `G711`) 22 | """ 23 | ], 24 | out_format: [ 25 | type: :atom, 26 | doc: """ 27 | Output format of the samples. 28 | 29 | In case of video, it's the pixel format. In case of audio, it's the sample format. 30 | 31 | To get the list of supported pixel formats use `Xav.pixel_formats/0`, 32 | and for sample formats `Xav.sample_formats/0`. 33 | """ 34 | ], 35 | out_sample_rate: [ 36 | type: :pos_integer, 37 | doc: """ 38 | Audio sample rate. 39 | 40 | If not specified, the sample rate of the input stream will be used. 41 | """ 42 | ], 43 | out_channels: [ 44 | type: :pos_integer, 45 | doc: """ 46 | Number of audio channels. 47 | 48 | If not specified, the number of channels of the input stream will be used. 49 | 50 | Audio samples are always in the packed form - 51 | samples from different channels are interleaved in the same, single binary: 52 | 53 | ``` 54 | <> 55 | ``` 56 | 57 | An alternative would be to return a list of binaries, where 58 | each binary represents different channel: 59 | ``` 60 | [ 61 | <>, 62 | <>, 63 | <> 64 | ] 65 | ``` 66 | """ 67 | ], 68 | out_width: [ 69 | type: :pos_integer, 70 | doc: "Scale the output video frame to the provided width." 71 | ], 72 | out_height: [ 73 | type: :pos_integer, 74 | doc: "Scale the output video frame to the provided height." 75 | ] 76 | ] 77 | 78 | @doc """ 79 | Creates a new decoder. 80 | 81 | `codec` is any audio/video decoder supported by `FFmpeg`. 82 | 83 | `opts` can be used to specify desired output parameters:\n#{NimbleOptions.docs(@decoder_options_schema)} 84 | """ 85 | @spec new(codec(), Keyword.t()) :: t() 86 | def new(codec, opts \\ []) when is_atom(codec) do 87 | opts = NimbleOptions.validate!(opts, @decoder_options_schema) 88 | 89 | Xav.Decoder.NIF.new( 90 | codec, 91 | opts[:channels] || -1, 92 | opts[:out_format], 93 | opts[:out_sample_rate] || 0, 94 | opts[:out_channels] || 0, 95 | opts[:out_width] || -1, 96 | opts[:out_height] || -1 97 | ) 98 | end 99 | 100 | @doc """ 101 | Decodes an audio/video frame. 102 | 103 | Some video frames are meant for decoder only and will not 104 | contain actual video samples. 105 | Some audio frames might require more data to be converted 106 | to the desired output format. 107 | In both cases, `:ok` term is returned and more data needs to be provided. 108 | """ 109 | @spec decode(t(), binary(), pts: integer(), dts: integer()) :: 110 | :ok | {:ok, Xav.Frame.t()} | {:error, atom()} 111 | def decode(decoder, data, opts \\ []) do 112 | pts = opts[:pts] || 0 113 | dts = opts[:dts] || 0 114 | 115 | case Xav.Decoder.NIF.decode(decoder, data, pts, dts) do 116 | :ok -> 117 | :ok 118 | 119 | {:ok, {data, format, width, height, pts}} -> 120 | {:ok, Xav.Frame.new(data, format, width, height, pts)} 121 | 122 | # Sometimes, audio converter might not return data immediately. 123 | {:ok, {"", _format, _samples, _pts}} -> 124 | :ok 125 | 126 | {:ok, {data, format, samples, pts}} -> 127 | {:ok, Xav.Frame.new(data, format, samples, pts)} 128 | 129 | {:error, _reason} = error -> 130 | error 131 | end 132 | end 133 | 134 | @doc """ 135 | Flushes the decoder. 136 | 137 | Flushing signals end of stream and forces the decoder to return 138 | the buffered frames if there're any. 139 | """ 140 | @spec flush(t()) :: {:ok, [Xav.Frame.t()]} | {:error, atom()} 141 | def flush(decoder) do 142 | with {:ok, frames} <- Xav.Decoder.NIF.flush(decoder) do 143 | frames = 144 | Enum.map(frames, fn {data, format, width, height, pts} -> 145 | Xav.Frame.new(data, format, width, height, pts) 146 | end) 147 | 148 | {:ok, frames} 149 | end 150 | end 151 | 152 | @doc """ 153 | Same as `flush/1` but raises an exception on error. 154 | """ 155 | def flush!(decoder) do 156 | case flush(decoder) do 157 | {:ok, frames} -> frames 158 | {:error, reason} -> raise "Failed to flush decoder: #{inspect(reason)}" 159 | end 160 | end 161 | end 162 | -------------------------------------------------------------------------------- /lib/xav/decoder_nif.ex: -------------------------------------------------------------------------------- 1 | defmodule Xav.Decoder.NIF do 2 | @moduledoc false 3 | 4 | @on_load :__on_load__ 5 | 6 | def __on_load__ do 7 | path = :filename.join(:code.priv_dir(:xav), ~c"libxavdecoder") 8 | :ok = :erlang.load_nif(path, 0) 9 | end 10 | 11 | def new( 12 | _codec, 13 | _channels, 14 | _out_format, 15 | _out_sample_rate, 16 | _out_channels, 17 | _out_width, 18 | _out_height 19 | ) do 20 | :erlang.nif_error(:undef) 21 | end 22 | 23 | def decode(_decoder, _data, _pts, _dts), do: :erlang.nif_error(:undef) 24 | 25 | def flush(_decoder), do: :erlang.nif_error(:undef) 26 | 27 | def pixel_formats(), do: :erlang.nif_error(:undef) 28 | 29 | def sample_formats(), do: :erlang.nif_error(:undef) 30 | 31 | def list_decoders(), do: :erlang.nif_error(:undef) 32 | end 33 | -------------------------------------------------------------------------------- /lib/xav/encoder.ex: -------------------------------------------------------------------------------- 1 | defmodule Xav.Encoder do 2 | @moduledoc """ 3 | Audio/Video encoder. 4 | 5 | Currently, it only supports video encoding. 6 | """ 7 | 8 | @type t :: reference() 9 | 10 | @type codec :: atom() 11 | @type encoder_options :: Keyword.t() 12 | 13 | @video_encoder_schema [ 14 | width: [ 15 | type: :pos_integer, 16 | required: true, 17 | doc: "Width of the video samples." 18 | ], 19 | height: [ 20 | type: :pos_integer, 21 | required: true, 22 | doc: "Height of the video samples." 23 | ], 24 | format: [ 25 | type: :atom, 26 | required: true, 27 | doc: "Pixel format of the video samples." 28 | ], 29 | time_base: [ 30 | type: {:tuple, [:pos_integer, :pos_integer]}, 31 | required: true, 32 | doc: """ 33 | Time base of the video stream. 34 | 35 | It is a rational represented as a tuple of two postive integers `{numerator, denominator}`. 36 | It represent the number of ticks `denominator` in `numerator` seconds. e.g. `{1, 90000}` reprensents 37 | 90000 ticks in 1 second. 38 | 39 | it is used for the decoding and presentation timestamps of the video frames. For video frames with constant 40 | frame rate, choose a timebase of `{1, frame_rate}`. 41 | """ 42 | ], 43 | gop_size: [ 44 | type: :pos_integer, 45 | doc: """ 46 | Group of pictures length. 47 | 48 | Determines the interval in which I-Frames (or keyframes) are inserted in 49 | the stream. e.g. a value of 50, means the I-Frame will be inserted at the 1st frame, 50 | the 51st frame, the 101st frame, and so on. 51 | """ 52 | ], 53 | max_b_frames: [ 54 | type: :non_neg_integer, 55 | doc: """ 56 | Maximum number of consecutive B-Frames to insert between non-B-Frames. 57 | 58 | A value of 0, disable insertion of B-Frames. 59 | """ 60 | ], 61 | profile: [ 62 | type: :string, 63 | doc: """ 64 | The encoder's profile. 65 | 66 | A profile defines the capabilities and features an encoder can use to 67 | target specific applications (e.g. `live video`) 68 | 69 | To get the list of available profiles for an encoder, see `Xav.list_encoders/0` 70 | """ 71 | ] 72 | ] 73 | 74 | @audio_encoder_schema [ 75 | format: [ 76 | type: :atom, 77 | required: true, 78 | doc: "Sample format of the audio samples." 79 | ], 80 | sample_rate: [ 81 | type: :pos_integer, 82 | default: 44_100, 83 | doc: """ 84 | Number of samples per second. 85 | 86 | To get the list of supported sample rates for an encoder, see `Xav.list_encoders/0` 87 | """ 88 | ], 89 | profile: [ 90 | type: :string, 91 | doc: """ 92 | The encoder's profile. 93 | 94 | To get the list of available profiles for an encoder, see `Xav.list_encoders/0` 95 | """ 96 | ], 97 | channel_layout: [ 98 | type: :string, 99 | required: true, 100 | doc: """ 101 | Channel layout of the audio samples. 102 | 103 | For possible values, check [this](https://ffmpeg.org/ffmpeg-utils.html#Channel-Layout). 104 | """ 105 | ] 106 | ] 107 | 108 | @doc """ 109 | Create a new encoder. 110 | 111 | To get the list of available encoders, see `Xav.list_encoders/0`. 112 | 113 | It accepts the following options:\n#{NimbleOptions.docs(@video_encoder_schema)} 114 | """ 115 | @spec new(codec(), Keyword.t()) :: t() 116 | def new(codec, opts) do 117 | {codec, codec_id, media_type} = validate_codec!(codec) 118 | 119 | nif_options = 120 | case media_type do 121 | :video -> 122 | opts = NimbleOptions.validate!(opts, @video_encoder_schema) 123 | {time_base_num, time_base_den} = opts[:time_base] 124 | 125 | opts 126 | |> Map.new() 127 | |> Map.delete(:time_base) 128 | |> Map.merge(%{time_base_num: time_base_num, time_base_den: time_base_den}) 129 | 130 | :audio -> 131 | opts 132 | |> NimbleOptions.validate!(@audio_encoder_schema) 133 | |> Map.new() 134 | end 135 | 136 | if codec_id do 137 | Xav.Encoder.NIF.new(nil, Map.put(nif_options, :codec_id, codec_id)) 138 | else 139 | Xav.Encoder.NIF.new(codec, nif_options) 140 | end 141 | end 142 | 143 | @doc """ 144 | Encodes a frame. 145 | 146 | The return value may be an empty list in case the encoder 147 | needs more frames to produce a packet. 148 | """ 149 | @spec encode(t(), Xav.Frame.t()) :: [Xav.Packet.t()] 150 | def encode(encoder, frame) do 151 | encoder 152 | |> Xav.Encoder.NIF.encode(frame.data, frame.pts) 153 | |> to_packets() 154 | end 155 | 156 | @doc """ 157 | Flush the encoder. 158 | """ 159 | @spec flush(t()) :: [Xav.Packet.t()] 160 | def flush(encoder) do 161 | encoder 162 | |> Xav.Encoder.NIF.flush() 163 | |> to_packets() 164 | end 165 | 166 | defp to_packets(result) do 167 | Enum.map(result, fn {data, dts, pts, keyframe?} -> 168 | %Xav.Packet{data: data, dts: dts, pts: pts, keyframe?: keyframe?} 169 | end) 170 | end 171 | 172 | defp validate_codec!(codec) do 173 | Xav.Encoder.NIF.list_encoders() 174 | |> Enum.find_value(fn {codec_family, encoder_name, _, media_type, codec_id, _profiles, 175 | _sample_formats, _sample_rates} -> 176 | cond do 177 | media_type not in [:video, :audio] -> nil 178 | encoder_name == codec -> {encoder_name, nil, media_type} 179 | codec_family == codec -> {codec, codec_id, media_type} 180 | true -> nil 181 | end 182 | end) 183 | |> case do 184 | nil -> raise ArgumentError, "Unknown codec: #{inspect(codec)}" 185 | result -> result 186 | end 187 | end 188 | end 189 | -------------------------------------------------------------------------------- /lib/xav/encoder_nif.ex: -------------------------------------------------------------------------------- 1 | defmodule Xav.Encoder.NIF do 2 | @moduledoc false 3 | 4 | @on_load :__on_load__ 5 | 6 | def __on_load__ do 7 | path = :filename.join(:code.priv_dir(:xav), ~c"libxavencoder") 8 | :ok = :erlang.load_nif(path, 0) 9 | end 10 | 11 | def new(_codec, _params), do: :erlang.nif_error(:undef) 12 | 13 | def encode(_encoder, _data, _pts), do: :erlang.nif_error(:undef) 14 | 15 | def flush(_encoder), do: :erlang.nif_error(:undef) 16 | 17 | def list_encoders(), do: :erlang.nif_error(:undef) 18 | end 19 | -------------------------------------------------------------------------------- /lib/xav/frame.ex: -------------------------------------------------------------------------------- 1 | defmodule Xav.Frame do 2 | @moduledoc """ 3 | Audio/video frame. 4 | """ 5 | 6 | @typedoc """ 7 | Possible audio samples formats. 8 | 9 | To get the complete list of sample formats, check `Xav.sample_formats/0`. 10 | """ 11 | @type audio_format() :: atom() 12 | 13 | @typedoc """ 14 | Possible video frame formats. 15 | 16 | To get the complete list of pixel formats, check `Xav.pixel_formats/0`. 17 | 18 | An example of a pixel format is `:rgb24`. 19 | """ 20 | @type video_format() :: atom() 21 | 22 | @type format() :: audio_format() | video_format() 23 | 24 | @type width :: non_neg_integer() | nil 25 | @type height :: non_neg_integer() | nil 26 | 27 | @type t() :: %__MODULE__{ 28 | type: :audio | :video, 29 | data: binary(), 30 | format: format(), 31 | width: width(), 32 | height: height(), 33 | samples: integer() | nil, 34 | pts: integer() 35 | } 36 | 37 | defstruct [ 38 | :type, 39 | :data, 40 | :format, 41 | :width, 42 | :height, 43 | :samples, 44 | :pts 45 | ] 46 | 47 | @doc """ 48 | Creates a new audio/video frame. 49 | """ 50 | @spec new(binary(), format(), non_neg_integer(), non_neg_integer(), integer()) :: t() 51 | def new(data, format, width, height, pts) do 52 | %__MODULE__{ 53 | type: :video, 54 | data: data, 55 | format: format, 56 | width: width, 57 | height: height, 58 | pts: pts 59 | } 60 | end 61 | 62 | @spec new(binary(), format(), integer(), integer()) :: t() 63 | def new(data, format, samples, pts) do 64 | %__MODULE__{ 65 | type: :audio, 66 | data: data, 67 | format: format, 68 | samples: samples, 69 | pts: pts 70 | } 71 | end 72 | 73 | if Code.ensure_loaded?(Nx) do 74 | @doc """ 75 | Converts a frame to an Nx tensor. 76 | 77 | In case of a video frame, dimension names of the newly created tensor are `[:height, :width, :channels]`. 78 | 79 | For video frames, the only supported pixel formats are: 80 | * `:rgb24` 81 | * `:bgr24` 82 | """ 83 | @spec to_nx(t()) :: Nx.Tensor.t() 84 | def to_nx(%__MODULE__{type: :video, format: format} = frame) 85 | when format in [:rgb24, :bgr24] do 86 | frame.data 87 | |> Nx.from_binary(:u8) 88 | |> Nx.reshape({frame.height, frame.width, 3}, names: [:height, :width, :channels]) 89 | end 90 | 91 | def to_nx(%__MODULE__{type: :audio} = frame) do 92 | Nx.from_binary(frame.data, normalize_format(frame.format)) 93 | end 94 | 95 | defp normalize_format(:flt), do: :f32 96 | defp normalize_format(:fltp), do: :f32 97 | defp normalize_format(:dbl), do: :f64 98 | defp normalize_format(:dblp), do: :f64 99 | defp normalize_format(:u8p), do: :u8 100 | defp normalize_format(:s16p), do: :s16 101 | defp normalize_format(:s32p), do: :s32 102 | defp normalize_format(:s64p), do: :s64 103 | defp normalize_format(format), do: format 104 | end 105 | end 106 | -------------------------------------------------------------------------------- /lib/xav/packet.ex: -------------------------------------------------------------------------------- 1 | defmodule Xav.Packet do 2 | @moduledoc """ 3 | A module representing an audio/video compressed data. 4 | """ 5 | 6 | @type t :: %__MODULE__{ 7 | data: binary(), 8 | dts: integer(), 9 | pts: integer(), 10 | keyframe?: boolean() 11 | } 12 | 13 | defstruct [:data, :dts, :pts, :keyframe?] 14 | 15 | @spec new(Enumerable.t()) :: t() 16 | def new(opts) do 17 | struct!(%__MODULE__{}, opts) 18 | end 19 | end 20 | -------------------------------------------------------------------------------- /lib/xav/reader.ex: -------------------------------------------------------------------------------- 1 | defmodule Xav.Reader do 2 | @moduledoc """ 3 | Audio/video file reader. 4 | """ 5 | 6 | @reader_options_schema [ 7 | read: [ 8 | type: {:in, [:audio, :video]}, 9 | default: :video, 10 | doc: "The type of the stream to read from the input, either `video` or `audio`" 11 | ], 12 | device?: [ 13 | type: :boolean, 14 | default: false, 15 | doc: "Whether the path points to the camera" 16 | ], 17 | out_format: [ 18 | type: :atom, 19 | doc: """ 20 | The output format of the audio samples. For a list of available 21 | sample formats check `Xav.sample_formats/0`. 22 | 23 | For video samples, it is always `:rgb24`. 24 | """ 25 | ], 26 | out_sample_rate: [ 27 | type: :pos_integer, 28 | doc: "The output sample rate of the audio samples" 29 | ], 30 | out_channels: [ 31 | type: :pos_integer, 32 | doc: "The output number of channels of the audio samples" 33 | ] 34 | ] 35 | 36 | @type t() :: %__MODULE__{ 37 | reader: reference(), 38 | in_format: atom(), 39 | out_format: atom(), 40 | in_sample_rate: integer() | nil, 41 | out_sample_rate: integer() | nil, 42 | in_channels: integer() | nil, 43 | out_channels: integer() | nil, 44 | bit_rate: integer(), 45 | duration: integer(), 46 | codec: atom(), 47 | framerate: {integer(), integer()} | nil 48 | } 49 | 50 | @enforce_keys [:reader, :in_format, :out_format, :bit_rate, :duration, :codec] 51 | defstruct @enforce_keys ++ 52 | [:in_sample_rate, :out_sample_rate, :in_channels, :out_channels, :framerate] 53 | 54 | @doc """ 55 | The same as `new/1` but raises on error. 56 | """ 57 | @spec new!(String.t(), Keyword.t()) :: t() 58 | def new!(path, opts \\ []) do 59 | case new(path, opts) do 60 | {:ok, reader} -> reader 61 | {:error, reason} -> raise "Couldn't create a new reader. Reason: #{inspect(reason)}" 62 | end 63 | end 64 | 65 | @doc """ 66 | Creates a new audio/video reader. 67 | 68 | Both reading from a file and from a video camera are supported. 69 | In case of using a video camera, the v4l2 driver is required, and FPS are 70 | locked to 10. 71 | 72 | Microphone input is not supported. 73 | 74 | The following options can be provided:\n#{NimbleOptions.docs(@reader_options_schema)} 75 | """ 76 | @spec new(String.t(), Keyword.t()) :: {:ok, t()} | {:error, term()} 77 | def new(path, opts \\ []) do 78 | with {:ok, opts} <- NimbleOptions.validate(opts, @reader_options_schema) do 79 | do_create_reader(path, opts) 80 | end 81 | end 82 | 83 | @doc """ 84 | Reads and decodes the next frame. 85 | """ 86 | @spec next_frame(t()) :: {:ok, Xav.Frame.t()} | {:error, :eof} 87 | def next_frame(%__MODULE__{reader: ref} = reader) do 88 | case Xav.Reader.NIF.next_frame(ref) do 89 | {:ok, {data, format, width, height, pts}} -> 90 | format = normalize_format(format) 91 | {:ok, Xav.Frame.new(data, format, width, height, pts)} 92 | 93 | {:ok, {"", _format, _samples, _pts}} -> 94 | # Sometimes, audio converter might not return data immediately. 95 | # Hence, call until we succeed. 96 | next_frame(reader) 97 | 98 | {:ok, {data, format, samples, pts}} -> 99 | format = normalize_format(format) 100 | {:ok, Xav.Frame.new(data, format, samples, pts)} 101 | 102 | {:error, :eof} = err -> 103 | err 104 | end 105 | end 106 | 107 | @doc """ 108 | Seeks the reader to the given time in seconds 109 | """ 110 | @spec seek(t(), float()) :: :ok | {:error, term()} 111 | def seek(%__MODULE__{reader: ref}, time_in_seconds) do 112 | Xav.Reader.NIF.seek(ref, time_in_seconds) 113 | end 114 | 115 | @doc """ 116 | Creates a new reader stream. 117 | 118 | Check `new/1` for the available options. 119 | """ 120 | @spec stream!(String.t(), Keyword.t()) :: Enumerable.t() 121 | def stream!(path, opts \\ []) do 122 | Stream.resource( 123 | fn -> 124 | case new(path, opts) do 125 | {:ok, reader} -> 126 | reader 127 | 128 | {:error, reason} -> 129 | raise "Couldn't create a new Xav.Reader stream. Reason: #{inspect(reason)}" 130 | end 131 | end, 132 | fn reader -> 133 | case next_frame(reader) do 134 | {:ok, frame} -> {[frame], reader} 135 | {:error, :eof} -> {:halt, reader} 136 | end 137 | end, 138 | fn _reader -> :ok end 139 | ) 140 | end 141 | 142 | defp do_create_reader(path, opts) do 143 | out_sample_rate = opts[:out_sample_rate] || 0 144 | out_channels = opts[:out_channels] || 0 145 | 146 | case Xav.Reader.NIF.new( 147 | path, 148 | to_int(opts[:device?]), 149 | to_int(opts[:read]), 150 | opts[:out_format], 151 | out_sample_rate, 152 | out_channels 153 | ) do 154 | {:ok, reader, in_format, out_format, in_sample_rate, out_sample_rate, in_channels, 155 | out_channels, bit_rate, duration, codec} -> 156 | {:ok, 157 | %__MODULE__{ 158 | reader: reader, 159 | in_format: in_format, 160 | out_format: out_format, 161 | in_sample_rate: in_sample_rate, 162 | out_sample_rate: out_sample_rate, 163 | in_channels: in_channels, 164 | out_channels: out_channels, 165 | bit_rate: bit_rate, 166 | duration: duration, 167 | codec: to_human_readable(codec) 168 | }} 169 | 170 | {:ok, reader, in_format, out_format, bit_rate, duration, codec, framerate} -> 171 | {:ok, 172 | %__MODULE__{ 173 | reader: reader, 174 | in_format: in_format, 175 | out_format: out_format, 176 | bit_rate: bit_rate, 177 | duration: duration, 178 | codec: to_human_readable(codec), 179 | framerate: framerate 180 | }} 181 | 182 | {:error, _reason} = err -> 183 | err 184 | end 185 | end 186 | 187 | defp to_human_readable(:libdav1d), do: :av1 188 | defp to_human_readable(:mp3float), do: :mp3 189 | defp to_human_readable(other), do: other 190 | 191 | defp to_int(:video), do: 1 192 | defp to_int(:audio), do: 0 193 | defp to_int(true), do: 1 194 | defp to_int(false), do: 0 195 | 196 | # Use the same formats as Nx 197 | defp normalize_format(:flt), do: :f32 198 | defp normalize_format(:dbl), do: :f64 199 | defp normalize_format(other), do: other 200 | end 201 | -------------------------------------------------------------------------------- /lib/xav/reader_nif.ex: -------------------------------------------------------------------------------- 1 | defmodule Xav.Reader.NIF do 2 | @moduledoc false 3 | 4 | @on_load :__on_load__ 5 | 6 | def __on_load__ do 7 | path = :filename.join(:code.priv_dir(:xav), ~c"libxavreader") 8 | :ok = :erlang.load_nif(path, 0) 9 | end 10 | 11 | def new(_path, _device, _video, _out_format, _out_sample_rate, _out_channels), 12 | do: :erlang.nif_error(:undef) 13 | 14 | def next_frame(_reader), do: :erlang.nif_error(:undef) 15 | 16 | def seek(_reader, _time_in_seconds), do: :erlang.nif_error(:undef) 17 | end 18 | -------------------------------------------------------------------------------- /lib/xav/video_converter.ex: -------------------------------------------------------------------------------- 1 | defmodule Xav.VideoConverter do 2 | @moduledoc """ 3 | Video samples converter. 4 | 5 | It supports pixel format conversion and/or scaling. 6 | """ 7 | 8 | alias Xav.Frame 9 | alias Xav.VideoConverter.NIF 10 | 11 | @type t :: %__MODULE__{ 12 | converter: reference(), 13 | out_format: Frame.video_format(), 14 | out_width: Frame.width(), 15 | out_height: Frame.height() 16 | } 17 | 18 | @converter_schema [ 19 | out_width: [ 20 | type: :pos_integer, 21 | required: false, 22 | doc: """ 23 | scale the video frame to this width 24 | 25 | If `out_width` and `out_height` are both not provided, scaling is not performed. If one of the 26 | dimensions is `nil`, the other will be calculated based on the input dimensions as 27 | to keep the aspect ratio. 28 | """ 29 | ], 30 | out_height: [ 31 | type: :pos_integer, 32 | required: false, 33 | doc: "scale the video frame to this height" 34 | ], 35 | out_format: [ 36 | type: :atom, 37 | required: false, 38 | doc: "video format to convert to (e.g. `:rgb24`)" 39 | ] 40 | ] 41 | 42 | defstruct [:converter, :out_format, :out_width, :out_height] 43 | 44 | @doc """ 45 | Creates a new video converter. 46 | 47 | The following options can be passed:\n#{NimbleOptions.docs(@converter_schema)} 48 | """ 49 | @spec new(Keyword.t()) :: t() 50 | def new(converter_opts) do 51 | opts = NimbleOptions.validate!(converter_opts, @converter_schema) 52 | 53 | if is_nil(opts[:out_format]) and is_nil(opts[:out_width]) and is_nil(opts[:out_height]) do 54 | raise "At least one of `out_format`, `out_width` or `out_height` must be provided" 55 | end 56 | 57 | converter = NIF.new(opts[:out_format], opts[:out_width] || -1, opts[:out_height] || -1) 58 | 59 | %__MODULE__{ 60 | converter: converter, 61 | out_format: opts[:out_format], 62 | out_width: opts[:out_width], 63 | out_height: opts[:out_height] 64 | } 65 | end 66 | 67 | @doc """ 68 | Converts a video frame. 69 | """ 70 | @spec convert(t(), Frame.t()) :: Frame.t() 71 | def convert( 72 | %__MODULE__{out_format: format, out_width: nil, out_height: nil}, 73 | %Frame{format: format} = frame 74 | ), 75 | do: frame 76 | 77 | def convert(%__MODULE__{converter: converter}, frame) do 78 | {data, out_format, width, height, _pts} = 79 | NIF.convert(converter, frame.data, frame.width, frame.height, frame.format) 80 | 81 | %Frame{ 82 | type: frame.type, 83 | data: data, 84 | format: out_format, 85 | width: width, 86 | height: height, 87 | pts: frame.pts 88 | } 89 | end 90 | end 91 | -------------------------------------------------------------------------------- /lib/xav/video_converter_nif.ex: -------------------------------------------------------------------------------- 1 | defmodule Xav.VideoConverter.NIF do 2 | @moduledoc false 3 | 4 | @on_load :__on_load__ 5 | 6 | def __on_load__ do 7 | path = :filename.join(:code.priv_dir(:xav), ~c"libxavvideoconverter") 8 | :ok = :erlang.load_nif(path, 0) 9 | end 10 | 11 | def new(_format, _width, _height), do: :erlang.nif_error(:undef) 12 | 13 | def convert(_converter, _frame, _width, _height, _pix_format), do: :erlang.nif_error(:undef) 14 | end 15 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule Xav.MixProject do 2 | use Mix.Project 3 | 4 | @version "0.10.0" 5 | @source_url "https://github.com/elixir-webrtc/xav" 6 | 7 | def project do 8 | [ 9 | app: :xav, 10 | version: @version, 11 | elixir: "~> 1.14", 12 | start_permanent: Mix.env() == :prod, 13 | description: "Elixir audio/video library built on top of FFmpeg", 14 | package: package(), 15 | compilers: [:elixir_make] ++ Mix.compilers(), 16 | deps: deps(), 17 | 18 | # docs 19 | docs: docs(), 20 | source_url: @source_url, 21 | 22 | # dialyzer 23 | dialyzer: [ 24 | plt_local_path: "_dialyzer", 25 | plt_core_path: "_dialyzer", 26 | plt_add_apps: [:nx] 27 | ], 28 | 29 | # code coverage 30 | test_coverage: [tool: ExCoveralls], 31 | preferred_cli_env: [ 32 | coveralls: :test, 33 | "coveralls.detail": :test, 34 | "coveralls.post": :test, 35 | "coveralls.html": :test, 36 | "coveralls.json": :test 37 | ] 38 | ] 39 | end 40 | 41 | def application do 42 | [ 43 | extra_applications: [:logger] 44 | ] 45 | end 46 | 47 | defp package do 48 | [ 49 | files: ~w(lib .formatter.exs mix.exs README* LICENSE* c_src Makefile), 50 | licenses: ["Apache-2.0"], 51 | links: %{"GitHub" => "https://github.com/elixir-webrtc/xav"} 52 | ] 53 | end 54 | 55 | defp deps do 56 | [ 57 | {:nx, "~> 0.7", optional: true}, 58 | {:elixir_make, "~> 0.7", runtime: false}, 59 | {:nimble_options, "~> 1.0"}, 60 | 61 | # dev/test 62 | # bumblebee and exla for testing speech to text 63 | {:bumblebee, "~> 0.6", only: :test}, 64 | {:exla, ">= 0.0.0", only: :test}, 65 | # other 66 | {:excoveralls, "~> 0.18.0", only: [:dev, :test], runtime: false}, 67 | {:ex_doc, ">= 0.0.0", runtime: false, only: :dev}, 68 | {:credo, ">= 0.0.0", runtime: false, only: :dev}, 69 | {:dialyxir, ">= 0.0.0", runtime: false, only: :dev} 70 | ] 71 | end 72 | 73 | defp docs do 74 | [ 75 | main: "readme", 76 | extras: ["README.md", "INSTALL.md"], 77 | source_ref: "v#{@version}", 78 | formatters: ["html"] 79 | ] 80 | end 81 | end 82 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "axon": {:hex, :axon, "0.7.0", "2e2c6d93b4afcfa812566b8922204fa022b60081e86ebd411df4db7ea30f5457", [:mix], [{:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}, {:kino_vega_lite, "~> 0.1.7", [hex: :kino_vega_lite, repo: "hexpm", optional: true]}, {:nx, "~> 0.9", [hex: :nx, repo: "hexpm", optional: false]}, {:polaris, "~> 0.1", [hex: :polaris, repo: "hexpm", optional: false]}, {:table_rex, "~> 3.1.1", [hex: :table_rex, repo: "hexpm", optional: true]}], "hexpm", "ee9857a143c9486597ceff434e6ca833dc1241be6158b01025b8217757ed1036"}, 3 | "bumblebee": {:hex, :bumblebee, "0.6.0", "1c731313308ff9fde2effc4a2f366742dbd78e227b84e980eb2804f6b9281724", [:mix], [{:axon, "~> 0.7.0", [hex: :axon, repo: "hexpm", optional: false]}, {:jason, "~> 1.4.0", [hex: :jason, repo: "hexpm", optional: false]}, {:nx, "~> 0.9.0", [hex: :nx, repo: "hexpm", optional: false]}, {:nx_image, "~> 0.1.0", [hex: :nx_image, repo: "hexpm", optional: false]}, {:nx_signal, "~> 0.2.0", [hex: :nx_signal, repo: "hexpm", optional: false]}, {:progress_bar, "~> 3.0", [hex: :progress_bar, repo: "hexpm", optional: false]}, {:safetensors, "~> 0.1.3", [hex: :safetensors, repo: "hexpm", optional: false]}, {:tokenizers, "~> 0.4", [hex: :tokenizers, repo: "hexpm", optional: false]}, {:unpickler, "~> 0.1.0", [hex: :unpickler, repo: "hexpm", optional: false]}, {:unzip, "~> 0.12.0", [hex: :unzip, repo: "hexpm", optional: false]}], "hexpm", "a8b863179d314e9615b00291d5dcd2dc043b294edc25b4483d5c88d1c8d21c89"}, 4 | "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, 5 | "castore": {:hex, :castore, "1.0.9", "5cc77474afadf02c7c017823f460a17daa7908e991b0cc917febc90e466a375c", [:mix], [], "hexpm", "5ea956504f1ba6f2b4eb707061d8e17870de2bee95fb59d512872c2ef06925e7"}, 6 | "complex": {:hex, :complex, "0.5.0", "af2d2331ff6170b61bb738695e481b27a66780e18763e066ee2cd863d0b1dd92", [:mix], [], "hexpm", "2683bd3c184466cfb94fad74cbfddfaa94b860e27ad4ca1bffe3bff169d91ef1"}, 7 | "credo": {:hex, :credo, "1.7.7", "771445037228f763f9b2afd612b6aa2fd8e28432a95dbbc60d8e03ce71ba4446", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "8bc87496c9aaacdc3f90f01b7b0582467b69b4bd2441fe8aae3109d843cc2f2e"}, 8 | "decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"}, 9 | "dialyxir": {:hex, :dialyxir, "1.4.3", "edd0124f358f0b9e95bfe53a9fcf806d615d8f838e2202a9f430d59566b6b53b", [:mix], [{:erlex, ">= 0.2.6", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "bf2cfb75cd5c5006bec30141b131663299c661a864ec7fbbc72dfa557487a986"}, 10 | "earmark_parser": {:hex, :earmark_parser, "1.4.41", "ab34711c9dc6212dda44fcd20ecb87ac3f3fce6f0ca2f28d4a00e4154f8cd599", [:mix], [], "hexpm", "a81a04c7e34b6617c2792e291b5a2e57ab316365c2644ddc553bb9ed863ebefa"}, 11 | "elixir_make": {:hex, :elixir_make, "0.8.4", "4960a03ce79081dee8fe119d80ad372c4e7badb84c493cc75983f9d3bc8bde0f", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:certifi, "~> 2.0", [hex: :certifi, repo: "hexpm", optional: true]}], "hexpm", "6e7f1d619b5f61dfabd0a20aa268e575572b542ac31723293a4c1a567d5ef040"}, 12 | "erlex": {:hex, :erlex, "0.2.7", "810e8725f96ab74d17aac676e748627a07bc87eb950d2b83acd29dc047a30595", [:mix], [], "hexpm", "3ed95f79d1a844c3f6bf0cea61e0d5612a42ce56da9c03f01df538685365efb0"}, 13 | "ex_doc": {:hex, :ex_doc, "0.34.2", "13eedf3844ccdce25cfd837b99bea9ad92c4e511233199440488d217c92571e8", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "5ce5f16b41208a50106afed3de6a2ed34f4acfd65715b82a0b84b49d995f95c1"}, 14 | "excoveralls": {:hex, :excoveralls, "0.18.2", "86efd87a0676a3198ff50b8c77620ea2f445e7d414afa9ec6c4ba84c9f8bdcc2", [:mix], [{:castore, "~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "230262c418f0de64077626a498bd4fdf1126d5c2559bb0e6b43deac3005225a4"}, 15 | "exla": {:hex, :exla, "0.9.1", "1e8ecd2a6106e86ec1d132fd80cc3992c6c5a8b3b6b1867abd12bf650e6ccd67", [:make, :mix], [{:elixir_make, "~> 0.6", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:nx, "~> 0.9.0", [hex: :nx, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:xla, "~> 0.8.0", [hex: :xla, repo: "hexpm", optional: false]}], "hexpm", "a44f10f2eafe802dab325b86eaf746ec578a408467731a83f4ddec9b05d50667"}, 16 | "file_system": {:hex, :file_system, "1.0.0", "b689cc7dcee665f774de94b5a832e578bd7963c8e637ef940cd44327db7de2cd", [:mix], [], "hexpm", "6752092d66aec5a10e662aefeed8ddb9531d79db0bc145bb8c40325ca1d8536d"}, 17 | "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, 18 | "makeup": {:hex, :makeup, "1.1.2", "9ba8837913bdf757787e71c1581c21f9d2455f4dd04cfca785c70bbfff1a76a3", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cce1566b81fbcbd21eca8ffe808f33b221f9eee2cbc7a1706fc3da9ff18e6cac"}, 19 | "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"}, 20 | "makeup_erlang": {:hex, :makeup_erlang, "1.0.1", "c7f58c120b2b5aa5fd80d540a89fdf866ed42f1f3994e4fe189abebeab610839", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "8a89a1eeccc2d798d6ea15496a6e4870b75e014d1af514b1b71fa33134f57814"}, 21 | "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"}, 22 | "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, 23 | "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, 24 | "nx": {:hex, :nx, "0.9.1", "b5296f178d24ded118d5fd5c3977bb65c7f6ad8113eff4cb1401ac1770eb837a", [:mix], [{:complex, "~> 0.5", [hex: :complex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "f67ca8fcf09f73000e9a59a19f93ad8e0e581f4993e008527a4a6f280c71c467"}, 25 | "nx_image": {:hex, :nx_image, "0.1.2", "0c6e3453c1dc30fc80c723a54861204304cebc8a89ed3b806b972c73ee5d119d", [:mix], [{:nx, "~> 0.4", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "9161863c42405ddccb6dbbbeae078ad23e30201509cc804b3b3a7c9e98764b81"}, 26 | "nx_signal": {:hex, :nx_signal, "0.2.0", "e1ca0318877b17c81ce8906329f5125f1e2361e4c4235a5baac8a95ee88ea98e", [:mix], [{:nx, "~> 0.6", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "7247e5e18a177a59c4cb5355952900c62fdeadeb2bad02a9a34237b68744e2bb"}, 27 | "polaris": {:hex, :polaris, "0.1.0", "dca61b18e3e801ecdae6ac9f0eca5f19792b44a5cb4b8d63db50fc40fc038d22", [:mix], [{:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "13ef2b166650e533cb24b10e2f3b8ab4f2f449ba4d63156e8c569527f206e2c2"}, 28 | "progress_bar": {:hex, :progress_bar, "3.0.0", "f54ff038c2ac540cfbb4c2bfe97c75e7116ead044f3c2b10c9f212452194b5cd", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}], "hexpm", "6981c2b25ab24aecc91a2dc46623658e1399c21a2ae24db986b90d678530f2b7"}, 29 | "rustler_precompiled": {:hex, :rustler_precompiled, "0.8.2", "5f25cbe220a8fac3e7ad62e6f950fcdca5a5a5f8501835d2823e8c74bf4268d5", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "63d1bd5f8e23096d1ff851839923162096364bac8656a4a3c00d1fff8e83ee0a"}, 30 | "safetensors": {:hex, :safetensors, "0.1.3", "7ff3c22391e213289c713898481d492c9c28a49ab1d0705b72630fb8360426b2", [:mix], [{:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "fe50b53ea59fde4e723dd1a2e31cfdc6013e69343afac84c6be86d6d7c562c14"}, 31 | "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"}, 32 | "tokenizers": {:hex, :tokenizers, "0.5.1", "b0975d92b4ee5b18e8f47b5d65b9d5f1e583d9130189b1a2620401af4e7d4b35", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, ">= 0.0.0", [hex: :rustler, repo: "hexpm", optional: true]}, {:rustler_precompiled, "~> 0.6", [hex: :rustler_precompiled, repo: "hexpm", optional: false]}], "hexpm", "5f08d97cc7f2ed3d71d370d68120da6d3de010948ccf676c9c0eb591ba4bacc9"}, 33 | "unpickler": {:hex, :unpickler, "0.1.0", "c2262c0819e6985b761e7107546cef96a485f401816be5304a65fdd200d5bd6a", [:mix], [], "hexpm", "e2b3f61e62406187ac52afead8a63bfb4e49394028993f3c4c42712743cab79e"}, 34 | "unzip": {:hex, :unzip, "0.12.0", "beed92238724732418b41eba77dcb7f51e235b707406c05b1732a3052d1c0f36", [:mix], [], "hexpm", "95655b72db368e5a84951f0bed586ac053b55ee3815fd96062fce10ce4fc998d"}, 35 | "xla": {:hex, :xla, "0.8.0", "fef314d085dd3ee16a0816c095239938f80769150e15db16dfaa435553d7cb16", [:make, :mix], [{:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "739c61c8d93b97e12ba0369d10e76130224c208f1a76ad293e3581f056833e57"}, 36 | } 37 | -------------------------------------------------------------------------------- /test/decoder_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Xav.DecoderTest do 2 | use ExUnit.Case, async: true 3 | 4 | @vp8_keyframe <<80, 188, 0, 157, 1, 42, 128, 2, 224, 1, 57, 107, 0, 47, 28, 34, 22, 22, 34, 102, 5 | 18, 32, 212, 14, 239, 198, 191, 249, 103, 67, 12, 209, 59, 136, 119, 231, 148, 6 | 71, 190, 250, 205, 56, 167, 146, 195, 27, 246, 83, 183, 213, 135, 180, 90, 89, 7 | 245, 142, 137, 25, 231, 188, 158, 193, 127, 253, 250, 159, 78, 244, 58, 228, 8 | 245, 85, 17, 60, 238, 231, 248, 173, 93, 56, 91, 8, 237, 147, 88, 153, 113, 51, 9 | 216, 209, 121, 113, 94, 108, 141, 79, 9, 69, 31, 25, 96, 117, 112, 75, 211, 110, 10 | 238, 92, 14, 106, 206, 195, 197, 4, 70, 79, 77, 249, 99, 68, 15, 195, 242, 233, 11 | 38, 42, 163, 136, 195, 132, 32, 246, 164, 116, 192, 41, 214, 49, 201, 5, 11, 85, 12 | 98, 239, 249, 93, 48, 135, 223, 250, 190, 150, 98, 51, 157, 195, 88, 142, 143, 13 | 65, 59, 47, 177, 210, 96, 210, 107, 101, 173, 129, 161, 29, 165, 113, 16, 41, 14 | 122, 27, 101, 179, 39, 71, 55, 169, 216, 178, 226, 50, 215, 188, 228, 234, 204, 15 | 196, 240, 38, 134, 50, 101, 13, 137, 128, 221, 34, 83, 63, 67, 223, 196, 25, 8, 16 | 225, 56, 23, 219, 177, 10, 8, 218, 243, 57, 141, 171, 100, 227, 140, 14, 152, 17 | 105, 93, 35, 153, 244, 190, 142, 63, 74, 38, 201, 221, 96, 62, 104, 126, 13, 15, 18 | 79, 137, 138, 205, 173, 133, 205, 115, 114, 142, 132, 68, 17, 52, 48, 14, 105, 19 | 114, 139, 116, 185, 237, 119, 206, 148, 180, 228, 141, 209, 104, 141, 14, 32, 20 | 241, 10, 184, 90, 153, 173, 142, 1, 206, 93, 206, 17, 148, 237, 180, 49, 70, 8, 21 | 152, 211, 167, 131, 244, 59, 117, 113, 82, 128, 130, 124, 64, 50, 202, 107, 152, 22 | 201, 11, 176, 213, 201, 15, 130, 121, 85, 92, 208, 132, 248, 87, 48, 82, 244, 23 | 135, 180, 58, 60, 230, 114, 218, 10, 108, 57, 168, 216, 133, 76, 140, 71, 120, 24 | 138, 173, 21, 34, 251, 4, 167, 220, 87, 152, 179, 71, 134, 80, 66, 105, 203, 62, 25 | 215, 213, 170, 108, 109, 47, 90, 231, 176, 252, 185, 253, 132, 139, 14, 108, 45, 26 | 61, 186, 144, 53, 101, 166, 85, 205, 189, 73, 87, 53, 14, 142, 10, 112, 225, 27 | 172, 10, 125, 178, 32, 128, 50, 10, 253, 31, 211, 23, 74, 138, 221, 141, 71, 46, 28 | 46, 78, 221, 103, 198, 30, 244, 41, 243, 50, 72, 204, 129, 238, 165, 56, 252, 3, 29 | 58, 152, 143, 189, 142, 105, 16, 20, 77, 46, 47, 148, 139, 77, 11, 197, 106, 30 | 167, 38, 123, 110, 249, 169, 133, 235, 148, 221, 126, 90, 8, 123, 23, 207, 177, 31 | 104, 148, 162, 243, 132, 202, 23, 25, 112, 12, 160, 184, 229, 77, 145, 115, 204, 32 | 100, 53, 176, 68, 35, 131, 237, 25, 57, 112, 247, 223, 135, 19, 102, 161, 71, 33 | 223, 40, 204, 61, 206, 180, 196, 33, 19, 86, 191, 156, 234, 73, 124, 250, 252, 34 | 194, 113, 124, 5, 176, 58, 44, 193, 137, 128, 191, 143, 180, 219, 24, 172, 120, 35 | 102, 131, 15, 8, 12, 250, 182, 107, 93, 178, 192, 231, 49, 161, 105, 30, 29, 94, 36 | 38, 232, 49, 24, 99, 101, 115, 202, 237, 127, 60, 222, 168, 5, 14, 226, 74, 29, 37 | 8, 1, 56, 137, 42, 115, 142, 182, 34, 163, 3, 138, 73, 60, 192, 61, 136, 197, 38 | 151, 218, 90, 103, 155, 78, 37, 242, 147, 118, 212, 19, 37, 164, 85, 15, 122, 39 | 108, 190, 66, 167, 233, 44, 7, 227, 237, 27, 136, 167, 172, 204, 118, 115, 144, 40 | 17, 104, 95, 94, 165, 14, 123, 199, 69, 56, 127, 232, 29, 150, 219, 252, 141, 41 | 92, 95, 4, 8, 183, 244, 72, 200, 33, 246, 171, 150, 162, 120, 4, 175, 140, 94, 42 | 38, 211, 201, 203, 239, 185, 159, 148, 8, 246, 238, 143, 183, 58, 164, 132, 245, 43 | 213, 67, 69, 42, 189, 248, 227, 25, 52, 97, 151, 75, 42, 212, 104, 249, 72, 39, 44 | 22, 56, 252, 155, 173, 172, 126, 154, 5, 68, 60, 181, 153, 22, 214, 200, 174, 45 | 70, 160, 113, 250, 157, 52, 34, 195, 233, 3, 147, 62, 78, 34, 218, 164, 183, 46 | 230, 105, 30, 183, 50, 160, 250, 159, 73, 33, 241, 58, 22, 169, 195, 203, 255, 47 | 3, 41, 102, 213, 119, 162, 98, 84, 37, 219, 233, 86, 243, 185, 177, 153, 69, 48 | 212, 97, 239, 27, 56, 173, 210, 59, 76, 88, 18, 93, 185, 72, 104, 215, 221, 22, 49 | 131, 216, 19, 86, 210, 0, 187, 47, 2, 182, 157, 40, 171, 87, 68, 191, 230, 167, 50 | 225, 38, 32, 153, 78, 165, 190, 187, 240, 94, 35, 28, 217, 35, 40, 236, 218, 81, 51 | 252, 42, 39, 14, 222, 254, 98, 231, 102, 168, 176, 147, 225, 19, 5, 137, 197, 9, 52 | 218, 73, 143, 186, 225, 176, 221, 53, 21, 14, 237, 138, 136, 193, 21, 87, 144, 53 | 221, 236, 202, 69, 2, 101, 197, 203, 10, 190, 80, 43, 18, 113, 167, 45, 162, 22, 54 | 26, 154, 153, 81, 76, 242, 106, 217, 93, 193, 112, 163, 13, 223, 161, 124, 93, 55 | 246, 67, 224, 190, 60, 143, 128, 188, 110, 191, 30, 25, 251, 221, 128, 76, 176, 56 | 220, 252, 194, 248, 170, 113, 243, 187, 213, 209, 135, 102, 182, 167, 255, 139, 57 | 77, 37, 173, 195, 231, 67, 182, 172, 121, 238, 104, 213, 120, 61, 138, 193, 190, 58 | 8, 15, 82, 60, 172, 68, 43, 14, 57, 244, 32, 41, 212, 196, 186, 155, 252, 85, 59 | 172, 235, 68, 113, 152, 136, 209, 158, 171, 180, 209, 165, 157, 124, 125, 4, 60 | 217, 43, 140, 143, 204, 223, 115, 186, 33, 220, 176, 170, 2, 222, 49, 4, 52, 61 | 220, 148, 39, 30, 135, 243, 235, 248, 88, 242, 92, 102, 171, 4, 251, 11, 134, 62 | 165, 42, 32, 161, 12, 25, 12, 128, 60, 190, 147, 184, 251, 81, 203, 247, 15, 63 | 234, 230, 68, 222, 179, 179, 66, 167, 126, 237, 32, 111, 50, 98, 247, 225, 26, 64 | 117, 133, 98, 42, 177, 25, 167, 137, 172, 47, 91, 123, 230, 176, 198, 252, 187, 65 | 160, 25, 13, 249, 103, 118, 195, 141, 217, 138, 197, 11, 249, 44, 79, 102, 188, 66 | 149, 31, 158, 29, 145, 155, 68, 159, 158, 223, 250, 173, 98, 148, 129, 72, 149, 67 | 21, 193, 171, 112, 191, 88, 26, 152, 211, 14, 134, 173, 187, 250, 189, 47, 28, 68 | 156, 160, 241, 65, 108, 91, 112, 198, 206, 197, 140, 50, 217, 206, 196, 93, 250, 69 | 141, 63, 57, 214, 225, 8, 209, 163, 139, 15, 25, 255, 150, 4, 29, 252, 181, 48, 70 | 193, 6, 135, 133, 7, 89, 233, 178, 167, 10, 70, 31, 91, 251, 216, 141, 82, 4, 71 | 214, 30, 44, 97, 35, 204, 161, 149, 108, 166, 206, 146, 149, 236, 145, 161, 72, 72 | 105, 151, 194, 125, 101, 81, 186, 15, 23, 64, 183, 98, 79, 154, 159, 233, 107, 73 | 79, 212, 127, 131, 125, 60, 43, 90, 130, 18, 229, 224, 1, 105, 105, 84, 51, 227, 74 | 80, 236, 195, 190, 138, 240, 185, 113, 67, 78, 224, 65, 241, 106, 110, 193, 210, 75 | 40, 102, 98, 120, 1, 239, 92, 83, 180, 208, 116, 248, 17, 13, 178, 139, 232, 76 | 227, 140, 56, 114, 232, 63, 83, 83, 211, 205, 11, 213, 20, 239, 66, 35, 133, 86, 77 | 25, 158, 164, 156, 88, 62, 106, 109, 229, 218, 71, 3, 249, 232, 179, 4, 129, 94, 78 | 13, 221, 243, 191, 176, 143, 42, 230, 205, 184, 43, 97, 230, 123, 195, 50, 157, 79 | 226, 20, 9, 34, 127, 90, 97, 236, 50, 95, 88, 222, 224, 13, 86, 221, 156, 90, 80 | 24, 241, 9, 127, 193, 53, 164, 203, 217, 222, 16, 142, 168, 13, 57, 44, 58, 149, 81 | 40, 105, 55, 148, 55, 149, 52, 72, 3, 5, 106, 147, 198, 104, 103, 34, 180, 92, 82 | 253, 87, 0, 34, 132, 177, 186, 225, 99, 133, 90, 70, 215, 202, 133, 87, 119, 83 | 163, 161, 188, 242, 24, 65, 173, 248, 170, 202, 43, 244, 29, 62, 206, 99, 212, 84 | 34, 141, 215, 201, 9, 91, 136, 18, 129, 33, 216, 15, 254, 211, 41, 92, 35, 78, 85 | 28, 86, 243, 141, 142, 55, 162, 114, 6, 23, 30, 214, 120, 8, 136, 191, 77, 27, 86 | 79, 81, 85, 116, 150, 171, 113, 130, 6, 218, 24, 105, 162, 182, 82, 136, 172, 87 | 68, 85, 175, 215, 62, 60, 253, 7, 150, 27, 46, 30, 109, 177, 174, 77, 51, 167, 88 | 100, 0, 28, 46, 103, 94, 236, 140, 129, 28, 116, 205, 45, 85, 236, 234, 240, 7, 89 | 158, 92, 131, 23, 244, 215, 192, 156, 58, 1, 205, 157, 171, 8, 179, 132, 151, 90 | 254, 201, 80, 207, 206, 86, 43, 27, 103, 16, 96, 17, 234, 28, 19, 178, 141, 12, 91 | 231, 244, 180, 224, 254, 170, 116, 14, 21, 119, 90, 99, 69, 30, 122, 152, 173, 92 | 250, 198, 250, 129, 127, 122, 130, 236, 229, 54, 143, 245, 226, 236, 106, 85, 93 | 29, 7, 93, 149, 211, 204, 235, 153, 151, 19, 170, 121, 91, 74, 24, 0, 219, 22, 94 | 226, 118, 227, 209, 248, 162, 170, 1, 49, 79, 111, 236, 21, 24, 53, 58, 190, 94, 95 | 137, 162, 96, 212, 50, 129, 222, 199, 115, 170, 113, 18, 119, 54, 103, 140, 28, 96 | 21, 135, 227, 122, 73, 33, 209, 120, 44, 46, 29, 105, 153, 191, 142, 252, 9, 97 | 249, 6, 182, 63, 153, 146, 84, 231, 30, 148, 44, 59, 24, 237, 104, 216, 162, 22, 98 | 206, 32, 158, 167, 144, 123, 35, 1, 135, 164, 144, 224, 59, 204, 178, 14, 248, 99 | 24, 66, 72, 161, 136, 144, 154, 250, 148, 99, 42, 211, 161, 74, 133, 104, 246, 100 | 148, 180, 29, 85, 1, 35, 121, 140, 65, 87, 108, 43, 135, 93, 147, 56, 104, 44, 101 | 4, 124, 214, 179, 166, 146, 31, 155, 28, 235, 123, 77, 113, 194, 82, 139, 74, 6, 102 | 85, 57, 9, 234, 67, 9, 109, 43, 182, 245, 113, 140, 100, 149, 204, 230, 11, 72, 103 | 153, 6, 177, 145, 194, 30, 193, 97, 215, 80, 1, 185, 141, 31, 42, 164, 172, 40, 104 | 103, 74, 186, 228, 239, 81, 49, 38, 71, 58, 5, 184, 235, 77, 73, 31, 56, 177, 105 | 102, 236, 44, 148, 84, 204, 177, 142, 150, 222, 174, 52, 193, 245, 248, 12, 92, 106 | 198, 70, 171, 219, 20, 75, 77, 117, 68, 170, 214, 47, 229, 18, 77, 52, 215, 28, 107 | 190, 90, 161, 64, 52, 182, 42, 140, 218, 183, 212, 187, 116, 54, 153, 99, 184, 108 | 69, 135, 172, 127, 234, 210, 216, 29, 107, 22, 121, 116, 147, 5, 20, 46, 123, 109 | 71, 108, 68, 134, 49, 137, 36, 79, 226, 190, 223, 78, 22, 96, 211, 208, 106, 62, 110 | 187, 0, 120, 196, 137, 54, 15, 195, 1, 241, 131, 129, 62, 232, 224, 99, 113, 17, 111 | 189, 58, 217, 13, 238, 82, 213, 197, 130, 209, 159, 59, 54, 142, 116, 35, 44, 112 | 147, 183, 8, 98, 74, 182, 89, 232, 79, 31, 195, 81, 79, 138, 56, 161, 250, 222, 113 | 15, 104, 205, 223, 249, 218, 72, 106, 101, 105, 11, 230, 46, 116, 234, 202, 187, 114 | 208, 11, 235, 125, 145, 85, 35, 235, 66, 203, 60, 39, 197, 10, 119, 14, 230, 78, 115 | 11, 86, 67, 109, 143, 245, 153, 110, 128, 136, 122, 97, 174, 73, 185, 169, 70, 116 | 183, 125, 129, 184, 180, 0, 0, 54, 200, 216, 241, 53, 74, 31, 97, 250, 49, 106, 117 | 140, 213, 229, 113, 14, 133, 170, 103, 9, 111, 125, 70, 126, 193, 134, 49, 176, 118 | 104, 44, 39, 184, 202, 189, 117, 216, 78, 216, 212, 84, 59, 35, 230, 223, 213, 119 | 133, 0, 75, 113, 111, 106, 125, 153, 242, 76, 4, 18, 161, 158, 100, 192, 89, 120 | 170, 153, 146, 62, 45, 251, 43, 216, 208, 230, 17, 43, 101, 50, 183, 42, 212, 121 | 37, 123, 76, 50, 240, 82, 84, 112, 12, 243, 194, 35, 49, 76, 28, 75, 89, 104, 122 | 107, 23, 89, 14, 226, 70, 60, 79, 67, 255, 193, 171, 114, 41, 50, 20, 133, 55, 123 | 92, 117, 109, 196, 49, 224, 27, 144, 142, 72, 20, 114, 250, 208, 107, 226, 225, 124 | 55, 41, 105, 102, 142, 242, 253, 206, 110, 87, 11, 234, 56, 251, 85, 231, 45, 1, 125 | 173, 4, 216, 17, 16, 49, 139, 84, 177, 100, 216, 220, 129, 248, 161, 18, 156, 126 | 162, 149, 128, 145, 110, 91, 226, 16, 246, 173, 128, 162, 215, 216, 118, 132, 127 | 99, 197, 157, 173, 150, 202, 151, 40, 56, 27, 7, 40, 226, 159, 18, 77, 186, 124, 128 | 175, 205, 198, 247, 154, 193, 254, 235, 13, 213, 45, 229, 166, 194, 28, 5, 228, 129 | 191, 102, 141, 21, 101, 229, 36, 223, 231, 20, 110, 26, 17, 64, 237, 85, 247, 130 | 198, 194, 204, 160, 2, 27, 246, 210, 18, 144, 10, 194, 249, 254, 119, 15, 22, 131 | 26, 9, 31, 136, 86, 236, 25, 214, 65, 134, 11, 108, 188, 252, 180, 190, 98, 238, 132 | 126, 193, 7, 234, 219, 47, 102, 186, 83, 249, 109, 248, 12, 84, 0, 40, 247, 169, 133 | 8, 131, 183, 214, 128, 170, 250, 6, 179, 220, 6, 57, 178, 3, 167, 125, 123, 176, 134 | 171, 21, 33, 152, 92, 65, 180, 89, 101, 237, 125, 219, 7, 100, 195, 80, 81, 29, 135 | 160, 109, 121, 179, 215, 176, 232, 230, 218, 93, 197, 90, 2, 114, 31, 197, 80, 136 | 19, 240, 232, 255, 189, 246, 98, 255, 170, 107, 112, 106, 238, 27, 24, 135, 141, 137 | 10, 25, 251, 109, 224, 70, 151, 182, 64, 92, 105, 63, 61, 226, 113, 193, 243, 138 | 235, 51, 16, 48, 253, 209, 160, 124, 10, 61, 192, 12, 125, 57, 16, 177, 123, 139 | 213, 22, 120, 109, 9, 217, 28, 161, 237, 7, 217, 22, 206, 88, 46, 49, 91, 95, 140 | 11, 39, 49, 32, 69, 92, 224, 236, 170, 4, 162, 61, 248, 80, 94, 16, 104, 16, 34, 141 | 192, 214, 24, 141, 37, 137, 88, 112, 49, 56, 110, 179, 90, 4, 241, 142, 35, 107, 142 | 200, 206, 129, 170, 138, 24, 107, 138, 171, 87, 243, 29, 37, 158, 75, 167, 37, 143 | 232, 13, 218, 97, 17, 104, 12, 85, 38, 57, 31, 103, 54, 149, 30, 62, 47, 204, 144 | 209, 68, 27, 179, 28, 15, 194, 142, 162, 81, 253, 80, 153, 209, 205, 1, 125, 24, 145 | 74, 3, 251, 117, 144, 180, 238, 179, 24, 43, 139, 155, 53, 216, 231, 49, 173, 146 | 96, 39, 203, 249, 129, 199, 170, 246, 18, 76, 195, 77, 179, 58, 200, 154, 15, 147 | 16, 68, 1, 236, 133, 206, 137, 186, 125, 203, 164, 92, 180, 155, 194, 69, 76, 148 | 144, 98, 64, 249, 0, 173, 26, 217, 108, 225, 47, 167, 144, 60, 187, 117, 92, 149 | 105, 152, 35, 123, 33, 30, 137, 17, 94, 135, 156, 169, 60, 123, 45, 14, 22, 178, 150 | 89, 57, 130, 180, 243, 12, 73, 71, 181, 196, 50, 226, 231, 144, 77, 138, 222, 151 | 212, 39, 72, 148, 167, 131, 242, 101, 77, 141, 193, 4, 241, 118, 203, 130, 18, 152 | 141, 51, 169, 211, 78, 134, 166, 187, 63, 79, 216, 178, 173, 138, 104, 48, 200, 153 | 15, 239, 38, 67, 74, 219, 28, 51, 40, 170, 219, 164, 88, 186, 236, 163, 254, 154 | 191, 181, 13, 151, 18, 108, 46, 134, 189, 103, 94, 28, 88, 181, 201, 86, 76, 1, 155 | 7, 59, 35, 39, 180, 63, 183, 100, 111, 235, 158, 182, 19, 93, 213, 87, 76, 10, 156 | 151, 207, 121, 218, 226, 139, 127, 21, 233, 63, 24, 192, 252, 85, 171, 107, 165, 157 | 207, 212, 246, 139, 84, 203, 177, 203, 51, 118, 150, 1, 209, 200, 219, 202, 59, 158 | 164, 222, 24, 121, 174, 101, 138, 176, 255, 164, 138, 154, 106, 18, 21, 136, 159 | 193, 250, 229, 170, 157, 86, 106, 26, 55, 180, 254, 107, 232, 123, 126, 16, 221, 160 | 9, 12, 116, 217, 229, 204, 11, 130, 195, 93, 33, 219, 224, 43, 200, 120, 244, 161 | 207, 173, 77, 35, 155, 125, 68, 60, 125, 112, 144, 175, 121, 208, 105, 249, 144, 162 | 217, 210, 41, 173, 19, 117, 92, 219, 249, 115, 219, 181, 194, 214, 100, 84, 173, 163 | 76, 18, 176, 45, 182, 35, 132, 83, 145, 141, 15, 152, 67, 171, 68, 204, 147, 164 | 219, 31, 222, 75, 162, 23, 6, 171, 114, 118, 97, 93, 201, 19, 101, 208, 182, 24, 165 | 16, 188, 80, 113, 103, 94, 42, 250, 14, 244, 156, 51, 184, 188, 228, 11, 162, 166 | 253, 54, 95, 0, 250, 143, 251, 22, 129, 181, 146, 75, 60, 67, 110, 173, 110, 167 | 134, 139, 153, 145, 90, 64, 226, 126, 14, 144, 232, 218, 141, 153, 48, 163, 83, 168 | 49, 236, 13, 17, 11, 56, 62, 136, 1, 63, 95, 118, 49, 49, 100, 2, 203, 217, 31, 169 | 32, 141, 14, 144, 0>> 170 | 171 | @vp8_frame <<113, 27, 0, 227, 99, 175, 184, 147, 248, 30, 192, 95, 64, 166, 192, 191, 208, 7, 172 | 132, 38, 186, 200, 217, 201, 141, 2, 254, 56, 91, 187, 127, 94, 174, 121, 213, 17, 173 | 58, 149, 133, 112, 140, 65, 76, 115, 78, 81, 91, 9, 205, 177, 242, 71, 187, 23, 174 | 190, 12, 164, 107, 84, 36, 86, 122, 106, 96, 58, 241, 248, 8, 240, 15, 2, 12, 231, 175 | 51, 156, 140, 18, 135, 111, 13, 73, 192, 148, 230, 131, 29, 20, 72, 4, 215, 212, 176 | 134, 76, 191, 182, 249, 110, 58, 113, 118, 107, 193, 209, 4, 105, 195, 226, 137, 177 | 208, 188, 78, 43, 238, 90, 37, 68, 221, 199, 148, 3, 92, 244, 212, 101, 16, 143, 7, 178 | 166, 121, 197, 192, 141, 176, 17, 182, 2, 54, 192, 70, 216, 8, 219, 1, 27, 96, 35, 179 | 108, 4, 109, 128, 141, 176, 17, 78, 35, 108, 4, 109, 128, 145, 160, 106, 70, 216, 180 | 8, 192, 128, 155, 23, 16, 148, 9, 158, 75, 62, 103, 230, 84, 114, 179, 210, 117, 181 | 187, 81, 74, 95, 94, 54, 11, 139, 181, 254, 202, 56, 241, 21, 119, 237, 191, 220, 182 | 115, 109, 175, 129, 26, 38, 220, 112, 141, 241, 182, 139, 24, 152, 45, 1, 93, 166, 183 | 93, 46, 187, 32, 145, 117, 169, 77, 32, 175, 203, 27, 209, 14, 181, 244, 136, 34, 184 | 27, 156, 15, 222, 141, 141, 39, 47, 228, 197, 93, 254, 193, 183, 77, 9, 193, 123, 185 | 187, 114, 204, 144, 33, 17, 223, 207, 106, 198, 221, 188, 80, 38, 52, 96, 70, 10, 186 | 90, 120, 92, 15, 46, 10, 109, 160, 214, 95, 10, 155, 83, 203, 168, 47, 122, 237, 187 | 29, 224, 240, 100, 3, 51, 51, 143, 33, 31, 143, 173, 116, 49, 159, 68, 12, 27, 75, 188 | 22, 202, 111, 106, 126, 82, 47, 108, 219, 254, 148, 1, 53, 206, 147, 47, 57, 210, 189 | 74, 109, 225, 250, 88, 204, 211, 212, 73, 183, 23, 153, 145, 191, 219, 243, 155, 190 | 33, 100, 92, 43, 135, 157, 93, 124, 30, 202, 107, 39, 236, 71, 179, 23, 165, 191, 191 | 109, 220, 230, 137, 113, 145, 153, 248, 100, 215, 148, 46, 172, 234, 1, 130, 241, 192 | 5, 236, 11, 190, 213, 45, 254, 106, 95, 253, 37, 73, 237, 193, 79, 19, 203, 230, 193 | 93, 200, 22, 40, 239, 242, 94, 78, 3, 139, 46, 253, 68, 131, 77, 215, 231, 166, 83, 194 | 118, 67, 112, 60, 181, 15, 244, 144, 192, 75, 96, 209, 110, 49, 43, 93, 50, 8, 100, 195 | 157, 143, 101, 226, 94, 173, 148, 126, 71, 70, 128, 174, 141, 234, 41, 43, 43, 158, 196 | 140, 82, 93, 84, 170, 53, 54, 65, 69, 85, 222, 122, 165, 210, 70, 67, 20, 98, 101, 197 | 105, 224, 152, 223, 178, 20, 126, 153, 130, 131, 64, 32, 247, 215, 119, 54, 132, 198 | 251, 218, 90, 75, 141, 103, 8, 199, 249, 193, 26, 138, 81, 19, 236, 111, 185, 146, 199 | 134, 84, 58, 244, 77, 13, 3, 210, 96, 138, 44, 89, 148, 99, 152, 132, 20, 178, 75, 200 | 6, 163, 164, 156, 253, 23, 132, 202, 168, 160, 145, 16, 209, 165, 78, 98, 244, 118, 201 | 60, 3, 137, 108, 170, 188, 249, 163, 67, 249, 21, 47, 53, 49, 38, 137, 248, 184, 202 | 124, 81, 181, 47, 213, 148, 51, 162, 15, 202, 105, 3, 196, 101, 12, 254, 154, 230, 203 | 218, 109, 0, 169, 34, 141, 182, 122, 208, 245, 5, 37, 168, 80, 69, 204, 81, 238, 204 | 112, 241, 73, 205, 110, 143, 120, 119, 118, 246, 226, 32, 29, 225, 244, 63, 205, 205 | 115, 194, 203, 172, 102, 69, 111, 44, 136, 95, 212, 20, 201, 200, 174, 162, 213, 206 | 53, 140, 120, 140, 68, 229, 63, 242, 24, 230, 105, 84, 163, 154, 241, 184, 237, 207 | 246, 186, 130, 133, 63, 18, 199, 196, 141, 162, 82, 232, 207, 252, 53, 108, 22, 208 | 156, 248, 19, 202, 251, 52, 31, 129, 192, 146, 74, 89, 81, 143, 124, 219, 239, 230, 209 | 49, 101, 21, 247, 80, 129, 162, 125, 98, 200, 197, 197, 126, 161, 54, 103, 53, 78, 210 | 58, 207, 224, 77, 31, 71, 112, 190, 168, 177, 74, 2, 62, 44, 53, 5, 122, 97, 173, 211 | 171, 92, 137, 60, 52, 211, 222, 248, 221, 88, 240, 131, 158, 225, 129, 237, 97, 10, 212 | 123, 46, 85, 130, 163, 224, 250, 178, 34, 142, 228, 109, 48, 112, 66, 169, 110, 6, 213 | 46, 138, 144, 189, 145, 241, 158, 168, 178, 71, 103, 177, 5, 227, 243, 113, 16, 214 | 112, 47, 121, 57, 100, 76, 148, 226, 250, 151, 48, 20, 26, 117, 239, 151, 97, 91, 215 | 126, 246, 15, 22, 128, 147, 167, 189, 208, 190, 248, 152, 239, 180, 233, 71, 222, 216 | 198, 145, 32, 90, 162, 218, 163, 17, 0, 60, 232, 8, 119, 13, 129, 15, 27, 157, 182, 217 | 25, 171, 98, 83, 80, 89, 135, 9, 119, 252, 164, 132, 148, 150, 15, 102, 176, 144, 218 | 234, 138, 103, 151, 7, 54, 167, 246, 212, 57, 245, 250, 99, 60, 58, 56, 204, 47, 219 | 220, 215, 109, 218, 216, 147, 246, 135, 3, 219, 219, 124, 109, 8, 187, 225, 86, 42, 220 | 147, 101, 65, 155, 43, 48, 19, 163, 127, 121, 96, 127, 160, 233, 78, 247, 100, 84, 221 | 45, 66, 228, 173, 255, 166, 245, 118, 21, 121, 90, 20, 69, 255, 228, 187, 229, 179, 222 | 62, 33, 31, 50, 216, 156, 70, 28, 255, 92, 105, 175, 105, 134, 33, 203, 99, 91, 83, 223 | 240, 155, 98, 107, 131, 89, 56, 242, 118, 49, 16, 29, 130, 234, 0, 180, 138, 0, 93, 224 | 8, 218, 127, 183, 34, 145, 237, 130, 123, 17, 80, 205, 233, 199, 169, 140, 71, 52, 225 | 247, 65, 8, 148, 55, 159, 88, 190, 170, 186, 31, 205, 141, 115, 233, 3, 77, 206, 226 | 160, 61, 29, 53, 148, 218, 64, 49, 196, 150, 164, 231, 238, 245, 183, 223, 71, 223, 227 | 185, 21, 133, 102, 136, 181, 176, 220, 209, 63, 130, 210, 237, 191, 108, 3, 109, 228 | 60, 210, 245, 81, 168, 156, 188, 66, 98, 242, 229, 227, 7, 159, 85, 64, 211, 34, 229 | 168, 215, 177, 22, 224, 19, 93, 96, 177, 229, 161, 48, 17, 27, 65, 176, 138, 141, 230 | 155, 142, 70, 32, 15, 124, 231, 226, 181, 118, 217, 198, 93, 250, 116, 141, 217, 231 | 164, 55, 11, 231, 103, 76, 193, 212, 188, 58, 160, 98, 183, 244, 83, 21, 208, 116, 232 | 117, 29, 179, 177, 21, 53, 151, 245, 168, 203, 221, 48, 102, 71, 34, 180, 160, 176, 233 | 202, 233, 21, 77, 17, 219, 9, 172, 60, 246, 143, 240, 107, 97, 94, 171, 147, 246, 234 | 166, 31, 5, 173, 235, 234, 65, 170, 64, 50, 195, 68, 165, 122, 19, 148, 190, 187, 235 | 119, 41, 13, 45, 63, 74, 29, 196, 183, 111, 243, 187, 194, 157, 188, 205, 106, 119, 236 | 184, 54, 254, 140, 47, 216, 53, 167, 128, 194, 9, 37, 58, 68, 197, 38, 5, 23, 154, 237 | 114, 174, 107, 132, 133, 210, 149, 84, 125, 139, 72, 226, 236, 167, 99, 158, 64, 238 | 196, 145, 137, 184, 100, 97, 24, 75, 145, 203, 85, 22, 79, 159, 212, 6, 34, 49, 44, 239 | 165, 69, 71, 19, 183, 200, 46, 202, 108, 16, 43, 126, 104, 81, 141, 161, 4, 48, 76, 240 | 55, 142, 63, 240, 40, 147, 102, 104, 231, 88, 82, 58, 149, 50, 239, 244, 0, 195, 241 | 108, 187, 105, 144, 52, 161, 109, 14, 253, 78, 62, 79, 118, 236, 11, 125, 207, 49, 242 | 172, 123, 243, 81, 214, 0, 37, 74, 252, 38, 113, 5, 218, 31, 148, 35, 7, 149, 165, 243 | 47, 81, 112, 36, 37, 110, 47, 58, 118, 186, 191, 203, 248, 224, 152, 132, 72, 28, 244 | 94, 142, 155, 246, 129, 78, 216, 169, 169, 202, 220, 233, 31, 200, 46, 39, 166, 95, 245 | 56, 128, 127, 83, 240, 21, 172, 228, 86, 81, 136, 65, 204, 219, 149, 80, 113, 136, 246 | 51, 160, 35, 15, 189, 30, 239, 230, 103, 108, 232, 196, 136, 69, 69, 7, 227, 49, 247 | 161, 241, 247, 140, 29, 70, 234, 87, 42, 190, 199, 34, 219, 201, 70, 145, 227, 237, 248 | 71, 90, 36, 11, 94, 119, 4, 193, 9, 206, 177, 115, 135, 111, 232, 154, 213, 63, 38, 249 | 210, 71, 215, 114, 161, 1, 72, 130, 135, 86, 50, 237, 52, 235, 161, 73, 41, 93, 250 | 128, 143, 100, 201, 181, 23, 20, 209, 152, 163, 93, 214, 159, 10, 99, 39, 63, 226, 251 | 7, 35, 142, 143, 195, 149, 158, 31, 113, 211, 70, 150, 100, 251, 174, 147, 248, 57, 252 | 197, 182, 127, 97, 169, 4, 188, 63, 180, 193, 14, 155, 212, 244, 32, 195, 185, 34, 253 | 124, 89, 101, 106, 85, 50, 113, 232, 148, 231, 73, 28, 124, 193, 152, 168, 63, 52, 254 | 64, 205, 26, 205, 218, 107, 85, 155, 196, 178, 213, 15, 90, 193, 68, 221, 128, 128, 255 | 23, 43, 52, 152, 253, 78, 32, 8, 71, 255, 228, 82, 174, 34, 7, 3, 222, 34, 77, 68, 256 | 249, 116, 60, 129, 23, 179, 75, 220, 112, 37, 79, 201, 124, 233, 153, 225, 251, 48, 257 | 39, 198, 118, 97, 45, 129, 210, 87, 113, 186, 223, 226, 211, 33, 194, 75, 129, 46, 258 | 122, 164, 228, 85, 9, 11, 155, 105, 17, 215, 246, 37, 6, 75, 123, 145, 243, 35, 8, 259 | 166, 61, 29, 211, 157, 217, 97, 173, 252, 28, 107, 118, 151, 72, 122, 254, 255, 5, 260 | 178, 132, 161, 175, 223, 188, 0, 160, 117, 125, 65, 160, 244, 121, 229, 200, 192, 261 | 159, 21, 81, 91, 126, 44, 86, 131, 155, 31, 39, 41, 75, 75, 214, 44, 66, 249, 107, 262 | 211, 200, 204, 73, 151, 142, 124, 238, 111, 42, 180, 215, 125, 23, 149, 243, 29, 263 | 70, 228, 225, 222, 119, 118, 57, 216, 136, 45, 34, 190, 172, 145, 122, 145, 212, 264 | 68, 76, 182, 53, 117, 93, 208, 166, 223, 215, 39, 170, 77, 178, 244, 228, 15, 102, 265 | 235, 16, 222, 224, 199, 42, 213, 88, 210, 106, 34, 142, 6, 79, 36, 72, 252, 225, 266 | 233, 4, 251, 111, 195, 87, 210, 14, 88, 137, 95, 75, 233, 68, 161, 52, 66, 144, 267 | 152, 184, 210, 94, 122, 186, 107, 79, 36, 13, 139, 203, 224, 17, 86, 29, 161, 222, 268 | 149, 118, 208, 195, 20, 81, 71, 154, 21, 206, 39, 49, 244, 210, 13, 24, 7, 239, 20, 269 | 15, 160, 2, 250, 212, 65, 61, 243, 90, 63, 68, 193, 38, 187, 247, 69, 179, 0, 193, 270 | 123, 220, 87, 219, 239, 160, 39, 104, 219, 123, 170, 165, 3, 90, 238, 78, 204, 237, 271 | 97, 245, 132, 242, 66, 233, 31, 248, 157, 152, 105, 29, 215, 43, 144, 188, 93, 21, 272 | 189, 153, 239, 52, 171, 162, 19, 58, 200, 80, 35, 215, 177, 168, 199, 195, 241, 273 | 201, 218, 168, 149, 71, 44, 45, 32, 210, 123, 40, 49, 128, 45, 39, 218, 71, 56, 81, 274 | 171, 91, 192, 138, 45, 210, 222, 95, 176, 78, 250, 173, 8, 178, 175, 37, 195, 139, 275 | 204, 209, 62, 212, 129, 253, 48, 153, 77, 67, 196, 165, 46, 34, 53, 202, 246, 66, 276 | 26, 58, 131, 73, 4, 128, 87, 122, 26, 183, 33, 35, 189, 79, 32, 244, 176, 146, 172, 277 | 111, 142, 216, 117, 159, 124, 27, 28, 109, 90, 250, 72, 237, 19, 110, 124, 122, 278 | 138, 148, 104, 243, 245, 220, 124, 157, 132, 105, 120, 55, 162, 70, 22, 27, 207, 279 | 250, 30, 232, 155, 62, 112, 161, 130, 151, 153, 60, 106, 189, 18, 240, 42, 202, 280 | 163, 215, 207, 53, 45, 161, 215, 215, 16, 169, 97, 55, 205, 151, 101, 54, 117, 247, 281 | 157, 108, 185, 56, 27, 101, 84, 150, 2, 37, 191, 153, 173, 174, 228, 68, 0, 217, 282 | 21, 212, 116, 246, 155, 111, 11, 160, 0>> 283 | 284 | @opus_frame <<120, 12, 65, 10, 226, 218, 78, 44, 178, 170, 67, 85, 217, 117, 65, 205, 95, 118, 285 | 107, 76, 36, 55, 13, 188, 245, 18, 11, 194, 57, 176, 212, 48, 198, 41, 85, 192, 286 | 142, 204, 5, 106, 217, 175, 162, 62, 128, 161, 69, 136, 234, 30, 43, 165, 152, 287 | 104, 143>> 288 | 289 | # Use ffmpeg to extract the first frame of the video 290 | # ffmpeg -i sample_video.mp4 -c:v copy -f h264 -vframes 1 sample_h264.h264 291 | @h264_frame File.read!("test/fixtures/decoder/sample_h264.h264") 292 | # You can do the same for hevc given that the mp4 file contains a hevc stream 293 | # ffmpeg -i sample_video.mp4 -c:v copy -f hevc -vframes 1 sample_h265.h265 294 | @h265_frame File.read!("test/fixtures/decoder/sample_h265.h265") 295 | 296 | test "new/0" do 297 | assert decoder = Xav.Decoder.new(:vp8) 298 | assert is_reference(decoder) 299 | 300 | assert decoder = Xav.Decoder.new(:opus) 301 | assert is_reference(decoder) 302 | 303 | assert_raise(ErlangError, fn -> Xav.Decoder.new(:unknown) end) 304 | end 305 | 306 | describe "decode/2" do 307 | test "audio" do 308 | decoder = Xav.Decoder.new(:opus) 309 | 310 | assert {:ok, %Xav.Frame{data: data, samples: 960, pts: 0, format: :flt}} = 311 | Xav.Decoder.decode(decoder, @opus_frame) 312 | 313 | assert byte_size(data) == 7680 314 | end 315 | 316 | test "audio with resampling" do 317 | decoder = Xav.Decoder.new(:opus, out_format: :u8, out_sample_rate: 16_000, out_channels: 1) 318 | 319 | # after changing out_format and out_sample rate, we should have less samples 320 | # and the data should be smaller 321 | assert {:ok, %Xav.Frame{data: data, samples: 304, pts: 0, format: :u8}} = 322 | Xav.Decoder.decode(decoder, @opus_frame) 323 | 324 | assert byte_size(data) == 304 325 | end 326 | 327 | test "video keyframe" do 328 | decoder = Xav.Decoder.new(:vp8) 329 | 330 | assert {:ok, %Xav.Frame{width: 640, height: 480, pts: 0, data: frame, format: :yuv420p}} = 331 | Xav.Decoder.decode(decoder, @vp8_keyframe) 332 | 333 | assert byte_size(frame) == 640 * 480 * 3 / 2 334 | end 335 | 336 | test "video without prior keyframe" do 337 | decoder = Xav.Decoder.new(:vp8) 338 | 339 | assert {:error, :no_keyframe} = Xav.Decoder.decode(decoder, @vp8_frame) 340 | end 341 | 342 | test "h264 video" do 343 | decoder = Xav.Decoder.new(:h264) 344 | 345 | assert :ok = Xav.Decoder.decode(decoder, @h264_frame) 346 | 347 | assert {:ok, [%Xav.Frame{width: 1280, height: 720, pts: 0, format: :yuv420p}]} = 348 | Xav.Decoder.flush(decoder) 349 | end 350 | 351 | test "hevc video" do 352 | decoder = Xav.Decoder.new(:hevc) 353 | 354 | assert :ok = Xav.Decoder.decode(decoder, @h265_frame) 355 | 356 | assert {:ok, [%Xav.Frame{width: 1920, height: 1080, pts: 0, format: :yuv420p}]} = 357 | Xav.Decoder.flush(decoder) 358 | end 359 | 360 | test "convert video frame" do 361 | decoder = Xav.Decoder.new(:vp8, out_format: :rgb24) 362 | 363 | assert {:ok, %Xav.Frame{width: 640, height: 480, pts: 0, data: frame, format: :rgb24}} = 364 | Xav.Decoder.decode(decoder, @vp8_keyframe) 365 | 366 | assert byte_size(frame) == 640 * 480 * 3 367 | end 368 | 369 | test "scale video frame" do 370 | decoder = Xav.Decoder.new(:vp8, out_width: 240, out_height: 180) 371 | 372 | assert {:ok, %Xav.Frame{width: 240, height: 180, pts: 0, data: frame, format: :yuv420p}} = 373 | Xav.Decoder.decode(decoder, @vp8_keyframe) 374 | 375 | assert byte_size(frame) == 240 * 180 * 3 / 2 376 | end 377 | end 378 | end 379 | -------------------------------------------------------------------------------- /test/encoder_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Xav.EncoderTest do 2 | use ExUnit.Case, async: true 3 | 4 | alias NimbleOptions.ValidationError 5 | 6 | describe "new/2" do 7 | test "new encoder" do 8 | assert encoder = 9 | Xav.Encoder.new(:h264, 10 | width: 180, 11 | height: 160, 12 | format: :yuv420p, 13 | time_base: {1, 90_000} 14 | ) 15 | 16 | assert is_reference(encoder) 17 | end 18 | 19 | test "raises on invalid encoder" do 20 | assert_raise ArgumentError, fn -> Xav.Encoder.new(:h264_none, []) end 21 | end 22 | 23 | test "raises on invalid options" do 24 | assert_raise ValidationError, fn -> Xav.Encoder.new(:h264, width: 180) end 25 | 26 | assert_raise ValidationError, fn -> 27 | Xav.Encoder.new(:hevc, width: 360, height: -4, format: :yuv420p, time_base: {1, 90_000}) 28 | end 29 | end 30 | end 31 | 32 | describe "encode/1" do 33 | setup do 34 | frame = %Xav.Frame{ 35 | type: :video, 36 | data: File.read!("test/fixtures/video_converter/frame_360x240.yuv"), 37 | format: :yuv420p, 38 | width: 360, 39 | height: 240, 40 | pts: 0 41 | } 42 | 43 | %{frame: frame} 44 | end 45 | 46 | test "encode a frame", %{frame: frame} do 47 | encoder = 48 | Xav.Encoder.new(:h264, 49 | width: 360, 50 | height: 240, 51 | format: :yuv420p, 52 | time_base: {1, 25} 53 | ) 54 | 55 | assert [] = Xav.Encoder.encode(encoder, frame) 56 | 57 | assert [ 58 | %Xav.Packet{ 59 | data: data, 60 | dts: 0, 61 | pts: 0, 62 | keyframe?: true 63 | } 64 | ] = Xav.Encoder.flush(encoder) 65 | 66 | assert byte_size(data) > 0 67 | end 68 | 69 | test "encode multiple frames", %{frame: frame} do 70 | encoder = 71 | Xav.Encoder.new(:h264, 72 | width: 360, 73 | height: 240, 74 | format: :yuv420p, 75 | time_base: {1, 25}, 76 | gop_size: 1 77 | ) 78 | 79 | packets = 80 | Xav.Encoder.encode(encoder, frame) ++ 81 | Xav.Encoder.encode(encoder, %{frame | pts: 1}) ++ 82 | Xav.Encoder.encode(encoder, %{frame | pts: 2}) ++ Xav.Encoder.flush(encoder) 83 | 84 | assert length(packets) == 3 85 | assert Enum.all?(packets, & &1.keyframe?) 86 | end 87 | 88 | test "no bframes inserted", %{frame: frame} do 89 | encoder = 90 | Xav.Encoder.new(:hevc, 91 | width: 360, 92 | height: 240, 93 | format: :yuv420p, 94 | time_base: {1, 25}, 95 | max_b_frames: 0 96 | ) 97 | 98 | packets = 99 | Stream.iterate(frame, fn frame -> %{frame | pts: frame.pts + 1} end) 100 | |> Stream.take(20) 101 | |> Stream.transform( 102 | fn -> encoder end, 103 | fn frame, encoder -> 104 | {Xav.Encoder.encode(encoder, frame), encoder} 105 | end, 106 | fn encoder -> {Xav.Encoder.flush(encoder), encoder} end, 107 | fn _encoder -> :ok end 108 | ) 109 | |> Enum.to_list() 110 | 111 | assert length(packets) == 20 112 | assert Enum.all?(packets, &(&1.dts == &1.pts)), "dts should be equal to pts" 113 | end 114 | 115 | test "encode audio samples" do 116 | audio_file = "test/fixtures/encoder/audio/input-s16le.raw" 117 | ref_file = "test/fixtures/encoder/audio/reference.al" 118 | 119 | encoder = 120 | Xav.Encoder.new(:pcm_alaw, 121 | format: :s16, 122 | channel_layout: "mono", 123 | sample_rate: 8000 124 | ) 125 | 126 | encoded_data = 127 | File.read!(audio_file) 128 | |> :binary.bin_to_list() 129 | |> Enum.chunk_every(20) 130 | |> Stream.map( 131 | &%Xav.Frame{type: :audio, data: :binary.list_to_bin(&1), format: :s16, pts: 0} 132 | ) 133 | |> Stream.transform( 134 | fn -> encoder end, 135 | fn frame, encoder -> 136 | {Xav.Encoder.encode(encoder, frame), encoder} 137 | end, 138 | fn encoder -> {Xav.Encoder.flush(encoder), encoder} end 139 | ) 140 | |> Stream.map(& &1.data) 141 | |> Enum.join() 142 | 143 | assert File.read!(ref_file) == encoded_data 144 | end 145 | end 146 | end 147 | -------------------------------------------------------------------------------- /test/fixtures/decoder/sample_h264.h264: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/decoder/sample_h264.h264 -------------------------------------------------------------------------------- /test/fixtures/decoder/sample_h265.h265: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/decoder/sample_h265.h265 -------------------------------------------------------------------------------- /test/fixtures/encoder/audio/input-s16le.raw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/encoder/audio/input-s16le.raw -------------------------------------------------------------------------------- /test/fixtures/encoder/audio/reference.al: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/encoder/audio/reference.al -------------------------------------------------------------------------------- /test/fixtures/one_frame.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/one_frame.mp4 -------------------------------------------------------------------------------- /test/fixtures/sample_av1.mkv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/sample_av1.mkv -------------------------------------------------------------------------------- /test/fixtures/sample_h264.h264: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/sample_h264.h264 -------------------------------------------------------------------------------- /test/fixtures/sample_h264.mkv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/sample_h264.mkv -------------------------------------------------------------------------------- /test/fixtures/sample_h264.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/sample_h264.mp4 -------------------------------------------------------------------------------- /test/fixtures/sample_vp8.webm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/sample_vp8.webm -------------------------------------------------------------------------------- /test/fixtures/sample_vp9.webm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/sample_vp9.webm -------------------------------------------------------------------------------- /test/fixtures/stt/README.md: -------------------------------------------------------------------------------- 1 | # STT Fixtures 2 | 3 | * [melnet_sample_0.mp3](https://audio-samples.github.io/) - 22050Hz, 1 channel, fltp 4 | * [harvard.wav](https://www.kaggle.com/datasets/pavanelisetty/sample-audio-files-for-speech-recognition) - 44100Hz, 2 channels, s16 5 | * harvard.mp3 - created by calling ffmpeg -i harvard.wav harvard.mp3 - 44100Hz, 2 channels, fltp 6 | 7 | -------------------------------------------------------------------------------- /test/fixtures/stt/harvard.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/stt/harvard.mp3 -------------------------------------------------------------------------------- /test/fixtures/stt/harvard.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/stt/harvard.wav -------------------------------------------------------------------------------- /test/fixtures/stt/melnet_sample_0.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/stt/melnet_sample_0.mp3 -------------------------------------------------------------------------------- /test/fixtures/video_converter/frame_360x240.yuv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/video_converter/frame_360x240.yuv -------------------------------------------------------------------------------- /test/fixtures/video_converter/frame_480x360.yuv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/elixir-webrtc/xav/a0003fd695073f5052fba09e7de4de638985ce01/test/fixtures/video_converter/frame_480x360.yuv -------------------------------------------------------------------------------- /test/reader_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Xav.ReaderTest do 2 | use ExUnit.Case, async: true 3 | 4 | test "new/1" do 5 | assert {:ok, %Xav.Reader{}} = Xav.Reader.new("./test/fixtures/sample_h264.mp4") 6 | assert {:error, _reason} = Xav.Reader.new("non_existing_input") 7 | end 8 | 9 | test "new!/1" do 10 | %Xav.Reader{} = Xav.Reader.new!("./test/fixtures/sample_h264.mp4") 11 | assert_raise RuntimeError, fn -> Xav.Reader.new!("non_existing_input") end 12 | end 13 | 14 | test "next_frame/1" do 15 | {:ok, r} = Xav.Reader.new("./test/fixtures/sample_h264.mp4") 16 | # the file has 30fps, try to read 5 seconds 17 | for _i <- 0..(30 * 5), do: assert({:ok, %Xav.Frame{}} = Xav.Reader.next_frame(r)) 18 | end 19 | 20 | describe "seek/2" do 21 | test "works with video" do 22 | {:ok, r} = Xav.Reader.new("./test/fixtures/sample_h264.mp4") 23 | assert :ok = Xav.Reader.seek(r, 5.0) 24 | assert({:ok, %Xav.Frame{}} = Xav.Reader.next_frame(r)) 25 | end 26 | 27 | test "works with audio" do 28 | {:ok, r} = Xav.Reader.new("./test/fixtures/stt/harvard.mp3", read: :audio) 29 | assert :ok = Xav.Reader.seek(r, 5.0) 30 | assert({:ok, %Xav.Frame{}} = Xav.Reader.next_frame(r)) 31 | end 32 | 33 | test "negative timestamp just seeks to beginning of file" do 34 | {:ok, r} = Xav.Reader.new("./test/fixtures/sample_h264.mp4") 35 | assert :ok = Xav.Reader.seek(r, -5.0) 36 | assert({:ok, %Xav.Frame{}} = Xav.Reader.next_frame(r)) 37 | end 38 | 39 | test "timestamp greater than duration" do 40 | {:ok, r} = Xav.Reader.new("./test/fixtures/sample_h264.mp4") 41 | assert :ok = Xav.Reader.seek(r, (r.duration + 1) * 1.0) 42 | assert({:error, :eof} = Xav.Reader.next_frame(r)) 43 | end 44 | 45 | test "seek back returns same frame" do 46 | {:ok, r} = Xav.Reader.new("./test/fixtures/sample_h264.mp4") 47 | assert :ok = Xav.Reader.seek(r, 0.0) 48 | assert({:ok, %Xav.Frame{} = first} = Xav.Reader.next_frame(r)) 49 | assert :ok = Xav.Reader.seek(r, 5.0) 50 | assert({:ok, %Xav.Frame{}} = Xav.Reader.next_frame(r)) 51 | assert :ok = Xav.Reader.seek(r, 0.0) 52 | assert({:ok, %Xav.Frame{} = other_first} = Xav.Reader.next_frame(r)) 53 | assert first == other_first 54 | end 55 | end 56 | 57 | test "stream!" do 58 | Xav.Reader.stream!("./test/fixtures/sample_h264.mp4") 59 | |> Enum.all?(fn frame -> is_struct(frame, Xav.Frame) end) 60 | end 61 | 62 | test "to_nx/1" do 63 | {:ok, r} = Xav.Reader.new("./test/fixtures/sample_h264.mp4") 64 | {:ok, frame} = Xav.Reader.next_frame(r) 65 | %Nx.Tensor{} = Xav.Frame.to_nx(frame) 66 | end 67 | 68 | test "eof" do 69 | {:ok, r} = Xav.Reader.new("./test/fixtures/one_frame.mp4") 70 | {:ok, _frame} = Xav.Reader.next_frame(r) 71 | {:error, :eof} = Xav.Reader.next_frame(r) 72 | end 73 | 74 | @formats [{"h264", "h264"}, {"h264", "mkv"}, {"vp8", "webm"}, {"vp9", "webm"}, {"av1", "mkv"}] 75 | Enum.map(@formats, fn {codec, container} -> 76 | name = "#{codec} #{container}" 77 | file = "./test/fixtures/sample_#{codec}.#{container}" 78 | 79 | test name do 80 | {:ok, r} = Xav.Reader.new(unquote(file)) 81 | # try to read 100 frames 82 | for _i <- 0..100, do: assert({:ok, %Xav.Frame{}} = Xav.Reader.next_frame(r)) 83 | end 84 | end) 85 | 86 | # This one might take some time on CI 87 | @tag timeout: 120_000 88 | test "speech to text" do 89 | for {path, expected_output} <- [ 90 | {"./test/fixtures/stt/melnet_sample_0.mp3", 91 | """ 92 | My thought, I have nobody by a beauty and will as you poured. \ 93 | Mr. Rochester has served and that so don't find a simple and \ 94 | devoted aboud to what might in a\ 95 | """}, 96 | {"./test/fixtures/stt/harvard.wav", 97 | """ 98 | The stale smell of old beer lingers. It takes heat to bring out the odor. \ 99 | A cold dip restores health in zest. A salt pickle tastes fine with ham. \ 100 | Tacos all pastora are my favorite. A zestful food is the hot cross bun.\ 101 | """}, 102 | # This mp3 file results in an empty buffer after first conversion. 103 | {"./test/fixtures/stt/harvard.mp3", 104 | """ 105 | The stale smell of old beer lingers. It takes heat to bring out the odor. \ 106 | A cold dip restores health in zest. A salt pickle tastes fine with ham. \ 107 | Tacos all pastora are my favorite. A zestful food is the hot cross bun.\ 108 | """} 109 | ] do 110 | test_speech_to_text(path, expected_output) 111 | end 112 | end 113 | 114 | defp test_speech_to_text(path, expected_output) do 115 | {:ok, whisper} = Bumblebee.load_model({:hf, "openai/whisper-tiny"}) 116 | {:ok, featurizer} = Bumblebee.load_featurizer({:hf, "openai/whisper-tiny"}) 117 | {:ok, tokenizer} = Bumblebee.load_tokenizer({:hf, "openai/whisper-tiny"}) 118 | {:ok, generation_config} = Bumblebee.load_generation_config({:hf, "openai/whisper-tiny"}) 119 | 120 | serving = 121 | Bumblebee.Audio.speech_to_text_whisper(whisper, featurizer, tokenizer, generation_config, 122 | defn_options: [compiler: EXLA] 123 | ) 124 | 125 | batch = 126 | Xav.Reader.stream!(path, 127 | read: :audio, 128 | out_channels: 1, 129 | out_format: :flt, 130 | out_sample_rate: 16_000 131 | ) 132 | |> Enum.map(&Xav.Frame.to_nx(&1)) 133 | |> Nx.Batch.concatenate() 134 | 135 | batch = Nx.Defn.jit_apply(&Function.identity/1, [batch]) 136 | assert %{chunks: chunks} = Nx.Serving.run(serving, batch) 137 | 138 | assert [%{text: ^expected_output}] = chunks 139 | end 140 | end 141 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start(capture_log: true) 2 | -------------------------------------------------------------------------------- /test/video_converter_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Xav.VideoConverterTest do 2 | use ExUnit.Case, async: true 3 | 4 | alias NimbleOptions.ValidationError 5 | 6 | describe "new/1" do 7 | test "new converter" do 8 | assert %Xav.VideoConverter{out_format: :rgb24, converter: converter} = 9 | Xav.VideoConverter.new(out_format: :rgb24) 10 | 11 | assert is_reference(converter) 12 | end 13 | 14 | test "fails when no option is provided" do 15 | assert_raise RuntimeError, fn -> Xav.VideoConverter.new(out_format: nil) end 16 | end 17 | 18 | test "fails on invalid options" do 19 | assert_raise ValidationError, fn -> Xav.VideoConverter.new(out_width: 0) end 20 | assert_raise ValidationError, fn -> Xav.VideoConverter.new(out_height: "15") end 21 | end 22 | end 23 | 24 | describe "convert/2" do 25 | setup do 26 | frame_480p = %Xav.Frame{ 27 | type: :video, 28 | data: File.read!("test/fixtures/video_converter/frame_480x360.yuv"), 29 | format: :yuv420p, 30 | width: 480, 31 | height: 360, 32 | pts: 0 33 | } 34 | 35 | %{ 36 | converter: Xav.VideoConverter.new(out_format: :rgb24), 37 | frame_480p: frame_480p 38 | } 39 | end 40 | 41 | test "convert video format", %{converter: converter, frame_480p: frame_480p} do 42 | assert %Xav.Frame{ 43 | type: :video, 44 | data: data, 45 | format: :rgb24, 46 | width: 480, 47 | height: 360, 48 | pts: 0 49 | } = Xav.VideoConverter.convert(converter, frame_480p) 50 | 51 | assert byte_size(data) == 480 * 360 * 3 52 | end 53 | 54 | test "converter re-init on resolution change", %{converter: converter, frame_480p: frame_480p} do 55 | frame_360p = %Xav.Frame{ 56 | type: :video, 57 | data: File.read!("test/fixtures/video_converter/frame_360x240.yuv"), 58 | format: :yuv420p, 59 | width: 360, 60 | height: 240 61 | } 62 | 63 | assert %Xav.Frame{format: :rgb24, data: ref_frame1} = 64 | Xav.VideoConverter.convert(converter, frame_480p) 65 | 66 | assert %Xav.Frame{format: :rgb24, data: ref_frame2} = 67 | Xav.VideoConverter.convert(converter, frame_360p) 68 | 69 | assert byte_size(ref_frame1) == 480 * 360 * 3 70 | assert byte_size(ref_frame2) == 360 * 240 * 3 71 | end 72 | 73 | test "scale video frame", %{frame_480p: frame_480p} do 74 | converter = Xav.VideoConverter.new(out_width: 368) 75 | 76 | assert %Xav.Frame{ 77 | type: :video, 78 | format: :yuv420p, 79 | data: data, 80 | width: 368, 81 | height: 276 82 | } = Xav.VideoConverter.convert(converter, frame_480p) 83 | 84 | assert byte_size(data) == 368 * 276 * 3 / 2 85 | end 86 | 87 | test "scale and convert video frame", %{frame_480p: frame_480p} do 88 | converter = Xav.VideoConverter.new(out_width: 360, out_height: 240, out_format: :rgb24) 89 | 90 | assert %Xav.Frame{ 91 | type: :video, 92 | format: :rgb24, 93 | width: 360, 94 | height: 240 95 | } = Xav.VideoConverter.convert(converter, frame_480p) 96 | end 97 | end 98 | end 99 | --------------------------------------------------------------------------------