├── .gitignore ├── LICENSE ├── README.md ├── build_executables.py ├── docker ├── Dockerfile ├── README.md ├── app.py ├── requirements.txt ├── startup.sh └── wsgi.py ├── docs ├── coordinate.png ├── equirect.jpg ├── fisheye.jpg ├── motion1.jpg ├── motion2.jpg ├── motion3.jpg ├── spatial-audio-rfc.md ├── spherical-video-rfc.md ├── spherical-video-v2-rfc.md └── vr180.md ├── requirements.txt ├── setup.py ├── spatial-audio ├── LICENSE ├── NOTICE ├── README.md ├── ambisonic-correction-filters │ ├── correction_filter_0.wav │ ├── correction_filter_1.wav │ ├── correction_filter_2.wav │ └── correction_filter_3.wav ├── raw-symmetric-cube-hrirs │ ├── E-35_A-135.wav │ ├── E-35_A-45.wav │ ├── E-35_A135.wav │ ├── E-35_A45.wav │ ├── E35_A-135.wav │ ├── E35_A-45.wav │ ├── E35_A135.wav │ ├── E35_A45.wav │ └── cube.config ├── symmetric-ambisonic-binaural-decoder │ ├── binaural_decoder_0.wav │ ├── binaural_decoder_1.wav │ ├── binaural_decoder_2.wav │ └── binaural_decoder_3.wav └── third_party │ ├── LICENSE │ ├── azi_135_ele_-35_DFC.wav │ ├── azi_135_ele_35_DFC.wav │ ├── azi_45_ele_-35_DFC.wav │ └── azi_45_ele_35_DFC.wav └── spatialmedia ├── LICENSE ├── README.md ├── __init__.py ├── __main__.py ├── gui.py ├── metadata_utils.py ├── mpeg ├── __init__.py ├── box.py ├── constants.py ├── container.py ├── mpeg4_container.py ├── sa3d.py └── sv3d.py └── spatial_media_metadata_injector.spec /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | __pycache__ 3 | /.venv 4 | /dist 5 | /build 6 | 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Google Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | 6 | Unless required by applicable law or agreed to in writing, software 7 | distributed under the License is distributed on an "AS IS" BASIS, 8 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 9 | See the License for the specific language governing permissions and 10 | limitations under the License. 11 | 12 | 13 | Apache License 14 | Version 2.0, January 2004 15 | http://www.apache.org/licenses/ 16 | 17 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 18 | 19 | 1. Definitions. 20 | 21 | "License" shall mean the terms and conditions for use, reproduction, 22 | and distribution as defined by Sections 1 through 9 of this document. 23 | 24 | "Licensor" shall mean the copyright owner or entity authorized by 25 | the copyright owner that is granting the License. 26 | 27 | "Legal Entity" shall mean the union of the acting entity and all 28 | other entities that control, are controlled by, or are under common 29 | control with that entity. For the purposes of this definition, 30 | "control" means (i) the power, direct or indirect, to cause the 31 | direction or management of such entity, whether by contract or 32 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 33 | outstanding shares, or (iii) beneficial ownership of such entity. 34 | 35 | "You" (or "Your") shall mean an individual or Legal Entity 36 | exercising permissions granted by this License. 37 | 38 | "Source" form shall mean the preferred form for making modifications, 39 | including but not limited to software source code, documentation 40 | source, and configuration files. 41 | 42 | "Object" form shall mean any form resulting from mechanical 43 | transformation or translation of a Source form, including but 44 | not limited to compiled object code, generated documentation, 45 | and conversions to other media types. 46 | 47 | "Work" shall mean the work of authorship, whether in Source or 48 | Object form, made available under the License, as indicated by a 49 | copyright notice that is included in or attached to the work 50 | (an example is provided in the Appendix below). 51 | 52 | "Derivative Works" shall mean any work, whether in Source or Object 53 | form, that is based on (or derived from) the Work and for which the 54 | editorial revisions, annotations, elaborations, or other modifications 55 | represent, as a whole, an original work of authorship. For the purposes 56 | of this License, Derivative Works shall not include works that remain 57 | separable from, or merely link (or bind by name) to the interfaces of, 58 | the Work and Derivative Works thereof. 59 | 60 | "Contribution" shall mean any work of authorship, including 61 | the original version of the Work and any modifications or additions 62 | to that Work or Derivative Works thereof, that is intentionally 63 | submitted to Licensor for inclusion in the Work by the copyright owner 64 | or by an individual or Legal Entity authorized to submit on behalf of 65 | the copyright owner. For the purposes of this definition, "submitted" 66 | means any form of electronic, verbal, or written communication sent 67 | to the Licensor or its representatives, including but not limited to 68 | communication on electronic mailing lists, source code control systems, 69 | and issue tracking systems that are managed by, or on behalf of, the 70 | Licensor for the purpose of discussing and improving the Work, but 71 | excluding communication that is conspicuously marked or otherwise 72 | designated in writing by the copyright owner as "Not a Contribution." 73 | 74 | "Contributor" shall mean Licensor and any individual or Legal Entity 75 | on behalf of whom a Contribution has been received by Licensor and 76 | subsequently incorporated within the Work. 77 | 78 | 2. Grant of Copyright License. Subject to the terms and conditions of 79 | this License, each Contributor hereby grants to You a perpetual, 80 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 81 | copyright license to reproduce, prepare Derivative Works of, 82 | publicly display, publicly perform, sublicense, and distribute the 83 | Work and such Derivative Works in Source or Object form. 84 | 85 | 3. Grant of Patent License. Subject to the terms and conditions of 86 | this License, each Contributor hereby grants to You a perpetual, 87 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 88 | (except as stated in this section) patent license to make, have made, 89 | use, offer to sell, sell, import, and otherwise transfer the Work, 90 | where such license applies only to those patent claims licensable 91 | by such Contributor that are necessarily infringed by their 92 | Contribution(s) alone or by combination of their Contribution(s) 93 | with the Work to which such Contribution(s) was submitted. If You 94 | institute patent litigation against any entity (including a 95 | cross-claim or counterclaim in a lawsuit) alleging that the Work 96 | or a Contribution incorporated within the Work constitutes direct 97 | or contributory patent infringement, then any patent licenses 98 | granted to You under this License for that Work shall terminate 99 | as of the date such litigation is filed. 100 | 101 | 4. Redistribution. You may reproduce and distribute copies of the 102 | Work or Derivative Works thereof in any medium, with or without 103 | modifications, and in Source or Object form, provided that You 104 | meet the following conditions: 105 | 106 | (a) You must give any other recipients of the Work or 107 | Derivative Works a copy of this License; and 108 | 109 | (b) You must cause any modified files to carry prominent notices 110 | stating that You changed the files; and 111 | 112 | (c) You must retain, in the Source form of any Derivative Works 113 | that You distribute, all copyright, patent, trademark, and 114 | attribution notices from the Source form of the Work, 115 | excluding those notices that do not pertain to any part of 116 | the Derivative Works; and 117 | 118 | (d) If the Work includes a "NOTICE" text file as part of its 119 | distribution, then any Derivative Works that You distribute must 120 | include a readable copy of the attribution notices contained 121 | within such NOTICE file, excluding those notices that do not 122 | pertain to any part of the Derivative Works, in at least one 123 | of the following places: within a NOTICE text file distributed 124 | as part of the Derivative Works; within the Source form or 125 | documentation, if provided along with the Derivative Works; or, 126 | within a display generated by the Derivative Works, if and 127 | wherever such third-party notices normally appear. The contents 128 | of the NOTICE file are for informational purposes only and 129 | do not modify the License. You may add Your own attribution 130 | notices within Derivative Works that You distribute, alongside 131 | or as an addendum to the NOTICE text from the Work, provided 132 | that such additional attribution notices cannot be construed 133 | as modifying the License. 134 | 135 | You may add Your own copyright statement to Your modifications and 136 | may provide additional or different license terms and conditions 137 | for use, reproduction, or distribution of Your modifications, or 138 | for any such Derivative Works as a whole, provided Your use, 139 | reproduction, and distribution of the Work otherwise complies with 140 | the conditions stated in this License. 141 | 142 | 5. Submission of Contributions. Unless You explicitly state otherwise, 143 | any Contribution intentionally submitted for inclusion in the Work 144 | by You to the Licensor shall be under the terms and conditions of 145 | this License, without any additional terms or conditions. 146 | Notwithstanding the above, nothing herein shall supersede or modify 147 | the terms of any separate license agreement you may have executed 148 | with Licensor regarding such Contributions. 149 | 150 | 6. Trademarks. This License does not grant permission to use the trade 151 | names, trademarks, service marks, or product names of the Licensor, 152 | except as required for reasonable and customary use in describing the 153 | origin of the Work and reproducing the content of the NOTICE file. 154 | 155 | 7. Disclaimer of Warranty. Unless required by applicable law or 156 | agreed to in writing, Licensor provides the Work (and each 157 | Contributor provides its Contributions) on an "AS IS" BASIS, 158 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 159 | implied, including, without limitation, any warranties or conditions 160 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 161 | PARTICULAR PURPOSE. You are solely responsible for determining the 162 | appropriateness of using or redistributing the Work and assume any 163 | risks associated with Your exercise of permissions under this License. 164 | 165 | 8. Limitation of Liability. In no event and under no legal theory, 166 | whether in tort (including negligence), contract, or otherwise, 167 | unless required by applicable law (such as deliberate and grossly 168 | negligent acts) or agreed to in writing, shall any Contributor be 169 | liable to You for damages, including any direct, indirect, special, 170 | incidental, or consequential damages of any character arising as a 171 | result of this License or out of the use or inability to use the 172 | Work (including but not limited to damages for loss of goodwill, 173 | work stoppage, computer failure or malfunction, or any and all 174 | other commercial damages or losses), even if such Contributor 175 | has been advised of the possibility of such damages. 176 | 177 | 9. Accepting Warranty or Additional Liability. While redistributing 178 | the Work or Derivative Works thereof, You may choose to offer, 179 | and charge a fee for, acceptance of support, warranty, indemnity, 180 | or other liability obligations and/or rights consistent with this 181 | License. However, in accepting such obligations, You may act only 182 | on Your own behalf and on Your sole responsibility, not on behalf 183 | of any other Contributor, and only if You agree to indemnify, 184 | defend, and hold each Contributor harmless for any liability 185 | incurred by, or claims asserted against, such Contributor by reason 186 | of your accepting any such warranty or additional liability. 187 | 188 | END OF TERMS AND CONDITIONS 189 | 190 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Spatial Media 2 | 3 | A collection of specifications and tools for 360° video and spatial audio, including: 4 | 5 | - [Spatial Audio](docs/spatial-audio-rfc.md) metadata specification 6 | - [Spherical Video](docs/spherical-video-rfc.md) metadata specification 7 | - [Spherical Video V2](docs/spherical-video-v2-rfc.md) metadata specification 8 | - [VR180 Video Format](docs/vr180.md) VR180 video format 9 | - [Spatial Media tools](spatialmedia/) for injecting spatial media metadata in media files 10 | -------------------------------------------------------------------------------- /build_executables.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import sys 4 | import time 5 | import platform 6 | import subprocess 7 | import shutil 8 | 9 | def get_platform_name(): 10 | """Get standardized platform name""" 11 | if sys.platform.startswith('win'): 12 | return 'windows' 13 | elif sys.platform.startswith('darwin'): 14 | return 'macos' 15 | elif sys.platform.startswith('linux'): 16 | return 'linux' 17 | return sys.platform 18 | 19 | def retry_rmtree(directory_name, max_retries=3, delay=1): 20 | """Retry removing directory tree with multiple attempts""" 21 | for attempt in range(max_retries): 22 | try: 23 | if os.path.exists(directory_name): 24 | shutil.rmtree(directory_name) 25 | return True 26 | except PermissionError as error: 27 | if attempt == max_retries - 1: 28 | print(f"Warning: Could not remove {directory_name}: {error}") 29 | return False 30 | print(f"Retrying removal of {directory_name} in {delay} seconds...") 31 | time.sleep(delay) 32 | return False 33 | 34 | def clean_build_directories(): 35 | """Clean up build directories""" 36 | directories_to_clean = ['build', 'dist'] 37 | for directory_name in directories_to_clean: 38 | if not retry_rmtree(directory_name): 39 | print(f"Warning: Proceeding without cleaning {directory_name}") 40 | 41 | def get_executable_name(): 42 | """Get platform-specific executable name""" 43 | platform_name = get_platform_name() 44 | if platform_name == 'windows': 45 | return 'Spatial Media Metadata Injector.exe' 46 | elif platform_name == 'macos': 47 | return 'Spatial Media Metadata Injector.app' 48 | else: 49 | return 'Spatial Media Metadata Injector' 50 | 51 | def build_executable(): 52 | """Build the executable for the current platform""" 53 | # Clean previous builds 54 | try: 55 | clean_build_directories() 56 | except Exception as error: 57 | print(f"Warning: Error during cleanup: {error}") 58 | print("Attempting to continue with build...") 59 | 60 | # Get the specification file path 61 | specification_file = os.path.join('spatialmedia', 'spatial_media_metadata_injector.spec') 62 | 63 | # Build command 64 | command = ['pyinstaller', '--clean', specification_file] 65 | 66 | try: 67 | subprocess.check_call(command) 68 | platform_name = get_platform_name() 69 | exe_name = get_executable_name() 70 | print(f"Successfully built executable for {platform_name}") 71 | 72 | # Show output location 73 | print(f"Output: ./dist/{exe_name}") 74 | 75 | # Set executable permissions for Unix-like systems 76 | if platform_name in ('linux', 'macos'): 77 | output_path = os.path.join('dist', exe_name) 78 | if os.path.exists(output_path): 79 | os.chmod(output_path, 0o755) 80 | 81 | except subprocess.CalledProcessError as error: 82 | print(f"Error building executable: {error}") 83 | sys.exit(1) 84 | 85 | if __name__ == "__main__": 86 | build_executable() 87 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | # Install App 3 | ############################################################################## 4 | FROM python:3.14.0a2-alpine3.20 5 | WORKDIR /spatialmediatools/app 6 | ENV PATH="${PATH}:/spatialmediatools/app" 7 | 8 | RUN apk update && \ 9 | apk upgrade && \ 10 | apk --no-cache add --virtual wget unzip ca-certificates 11 | 12 | COPY ./requirements.txt /spatialmediatools/app/requirements.txt 13 | RUN python -m venv spatialmediatools 14 | RUN source spatialmediatools/bin/activate 15 | RUN spatialmediatools/bin/python -m pip install --upgrade pip 16 | RUN spatialmediatools/bin/python -m pip install -r requirements.txt 17 | RUN spatialmediatools/bin/python -m pip install -I gunicorn 18 | 19 | COPY ./app.py /spatialmediatools/app 20 | COPY ./wsgi.py /spatialmediatools/app 21 | COPY ./startup.sh /spatialmediatools/app 22 | RUN chmod 777 /spatialmediatools/app/startup.sh 23 | RUN mkdir ./data 24 | 25 | ############################################################################## 26 | # Download and extract Spatial Metadata Tools Code 27 | ############################################################################## 28 | ENV GIT_URL="https://github.com/google/spatial-media/archive/refs/heads/master.zip" 29 | ENV APP_DIR="/spatialmediatools/app" 30 | 31 | RUN wget --no-check-certificate -O spatialmediatools.zip $GIT_URL; 32 | RUN unzip $APP_DIR/spatialmediatools.zip; 33 | 34 | ############################################################################## 35 | # Clean up of unneeded packages and download 36 | ############################################################################## 37 | RUN rm -rf /var/cache/apk/*; 38 | RUN rm $APP_DIR/spatialmediatools.zip 39 | RUN apk del wget unzip ca-certificates; 40 | 41 | ############################################################################## 42 | # Run app.py 43 | ############################################################################## 44 | #CMD [ "spatialmediatools/bin/python", "app.py" ] 45 | ENTRYPOINT [ "/spatialmediatools/app/startup.sh" ] 46 | -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- 1 | This is the first attempt at taking a different path for the Spatial Media Tools and creating a Docker container to use the [CLI commands](https://github.com/google/spatial-media/tree/master/spatialmedia#spatial-media-metadata-injector) to inject the Spatial Media metadata required for VR360/180 video with or without ambisonic audio. 2 | 3 | This should remove any OS specific requirements for Python TK that are tied to different Python versions in use. It will be based on the latest available Python/Alpine image at the time of release. 4 | 5 | To build this image clone this repository to a machine with Docker installed and run the following from this ./docker folder where the Dockerfile exists: 6 | 7 | `docker build -t spatialmedia/tools .` 8 | 9 | To run this newly built image in Docker use the following command: 10 | 11 | **Note:** Map an OS path in the first section of the -v flag to /app/data within the container and ensure that it has read/write access. 12 | 13 | ``` 14 | docker run -it \ 15 | -p 8888:5000 \ 16 | --net=bridge \ 17 | -h spatialmedia \ 18 | --name SpatialMediaTools \ 19 | -v /path/to/OS/folder:/spatialmediatools/app/data \ 20 | -d spatialmedia/tools 21 | ``` 22 | 23 | Once the image is running copy a file to inject to the above OS path and run the following to connect to the running image: 24 | 25 | `docker exec -it SpatialMediaTools sh` 26 | 27 | Change to the directory where the code was installed to in the image: 28 | 29 | `cd spatial-media-master` 30 | 31 | Using the [CLI commands](https://github.com/google/spatial-media/tree/master/spatialmedia#spatial-media-metadata-injector) as a reference attempt to inject the spatial media metadata into the video file you copied to the above path. Example: 32 | 33 | `python spatialmedia -i /spatialmediatools/app/data/ /spatialmediatools/app/data/` 34 | -------------------------------------------------------------------------------- /docker/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | app = Flask(__name__) 3 | 4 | @app.route("/") 5 | def hello(): 6 | return "

Hello There!

" 7 | 8 | if __name__ == "__main__": 9 | app.run(host='0.0.0.0') 10 | -------------------------------------------------------------------------------- /docker/requirements.txt: -------------------------------------------------------------------------------- 1 | Flask 2 | gunicorn -------------------------------------------------------------------------------- /docker/startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | /spatialmediatools/app/spatialmediatools/bin/gunicorn wsgi:app -w 2 --threads 2 -b 0.0.0.0:5000 3 | -------------------------------------------------------------------------------- /docker/wsgi.py: -------------------------------------------------------------------------------- 1 | from app import app 2 | 3 | if __name__ == "__main__": 4 | app.run() 5 | -------------------------------------------------------------------------------- /docs/coordinate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/docs/coordinate.png -------------------------------------------------------------------------------- /docs/equirect.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/docs/equirect.jpg -------------------------------------------------------------------------------- /docs/fisheye.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/docs/fisheye.jpg -------------------------------------------------------------------------------- /docs/motion1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/docs/motion1.jpg -------------------------------------------------------------------------------- /docs/motion2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/docs/motion2.jpg -------------------------------------------------------------------------------- /docs/motion3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/docs/motion3.jpg -------------------------------------------------------------------------------- /docs/spatial-audio-rfc.md: -------------------------------------------------------------------------------- 1 | # Spatial Audio RFC (draft) 2 | *This document describes an open metadata scheme by which MP4 multimedia containers may accommodate spatial and head-locked stereo audio. Comments are welcome on the [spatial-media-discuss](https://groups.google.com/forum/#!forum/spatial-media-discuss) mailing list or by [filing an issue](https://github.com/google/spatial-media/issues) on GitHub.* 3 | 4 | ------------------------------------------------------ 5 | 6 | ## Metadata Format 7 | 8 | ### MP4 9 | Spatial audio metadata is stored in a new box, `SA3D`, defined in this RFC. 10 | 11 | #### Spatial Audio Box (SA3D) 12 | ##### Definition 13 | Box Type: `SA3D` 14 | Container: Sound Sample Description box (e.g., `mp4a`, `lpcm`, `sowt`, etc.) 15 | Mandatory: No 16 | Quantity: Zero or one 17 | 18 | When present, provides additional information about the spatial audio content contained in this audio track. 19 | 20 | ##### Syntax 21 | ``` 22 | aligned(8) class SpatialAudioBox extends Box(‘SA3D’) { 23 | unsigned int(8) version; 24 | unsigned int(8) ambisonic_type; 25 | unsigned int(32) ambisonic_order; 26 | unsigned int(8) ambisonic_channel_ordering; 27 | unsigned int(8) ambisonic_normalization; 28 | unsigned int(32) num_channels; 29 | for (i = 0; i < num_channels; i++) { 30 | unsigned int(32) channel_map; 31 | } 32 | } 33 | ``` 34 | 35 | ##### Semantics 36 | - `version` is an 8-bit unsigned integer that specifies the version of this box. Must be set to `0`. 37 | 38 | - `head_locked_stereo` is a 1-bit flag used to indicate that the stored audio track contains head-locked stereo audio in addition to ambisonics audio. The flag should be set if the track contains head-locked stereo and unset otherwise. 39 | 40 | - `ambisonic_type` is a 7-bit unsigned integer that specifies the type of ambisonic audio represented; the following values are defined: 41 | 42 | | `ambisonic_type` | Ambisonic Type Description | 43 | |:-----------------|:---------------------------| 44 | | `0` | **Periphonic**: Indicates that the audio stored is a periphonic ambisonic sound field (i.e., full 3D). | 45 | 46 | - `ambisonic_order` is a 32-bit unsigned integer that specifies the order of the ambisonic sound field. If the `ambisonic_type` is `0` (*periphonic*), this is a non-negative integer representing the periphonic ambisonic order; in this case, it should take a value of `sqrt(n) - 1`, where `n` is the number of channels in the represented ambisonic audio data. For example, a *periphonic* ambisonic sound field with `ambisonic_order = 1` requires `(ambisonic_order + 1)^2 = 4` ambisonic components. 47 | 48 | - `ambisonic_channel_ordering` is an 8-bit integer specifying the channel ordering (i.e., spherical harmonics component ordering) used in the represented ambisonic audio data; the following values are defined: 49 | 50 | | `ambisonic_channel_ordering` | Channel Ordering Description | 51 | |:-----------------------------|:-----------------------------| 52 | | `0` | **ACN**: The channel ordering used is the *Ambisonic Channel Number* (ACN) system. In this, given a spherical harmonic of degree `l` and order `m`, the corresponding ordering index `n` is given by `n = l * (l + 1) + m`. | 53 | 54 | - `ambisonic_normalization` is an 8-bit unsigned integer specifying the normalization (i.e., spherical harmonics normalization) used in the represented ambisonic audio data; the following values are defined: 55 | 56 | | `ambisonic_normalization` | Normalization Description | 57 | |:--------------------------|:--------------------------| 58 | | `0` | **SN3D**: The normalization used is *Schmidt semi-normalization* (SN3D). In this, the spherical harmonic of degree `l` and order `m` is normalized according to `sqrt((2 - δ(m)) * ((l - m)! / (l + m)!))`, where `δ(m)` is the *Kronecker delta* function, such that `δ(0) = 1` and `δ(m) = 0` otherwise. | 59 | 60 | - `num_channels` is a 32-bit unsigned integer specifying the number of audio channels contained in the given audio track. 61 | 62 | - `channel_map` is a sequence of 32-bit unsigned integers that maps audio channels in a given audio track to ambisonic components, given the defined `ambisonic_channel_ordering`. The sequence of `channel_map` values should match the channel sequence within the given audio track. 63 | 64 | For the example case of `ambisonic_type = 0` (Periphonic), consider a 4-channel audio track containing ambisonic components *W*, *X*, *Y*, *Z* at channel indexes `0`, `1`, `2`, `3`, respectively. For `ambisonic_channel_ordering = 0` (ACN), the ordering of components should be *W*, *Y*, *Z*, *X*, so the `channel_map` sequence should be `0`, `2`, `3`, `1`. 65 | 66 | As a simpler example, for a 4-channel audio track containing ambisonic components *W*, *Y*, *Z*, *X* at channel indexes `0`, `1`, `2`, `3`, respectively, the `channel_map` sequence should be specified as `0`, `1`, `2`, `3` when `ambisonic_channel_ordering = 0` (ACN). 67 | 68 | For the example case of `ambisonic_type = 0` (Periphonic) with `head_locked_stereo = 1`, the stored audio will consist of `4` ambisonic components *W*, *Y*, *Z*, *X* in addition to head-locked stereo components *L* and *R*. In this case, the SA3D atom will define `num_channels = 6` and a `channel_map` specified as `0`, `1`, `2`, `3`, `4`, `5` indicating that the channels are laid out in the file as *W*, *Y*, *Z*, *X*, *L*, *R*. This representation extends to different layouts of ambisonics and head-locked stereo components. For example, a channel layout of `4`, `5`, `0`, `1`, `2`, `3` indicates that the layout of the stored audio is *L*, *R*, *W*, *Y*, *Z*, *X*. 69 | 70 | ##### Example 71 | 72 | Here is an example MP4 box hierarchy for a file containing the `SA3D` box: 73 | 74 | - moov 75 | - trak 76 | - mdia 77 | - minf 78 | - stbl 79 | - stsd 80 | - mp4a 81 | - esds 82 | - SA3D 83 | 84 | where the `SA3D` box has the following data: 85 | 86 | | Field Name | Value | 87 | |:-----------|:-----| 88 | | `version` | `0` | 89 | | `ambisonic_type` | `0` | 90 | | `ambisonic_order` | `1` | 91 | | `ambisonic_channel_ordering` | `0` | 92 | | `ambisonic_normalization` | `0` | 93 | | `num_channels` | `4` | 94 | | `channel_map` | `0` | 95 | | `channel_map` | `2` | 96 | | `channel_map` | `3` | 97 | | `channel_map` | `1` | 98 | 99 | ------------------------------------------------------ 100 | 101 | ## Appendix 1 - Ambisonics 102 | The traditional notion of ambisonics is used, where the sound field is represented by spherical harmonics coefficients using the *associated Legendre polynomials* (without *Condon-Shortley phase*) as the basis functions. Thus, the spherical harmonic of degree `l` and order `m` at elevation `E` and azimuth `A` is given by: 103 | 104 | N(l, abs(m)) * P(l, abs(m), sin(E)) * T(m, A) 105 | 106 | where: 107 | - `N(l, m)` is the spherical harmonics normalization function used. 108 | - `P(l, m, x)` is the (unnormalized) *associated Legendre polynomial*, without *Condon-Shortley phase*, of degree `l` and order `m` evaluated at `x`. 109 | - `T(m, x)` is `sin(-m * x)` for `m < 0` and `cos(m * x)` otherwise. 110 | 111 | ### Conventions 112 | #### Azimuth 113 | - `A = 0`: The source is in front of the listener. 114 | - `A` in `(0, pi/2)`: The source is in the forward-left quadrant. 115 | - `A` in `(pi/2, pi)`: The source is in the back-left quadrant. 116 | - `A` in `(-pi/2, 0)`: The source is in the forward-right quadrant. 117 | - `A` in `(-pi, -pi/2)`: The source is in the back-right quadrant. 118 | 119 | #### Elevation 120 | - `E = 0`: The source is in the horizontal plane. 121 | - `E` in `(0, pi/2]`: The source is above the listener. 122 | - `E` in `[-pi/2, 0)`: The source is below the listener. 123 | -------------------------------------------------------------------------------- /docs/spherical-video-rfc.md: -------------------------------------------------------------------------------- 1 | # Spherical Video RFC 2 | **Note: This metadata scheme is superseded by the [Spherical Video V2](spherical-video-v2-rfc.md) metadata specification.** 3 | 4 | *This document describes an open metadata scheme by which Matroska-like and MP4 multimedia containers may accommodate spherical video. Comments are welcome on the [spatial-media-discuss](https://groups.google.com/forum/#!forum/spatial-media-discuss) mailing list or by [filing an issue](https://github.com/google/spatial-media/issues) on GitHub.* 5 | 6 | 7 | ------------------------------------------------------ 8 | 9 | ## Metadata Format 10 | Two kinds of metadata are needed to represent various characteristics of a spherical video: Global and Local metadata. Global metadata is stored in an XML format, namespaced as . 11 | 12 | Example: 13 | 14 | 16 | ... 17 | 18 | 19 | 20 | Local metadata is stored either as metadata tracks or along with the video frames (see [Local Metadata](#LocalMetadata) below). 21 | 22 | ## Global Metadata 23 | Global Metadata is metadata that applies to the file or track as a whole. It is stored in the container as defined in the following sections. 24 | 25 | 26 | ### Matroska/WebM 27 | Global XML metadata is stored using Matroska/WebM's "Tags" mechanism, having the following structure: 28 | 29 | - [Tags](http://matroska.org/technical/specs/tagging/index.html#Tags) 30 | - [Tag](http://matroska.org/technical/specs/tagging/index.html#Tag) 31 | - [Targets](http://matroska.org/technical/specs/tagging/index.html#Targets) 32 | - [TargetType](http://matroska.org/technical/specs/tagging/index.html#TargetType) 33 | - "Track" 34 | - [TagTrackUID](http://matroska.org/technical/specs/tagging/index.html#TagTrackUID) 35 | - 36 | - [SimpleTag](http://matroska.org/technical/specs/tagging/index.html#SimpleTag) 37 | - [TagName](http://matroska.org/technical/specs/tagging/index.html#TagName) 38 | - "spherical-video" or "SPHERICAL-VIDEO" 39 | - [TagString](http://matroska.org/technical/specs/tagging/index.html#TagString) 40 | - <xml data> 41 | 42 | ### MP4 43 | Spherical video metadata is stored in a uniquely-identified *moov.trak.uuid* box to avoid collisions with other potential metadata. This box shall cite the UUID value `ffcc8263-f855-4a93-8814-587a02521fdd`. The XML metadata itself is written within the *uuid* leaf as a UTF-8 string. 44 | 45 | - moov 46 | - ... 47 | - trak 48 | - uuid[`ffcc8263-f855-4a93-8814-587a02521fdd`] 49 | - ... 50 | 51 | ### Allowed Global Metadata Elements 52 | 53 | 54 | | **Name** | **Description** | **Type** | **Required** | **Default** | **V1.0 Requirements** | 55 | |----------|-----------------|----------|--------------|-------------|-----------------------| 56 | |Spherical | Flag indicating if the video is a spherical video | Boolean | Yes | - | Must be `true`. | 57 | |Stitched | Flag indicating if the video is stitched. | Boolean | Yes | - | Must be `true`. | 58 | |StitchingSoftware| Software used to stitch the spherical video. | String | Yes | - | | 59 | |ProjectionType| Projection type used in the video frames. | String | Yes | - | Must be `equirectangular`. | 60 | |[StereoMode](#StereoMode)| Description of stereoscopic 3D layout. | String | No | `mono` | Must be `mono`, `left-right`, or `top-bottom`. | 61 | |SourceCount|Number of cameras used to create the spherical video. | Integer | No | - | | 62 | |[InitialViewHeadingDegrees](#InitialView)|The heading angle of the initial view in degrees. | Integer | No | 0 | | 63 | |[InitialViewPitchDegrees](#InitialView)|The pitch angle of the initial view in degrees. | Integer | No | 0 | | 64 | |[InitialViewRollDegrees](#InitialView)|The roll angle of the initial view in degrees. | Integer | No | 0 | | 65 | |Timestamp | Epoch timestamp of when the first frame in the video was recorded. | Integer | No | - | | 66 | |FullPanoWidthPixels|Width of the encoded video frame in pixels.|Integer|No| See [Stereo Mode](#StereoMode).| | 67 | |FullPanoHeightPixels|Height of the encoded video frame in pixels.|Integer|No| See [Stereo Mode](#StereoMode).| | 68 | |CroppedAreaImageWidthPixels|Width of the video frame to display (e.g. cropping). | Integer | No | See [Stereo Mode](#StereoMode). | | 69 | |CroppedAreaImageHeightPixels|Height of the video frame to display (e.g. cropping). | Integer | No | See [Stereo Mode](#StereoMode). | | 70 | |CroppedAreaLeftPixels|Column where the left edge of the image was cropped from the full sized panorama|Integer|No|0| | 71 | |CroppedAreaTopPixels|Row where the top edge of the image was cropped from the full sized panorama|Integer|No|0| | 72 | 73 | #### Stereo Mode 74 | 75 | [SEI Frame Packing Arragement](http://www.itu.int/ITU-T/recommendations/rec.aspx?rec=10635) and the [StereoMode](http://www.matroska.org/technical/specs/index.html#StereoMode) tag for Matroska/WebM video files can be used to describe the left/right frame layout. To include non-h264 MPEG-4 files an additional StereoMode tag will override the native stereo configuration. The supported StereoMode values are shown below with the corresponding native values. 76 | 77 | | **Name** | **Description** |Equivalent MKV Value | Equivalent h264 SEI FPI | 78 | |----------|-----------------|---------------------|-------------------------| 79 | | mono | Whole frame contains a single mono view.| 0 | - | 80 | | left-right | Left half contains the left eye while the right half contains the right eye.| 1 | 3 | 81 | | top-bottom | The top half contains the left eye and the bottom half contains the right eye. | 3 | 4 | 82 | 83 | Cropping, initial view, and projection properties are shared across the left/right eyes. Each video frame is divided into the left/right eye regions then cropping and view information is applied treating each region as a separate video frame. Default cropping information varies with the StereoMode tag as shown below. 84 | 85 | | **Name** | **mono** | **left-right** | **top-bottom** | 86 | |----------------------|-----------------|----------------------|----------------------| 87 | |CroppedAreaImageWidth |Container Width. |Half Container Width. |Container Width. | 88 | |CroppedAreaImageHeight|Container Height.|Container Height. |Half Container Height.| 89 | |FullPanoWidthPixels |Container Width. |Half Container Width. |Container Width. | 90 | |FullPanoHeightPixels |Container Height.|Container Height. |Half Container Height.| 91 | 92 | #### Initial View 93 | 94 | The default initial viewport is set such that the frame center occurs at the view center. A diagram of the rotation model for an equirectangular projection is shown below. 95 | 96 | Heading 97 | -180 0 180 98 | 90 +-------------+-------------+ 99 | | | | 100 | P | | o> | 101 | i | ^ | 102 | t 0 +-------------X-------------+ 103 | c | | | 104 | h | | | 105 | | | | 106 | -90 +-------------+-------------+ 107 | 108 | X - the default camera center 109 | ^ - the default up vector 110 | o - the image center for a pitch of 45 and a heading of 90 111 | > - the up vector for a rotation of 90 degrees. 112 | 113 | ### Local Metadata 114 | Version 1 supports the following Local Metadata: 115 | 116 | - GPS (latitude, longitude, altitude) 117 | - Director's Cut (viewport for each frame) 118 | - Hotspot (plaint text, including HTML) 119 | 120 | ### Two Types of Local Metadata 121 | These are two types of local metadata: (1) strictly per-frame and (2) arbitrary local metadata (perhaps sampled at certain intervals -- in other words, not strictly per-frame). Both types of local metadata may be used concurrently, depending on the author's needs and available metadata granularity. 122 | 123 | ### Specification of Strictly Per-Frame Metadata 124 | In this case, metadata content is stored at a frame-level accuracy: there is one chunk of metadata content for every frame. 125 | 126 | #### WebM/Matroska 127 | Metadata content will go into the [BlockAdditional](http://matroska.org/technical/specs/index.html#BlockAdditional) element of the corresponding [Block](http://matroska.org/technical/specs/index.html#Block) to which the metadata belongs. 128 | 129 | #### MP4 130 | User data unregistered SEI message syntax from the ISO-14496-10:2005 (see D.1.6). This message is an SEI message of Payload Type 5. 131 | 132 | ### Specifications of Local Metadata Sampled at Intervals 133 | In this case, the metadata content not available at frame-level accuracy, but rather sampled as a certain time interval. 134 | 135 | #### WebM/Matroska 136 | Metadata content should be stored as a separate metadata track. The metadata track entry must have the following values for specified fields: 137 | - Track type: `0x21` (WebVTT Metadata as mentioned [here](http://www.webmproject.org/docs/container/)) 138 | - CodecID: `D_WEBVTT/METADATA` 139 | 140 | Each metadata chunk must be stored as either [Blocks](http://matroska.org/technical/specs/index.html#Block) or [SimpleBlocks](http://matroska.org/technical/specs/index.html#SimpleBlock), per the Matroska specification, with the exception that no lacing is permitted. Metadata blocks should always be key frames and must be indicated accordingly in the flags -- depending on whether it's a SimpleBlock or a Block. 141 | 142 | #### MP4 143 | Create a track with ComponentSubType set to "meta" for Timed Text Metadata. The box structure is as follows: 144 | 145 | - mdia 146 | - mdhd 147 | - hdlr 148 | - minf 149 | - nmhd 150 | - dinf 151 | - dref 152 | - url 153 | - stbl 154 | - stsd 155 | - gpsd 156 | - stts 157 | - stsc 158 | - stsz 159 | - stco 160 | 161 | ### Local Metadata Specification 162 | Local metadata is stored as a binary stream. 163 | 164 | Header bits (1 to 32): flags indicating which metadata are present. 165 | 166 | - Bit 1: Flag for GPS data 167 | - Bit 2: Flag for Director's Cut data 168 | - Bit 3: Flag for Hotspot Data 169 | - Bit 4 to 32: Reserved for future use 170 | 171 | Depending on which flags are set, the actual metadata will follow in the exact order of the flags. 172 | 173 | Each block of local metadata will be preceded by the length of that particular block of data. This will allow parsers to skip metadata blocks. 174 | 175 | Example: 176 | 177 | 0 1 2 3 178 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 179 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 180 | |g|d|h| Reserved | 181 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 182 | | GPS Data Length (if Bit 0 set) | 183 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 184 | | | 185 | | GPS Data (if Bit 0 set) | 186 | | | 187 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 188 | | Director's Cut Data Length (if Bit 1 set) | 189 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 190 | | | 191 | | Director's Cut Data (if Bit 1 set) | 192 | | | 193 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 194 | | Hotspot Data Length (if Bit 2 set) | 195 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 196 | | | 197 | | Hotspot Data (if Bit 2 set) | 198 | | | 199 | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 200 | 201 | The binary format for representing GPS data can be found in the [MPEG3.1 GPS-V Spec](http://wg11.sc29.org/mpeg-v/?page_id=2087). Exact specification for Director's Cut and Hotspot data are yet to be determined. 202 | 203 | ### Audio 204 | Audio can be stored in the following two ways, and metadata is needed to signal which format is used and how many streams exist. For example, the four channels of the ambisonic B format might be stored and compressed as two stereo streams. 205 | 206 | - Stereo, 2 channels, compressed as AAC 207 | - 5.1, 6 channels, compressed as AAC 208 | 209 | #### Matroska/WebM: 210 | Specification - (http://matroska.org/technical/specs/notes.html#3D) 211 | Stereo Modes - (http://www.matroska.org/technical/specs/index.html#StereoMode) 212 | 213 | #### MP4: 214 | See ISO/IEC 14496-10 for SEI message 215 | See ISO/IEC 14496-12 for ISO BMFF 216 | 217 | # Appendix 1 - Global Metadata Sample 218 | 219 | 222 | true 223 | true 224 | 225 | OpenCV for Windows v2.4.9 226 | 227 | equirectangular 228 | 6 229 | 90 230 | 0 231 | 0 232 | 1400454971 233 | 234 | 1920 235 | 236 | 237 | 1080 238 | 239 | 1900 240 | 960 241 | 15 242 | 60 243 | 244 | 245 | 246 | # Appendix 2 - Matroska/WebM Local Metadata Track Sample 247 | 248 | 249 | 3 250 | 0x21 251 | D_WEBVTT/METADATA 252 | 253 | ... 254 | (http://matroska.org/technical/specs/index.html#simpleblock_structure) 255 | 256 | 257 | 258 | -------------------------------------------------------------------------------- /docs/spherical-video-v2-rfc.md: -------------------------------------------------------------------------------- 1 | # Spherical Video V2 RFC 2 | *This document describes a revised open metadata scheme by which MP4 (ISOBMFF) 3 | and WebM (Matroska) multimedia containers may accommodate spherical videos. Comments are welcome by 4 | discussing on the [Spatial Media Google 5 | group](https://groups.google.com/forum/#!forum/spatial-media-discuss) or by 6 | filing an [issue](https://github.com/google/spatial-media/issues/new) on 7 | GitHub.* 8 | 9 | ------------------------------------------------------ 10 | 11 | ## Metadata Format 12 | 13 | ### MP4 (ISOBMFF) 14 | Spherical video metadata is stored in a new box, `sv3d`, defined in this RFC, in 15 | an MP4 (ISOBMFF) container. The metadata is applicable to individual video 16 | tracks in the container. Since many spherical videos are also stereoscopic, this 17 | RFC also defines an additional optional box, `st3d`, to specify metadata 18 | specific to stereoscopic rendering. 19 | 20 | As the V2 specification stores its metadata in a different location, it is 21 | possible for a file to contain both the V1 and V2 metadata. If both V1 and V2 22 | metadata are contained they should contain semantically equivalent information, 23 | with V2 taking priority when they differ. 24 | 25 | #### Stereoscopic 3D Video Box (st3d) 26 | ##### Definition 27 | Box Type: `st3d` 28 | Container: VisualSampleEntry (e.g. `avc1`, `mp4v`, `apcn`) 29 | Mandatory: No 30 | Quantity: Zero or one 31 | 32 | Stores additional information about stereoscopic rendering in this video track. 33 | This box must come after non-optional boxes defined by the ISOBMFF 34 | specification and before optional boxes at the end of the VisualSampleEntry 35 | definition such as the CleanApertureBox and PixelAspectRatioBox. 36 | 37 | ##### Syntax 38 | ``` 39 | aligned(8) class Stereoscopic3D extends FullBox(‘st3d’, 0, 0) { 40 | unsigned int(8) stereo_mode; 41 | } 42 | ``` 43 | 44 | ##### Semantics 45 | 46 | - `stereo_mode` is an 8-bit unsigned integer that specifies the stereo frame 47 | layout. The values 0 to 255 are reserved for current and future layouts. The 48 | following values are defined: 49 | 50 | | `stereo_mode` | Stereo Mode Description | 51 | |:-----------------|:---------------------------| 52 | | `0` | **Monoscopic**: Indicates the video frame contains a single monoscopic view. | 53 | | `1` | **Stereoscopic Top-Bottom**: Indicates the video frame contains a stereoscopic view storing the left eye on top half of the frame and right eye at the bottom half of the frame.| 54 | | `2` | **Stereoscopic Left-Right**: Indicates the video frame contains a stereoscopic view storing the left eye on left half of the frame and right eye on the right half of the frame.| 55 | | `3` | **Stereoscopic Stereo-Custom**: Indicates the video frame contains a stereoscopic view storing left and right eyes in the frame, but its layout is application dependent, and needs to be determined elsewhere. For example, this must be used with a mesh projection that contains a mesh for each eye. In this case the layout information is stored in the meshes instead of explicitly described here.| 56 | | `4` | **Stereoscopic Right-Left**: Indicates the video frame contains a stereoscopic view storing the right eye on the left half of the frame and the left eye on the right half of the frame.| 57 | 58 | #### Spherical Video Box (sv3d) 59 | ##### Definition 60 | Box Type: `sv3d` 61 | Container: VisualSampleEntry (e.g. `avc1`, `mp4v`, `apcn`) 62 | Mandatory: No 63 | Quantity: Zero or one 64 | 65 | Stores additional information about spherical video content contained in this 66 | video track. This box must come after non-optional boxes defined by the ISOBMFF 67 | specification and before optional boxes at the end of the VisualSampleEntry 68 | definition such as the CleanApertureBox and PixelAspectRatioBox. This box should 69 | be placed after the Stereoscopic3D box if one is present. 70 | 71 | 72 | ##### Syntax 73 | ``` 74 | aligned(8) class SphericalVideoBox extends Box(‘sv3d’) { 75 | } 76 | ``` 77 | 78 | #### Spherical Video Header (svhd) 79 | ##### Definition 80 | Box Type: `svhd` 81 | Container: `sv3d` 82 | Mandatory: Yes 83 | Quantity: Exactly one 84 | 85 | Contains spherical video information unrelated to the projection format. 86 | 87 | ##### Syntax 88 | ``` 89 | aligned(8) class SphericalVideoHeader extends FullBox(‘svhd’, 0, 0) { 90 | string metadata_source; 91 | } 92 | ``` 93 | 94 | ##### Semantics 95 | 96 | - `metadata_source` is a null-terminated string in UTF-8 characters which 97 | identifies the tool used to create the SV3D metadata. 98 | 99 | #### Projection Box (proj) 100 | ##### Definition 101 | Box Type: `proj` 102 | Container: `sv3d` 103 | Mandatory: Yes 104 | Quantity: Exactly one 105 | 106 | Container for projection information about the spherical video content. 107 | This container must contain exactly one subtype of the Projection Data Box 108 | (e.g. an `equi` box) that defines the spherical projection. 109 | 110 | ##### Syntax 111 | ``` 112 | aligned(8) class Projection extends Box(‘proj’) { 113 | } 114 | ``` 115 | 116 | #### Projection Header Box (prhd) 117 | ##### Definition 118 | Box Type: `prhd` 119 | Container: `proj` 120 | Mandatory: Yes 121 | Quantity: Exactly one 122 | 123 | Contains projection information about the spherical video content that is 124 | independent of the video projection. 125 | 126 | ##### Syntax 127 | ``` 128 | aligned(8) class ProjectionHeader extends FullBox(‘prhd’, 0, 0) { 129 | int(32) pose_yaw_degrees; 130 | int(32) pose_pitch_degrees; 131 | int(32) pose_roll_degrees; 132 | } 133 | ``` 134 | 135 | ##### Semantics 136 | 137 | - Pose values are 16.16 fixed point values measuring rotation in degrees. These 138 | rotations transform the the projection as follows: 139 | - `pose_yaw_degrees` counter-clockwise rotation in degrees around the up vector, 140 | restricted to -180.0 to 180.0 141 | - `pose_pitch_degrees` counter-clockwise rotation in degrees around the right 142 | vector post yaw transform, restricted to -90.0 to 90.0 143 | - `pose_roll_degrees` clockwise-rotation in degrees around the forward 144 | vector post yaw and pitch transform, restricted to -180.0 to 180.0 145 | 146 | #### Projection Data Box 147 | ##### Definition 148 | Box Type: Projection Dependent Identifier 149 | Container: `proj` 150 | Mandatory: Yes 151 | Quantity: Exactly one 152 | 153 | Base class for all projection data boxes. Any new projection must subclass this 154 | type with a unique `proj_type`. There must not be more than one subclass of a 155 | ProjectionDataBox in a given `proj` box. 156 | 157 | ##### Syntax 158 | ``` 159 | aligned(8) class ProjectionDataBox(unsigned int(32) proj_type, unsigned int(8)version, unsigned int(24) flags) 160 | extends FullBox(proj_type, version, flags) { 161 | } 162 | ``` 163 | 164 | #### Cubemap Projection Box (cbmp) 165 | ##### Definition 166 | Box Type: `cbmp` 167 | Container: `proj` 168 | 169 | Specifies that the track uses a cubemap projection and contains additional 170 | projection dependent information. The 171 | [cubemap's](https://en.wikipedia.org/wiki/Cube_mapping) face layout is 172 | defined by a unique `layout` value. 173 | 174 | ##### Syntax 175 | ``` 176 | aligned(8) class CubemapProjection ProjectionDataBox(‘cbmp’, 0, 0) { 177 | unsigned int(32) layout; 178 | unsigned int(32) padding; 179 | } 180 | ``` 181 | 182 | ##### Semantics 183 | - `layout` is a 32-bit unsigned integer describing the layout of cube faces. The 184 | values 0 to 255 are reserved for current and future layouts. 185 | - a value of `0` corresponds to a grid with 3 columns and 2 rows. Faces are 186 | oriented upwards for the front, left, right, and back faces. The up face is 187 | oriented so the top of the face is forwards and the down face is oriented 188 | so the top of the face is to the back. 189 |
190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 |
right faceleft faceup face
down facefront faceback face
202 |
203 | 204 | - `padding` is a 32-bit unsigned integer measuring the number of pixels to pad 205 | from the edge of each cube face. 206 | 207 | #### Equirectangular Projection Box (equi) 208 | ##### Definition 209 | Box Type: `equi` 210 | Container: `proj` 211 | 212 | Specifies that the track uses an equirectangular projection. The 213 | [equirectangular projection]( 214 | https://en.wikipedia.org/wiki/Equirectangular_projection) should be arranged 215 | such that the default pose has the forward vector in the center of the frame, 216 | the up vector at top of the frame, and the right vector towards the right of the 217 | frame. 218 | 219 | ##### Syntax 220 | ``` 221 | aligned(8) class EquirectangularProjection ProjectionDataBox(‘equi’, 0, 0) { 222 | unsigned int(32) projection_bounds_top; 223 | unsigned int(32) projection_bounds_bottom; 224 | unsigned int(32) projection_bounds_left; 225 | unsigned int(32) projection_bounds_right; 226 | } 227 | ``` 228 | 229 | ##### Semantics 230 | 231 | - The projection bounds use 0.32 fixed point values. These values represent the 232 | proportion of projection cropped from each edge not covered by the video 233 | frame. For an uncropped frame all values are 0. 234 | - `projection_bounds_top` is the amount from the top of the frame to crop 235 | - `projection_bounds_bottom` is the amount from the bottom of the frame to 236 | crop; must be less than 0xFFFFFFFF - projection_bounds_top 237 | - `projection_bounds_left` is the amount from the left of the frame to crop 238 | - `projection_bounds_right` is the amount from the right of the frame to crop; 239 | must be less than 0xFFFFFFFF - projection_bounds_left 240 | 241 | #### Mesh Projection Box (mshp) 242 | ##### Definition 243 | Box Type: `mshp` 244 | Container: `proj` 245 | 246 | Specifies that the track uses mesh projection. A mesh projection describes the 247 | video projection in the form of a 3D mesh and associated metadata. 248 | 249 | ##### Syntax 250 | ``` 251 | aligned(8) class MeshProjection ProjectionDataBox(‘mshp’, 0, 0) { 252 | unsigned int(32) crc; 253 | unsigned int(32) encoding_four_cc; 254 | 255 | // All bytes below this point are compressed according to 256 | // the algorithm specified by the encoding_four_cc field. 257 | MeshBox() meshes[]; // At least 1 mesh box must be present. 258 | Box(); // further boxes as needed 259 | } 260 | ``` 261 | 262 | ##### Semantics 263 | - `crc` is the CRC32 of every byte following the CRC until the end of the 264 | MeshProjection box. 265 | - `encoding_four_cc` is the encoding/compression algorithm used for all bytes 266 | that follow this field until the end of the MeshProjection box. 267 | 268 | Supported compression algorithms are: 269 | - 'raw ' (0x72617720) signals no compression or encoding. 270 | - 'dfl8' (0x64666c38) signals raw deflate compression that does not have zlib 271 | or gzip headers or a checksum. (https://tools.ietf.org/html/rfc1951) 272 | 273 | - `meshes` contain the projection meshes for rendering. If there is only one 274 | mesh box, it represents the monocular view and the stereo mode 275 | field expressed separately in the media container is used to determine 276 | the left and right eye view in case of stereo content. If the mshp box 277 | contains two mesh boxes, the first box represents the left eye mesh and the 278 | second box represents the right eye mesh. 279 | 280 | #### Mesh Box (mesh) 281 | ##### Definition 282 | Box Type: `mesh` 283 | Container: `mshp` 284 | Mandatory: Yes 285 | Quantity: One or Two 286 | 287 | Contains vertex and texture coordinate information required to render the 288 | projected video correctly. 289 | 290 | A 3D mesh consists of the following information: 291 | 292 | * Total number of unique vertices. 293 | * For each unique vertex: 294 | * X, Y, Z, U, V coordinates as floating point. 295 | * Number of vertex lists used to describe the projection. 296 | * For each vertex list: 297 | * Texture ID indicating which texture to sample from. 298 | * Triangle render method/type (triangle/strip/fan). 299 | * Number of vertex indices in this list. 300 | * For each vertex: 301 | * Index into the unique vertex list. 302 | 303 | A texture ID could refer to the current video frame or a static image. This 304 | allows portions of the spherical scene to be dynamic and other portions to be 305 | static. 306 | 307 | The multiple texture scheme helps cameras that do not capture the entire 308 | spherical field of view (360 degrees horizontal and 180 degrees vertical). Such 309 | cameras can replace the uncaptured portion of the spherical field of view with 310 | a static image. 311 | 312 | ##### Syntax 313 | ``` 314 | aligned(8) class Mesh Box(‘mesh’) { 315 | const unsigned int(1) reserved = 0; 316 | unsigned int(31) coordinate_count; 317 | for (i = 0; i < coordinate_count; i++) { 318 | float(32) coordinate; 319 | } 320 | const unsigned int(1) reserved = 0; 321 | unsigned int(31) vertex_count; 322 | for (i = 0; i < vertex_count; i++) { 323 | unsigned int(ccsb) x_index_delta; 324 | unsigned int(ccsb) y_index_delta; 325 | unsigned int(ccsb) z_index_delta; 326 | unsigned int(ccsb) u_index_delta; 327 | unsigned int(ccsb) v_index_delta; 328 | } 329 | const unsigned int(1) padding[]; 330 | 331 | const unsigned int(1) reserved = 0; 332 | unsigned int(31) vertex_list_count; 333 | for (i = 0; i < vertex_list_count; i++) { 334 | unsigned int(8) texture_id; 335 | unsigned int(8) index_type; 336 | const unsigned int(1) reserved = 0; 337 | unsigned int(31) index_count; 338 | for (j = 0; j < index_count; j++) { 339 | unsigned int(vcsb) index_as_delta; 340 | } 341 | const unsigned int(1) padding[]; 342 | } 343 | } 344 | ``` 345 | 346 | ##### Semantics 347 | 348 | - `reserved` are fields where all bits are set to 0. A MeshProjection box 349 | version change is required if any of these bits are allowed to be set to 1 350 | in a future revision of the spec. 351 | - `coordinate_count` is the number of floating point values used in the 352 | vertices. 353 | - `coordinate` is a floating point value used in mesh vertices. 354 | 355 | - `vertex_count` is the number of position (x,y,z) and texture coordinate (u,v) 356 | pairings used in the projection mesh. 357 | - `ccsb` coordinate count size in bits = `ceil(log2(coordinate_count * 2))` 358 | - `x_index_delta` is a delta from the previous x_index into the 359 | list of coordinates. For the first element, the previous index is assumed to 360 | be zero. These integers are encoded in a zig-zag scheme, similar to 361 | [Protocol Buffers's signed integers] 362 | (https://developers.google.com/protocol-buffers/docs/encoding#signed-integers). 363 | An integer `n` that is greater than or equal to 0 is encoded as `n * 2`. An 364 | integer `n` that is less than 0 is encoded as `-n * 2 - 1`. 365 | - `y_index_delta` is a delta from the previous y_index and has the same encoding 366 | and initial index as `x_index_delta` 367 | - `z_index_delta` is a delta from the previous z_index and has the same encoding 368 | and initial index as `x_index_delta` 369 | - `u_index_delta` is a delta from the previous u_index and has the same encoding 370 | and initial index as `x_index_delta` 371 | - `v_index_delta` is a delta from the previous v_index and has the same encoding 372 | and initial index as `x_index_delta` 373 | - `padding` contains 0-7 bits to align to the next byte boundary. 374 | - `vertex_list_count` is the number of vertex index lists that describe the 375 | projection mesh. 376 | - `texture_id` is the Texture ID the UV coordinates refer to. 377 | * 0 for video frames in this track. 378 | * >0 reserved 379 | - `index_type` specifies what the indices refer to. The valid values are: 380 | * 0: Triangles 381 | * 1: Triangle Strip 382 | * 2: Triangle Fan 383 | - `index_count` is the number of vertex indices in this vertex list. 384 | - `vcsb` vertex count size in bits = `ceil(log2(vertex_count * 2))` 385 | - `index_as_delta` is a delta from the previous index into the 386 | list of unique vertices. This field has the same encoding 387 | and initial index as `x_index_delta`. 388 | 389 | ##### Notes: 390 | 391 | * All fields are big-endian most-significant-bit first. 392 | * Parsers should ignore boxes they don't know about. 393 | * Parsers should ignore extra bytes at the end of a box. 394 | * Boxes / fields may be added to the end of a box without incrementing the 395 | version number. 396 | * (x,y,z) coordinates are expressed in an OpenGL-style right-handed coordinate 397 | system where -Z is forward, +X is right and +Y is up. Triangles must be specified 398 | with counter-clockwise winding order. 399 | * (u,v) coordinates are also expressed in an OpenGL-style texture coordinate 400 | system, where the lower-left corner is the origin (0,0), and the upper-right is 401 | (1,1). 402 | * (u,v) coordinates, encoded in this box, need to be adjusted based on the 403 | stereo mode of the stream before rendering. 404 | * If the stereo mode is monoscopic: 405 | * No (u,v) coordinate adjustments are required. 406 | * If the stereo mode is left-right: 407 | * left u' = u * 0.5 408 | * right u' = u * 0.5 + 0.5 409 | * If the stereo mode is top-bottom: 410 | * left v' = v * 0.5 + 0.5 411 | * right v' = v * 0.5 412 | * If the stereo mode is stereo-custom: 413 | * Two mesh boxes must be present in the mshp box. 414 | * Unmodified (u,v) coordinates from the first mesh box are used for 415 | rendering the left eye, and unmodified (u,v) coordinates from the second mesh 416 | box are used for rendering the right eye. 417 | 418 | ### Example 419 | 420 | Here is an example box hierarchy for a file containing the SV3D metadata for a 421 | monoscopic equirectangular video: 422 | 423 | ``` 424 | [moov: Movie Box] 425 | [mdia: Media Box] 426 | [minf: Media Information Box] 427 | [stbl: Sample Table Box] 428 | [stsd: Sample Table Sample Descriptor] 429 | [avc1: Advance Video Coding Box] 430 | [avcC: AVC Configuration Box] 431 | ... 432 | [st3d: Stereoscopic 3D Video Box] 433 | stereo_mode = 0 434 | [sv3d: Spherical Video Box] 435 | [svhd: Spherical Video Header Box] 436 | metadata_source = "Spherical Metadata Tooling" 437 | [proj: Projection Box] 438 | [prhd: Projection Header Box] 439 | pose_yaw_degrees = 0 440 | pose_pitch_degrees = 0 441 | pose_roll_degrees = 0 442 | [equi: Equirectangular Projection Box] 443 | projection_bounds_top = 0 444 | projection_bounds_bottom = 0 445 | projection_bounds_left = 0 446 | projection_bounds_right = 0 447 | [pasp: Pixel Aspect Ratio Box] 448 | ... 449 | ``` 450 | 451 | ### WebM (Matroska) 452 | Spherical video metadata is stored in a new master element, `Projection`, 453 | placed inside a video track's `Video` master element. 454 | 455 | 456 | As the V2 specification stores its metadata in a different location, it is 457 | possible for a file to contain both the V1 and V2 metadata. If both V1 and 458 | V2 metadata are contained they should contain semantically equivalent 459 | information, with V2 taking priority when they differ. 460 | 461 | Stereo mode is specified using the existing `StereoMode` element specified in 462 | the Matroska spec. Only `StereoMode` values that have the same meaning as the 463 | ones specified in the `st3d` box are allowed at this time. (e.g. 0 - mono, 464 | 1- left-right, 3 - top-bottom, 15 (provisional) - stereo-custom). 465 | 466 | #### `Projection` master element 467 | ##### Definition 468 | ID: 0x7670 469 | Level: 4 470 | Mandatory: No 471 | Type: Master 472 | Default: N/A 473 | Minver: 4 474 | WebM: Yes 475 | Container: `Video` master element 476 | 477 | Describes the video projection details. Used to render spherical and VR videos. 478 | 479 | #### `ProjectionType` element 480 | ##### Definition 481 | ID: 0x7671 482 | Level: 5 483 | Mandatory: Yes 484 | Type: uinteger 485 | Default: 0 486 | Minver: 4 487 | WebM: Yes 488 | Container: `Projection` master element 489 | 490 | Describes the projection used for this video track. 491 | 492 | ##### Semantics 493 | `ProjectionType` is an enum. The valid values are: 494 | 495 | * 0: Rectangular 496 | * 1: Equirectangular 497 | * 2: Cubemap 498 | * 3: Mesh 499 | 500 | 501 | #### `ProjectionPrivate` element 502 | ##### Definition 503 | ID: 0x7672 504 | Level: 5 505 | Mandatory: No 506 | Type: binary 507 | Default: N/A 508 | Minver: 4 509 | WebM: Yes 510 | Container: `Projection` master element 511 | 512 | Private data that only applies to a specific projection. 513 | 514 | ##### Semantics 515 | * If `ProjectionType` equals 0 (Rectangular), then this element must not be 516 | present. 517 | * If `ProjectionType` equals 1 (Equirectangular), then this element may be 518 | present. If the element is present, then it must contain the same binary data 519 | that would be stored inside an ISOBMFF Equirectangular Projection Box 520 | ('equi'). If the element is not present, then the content must be treated as 521 | if an element containing 20 zero bytes was present (i.e. a version 0 'equi' 522 | box with no flags set and all projection_bounds fields set to 0). 523 | * If `ProjectionType` equals 2 (Cubemap), then this element must be present 524 | and contain the same binary data that would be stored inside an ISOBMFF 525 | Cubemap Projection Box ('cbmp'). 526 | * If `ProjectionType` equals 3 (Mesh), then this element must be present 527 | and contain the same binary data that would be stored inside an ISOBMFF 528 | Mesh Projection Box ('mshp'). 529 | 530 | Note: ISOBMFF box size and fourcc fields are not included in the binary 531 | data, but the FullBox version and flag fields are. This is to avoid 532 | redundant framing information while preserving versioning and semantics 533 | between the two container formats. 534 | 535 | #### `ProjectionPoseYaw` element 536 | ##### Definition 537 | ID: 0x7673 538 | Level: 5 539 | Mandatory: No 540 | Type: float 541 | Default: 0.0 542 | Minver: 4 543 | WebM: Yes 544 | Container: Projection master element 545 | 546 | Specifies a yaw rotation to the projection. 547 | 548 | ##### Semantics 549 | Value represents a counter-clockwise rotation, in degrees, around the up vector. 550 | This rotation must be applied before any `ProjectionPosePitch` or 551 | `ProjectionPoseRoll` rotations. The value of this field should be in the 552 | -180 to 180 degree range. 553 | 554 | #### `ProjectionPosePitch` element 555 | ##### Definition 556 | ID: 0x7674 557 | Level: 5 558 | Mandatory: No 559 | Type: float 560 | Default: 0.0 561 | Minver: 4 562 | WebM: Yes 563 | Container: Projection master element 564 | 565 | Specifies a pitch rotation to the projection. 566 | 567 | ##### Semantics 568 | Value represents a counter-clockwise rotation, in degrees, around the right 569 | vector. This rotation must be applied after the `ProjectionPoseYaw` rotation 570 | and before the `ProjectionPoseRoll` rotation. The value of this field 571 | should be in the -90 to 90 degree range. 572 | 573 | #### `ProjectionPoseRoll` element 574 | ##### Definition 575 | ID: 0x7675 576 | Level: 5 577 | Mandatory: No 578 | Type: float 579 | Default: 0.0 580 | Minver: 4 581 | WebM: Yes 582 | Container: Projection master element 583 | 584 | Specifies a roll rotation to the projection. 585 | 586 | ##### Semantics 587 | Value represents a clockwise rotation, in degrees, around the forward 588 | vector. This rotation must be applied after the `ProjectionPoseYaw` and 589 | `ProjectionPosePitch` rotations. The value of this field should be in 590 | the -180 to 180 degree range. 591 | 592 | ### Example 593 | 594 | Here is an example element hierarchy for a file containing the Projection 595 | metadata for a stereo left-right equirectangular video: 596 | 597 | ``` 598 | [Segment] 599 | [Tracks] 600 | [TrackEntry] 601 | ... 602 | [Video] 603 | ... 604 | [StereoMode value = 1] 605 | [Projection] 606 | [ProjectionType value = 1] 607 | [ProjectionPrivate] 608 | flags = 0 609 | version = 0 610 | projection_bounds_top = 0 611 | projection_bounds_bottom = 0 612 | projection_bounds_left = 0 613 | projection_bounds_right = 0 614 | ``` 615 | -------------------------------------------------------------------------------- /docs/vr180.md: -------------------------------------------------------------------------------- 1 | # VR180 Video Format 2 | 3 | # 1. Introduction 4 | 5 | VR180 cameras are a new category of VR camera that use two wide angle cameras to 6 | capture the world as you see it with point and shoot simplicity. This document 7 | describes the video format output by these devices. The choice considers the 8 | following aspects: 9 | 10 | * **FOV**: VR180 cameras capture sub-360 FOV rather than full 360. It is 11 | important to retain the original pixel density of the camera sensors in 12 | order to provide high pixel density for VR viewing. 13 | * **Projection**: Different versions of VR180 cameras may have different lens 14 | and different camera projections. As such the file format should be 15 | camera-independent. 16 | * **Motion**: The cameras can often be non-stationary due to unintentional 17 | shakes or intentional motion, for example, handheld capture of events or 18 | people. To avoid motion sickness, camera motion metadata should be saved for 19 | stabilized playback. 20 | * **Playback**: The file format should be friendly enough for local playback 21 | so that manufacturers can easily build their apps. Android and iOS should 22 | have an easy way to play the raw video. 23 | 24 | VR180 videos contain two types of metadata to jointly define the projection from 25 | video frames to their partial viewports within a spherical coordinate system. 26 | 27 | 1. **A global static projection** that defines the mapping from the pixels to 28 | local spherical coordinate systems, typically to only a sub-180 FOV part. 29 | The [Spherical Metadata V2 30 | Spec](https://github.com/google/spatial-media/blob/master/docs/spherical-video-v2-rfc.md) 31 | is adopted here to encode this global metadata. (See details in [section 32 | 2](#2-mesh-projection)). 33 | 2. **A dynamic orientation stream** that defines the rotation between the local 34 | coordinate system of each frame and the world coordinate system. A new 35 | [Camera Motion Metadata 36 | track](https://developers.google.com/streetview/publish/camm-spec) is 37 | created for encoding such per-frame metadata. (See [section 38 | 3](#3-camera-motion-metadata)). 39 | 40 | # 2. Mesh Projection 41 | 42 | The [Spherical Metadata V2 43 | Spec](https://github.com/google/spatial-media/blob/master/docs/spherical-video-v2-rfc.md) 44 | should be present in the file to define the static global projection of 45 | individual frames to their local spherical coordinate system. Among the allowed 46 | projection types by Spherical Metadata V2, the VR180 Video format requires a 47 | mesh projection, which is most generic and works for fisheye projection. 48 | 49 | | | 50 | :-------------------------------------------------------: | :---------------------: 51 | (a) 360 equirectangular | (b) fisheye mesh projection 52 | 53 | Figure 1. Example of video frame in typical equirectangular format and the mesh 54 | format. 55 | 56 | By using the mesh projection type, the cameras can save the raw pixels in 57 | side-by-side or over-under format in the video, and let the projection meshes 58 | define the back-projection from pixels to the 3D directions. This not only 59 | preserves the pixel density of the camera sensors, but also saves production 60 | cost and power consumption by shaving off expensive reprojection computation. To 61 | render such videos, player clients simply need to draw the saved per-eye mesh 62 | with their corresponding image as texture. To be specific in VR180: 63 | 64 | * Dual stereo mesh: video files contain two meshes, one mesh for each eye. 65 | * Fisheye projection: geometry-wise, the video frames are simple 66 | concatenations of left and right views with possible crop and rescale, but 67 | there is no other type of warping (e.g. de-fisheye). 68 | * Stereo mode: for better compatibility with video streaming services that are 69 | optimized for 16:9, landscape LEFT-RIGHT is preferred over portrait 70 | TOP-BOTTOM. 71 | 72 | ## Mesh Generation 73 | 74 | Once the cameras are calibrated, the mesh vertices can be generated by 75 | straightforward back-projection for a grid of coordinates that cover the valid 76 | image portion (inside 180 image circle). Refer to the 77 | [appendix](#appendix-mesh-generation-demo) for a complete Matlab demo code for 78 | producing a full mesh for a fisheye camera. Below is the pseudo code for getting 79 | a single mesh vertex. 80 | 81 | ```matlab 82 | % Returns the mesh vertex for an image point image_x, image_y for an eye. 83 | % (width, height) : the size of the image of an eye. 84 | % (image_x, image_y): image coordinate where (0, 0) and (width, height) are top-left 85 | % and bottom-right corner respectively. 86 | % eye_camera : the calibrated camera for an eye (left or right) 87 | function [x, y, z, u, v] = GetMeshVertex(width, height, image_x, image_y, eye_camera) 88 | % Unit ray direction corresponding the pixel 89 | [x, y, z] = PixelToRay(eye_camera, image_x, image_y); 90 | % Negate Y and Z IF the camera parameterization follows standard Computer Vision 91 | % convention where Y points down and Z points forward. This is to account the 92 | % difference with OpenGL coordinate system. 93 | [x, y, z] = [x, -y, -z]; 94 | % Normalized OpenGL coordinate for the pixel, where the V coordinate needs to be flipped. 95 | u = image_x / width; 96 | v = (height - image_y) / height; 97 | end 98 | ``` 99 | 100 | * Although the video frame is a concatenation of left and right eye images 101 | (LEFT-RIGHT or OVER-UNDER), the mesh for each eye should be generated as if 102 | they are separate images. 103 | * A coarse mesh is preferred over a full-resolution mesh. Downsampled meshes 104 | work well as long as the resolution is reasonable, and they are more 105 | efficient for playback. A typical mesh resolution is a 40x40 grid. 106 | 107 | # 3. Camera Motion Metadata 108 | 109 | Camera rotations during video capture in a world coordinate system can be 110 | embedded as video metadata. This metadata is particularly important for 111 | hand-held VR video: 112 | 113 | * By using camera rotation metadata, the player can render the video frames at 114 | the exact orientation they were captured. The compensation of the camera 115 | rotation essentially keeps the distant background static. Our experiments 116 | have shown such stabilized viewing significantly reduces the motion sickness 117 | issue for VR. 118 | * It is important to have high quality rotation data (including correct 119 | gravity vector), otherwise the playback can cause motion sickness or be 120 | disorienting. This basically requires a well-calibrated IMU along with 121 | on-device sensor fusion. 122 | 123 | | | | | 124 | :-: | :--:| :--: 125 | 126 | Figure 2. Three equirectangular stereo views generated according to their 127 | rotations. 128 | 129 | ## Camera Motion Metadata Track 130 | 131 | We have created a new [Camera Motion Metadata 132 | Track](https://developers.google.com/streetview/publish/camm-spec) for storing 133 | various kinds of camera motion metadata, including camera orientation, gyroscope 134 | reading, accelerometer readings, etc. The custom metadata track can be 135 | identified by the new Camera Motion Metadata (camm) Sample Entry box. 136 | 137 | In the application of VR180 camera, each video contains such a metadata track to 138 | store camera rotation data. Each data sample in the metadata track is 139 | represented as bitstream in the following format 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 164 | 165 |
Fields: Description:
int32 reserved; Should be 0.
float32 angle_axis[3];Angle axis orientation in radians representing the rotation from camera coordinate system to world coordinate system.
153 | Let M be the 3x3 rotation matrix corresponding to the angle axis vector. For any ray X in the local coordinate system,
154 | the ray direction in the world coordinate is M * X.
155 |
156 | Such orientation information can be obtained by running 3DoF sensor fusion on the device. After integrating the IMU readings,
157 | only the integrated global orientation needs to be recorded.
158 |
159 | Below is an example c++ code for converting from matrix to the expected angle axis using Eigen3. 160 |
Eigen::Matrix3f M = get_current_rotation_matrix();
161 | Eigen::AngleAxisf aa(M);
162 | Eigen::Vector3f angle_axis = aa.angle() * aa.axis(); 
163 |
166 | 167 | * The coordinate systems are right-hand sided. The camera coordinate system is 168 | defined as X pointing right, Y pointing downward, and Z pointing forward. 169 | The Y-axis of the global coordinate system should point down along the 170 | gravity vector. 171 | 172 | ![image alt text](coordinate.png) 173 | 174 | * IMU readings are typically in its own IMU coordinate system, and necessary 175 | rotation is needed to map them to the camera coordinate system if the two 176 | coordinate systems are different. 177 | * To have a consistent viewing experience, we recommend resetting the yaw 178 | angle for each new video recording, such that the orientation of the first 179 | frame has a yaw angle of 0. 180 | * All fields are little-endian and least significant bit first, and the 32-bit 181 | floating points are of IEEE 754-1985 format. The video recorder should 182 | maintain a struct of these fields in memory and copy the raw data to video 183 | packets. 184 | 185 | Synchronization between metadata and video frames. 186 | 187 | * Video track and metadata track are synchronized by the presentation 188 | timestamp of the video and metadata samples. 189 | * Given the camera orientation for a discrete set of metadata presentation 190 | time, the continuous orientation for any given time is defined by linear 191 | interpolation of neighboring camera orientations. When rendering a video 192 | frame, player should obtain the frame rotation by linear interpolation using 193 | the presentation time of the video frame. 194 | * Typical presentation time for a video frame is the start of frame exposure, 195 | which does not take into account of exposure time and rolling shutter. When 196 | per-frame exposure time and rolling shutter are known, better 197 | correspondences can be achieved by adjusting the presentation time of the 198 | video frames to the middle of frame exposure duration: 199 | exposure_start_of_first_row + (pixel_exposure_time + rolling_shutter_skew) 200 | /2. 201 | 202 | # 4. Identifying VR180 Videos 203 | 204 | Below is an example box structure of a VR180 video: 205 | 206 | ``` 207 | [moov] 208 | [trak] // video track 209 | [mdia] 210 | [minf] 211 | [stbl] 212 | [stsd] 213 | [avc1] 214 | [st3d] // spherical metadata v2 215 | [sv3d] // spherical metadata v2 216 | ... 217 | [trak] // audio track 218 | ... 219 | [trak] // camera motion data track 220 | [mdia] 221 | [hdlr] // handler = ‘meta’ 222 | [minf] 223 | [stbl] 224 | [stsd] 225 | [camm] // camera motion sample entry 226 | ``` 227 | 228 | The VR180 videos can be identified for custom processing or playback by the 229 | existence and the content of Spherical Video Metadata V2. Optionally, the camera 230 | motion metadata track provides the stabilization that aligns the video frames 231 | with a fixed world orientation. 232 | 233 | # Appendix - Mesh Generation Demo 234 | 235 | ```matlab 236 | % Demo code for mesh generation from a fisheye camera. The format of the mesh 237 | % vertices and triangle indices are generated according to the definition of 238 | % ProjectionMesh in Spherical Video V2 ( 239 | % https://github.com/google/spatial-media/blob/master/docs/spherical-video-v2-rfc.md) 240 | % 241 | % Note for stereo image that are composed of two sub-images for left and right 242 | % eye, the meshes should be generated from the individual cameras that describe 243 | % the sub-images of each eye as if they are separated. 244 | % 245 | % Please note that Computer Vision typically uses a coordinate system such X 246 | % points right, Y points downward, and Z points forward, which has negated Y and 247 | % Z compared to OpenGL. To generate a mesh from such a camera, Y and Z 248 | % coordinates need to be negated, and texture coordinate V needs to be flipped 249 | % similarly. 250 | % 251 | function spherical_mesh_demo() 252 | % Example fisheye camera for the demo. 253 | fisheye_camera = demo_camera(); 254 | 255 | % Mesh resolution. 256 | grid_size_x = 40; 257 | grid_size_y = 40; 258 | 259 | % Generate the vertices and triangle indices from the camera. 260 | [vertices, tri] = generate_mesh(fisheye_camera, grid_size_x, grid_size_y); 261 | 262 | % Plot the UV triangulation in the image space. 263 | figure(1); 264 | u = reshape([vertices(:).u], grid_size_y, grid_size_x); 265 | v = reshape([vertices(:).v], grid_size_y, grid_size_x); 266 | trimesh(tri, u * fisheye_camera.image_size(1), ... 267 | v * fisheye_camera.image_size(2)); 268 | set(gca, 'xlim', [0, fisheye_camera.image_size(1)],... 269 | 'ylim', [0, fisheye_camera.image_size(2)]); 270 | axis equal; 271 | 272 | % Plot the mesh in 3D. 273 | figure(2); 274 | x = reshape([vertices(:).x], grid_size_y, grid_size_x); 275 | y = reshape([vertices(:).y], grid_size_y, grid_size_x); 276 | z = reshape([vertices(:).z], grid_size_y, grid_size_x); 277 | trimesh(tri, x, y, z); 278 | axis equal; 279 | end 280 | 281 | 282 | % Generate the mesh vertices and triangle indices using a grid in the 283 | % intersection of 180 image circle and the image rectangle. 284 | function [vertices, tri] = generate_mesh(fisheye_camera, grid_size_x,... 285 | grid_size_y) 286 | % Struct for the mesh vertex 287 | vertices = struct('u', {}, 'v', {}, 'x', {}, 'y', {}, 'z', {}); 288 | 289 | % The radius along x-axis and y-axis, assuming an ellipse shape. 290 | radius = image_circle(fisheye_camera); 291 | 292 | % The vertical boundary of the image circle/ellipse. 293 | ymin = max(0, fisheye_camera.principal_point(2) - radius(2)); 294 | ymax = min(fisheye_camera.image_size(2),... 295 | fisheye_camera.principal_point(2) + radius(2)); 296 | 297 | for i = 1 : grid_size_y; 298 | % Y coordinate in the image. 299 | yi = ymin + (i - 1) * (ymax - ymin) / (grid_size_y - 1); 300 | % Y coordinate relative to image center. 301 | yc = yi - fisheye_camera.principal_point(2); 302 | 303 | % Horizontal boundary on the image circle along the given y coordinate. 304 | rx = radius(1) * sqrt(1 - yc^2 / (radius(2)^2)); 305 | xmin = max(0, fisheye_camera.principal_point(1) - rx); 306 | xmax = min(fisheye_camera.image_size(1), ... 307 | fisheye_camera.principal_point(1) + rx); 308 | 309 | % Generate evenly spaced vertices along the horizontal line. 310 | for j = 1 : grid_size_x; 311 | % X coordinate 312 | xj = xmin + (j - 1) * (xmax - xmin) / (grid_size_x - 1); 313 | point = pixel_to_ray(fisheye_camera, xj, yi); 314 | 315 | % X, Y, Z, U, V for each vertex. To account for the difference between 316 | % normal Computer Vision coordinate and OpenGL coordinate, the Y and Z 317 | % coordinate of the needs to be negated, and V needs to be flipped. 318 | % If you are already using an OpenGL like coordinate system, this will 319 | % not be needed. 320 | vertices(i, j).x = point(1); 321 | vertices(i, j).y = - point(2); 322 | vertices(i, j).z = - point(3); 323 | vertices(i, j).u = xj / fisheye_camera.image_size(1); 324 | vertices(i, j).v = 1 - yi / fisheye_camera.image_size(2); 325 | end 326 | end 327 | 328 | % Generate triangle indices for the mesh. 329 | for j = 0 : grid_size_x - 2; 330 | for i = 0 : grid_size_y - 2; 331 | % Split the quad (i , i + 1) x (j, j + 1) to two triangles: 332 | tri(end + 1, :) = [grid_size_y * j + i + 1,... 333 | grid_size_y * (j + 1) + i + 1,... 334 | grid_size_y * j + i + 2]; 335 | tri(end + 1, :) = [grid_size_y * j + i + 2,... 336 | grid_size_y * (j + 1) + i + 1,... 337 | grid_size_y * (j + 1) + i + 2]; 338 | end 339 | end 340 | end 341 | 342 | % The example camera uses a typical Computer Vision fisheye camera model, which 343 | % projects a 3D points in the world coordinate system as follows: 344 | % 345 | % 1. Transform world_point to camera coordinate system: 346 | % camera_point = ... 347 | % camera.world_to_camera_rotation * (world_point - camera.position); 348 | % 2. Fisheye mapping. 349 | % theta = atan2(norm(camera_point(1:2)), camera_point(3)); 350 | % 3. Radial distortion factors 351 | % d = camera.radial_distortion 352 | % normalized_r = theta + d(1) * theta^3 + d(2) * theta^5 + d(3) * theta^7. 353 | % normalized_x = normalized_r * camera_point(1) / norm(camera_point(1:2)); 354 | % normalized_y = normalized_r * camera_point(2) / norm(camera_point(1:2)); 355 | % 4. Map the normalized coordinate to pixels 356 | % x = camera.focal_length * normalized_x + camera.principal_point(1) 357 | % y = camera.focal_length * camera.pixel_aspect_ratio * normalized_y ... 358 | % + camera.principal_point(2); 359 | function fisheye_camera = demo_camera() 360 | fisheye_camera = struct('image_size', [2160, 2160], ... 361 | 'principal_point', [1080, 1080], ... 362 | 'pixel_aspect_ratio', 1.2, ... 363 | 'focal_length', 828, ... 364 | 'radial_distortion', [-0.032, -0.00243, 0.001],... 365 | 'world_to_camera_rotation', eye(3), ... 366 | 'position', [0, 0, 0]); 367 | end 368 | 369 | % Map a pixel (x, y) to the ray direction in the world coordinate. 370 | % 371 | % Note this needs to be modified for cameras with different parametrization. 372 | function point = pixel_to_ray(fisheye_camera, x, y) 373 | % Normalized Y coordinate. 374 | yn = (y - fisheye_camera.principal_point(2))/ fisheye_camera.focal_length ... 375 | / fisheye_camera.pixel_aspect_ratio; 376 | 377 | % Normalized X coordinate. 378 | xn = (x - fisheye_camera.principal_point(1)) / fisheye_camera.focal_length; 379 | 380 | % Normalized distance to image center. 381 | rn = sqrt(xn * xn + yn * yn); 382 | 383 | % Solve for the angle theta between the viewing ray and the optical axis 384 | % that satisfies: 385 | % rn = theta + theta^3 * d(1) + theta^5 * d(2) + theta^7 * d(3); 386 | % The example uses just 3 parameters, but it can easily extended to more. 387 | d = fisheye_camera.radial_distortion; 388 | theta = roots([d(3), 0, d(2), 0, d(1), 0, 1.0, -rn]); 389 | 390 | % Take the smallest positive real solution. 391 | theta = min(theta(find(imag(theta) == 0 & real(theta) > 0))); 392 | % Degenerate case in exact image center. 393 | if isempty(theta); theta = 0; end; 394 | 395 | % Generate the point in the world coordinate. 396 | point = [sin(theta) * xn / rn; sin(theta) * yn / rn; cos(theta)]; 397 | 398 | % Apply the inverse rotation to transform it from camera to world. 399 | point = fisheye_camera.world_to_camera_rotation' * point; 400 | end 401 | 402 | % Calculate X- and Y-radius of the image circle/ellipse for a fisheye camera. 403 | % 404 | % Note the image circle logic needs to be modified for cameras with different 405 | % parametrization, for example, having non-zero skew. 406 | function radius = image_circle(fisheye_camera) 407 | % Half of the desired image circle. Note the maximum image circle should be 408 | % at most 180 degrees, but it is OK to make it smaller to avoid peripheral 409 | % with poor quality. 410 | theta = pi / 2; 411 | % Normalized distance to the image center. 412 | d = fisheye_camera.radial_distortion; 413 | normalized_r = theta + theta^3 * d(1) + theta^5 * d(2) + theta^7 * d(3); 414 | % Radius along X and Y axes. 415 | radius = normalized_r * fisheye_camera.focal_length... 416 | * [1, fisheye_camera.pixel_aspect_ratio]; 417 | end 418 | ``` 419 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | PyInstaller 2 | tk 3 | pillow 4 | packaging 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | setup(name='spatialmedia', 4 | version='2.1a1', 5 | description='Specifications and tools for 360 video and spatial audio.', 6 | author='Google Inc', 7 | license='Apache License 2.0', 8 | url='https://github.com/google/spatial-media', 9 | packages=['spatialmedia', 'spatialmedia.mpeg'] 10 | ) 11 | -------------------------------------------------------------------------------- /spatial-audio/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /spatial-audio/NOTICE: -------------------------------------------------------------------------------- 1 | ==================== 2 | Open Source Licenses 3 | ==================== 4 | 5 | This software may use portions of the following libraries subject to the accompanying licenses: 6 | 7 | **************************** 8 | SADIE BINAURAL MEASUREMENTS 9 | **************************** 10 | Apache License 11 | 12 | Version 2.0, January 2004 13 | 14 | http://www.apache.org/licenses/ 15 | 16 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 17 | 18 | 1. Definitions. 19 | 20 | "License" shall mean the terms and conditions for use, reproduction, and 21 | distribution as defined by Sections 1 through 9 of this document. 22 | 23 | "Licensor" shall mean the copyright owner or entity authorized by the copyright 24 | owner that is granting the License. 25 | 26 | "Legal Entity" shall mean the union of the acting entity and all other entities 27 | that control, are controlled by, or are under common control with that entity. 28 | For the purposes of this definition, "control" means (i) the power, direct or 29 | indirect, to cause the direction or management of such entity, whether by 30 | contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the 31 | outstanding shares, or (iii) beneficial ownership of such entity. 32 | 33 | "You" (or "Your") shall mean an individual or Legal Entity exercising 34 | permissions granted by this License. 35 | 36 | "Source" form shall mean the preferred form for making modifications, including 37 | but not limited to software source code, documentation source, and configuration 38 | files. 39 | 40 | "Object" form shall mean any form resulting from mechanical transformation or 41 | translation of a Source form, including but not limited to compiled object code, 42 | generated documentation, and conversions to other media types. 43 | 44 | "Work" shall mean the work of authorship, whether in Source or Object form, made 45 | available under the License, as indicated by a copyright notice that is included 46 | in or attached to the work (an example is provided in the Appendix below). 47 | 48 | "Derivative Works" shall mean any work, whether in Source or Object form, that 49 | is based on (or derived from) the Work and for which the editorial revisions, 50 | annotations, elaborations, or other modifications represent, as a whole, an 51 | original work of authorship. For the purposes of this License, Derivative Works 52 | shall not include works that remain separable from, or merely link (or bind by 53 | name) to the interfaces of, the Work and Derivative Works thereof. 54 | 55 | "Contribution" shall mean any work of authorship, including the original version 56 | of the Work and any modifications or additions to that Work or Derivative Works 57 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 58 | by the copyright owner or by an individual or Legal Entity authorized to submit 59 | on behalf of the copyright owner. For the purposes of this definition, 60 | "submitted" means any form of electronic, verbal, or written communication sent 61 | to the Licensor or its representatives, including but not limited to 62 | communication on electronic mailing lists, source code control systems, and 63 | issue tracking systems that are managed by, or on behalf of, the Licensor for 64 | the purpose of discussing and improving the Work, but excluding communication 65 | that is conspicuously marked or otherwise designated in writing by the copyright 66 | owner as "Not a Contribution." 67 | 68 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf 69 | of whom a Contribution has been received by Licensor and subsequently 70 | incorporated within the Work. 71 | 72 | 2. Grant of Copyright License. Subject to the terms and conditions of this 73 | License, each Contributor hereby grants to You a perpetual, worldwide, 74 | non-exclusive, no-charge, royalty-free, irrevocable copyright license to 75 | reproduce, prepare Derivative Works of, publicly display, publicly perform, 76 | sublicense, and distribute the Work and such Derivative Works in Source or 77 | Object form. 78 | 79 | 3. Grant of Patent License. Subject to the terms and conditions of this License, 80 | each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, 81 | no-charge, royalty-free, irrevocable (except as stated in this section) patent 82 | license to make, have made, use, offer to sell, sell, import, and otherwise 83 | transfer the Work, where such license applies only to those patent claims 84 | licensable by such Contributor that are necessarily infringed by their 85 | Contribution(s) alone or by combination of their Contribution(s) with the Work 86 | to which such Contribution(s) was submitted. If You institute patent litigation 87 | against any entity (including a cross-claim or counterclaim in a lawsuit) 88 | alleging that the Work or a Contribution incorporated within the Work 89 | constitutes direct or contributory patent infringement, then any patent licenses 90 | granted to You under this License for that Work shall terminate as of the date 91 | such litigation is filed. 92 | 93 | 4. Redistribution. You may reproduce and distribute copies of the Work or 94 | Derivative Works thereof in any medium, with or without modifications, and in 95 | Source or Object form, provided that You meet the following conditions: 96 | 97 | You must give any other recipients of the Work or Derivative Works a copy of 98 | this License; and 99 | You must cause any modified files to carry prominent notices stating that You 100 | changed the files; and 101 | You must retain, in the Source form of any Derivative Works that You distribute, 102 | all copyright, patent, trademark, and attribution notices from the Source form 103 | of the Work, excluding those notices that do not pertain to any part of the 104 | Derivative Works; and 105 | If the Work includes a "NOTICE" text file as part of its distribution, then any 106 | Derivative Works that You distribute must include a readable copy of the 107 | attribution notices contained within such NOTICE file, excluding those notices 108 | that do not pertain to any part of the Derivative Works, in at least one of the 109 | following places: within a NOTICE text file distributed as part of the 110 | Derivative Works; within the Source form or documentation, if provided along 111 | with the Derivative Works; or, within a display generated by the Derivative 112 | Works, if and wherever such third-party notices normally appear. The contents of 113 | the NOTICE file are for informational purposes only and do not modify the 114 | License. You may add Your own attribution notices within Derivative Works that 115 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 116 | provided that such additional attribution notices cannot be construed as 117 | modifying the License. 118 | 119 | You may add Your own copyright statement to Your modifications and may provide 120 | additional or different license terms and conditions for use, reproduction, or 121 | distribution of Your modifications, or for any such Derivative Works as a whole, 122 | provided Your use, reproduction, and distribution of the Work otherwise complies 123 | with the conditions stated in this License. 124 | 5. Submission of Contributions. Unless You explicitly state otherwise, any 125 | Contribution intentionally submitted for inclusion in the Work by You to the 126 | Licensor shall be under the terms and conditions of this License, without any 127 | additional terms or conditions. Notwithstanding the above, nothing herein shall 128 | supersede or modify the terms of any separate license agreement you may have 129 | executed with Licensor regarding such Contributions. 130 | 131 | 6. Trademarks. This License does not grant permission to use the trade names, 132 | trademarks, service marks, or product names of the Licensor, except as required 133 | for reasonable and customary use in describing the origin of the Work and 134 | reproducing the content of the NOTICE file. 135 | 136 | 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in 137 | writing, Licensor provides the Work (and each Contributor provides its 138 | Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 139 | KIND, either express or implied, including, without limitation, any warranties 140 | or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 141 | PARTICULAR PURPOSE. You are solely responsible for determining the 142 | appropriateness of using or redistributing the Work and assume any risks 143 | associated with Your exercise of permissions under this License. 144 | 145 | 8. Limitation of Liability. In no event and under no legal theory, whether in 146 | tort (including negligence), contract, or otherwise, unless required by 147 | applicable law (such as deliberate and grossly negligent acts) or agreed to in 148 | writing, shall any Contributor be liable to You for damages, including any 149 | direct, indirect, special, incidental, or consequential damages of any character 150 | arising as a result of this License or out of the use or inability to use the 151 | Work (including but not limited to damages for loss of goodwill, work stoppage, 152 | computer failure or malfunction, or any and all other commercial damages or 153 | losses), even if such Contributor has been advised of the possibility of such 154 | damages. 155 | 156 | 9. Accepting Warranty or Additional Liability. While redistributing the Work or 157 | Derivative Works thereof, You may choose to offer, and charge a fee for, 158 | acceptance of support, warranty, indemnity, or other liability obligations 159 | and/or rights consistent with this License. However, in accepting such 160 | obligations, You may act only on Your own behalf and on Your sole 161 | responsibility, not on behalf of any other Contributor, and only if You agree to 162 | indemnify, defend, and hold each Contributor harmless for any liability incurred 163 | by, or claims asserted against, such Contributor by reason of your accepting any 164 | such warranty or additional liability. 165 | 166 | END OF TERMS AND CONDITIONS -------------------------------------------------------------------------------- /spatial-audio/README.md: -------------------------------------------------------------------------------- 1 | # Spatial Audio Resources 2 | Google VR Audio enables Ambisonic spatial audio playback in [Google VR SDK](https://developers.google.com/vr/), [YouTube 360/VR](http://yt.be/spatialaudiovrhelp), and [Omnitone](https://googlechrome.github.io/omnitone/#home). It provides ambisonic binaural decoding via Head Related Transfer Functions (HRTFs). This repository contains information and resources that can be used in order to build binaural preview software or to directly monitor the binaural output when creating content in Digital Audio Workstations (DAWs). 3 | 4 | # Contents of the directories 5 | ## raw symmetric cube hrirs 6 | This is a set of binaural measurements (Head Related Impulse Responses or HRIRs) taken from a cube loudspeaker configuration. This configuration is used in the Google VR Audio first-order ambisonic binaural decoder. 7 | 8 | This set has been derived from the [SADIE binaural measurements](https://www.york.ac.uk/sadie-project/binaural.html) and is provided as individual time-domain FIR filters. 9 | 10 | ### Specification & modifications: 11 | subject: 002 (KU100 binaural head) 12 | sampling rate: 48kHz 13 | bit depth: 16 bit 14 | length: 256 samples 15 | fade-in: none 16 | fade-out: half-hann (16 samples) 17 | symmetric: yes (left hemisphere only) 18 | applied gain: 0dB (none) 19 | 20 | Also provided is a preset configuration file for the [ambiX](https://github.com/kronihias/ambix) binaural decoder plugin. 21 | 22 | **Note**: Using these filters directly (or via the [ambiX](https://github.com/kronihias/ambix) preset) with your ambisonic mix will **not** result in an output equivalent to Google VR Audio unless you use phase-matched shelf-filters, as recommended by A. J. Heller, R. Lee and E. M. Benjamin in their paper "[Is My Decoder Ambisonic?](http://www.ai.sri.com/ajh/ambisonics/BLaH3.pdf)". To solve that problem, we also provide a set called *symmetric ambisonic binaural decoder* which contains shelf-filtered, symmetric spherical harmonic HRIRs which can be directly applied to an ambisonic mix via a simple filter operation (please see below). 23 | 24 | ## symmetric ambisonic binaural decoder 25 | This set represents spherical harmonic HRIR filters used for first-order ambisonic binaural decode in Google VR Audio. To produce the binaural output, these filters should be applied to an ambisonic mix directly and the output routed to the stereo L & R channels as shown in the following diagram: 26 | 27 | ![symmetric binaural ambisonic decoder - signal flow diagram](https://cloud.githubusercontent.com/assets/26985965/24811254/2143fb12-1b7a-11e7-99a0-cef55c3f8365.png) 28 | 29 | The filtering operation (* symbol) can be done using freely available tools, for example, free multichannel convolver plugins like [LAConvolver](http://audio.lernvall.com/) (mac) or [Freeverb3](http://www.nongnu.org/freeverb3/) (win). The dotted red line with the Ø symbol denotes 180° phase inversion. 30 | 31 | ## ambisonic correction filters 32 | This is a set of filters which compensate for the change of HRTFs in the binaural ambisonic decoder. If the ambisonic audio has been mixed against our previous THRIVE HRTFs, applying these filters to the ambiX tracks will minimize the timbral changes after the switch to SADIE KU100 HRTFs. For example, the below diagram shows a frequency response of a decoded sound fields containing a single, broadband sound source in front of the listener. If no compensation is used, switching the binaural decoder from THRIVE to SADIE KU100 will result in spectral changes: 33 | 34 | ![decoder output before correction](https://cloud.githubusercontent.com/assets/26985965/24811252/21399d66-1b7a-11e7-953c-2357d6be8f3a.png) 35 | 36 | After applying the compensation filters, frequency response differences will be minimized: 37 | 38 | ![decoder output after correction](https://cloud.githubusercontent.com/assets/26985965/24811250/2137f42a-1b7a-11e7-9ff5-e09718293401.png) 39 | 40 | The filtering operation (* symbol) can be done using freely available tools, for example, free multichannel convolver plugins like [LAConvolver](http://audio.lernvall.com/) (mac) or [Freeverb3](http://www.nongnu.org/freeverb3/) (win). 41 | 42 | ![ambiX correction filters - signal flow diagram](https://cloud.githubusercontent.com/assets/26985965/24811249/21269c84-1b7a-11e7-8139-91e4fd0d0a20.png) 43 | 44 | # FAQs 45 | ### Why did you change HRTFs? 46 | Binaural decoding of ambisonic spatial audio relies on Head Related Transfer Functions (or their time-domain equivalents - HRIRs) which are unique to each individual. However, our studies showed that some datasets perform better on average than other datasets, in the case when an individualized set is not available. 47 | 48 | We tested a large number of available HRTF datasets in a mobile VR application and using a variety of different sound samples. A panel of 53 experienced listeners (20 of whom were expert assessors) compared the overall subjective audio quality of the interactive binaural renders. We found that SADIE KU100 set performed best for majority of the participants: 49 | 50 | ![perceived decoder quality](https://cloud.githubusercontent.com/assets/26985965/24811253/213b6628-1b7a-11e7-9079-f7a906e963a0.png) 51 | 52 | ### Should I use HRTF monitoring when creating ambisonic content for binaural reproduction (e.g. YouTube 360/VR)? 53 | When creating content for YouTube 360/VR it is recommended that you monitor your ambisonic audio using the provided *symmetric ambisonic binaural decoder* and/or preview the final mix directly on YouTube to control the timbre (coloration) and loudness of your mix (please see below). 54 | 55 | ### What is timbral coloration of HRTFs? 56 | HRTFs add specific coloration to the decoded binaural output signal. However, this coloration is dependent on the HRTF set used (due to individual nature of HRTFs). For example, here is an example HRTF frequency response of the THRIVE set and a similar response of the SADIE KU100 set: 57 | 58 | ![HRTF coloration](https://cloud.githubusercontent.com/assets/26985965/24811251/2139476c-1b7a-11e7-865b-f37ac10b197f.png) 59 | 60 | ### Can I adjust my existing ambisonic mix to make it sound the same with the new binaural decoder? 61 | If you already have produced an ambisonic soundtrack using our THRIVE HRTF set for monitoring, you can simply correct the frequency response of your ambiX tracks to match the frequency response of the new SADIE KU100 HRTF set. This can be done by applying the filters we share in the [ambisonic correction filters](#ambisonic-correction-filters) directory. 62 | 63 | ### What is expected loudness of the Google VR Audio binaural decoder? 64 | Due to HRTF processing, the output amplitude of your Ambisonic track may differ depending which HRTF set you use. That is why it is important to monitor the binaural output when working on an ambisonic soundtrack using the binaural decoder which is going to be used to render your ambisonic content. Our binaural decoder's loudness should be the same as the loudness of a standard M-S stereo decoder. 65 | 66 | For example, the table below shows loudness measured in compliance with the [ITU-R BS.1770-4 recommendation](https://www.itu.int/dms_pubrec/itu-r/rec/bs/R-REC-BS.1770-4-201510-I!!PDF-E.pdf) and absolute amplitude peak values when a 0.5 normalized pink noise bursts signal is played at 8 different spatial locations around the listener: 67 | 68 | |Decoder | Loudness [dB LUFS] | Absolute Peak Amplitude | 69 | |------------|--------------------|-------------------------| 70 | |M-S Stereo |-10.59 |0.5000 | 71 | |THRIVE |-10.99 |0.9478 | 72 | |SADIE KU100 |-11.20 |0.9642 | 73 | 74 | Please note, although the loudness is matched between different decoders and the stereo decoder, the binaural output may result in the peak amplitude exceeding 0dB FS. 75 | 76 | ### Is there third party software for monitoring binaural output when working with ambisonic audio for YouTube? 77 | Some third party tools like [BlueRippleSound O3A View](http://www.blueripplesound.com/products/o3a-view-vst), [Noisemakers AmbiHead](http://www.noisemakers.fr/ambi-head/) and [SoundParticles](http://soundparticles.com/) (coming soon) implement their own binaural preview tools using the HRTFs from this repository which match those used by YouTube 360/VR. 78 | -------------------------------------------------------------------------------- /spatial-audio/ambisonic-correction-filters/correction_filter_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/ambisonic-correction-filters/correction_filter_0.wav -------------------------------------------------------------------------------- /spatial-audio/ambisonic-correction-filters/correction_filter_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/ambisonic-correction-filters/correction_filter_1.wav -------------------------------------------------------------------------------- /spatial-audio/ambisonic-correction-filters/correction_filter_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/ambisonic-correction-filters/correction_filter_2.wav -------------------------------------------------------------------------------- /spatial-audio/ambisonic-correction-filters/correction_filter_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/ambisonic-correction-filters/correction_filter_3.wav -------------------------------------------------------------------------------- /spatial-audio/raw-symmetric-cube-hrirs/E-35_A-135.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/raw-symmetric-cube-hrirs/E-35_A-135.wav -------------------------------------------------------------------------------- /spatial-audio/raw-symmetric-cube-hrirs/E-35_A-45.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/raw-symmetric-cube-hrirs/E-35_A-45.wav -------------------------------------------------------------------------------- /spatial-audio/raw-symmetric-cube-hrirs/E-35_A135.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/raw-symmetric-cube-hrirs/E-35_A135.wav -------------------------------------------------------------------------------- /spatial-audio/raw-symmetric-cube-hrirs/E-35_A45.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/raw-symmetric-cube-hrirs/E-35_A45.wav -------------------------------------------------------------------------------- /spatial-audio/raw-symmetric-cube-hrirs/E35_A-135.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/raw-symmetric-cube-hrirs/E35_A-135.wav -------------------------------------------------------------------------------- /spatial-audio/raw-symmetric-cube-hrirs/E35_A-45.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/raw-symmetric-cube-hrirs/E35_A-45.wav -------------------------------------------------------------------------------- /spatial-audio/raw-symmetric-cube-hrirs/E35_A135.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/raw-symmetric-cube-hrirs/E35_A135.wav -------------------------------------------------------------------------------- /spatial-audio/raw-symmetric-cube-hrirs/E35_A45.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/raw-symmetric-cube-hrirs/E35_A45.wav -------------------------------------------------------------------------------- /spatial-audio/raw-symmetric-cube-hrirs/cube.config: -------------------------------------------------------------------------------- 1 | # Google VR Audio cube preset for ambiX Binaural Decoder 2 | #GLOBAL 3 | /global_hrtf_gain 1 4 | /coeff_scale sn3d 5 | /coeff_seq acn 6 | #END 7 | 8 | #HRTF 9 | E35_A135.wav 1 0 0 10 | E35_A-135.wav 1 0 0 11 | E-35_A135.wav 1 0 0 12 | E-35_A-135.wav 1 0 0 13 | E35_A45.wav 1 0 0 14 | E35_A-45.wav 1 0 0 15 | E-35_A45.wav 1 0 0 16 | E-35_A-45.wav 1 0 0 17 | #END 18 | 19 | #DECODERMATRIX 20 | 0.125 0.216495 0.21653 -0.216495 21 | 0.125 -0.216495 0.21653 -0.216495 22 | 0.125 0.216495 -0.21653 -0.216495 23 | 0.125 -0.216495 -0.21653 -0.216495 24 | 0.125 0.216495 0.21653 0.216495 25 | 0.125 -0.216495 0.21653 0.216495 26 | 0.125 0.216495 -0.21653 0.216495 27 | 0.125 -0.216495 -0.21653 0.216495 28 | #END -------------------------------------------------------------------------------- /spatial-audio/symmetric-ambisonic-binaural-decoder/binaural_decoder_0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/symmetric-ambisonic-binaural-decoder/binaural_decoder_0.wav -------------------------------------------------------------------------------- /spatial-audio/symmetric-ambisonic-binaural-decoder/binaural_decoder_1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/symmetric-ambisonic-binaural-decoder/binaural_decoder_1.wav -------------------------------------------------------------------------------- /spatial-audio/symmetric-ambisonic-binaural-decoder/binaural_decoder_2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/symmetric-ambisonic-binaural-decoder/binaural_decoder_2.wav -------------------------------------------------------------------------------- /spatial-audio/symmetric-ambisonic-binaural-decoder/binaural_decoder_3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/symmetric-ambisonic-binaural-decoder/binaural_decoder_3.wav -------------------------------------------------------------------------------- /spatial-audio/third_party/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /spatial-audio/third_party/azi_135_ele_-35_DFC.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/third_party/azi_135_ele_-35_DFC.wav -------------------------------------------------------------------------------- /spatial-audio/third_party/azi_135_ele_35_DFC.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/third_party/azi_135_ele_35_DFC.wav -------------------------------------------------------------------------------- /spatial-audio/third_party/azi_45_ele_-35_DFC.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/third_party/azi_45_ele_-35_DFC.wav -------------------------------------------------------------------------------- /spatial-audio/third_party/azi_45_ele_35_DFC.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/google/spatial-media/642ec170222cea711661b15f70d1bf9c2f1cb00b/spatial-audio/third_party/azi_45_ele_35_DFC.wav -------------------------------------------------------------------------------- /spatialmedia/LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /spatialmedia/README.md: -------------------------------------------------------------------------------- 1 | # Spatial Media Metadata Injector 2 | 3 | A tool for manipulating spatial media 4 | ([spherical video](../docs/spherical-video-rfc.md) and 5 | [spatial audio](../docs/spatial-audio-rfc.md)) metadata in MP4 and MOV files. 6 | It can be used to inject spatial media metadata into a file or validate metadata 7 | in an existing file. 8 | 9 | ## Usage 10 | 11 | [Python 2.7](https://www.python.org/downloads/) must be used to run the tool. 12 | From within the directory above `spatialmedia`: 13 | 14 | #### Help 15 | 16 | python spatialmedia -h 17 | 18 | Prints help and usage information. 19 | 20 | #### Examine 21 | 22 | python spatialmedia 23 | 24 | For each file specified, prints spatial media metadata contained in the file. 25 | 26 | #### Inject 27 | 28 | python spatialmedia -i [--stereo=(none|top-bottom|left-right)] [--spatial-audio] 29 | 30 | Saves a version of `` injected with spatial media metadata to ``. 31 | `` and `` must not be the same file. 32 | 33 | ##### --stereo 34 | 35 | Selects the left/right eye frame layout; see the `StereoMode` element in the 36 | [Spherical Video RFC](../docs/spherical-video-rfc.md) for more information. 37 | 38 | Options: 39 | 40 | - `none`: Mono frame layout. 41 | 42 | - `top-bottom`: Top half contains the left eye and bottom half contains the 43 | right eye. 44 | 45 | - `left-right`: Left half contains the left eye and right half contains the 46 | right eye. 47 | 48 | ##### --spatial-audio 49 | 50 | Enables injection of spatial audio metadata. If enabled, the file must contain a 51 | 4-channel first-order ambisonics audio track with ACN channel ordering and SN3D 52 | normalization; see the [Spatial Audio RFC](../docs/spatial-audio-rfc.md) for 53 | more information. 54 | 55 | ## Building standalone GUI application 56 | 57 | Install [PyInstaller](http://pythonhosted.org/PyInstaller/), then run the 58 | following: 59 | 60 | pyinstaller spatial_media_metadata_injector.spec 61 | -------------------------------------------------------------------------------- /spatialmedia/__init__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright 2016 Google Inc. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | # Ensure the package is available on the current path or is installed. 19 | import os 20 | import sys 21 | sys.path.append(os.path.dirname(os.path.abspath(__file__))) 22 | 23 | __all__ = ["metadata_utils", "mpeg"] 24 | 25 | import spatialmedia.metadata_utils 26 | import spatialmedia.mpeg 27 | -------------------------------------------------------------------------------- /spatialmedia/__main__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright 2016 Google Inc. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | """Spatial Media Metadata Injector 18 | 19 | Tool for examining and injecting spatial media metadata in MP4/MOV files. 20 | """ 21 | 22 | import argparse 23 | import os 24 | import re 25 | import sys 26 | 27 | path = os.path.dirname(sys.modules[__name__].__file__) 28 | path = os.path.join(path, '..') 29 | sys.path.insert(0, path) 30 | from spatialmedia import metadata_utils 31 | 32 | 33 | def console(contents): 34 | print(contents) 35 | 36 | 37 | def main(): 38 | """Main function for printing and injecting spatial media metadata.""" 39 | 40 | parser = argparse.ArgumentParser( 41 | usage= 42 | "%(prog)s [options] [files...]\n\nBy default prints out spatial media " 43 | "metadata from specified files.") 44 | parser.add_argument( 45 | "-i", 46 | "--inject", 47 | action="store_true", 48 | help= 49 | "injects spatial media metadata into the first file specified (.mp4 or " 50 | ".mov) and saves the result to the second file specified") 51 | parser.add_argument( 52 | "-2", 53 | "--v2", 54 | action="store_true", 55 | help= 56 | "Uses v2 of the video metadata spec") 57 | video_group = parser.add_argument_group("Spherical Video") 58 | video_group.add_argument("-s", 59 | "--stereo", 60 | action="store", 61 | dest="stereo_mode", 62 | metavar="STEREO-MODE", 63 | choices=["none", "top-bottom", "left-right"], 64 | default="none", 65 | help="stereo mode (none | top-bottom | left-right)") 66 | video_group.add_argument("-p", 67 | "--projection", 68 | action="store", 69 | dest="projection", 70 | choices=["none", "equirectangular"], 71 | default="equirectangular", 72 | help="projection (none | equirectangular)") 73 | video_group.add_argument( 74 | "-c", 75 | "--crop", 76 | action="store", 77 | default=None, 78 | help= 79 | "crop region. Must specify 6 integers in the form of \"w:h:f_w:f_h:x:y\"" 80 | " where w=CroppedAreaImageWidthPixels h=CroppedAreaImageHeightPixels " 81 | "f_w=FullPanoWidthPixels f_h=FullPanoHeightPixels " 82 | "x=CroppedAreaLeftPixels y=CroppedAreaTopPixels") 83 | audio_group = parser.add_argument_group("Spatial Audio") 84 | audio_group.add_argument( 85 | "-a", 86 | "--spatial-audio", 87 | action="store_true", 88 | help= 89 | "spatial audio. First-order periphonic ambisonics with ACN channel " 90 | "ordering and SN3D normalization") 91 | parser.add_argument("file", nargs="+", help="input/output files") 92 | 93 | args = parser.parse_args() 94 | 95 | if args.inject: 96 | if len(args.file) != 2: 97 | console("Injecting metadata requires both an input file and output file.") 98 | return 99 | 100 | metadata = metadata_utils.Metadata(args.projection, args.stereo_mode) 101 | if not args.v2: 102 | metadata.projection = None 103 | metadata.stereo_mode = None 104 | metadata.video = metadata_utils.generate_spherical_xml(args.projection, 105 | args.stereo_mode, 106 | args.crop) 107 | 108 | if args.spatial_audio: 109 | parsed_metadata = metadata_utils.parse_metadata(args.file[0], console) 110 | if not metadata.audio: 111 | spatial_audio_description = metadata_utils.get_spatial_audio_description( 112 | parsed_metadata.num_audio_channels) 113 | if spatial_audio_description.is_supported: 114 | metadata.audio = metadata_utils.get_spatial_audio_metadata( 115 | spatial_audio_description.order, 116 | spatial_audio_description.has_head_locked_stereo) 117 | else: 118 | console("Audio has %d channel(s) and is not a supported " 119 | "spatial audio format." % (parsed_metadata.num_audio_channels)) 120 | return 121 | 122 | if metadata.video or metadata.projection or metadata.stereo_mode: 123 | metadata_utils.inject_metadata(args.file[0], args.file[1], metadata, 124 | console) 125 | else: 126 | console("Failed to generate metadata.") 127 | return 128 | 129 | if len(args.file) > 0: 130 | for input_file in args.file: 131 | if args.spatial_audio: 132 | parsed_metadata = metadata_utils.parse_metadata(input_file, console) 133 | metadata.audio = metadata_utils.get_spatial_audio_description( 134 | parsed_metadata.num_channels) 135 | 136 | metadata_utils.parse_metadata(input_file, console) 137 | return 138 | 139 | parser.print_help() 140 | return 141 | 142 | 143 | if __name__ == "__main__": 144 | main() 145 | -------------------------------------------------------------------------------- /spatialmedia/gui.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright 2016 Google Inc. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | """Spatial Media Metadata Injector GUI 19 | 20 | GUI application for examining/injecting spatial media metadata in MP4/MOV files. 21 | """ 22 | 23 | import ntpath 24 | import os 25 | import sys 26 | import traceback 27 | 28 | try: 29 | # python 3 30 | import tkinter as tk 31 | from tkinter import filedialog, messagebox, ttk 32 | import configparser 33 | except ImportError: 34 | # python 2 35 | import Tkinter as tk 36 | from tkFont import Font, nametofont 37 | import tkMessageBox as messagebox 38 | import tkFileDialog as filedialog 39 | import ttk 40 | except ImportError: 41 | print("Tkinter library is not available.") 42 | exit(0) 43 | 44 | 45 | path = os.path.dirname(sys.modules[__name__].__file__) 46 | path = os.path.join(path, "..") 47 | sys.path.insert(0, path) 48 | from spatialmedia import metadata_utils 49 | 50 | SPATIAL_AUDIO_LABEL = "My video has spatial audio (ambiX ACN/SN3D format)" 51 | HEAD_LOCKED_STEREO_LABEL = "with head-locked stereo" 52 | 53 | 54 | class Console(): 55 | def __init__(self): 56 | self.log = [] 57 | 58 | def append(self, text): 59 | print(text.encode("utf-8")) 60 | self.log.append(text) 61 | 62 | 63 | class Application(tk.Frame): 64 | def action_open(self): 65 | """Triggers open file dialog, reading new files' metadata.""" 66 | tmp_in_files = filedialog.askopenfilenames(**self.open_options) 67 | if not tmp_in_files: 68 | return 69 | 70 | # Process first file to show in the UI 71 | self.in_file = tmp_in_files[0] 72 | self.all_files = tmp_in_files # Store all selected files 73 | 74 | self.set_message(f"Selected {len(tmp_in_files)} files. Current file: {ntpath.basename(self.in_file)}") 75 | 76 | console = Console() 77 | parsed_metadata = metadata_utils.parse_metadata(self.in_file, console.append) 78 | 79 | metadata = None 80 | audio_metadata = None 81 | if parsed_metadata: 82 | metadata = parsed_metadata.video 83 | audio_metadata = parsed_metadata.audio 84 | 85 | for line in console.log: 86 | if "Error" in line: 87 | self.set_error("Failed to load file %s" % ntpath.basename(self.in_file)) 88 | self.var_spherical.set(0) 89 | self.var_spatial_audio.set(0) 90 | self.disable_state() 91 | self.button_open.configure(state="normal") 92 | return 93 | 94 | self.enable_state() 95 | self.checkbox_spherical.configure(state="normal") 96 | 97 | infile = os.path.abspath(self.in_file) 98 | file_extension = os.path.splitext(infile)[1].lower() 99 | 100 | self.var_spherical.set(1) 101 | self.spatial_audio_description = metadata_utils.get_spatial_audio_description( 102 | parsed_metadata.num_audio_channels 103 | ) 104 | 105 | if not metadata: 106 | self.var_3d.set(0) 107 | 108 | if not audio_metadata: 109 | self.var_spatial_audio.set(0) 110 | 111 | if metadata: 112 | metadata = next(iter(metadata.values())) 113 | 114 | if metadata.get("Spherical", "") == "true": 115 | self.var_spherical.set(1) 116 | else: 117 | self.var_spherical.set(0) 118 | 119 | if metadata.get("StereoMode", "") == "top-bottom": 120 | self.var_3d.set(1) 121 | else: 122 | self.var_3d.set(0) 123 | 124 | if audio_metadata: 125 | self.var_spatial_audio.set(1) 126 | print(audio_metadata.get_metadata_string()) 127 | 128 | self.update_state() 129 | 130 | def action_inject_delay(self): 131 | """Process all selected files for injection.""" 132 | stereo = None 133 | if self.var_3d.get(): 134 | stereo = "top-bottom" 135 | 136 | metadata = metadata_utils.Metadata() 137 | metadata.video = metadata_utils.generate_spherical_xml(stereo=stereo) 138 | 139 | if self.var_spatial_audio.get(): 140 | metadata.audio = metadata_utils.get_spatial_audio_metadata( 141 | self.spatial_audio_description.order, 142 | self.spatial_audio_description.has_head_locked_stereo, 143 | ) 144 | 145 | console = Console() 146 | success_count = 0 147 | 148 | for input_file in self.all_files: 149 | split_filename = os.path.splitext(ntpath.basename(input_file)) 150 | base_filename = split_filename[0] 151 | extension = split_filename[1] 152 | 153 | # Create output filename for each file 154 | # Fix: Use self.save_file directly as it's already the correct directory path 155 | output_file = os.path.join( 156 | #os.path.dirname(self.save_file), # Remove os.path.dirname() call to fix directory path issue 157 | self.save_file, # Remove os.path.dirname() call 158 | f"{base_filename}_injected{extension}" 159 | ) 160 | 161 | try: 162 | metadata_utils.inject_metadata( 163 | input_file, output_file, metadata, console.append 164 | ) 165 | success_count += 1 166 | except Exception as e: 167 | console.append(f"Error processing {ntpath.basename(input_file)}: {str(e)}") 168 | 169 | self.set_message( 170 | f"Successfully processed {success_count} out of {len(self.all_files)} files" 171 | ) 172 | self.button_open.configure(state="normal") 173 | self.update_state() 174 | 175 | def action_inject(self): 176 | """Inject metadata into new save files.""" 177 | # Ask for output directory instead of single file 178 | self.save_file = filedialog.askdirectory(title="Select Output Directory") 179 | if not self.save_file: 180 | return 181 | 182 | self.set_message(f"Processing {len(self.all_files)} files...") 183 | 184 | # Launch injection on a separate thread after disabling buttons 185 | self.disable_state() 186 | self.master.after(100, self.action_inject_delay) 187 | 188 | def action_set_spherical(self): 189 | self.update_state() 190 | 191 | def action_set_spatial_audio(self): 192 | self.update_state() 193 | 194 | def action_set_3d(self): 195 | self.update_state() 196 | 197 | def enable_state(self): 198 | self.button_open.configure(state="normal") 199 | 200 | def disable_state(self): 201 | self.checkbox_spherical.configure(state="disabled") 202 | self.checkbox_spatial_audio.configure(state="disabled") 203 | self.checkbox_3D.configure(state="disabled") 204 | self.button_inject.configure(state="disabled") 205 | self.button_open.configure(state="disabled") 206 | 207 | def update_state(self): 208 | self.checkbox_spherical.configure(state="normal") 209 | if self.var_spherical.get(): 210 | self.checkbox_3D.configure(state="normal") 211 | self.button_inject.configure(state="normal") 212 | if self.spatial_audio_description.is_supported: 213 | self.checkbox_spatial_audio.configure(state="normal") 214 | else: 215 | self.checkbox_3D.configure(state="disabled") 216 | self.button_inject.configure(state="disabled") 217 | self.checkbox_spatial_audio.configure(state="disabled") 218 | if self.spatial_audio_description.has_head_locked_stereo: 219 | self.label_spatial_audio.configure( 220 | text="{}\n{}".format(SPATIAL_AUDIO_LABEL, HEAD_LOCKED_STEREO_LABEL) 221 | ) 222 | else: 223 | self.label_spatial_audio.configure(text=SPATIAL_AUDIO_LABEL) 224 | 225 | def set_error(self, text): 226 | self.label_message["text"] = text 227 | self.label_message.config(fg="red") 228 | 229 | def set_message(self, text): 230 | self.label_message["text"] = text 231 | self.label_message.config(fg="blue") 232 | 233 | def create_widgets(self): 234 | """Sets up GUI contents.""" 235 | 236 | row = 0 237 | column = 0 238 | 239 | PAD_X = 10 240 | 241 | row = row + 1 242 | column = 0 243 | self.label_message = tk.Label(self) 244 | self.label_message["text"] = "Click Open to open your 360 video." 245 | self.label_message.grid( 246 | row=row, 247 | column=column, 248 | rowspan=1, 249 | columnspan=2, 250 | padx=PAD_X, 251 | pady=10, 252 | sticky="w", 253 | ) 254 | 255 | row = row + 1 256 | separator = tk.Frame(self, relief=tk.GROOVE, bd=1, height=2, bg="white") 257 | separator.grid(columnspan=row, padx=PAD_X, pady=4, sticky="n" + "e" + "s" + "w") 258 | 259 | # Spherical Checkbox 260 | row += 1 261 | self.label_spherical = tk.Label(self, anchor="w") 262 | self.label_spherical["text"] = "My video is spherical (360)" 263 | self.label_spherical.grid( 264 | row=row, column=column, padx=PAD_X, pady=7, sticky="w" 265 | ) 266 | column += 1 267 | 268 | self.var_spherical = tk.IntVar() 269 | self.checkbox_spherical = tk.Checkbutton(self, variable=self.var_spherical) 270 | self.checkbox_spherical["command"] = self.action_set_spherical 271 | self.checkbox_spherical.grid(row=row, column=column, padx=PAD_X, pady=2) 272 | 273 | # 3D 274 | row = row + 1 275 | column = 0 276 | self.label_3D = tk.Label(self, anchor="w") 277 | self.label_3D["text"] = "My video is stereoscopic 3D (top/bottom layout)" 278 | self.label_3D.grid(row=row, column=column, padx=PAD_X, pady=7, sticky="w") 279 | column += 1 280 | 281 | self.var_3d = tk.IntVar() 282 | self.checkbox_3D = tk.Checkbutton(self, variable=self.var_3d) 283 | self.checkbox_3D["command"] = self.action_set_3d 284 | self.checkbox_3D.grid(row=row, column=column, padx=PAD_X, pady=2) 285 | 286 | # Spatial Audio Checkbox 287 | row += 1 288 | column = 0 289 | self.label_spatial_audio = tk.Label(self, anchor="w", justify=tk.LEFT) 290 | self.label_spatial_audio["text"] = SPATIAL_AUDIO_LABEL 291 | self.label_spatial_audio.grid( 292 | row=row, column=column, padx=PAD_X, pady=7, sticky="w" 293 | ) 294 | 295 | column += 1 296 | self.var_spatial_audio = tk.IntVar() 297 | self.checkbox_spatial_audio = tk.Checkbutton( 298 | self, variable=self.var_spatial_audio 299 | ) 300 | self.checkbox_spatial_audio["command"] = self.action_set_spatial_audio 301 | self.checkbox_spatial_audio.grid(row=row, column=column, padx=0, pady=0) 302 | 303 | row = row + 1 304 | separator = tk.Frame(self, relief=tk.GROOVE, bd=1, height=2, bg="white") 305 | separator.grid( 306 | columnspan=row, padx=PAD_X, pady=10, sticky="n" + "e" + "s" + "w" 307 | ) 308 | 309 | # Button Frame 310 | column = 0 311 | row = row + 1 312 | buttons_frame = tk.Frame(self) 313 | buttons_frame.grid(row=row, column=0, columnspan=3, padx=PAD_X, pady=10) 314 | 315 | style = ttk.Style() 316 | style.configure("TButton", foreground="black") 317 | 318 | self.button_open = ttk.Button(buttons_frame) 319 | self.button_open["text"] = "Open" 320 | self.button_open["command"] = self.action_open 321 | self.button_open.grid(row=0, column=0, padx=14, pady=2) 322 | 323 | self.button_inject = ttk.Button(buttons_frame) 324 | self.button_inject["text"] = "Inject metadata" 325 | self.button_inject["command"] = self.action_inject 326 | self.button_inject.grid(row=0, column=1, padx=14, pady=2) 327 | 328 | def __init__(self, master=None): 329 | master.wm_title("Spatial Media Metadata Injector") 330 | master.config(menu=tk.Menu(master)) 331 | self.title = "Spatial Media Metadata Injector" 332 | self.open_options = {} 333 | self.open_options["filetypes"] = [("Videos", ("*.mov", "*.mp4"))] 334 | self.open_options["multiple"] = True # Enable multiple file selection 335 | 336 | self.save_options = {} 337 | 338 | tk.Frame.__init__(self, master) 339 | self.create_widgets() 340 | self.pack() 341 | 342 | self.in_file = None 343 | self.all_files = [] # Store all selected files 344 | self.disable_state() 345 | self.enable_state() 346 | master.attributes("-topmost", True) 347 | master.focus_force() 348 | self.after(50, lambda: master.attributes("-topmost", False)) 349 | self.spatial_audio_description = None 350 | 351 | 352 | def report_callback_exception(self, *args): 353 | exception = traceback.format_exception(*args) 354 | messagebox.showerror("Error", exception) 355 | 356 | 357 | def main(): 358 | root = tk.Tk() 359 | root.tk.call('tk', 'scaling', 2.0) 360 | tk.report_callback_exception = report_callback_exception 361 | app = Application(master=root) 362 | app.mainloop() 363 | 364 | 365 | if __name__ == "__main__": 366 | main() -------------------------------------------------------------------------------- /spatialmedia/mpeg/__init__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright 2016 Google Inc. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import spatialmedia.mpeg.sa3d 19 | import spatialmedia.mpeg.box 20 | import spatialmedia.mpeg.constants 21 | import spatialmedia.mpeg.container 22 | import spatialmedia.mpeg.mpeg4_container 23 | 24 | load = mpeg4_container.load 25 | 26 | Box = box.Box 27 | SA3DBox = sa3d.SA3DBox 28 | Container = container.Container 29 | Mpeg4Container = mpeg4_container.Mpeg4Container 30 | 31 | __all__ = ["box", "mpeg4", "container", "constants", "sa3d"] 32 | -------------------------------------------------------------------------------- /spatialmedia/mpeg/box.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright 2016 Google Inc. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | """MPEG processing classes. 19 | 20 | Tool for loading mpeg4 files and manipulating atoms. 21 | """ 22 | 23 | import io 24 | import struct 25 | 26 | from spatialmedia.mpeg import constants 27 | 28 | def load(fh, position, end): 29 | """Loads the box located at a position in a mp4 file. 30 | 31 | Args: 32 | fh: file handle, input file handle. 33 | position: int or None, current file position. 34 | 35 | Returns: 36 | box: box, box from loaded file location or None. 37 | """ 38 | if position is None: 39 | position = fh.tell() 40 | 41 | fh.seek(position) 42 | header_size = 8 43 | size = struct.unpack(">I", fh.read(4))[0] 44 | name = fh.read(4) 45 | 46 | if size == 1: 47 | size = struct.unpack(">Q", fh.read(8))[0] 48 | header_size = 16 49 | 50 | if size < 8: 51 | print("Error, invalid size {} in {} at {}".format(size, name, position)) 52 | return None 53 | 54 | if (position + size) > end: 55 | print("Error: Leaf box size exceeds bounds.") 56 | return None 57 | 58 | new_box = Box() 59 | new_box.name = name 60 | new_box.position = position 61 | new_box.header_size = header_size 62 | new_box.content_size = size - header_size 63 | new_box.contents = None 64 | 65 | return new_box 66 | 67 | 68 | class Box(object): 69 | """MPEG4 box contents and behaviour true for all boxes.""" 70 | 71 | def __init__(self): 72 | self.name = "" 73 | self.position = 0 74 | self.header_size = 0 75 | self.content_size = 0 76 | self.contents = None 77 | 78 | def content_start(self): 79 | return self.position + self.header_size 80 | 81 | def save(self, in_fh, out_fh, delta): 82 | """Save box contents prioritizing set contents. 83 | 84 | Args: 85 | in_fh: file handle, source to read box contents from. 86 | out_fh: file handle, destination for written box contents. 87 | delta: int, index update amount. 88 | """ 89 | if self.header_size == 16: 90 | out_fh.write(struct.pack(">I", 1)) 91 | out_fh.write(self.name) 92 | out_fh.write(struct.pack(">Q", self.size())) 93 | elif self.header_size == 8: 94 | out_fh.write(struct.pack(">I", self.size())) 95 | out_fh.write(self.name) 96 | 97 | if self.content_start(): 98 | in_fh.seek(self.content_start()) 99 | 100 | if self.name == constants.TAG_STCO: 101 | stco_copy(in_fh, out_fh, self, delta) 102 | elif self.name == constants.TAG_CO64: 103 | co64_copy(in_fh, out_fh, self, delta) 104 | elif self.contents: 105 | out_fh.write(self.contents) 106 | else: 107 | tag_copy(in_fh, out_fh, self.content_size) 108 | 109 | def set(self, new_contents): 110 | """Sets / overwrites the box contents.""" 111 | self.contents = new_contents 112 | self.content_size = len(contents) 113 | 114 | def size(self): 115 | """Total size of a box. 116 | 117 | Returns: 118 | Int, total size in bytes of the box. 119 | """ 120 | return self.header_size + self.content_size 121 | 122 | def print_structure(self, indent=""): 123 | """Prints the box structure.""" 124 | size1 = self.header_size 125 | size2 = self.content_size 126 | print("{0} {1} [{2}, {3}]".format(indent, self.name, size1, size2)) 127 | 128 | 129 | def tag_copy(in_fh, out_fh, size): 130 | """Copies a block of data from in_fh to out_fh. 131 | 132 | Args: 133 | in_fh: file handle, source of uncached file contents. 134 | out_fh: file handle, destination for saved file. 135 | size: int, amount of data to copy. 136 | """ 137 | 138 | # On 32-bit systems reading / writing is limited to 2GB chunks. 139 | # To prevent overflow, read/write 64 MB chunks. 140 | block_size = 64 * 1024 * 1024 141 | while (size > block_size): 142 | contents = in_fh.read(block_size) 143 | out_fh.write(contents) 144 | size = size - block_size 145 | 146 | contents = in_fh.read(size) 147 | out_fh.write(contents) 148 | 149 | 150 | def index_copy(in_fh, out_fh, box, mode, mode_length, delta=0): 151 | """Update and copy index table for stco/co64 files. 152 | 153 | Args: 154 | in_fh: file handle, source to read index table from. 155 | out_fh: file handle, destination for index file. 156 | box: box, stco/co64 box to copy. 157 | mode: string, bit packing mode for index entries. 158 | mode_length: int, number of bytes for index entires. 159 | delta: int, offset change for index entries. 160 | """ 161 | fh = in_fh 162 | if not box.contents: 163 | fh.seek(box.content_start()) 164 | else: 165 | fh = io.BytesIO(box.contents) 166 | 167 | header = struct.unpack(">I", fh.read(4))[0] 168 | values = struct.unpack(">I", fh.read(4))[0] 169 | 170 | new_contents = [] 171 | new_contents.append(struct.pack(">I", header)) 172 | new_contents.append(struct.pack(">I", values)) 173 | for i in range(values): 174 | content = fh.read(mode_length) 175 | content = struct.unpack(mode, content)[0] + delta 176 | new_contents.append(struct.pack(mode, content)) 177 | out_fh.write(b"".join(new_contents)) 178 | 179 | 180 | def stco_copy(in_fh, out_fh, box, delta=0): 181 | """Copy for stco box. 182 | 183 | Args: 184 | in_fh: file handle, source to read index table from. 185 | out_fh: file handle, destination for index file. 186 | box: box, stco box to copy. 187 | delta: int, offset change for index entries. 188 | """ 189 | index_copy(in_fh, out_fh, box, ">I", 4, delta) 190 | 191 | 192 | def co64_copy(in_fh, out_fh, box, delta=0): 193 | """Copy for co64 box. 194 | 195 | Args: 196 | in_fh: file handle, source to read index table from. 197 | out_fh: file handle, destination for index file. 198 | box: box, co64 box to copy. 199 | delta: int, offset change for index entries. 200 | """ 201 | index_copy(in_fh, out_fh, box, ">Q", 8, delta) 202 | -------------------------------------------------------------------------------- /spatialmedia/mpeg/constants.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright 2016 Google Inc. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | """MPEG-4 constants.""" 19 | 20 | TRAK_TYPE_VIDE = b"vide" 21 | 22 | # Leaf types. 23 | TAG_STCO = b"stco" 24 | TAG_CO64 = b"co64" 25 | TAG_FREE = b"free" 26 | TAG_MDAT = b"mdat" 27 | TAG_XML = b"xml " 28 | TAG_HDLR = b"hdlr" 29 | TAG_FTYP = b"ftyp" 30 | TAG_ESDS = b"esds" 31 | TAG_SOUN = b"soun" 32 | TAG_VIDE = b"vide" 33 | TAG_SA3D = b"SA3D" 34 | 35 | TAG_PRHD = b"prhd" 36 | TAG_EQUI = b"equi" 37 | TAG_SVHD = b"svhd" 38 | TAG_ST3D = b"st3d" 39 | 40 | # Container types. 41 | TAG_MOOV = b"moov" 42 | TAG_UDTA = b"udta" 43 | TAG_META = b"meta" 44 | TAG_TRAK = b"trak" 45 | TAG_MDIA = b"mdia" 46 | TAG_MINF = b"minf" 47 | TAG_STBL = b"stbl" 48 | TAG_STSD = b"stsd" 49 | TAG_UUID = b"uuid" 50 | TAG_WAVE = b"wave" 51 | 52 | TAG_SV3D = b"sv3d" 53 | TAG_PROJ = b"proj" 54 | 55 | 56 | # Sound sample descriptions. 57 | TAG_NONE = b"NONE" 58 | TAG_RAW_ = b"raw " 59 | TAG_TWOS = b"twos" 60 | TAG_SOWT = b"sowt" 61 | TAG_FL32 = b"fl32" 62 | TAG_FL64 = b"fl64" 63 | TAG_IN24 = b"in24" 64 | TAG_IN32 = b"in32" 65 | TAG_ULAW = b"ulaw" 66 | TAG_ALAW = b"alaw" 67 | TAG_LPCM = b"lpcm" 68 | TAG_MP4A = b"mp4a" 69 | TAG_OPUS = b"Opus" 70 | 71 | # Video sample descriptions. 72 | TAG_AVC1 = b"avc1" 73 | TAG_VP09 = b"vp09" 74 | TAG_AV01 = b"av01" 75 | TAV_HEV1 = b"hev1" 76 | TAG_DVH1 = b"dvh1" 77 | TAG_APCN = b"apcn" 78 | TAG_APCH = b"apch" 79 | TAG_APCS = b"apcs" 80 | TAG_APCO = b"apco" 81 | TAG_AP4H = b"ap4h" 82 | TAG_AP4X = b"ap4x" 83 | 84 | SOUND_SAMPLE_DESCRIPTIONS = frozenset([ 85 | TAG_NONE, 86 | TAG_RAW_, 87 | TAG_TWOS, 88 | TAG_SOWT, 89 | TAG_FL32, 90 | TAG_FL64, 91 | TAG_IN24, 92 | TAG_IN32, 93 | TAG_ULAW, 94 | TAG_ALAW, 95 | TAG_LPCM, 96 | TAG_MP4A, 97 | TAG_OPUS, 98 | ]) 99 | 100 | VIDEO_SAMPLE_DESCRIPTIONS = frozenset([ 101 | TAG_NONE, 102 | TAG_AVC1, 103 | TAG_VP09, 104 | TAG_AV01, 105 | TAV_HEV1, 106 | TAG_DVH1, 107 | TAG_APCN, 108 | TAG_APCH, 109 | TAG_APCS, 110 | TAG_APCO, 111 | TAG_AP4H, 112 | TAG_AP4X 113 | ]) 114 | 115 | CONTAINERS_LIST = frozenset([ 116 | TAG_MDIA, 117 | TAG_MINF, 118 | TAG_MOOV, 119 | TAG_STBL, 120 | TAG_STSD, 121 | TAG_TRAK, 122 | TAG_UDTA, 123 | TAG_WAVE, 124 | TAG_SV3D, 125 | TAG_PROJ 126 | ]).union(SOUND_SAMPLE_DESCRIPTIONS).union(VIDEO_SAMPLE_DESCRIPTIONS) 127 | -------------------------------------------------------------------------------- /spatialmedia/mpeg/container.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright 2016 Google Inc. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | """MPEG processing classes. 19 | 20 | Functions for loading MPEG files and manipulating boxes. 21 | """ 22 | 23 | import struct 24 | 25 | from spatialmedia.mpeg import box 26 | from spatialmedia.mpeg import constants 27 | from spatialmedia.mpeg import sa3d 28 | from spatialmedia.mpeg import sv3d 29 | 30 | def load(fh, position, end): 31 | if position is None: 32 | position = fh.tell() 33 | 34 | fh.seek(position) 35 | header_size = 8 36 | size = struct.unpack(">I", fh.read(4))[0] 37 | name = fh.read(4) 38 | is_box = name not in constants.CONTAINERS_LIST 39 | # Handle the mp4a decompressor setting (wave -> mp4a). 40 | if name == constants.TAG_MP4A and size == 12: 41 | is_box = True 42 | if is_box: 43 | if name == constants.TAG_SA3D: 44 | return sa3d.load(fh, position, end) 45 | if sv3d.is_supported_box_name(name): 46 | return sv3d.load(fh, position, end) 47 | return box.load(fh, position, end) 48 | 49 | if size == 1: 50 | size = struct.unpack(">Q", fh.read(8))[0] 51 | header_size = 16 52 | 53 | if size < 8: 54 | print("Error, invalid size", size, "in", name, "at", position) 55 | return None 56 | 57 | if (position + size) > end: 58 | print("Error: Container box size exceeds bounds.") 59 | return None 60 | 61 | padding = 0 62 | if name == constants.TAG_STSD: 63 | padding = 8 64 | if name in constants.SOUND_SAMPLE_DESCRIPTIONS: 65 | current_pos = fh.tell() 66 | fh.seek(current_pos + 8) 67 | sample_description_version = struct.unpack(">h", fh.read(2))[0] 68 | fh.seek(current_pos) 69 | 70 | if sample_description_version == 0: 71 | padding = 28 72 | elif sample_description_version == 1: 73 | padding = 28 + 16 74 | elif sample_description_version == 2: 75 | padding = 64 76 | else: 77 | print("Unsupported sample description version:", 78 | sample_description_version) 79 | if name in constants.VIDEO_SAMPLE_DESCRIPTIONS: 80 | current_pos = fh.tell() 81 | fh.seek(current_pos + 8) 82 | sample_description_version = struct.unpack(">h", fh.read(2))[0] 83 | fh.seek(current_pos) 84 | 85 | if sample_description_version == 0: 86 | padding = 78 87 | else: 88 | print("Unsupported video sample description version:", 89 | sample_description_version) 90 | 91 | new_box = Container() 92 | new_box.name = name 93 | new_box.position = position 94 | new_box.header_size = header_size 95 | new_box.content_size = size - header_size 96 | new_box.padding = padding 97 | new_box.contents = load_multiple( 98 | fh, position + header_size + padding, position + size) 99 | 100 | if new_box.contents is None: 101 | return None 102 | 103 | return new_box 104 | 105 | 106 | def load_multiple(fh, position=None, end=None): 107 | loaded = list() 108 | while (position + 4 < end): 109 | new_box = load(fh, position, end) 110 | if new_box is None: 111 | print("Error, failed to load box.") 112 | return None 113 | loaded.append(new_box) 114 | position = new_box.position + new_box.size() 115 | 116 | return loaded 117 | 118 | 119 | class Container(box.Box): 120 | """MPEG4 container box contents / behaviour.""" 121 | 122 | def __init__(self, padding=0, header_size=0): 123 | self.name = "" 124 | self.position = 0 125 | self.header_size = header_size 126 | self.content_size = 0 127 | self.contents = list() 128 | self.padding = padding 129 | 130 | def resize(self): 131 | """Recomputes the box size and recurses on contents.""" 132 | self.content_size = self.padding 133 | for element in self.contents: 134 | if isinstance(element, Container): 135 | element.resize() 136 | self.content_size += element.size() 137 | 138 | def print_box(self, console): 139 | for child in self.contents: 140 | child.print_box(console) 141 | 142 | def print_structure(self, indent=""): 143 | """Prints the box structure and recurses on contents.""" 144 | size1 = self.header_size 145 | size2 = self.content_size 146 | print("{0} {1} [{2}, {3}]".format(indent, self.name, size1, size2)) 147 | 148 | size = len(self.contents) 149 | for i in range(size): 150 | next_indent = indent 151 | 152 | next_indent = next_indent.replace("├", "│") 153 | next_indent = next_indent.replace("└", " ") 154 | next_indent = next_indent.replace("─", " ") 155 | 156 | if i == (size - 1): 157 | next_indent = next_indent + " └──" 158 | else: 159 | next_indent = next_indent + " ├──" 160 | 161 | element = self.contents[i] 162 | element.print_structure(next_indent) 163 | 164 | def remove(self, tag): 165 | """Removes a tag recursively from all containers.""" 166 | new_contents = [] 167 | self.content_size = 0 168 | for element in self.contents: 169 | if element.name != tag: 170 | new_contents.append(element) 171 | if isinstance(element, Container): 172 | element.remove(tag) 173 | self.content_size += element.size() 174 | self.contents = new_contents 175 | 176 | def add(self, element): 177 | """Adds an element, merging with containers of the same type. 178 | 179 | Returns: 180 | Int, increased size of container. 181 | """ 182 | for content in self.contents: 183 | if content.name == element.name: 184 | if isinstance(content, container_leaf): 185 | return content.merge(element) 186 | print("Error, cannot merge leafs.") 187 | return False 188 | 189 | self.contents.append(element) 190 | return True 191 | 192 | def merge(self, element): 193 | """Merges structure with container. 194 | 195 | Returns: 196 | Int, increased size of container. 197 | """ 198 | assert(self.name == element.name) 199 | assert(isinstance(element, container_box)) 200 | for sub_element in element.contents: 201 | if not self.add(sub_element): 202 | return False 203 | 204 | return True 205 | 206 | def save(self, in_fh, out_fh, delta): 207 | """Saves box to out_fh reading uncached content from in_fh. 208 | 209 | Args: 210 | in_fh: file handle, source of uncached file contents. 211 | out_fh: file_hande, destination for saved file. 212 | delta: int, file change size for updating stco and co64 files. 213 | """ 214 | if self.header_size == 16: 215 | out_fh.write(struct.pack(">I", 1)) 216 | out_fh.write(self.name) 217 | out_fh.write(struct.pack(">Q", self.size())) 218 | elif self.header_size == 8: 219 | out_fh.write(struct.pack(">I", self.size())) 220 | out_fh.write(self.name) 221 | 222 | if self.padding > 0: 223 | in_fh.seek(self.content_start()) 224 | box.tag_copy(in_fh, out_fh, self.padding) 225 | 226 | for element in self.contents: 227 | element.save(in_fh, out_fh, delta) 228 | -------------------------------------------------------------------------------- /spatialmedia/mpeg/mpeg4_container.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright 2016 Google Inc. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | """MPEG4 processing classes. 19 | 20 | Functions for loading MP4/MOV files and manipulating boxes. 21 | """ 22 | 23 | from spatialmedia.mpeg import box 24 | from spatialmedia.mpeg import constants 25 | from spatialmedia.mpeg import container 26 | 27 | 28 | def load(fh): 29 | """Load the mpeg4 file structure of a file. 30 | 31 | Args: 32 | fh: file handle, input file handle. 33 | position: int, current file position. 34 | size: int, maximum size. This is used to ensure correct box sizes. 35 | 36 | return: 37 | mpeg4, the loaded mpeg4 structure. 38 | """ 39 | 40 | fh.seek(0, 2) 41 | size = fh.tell() 42 | contents = container.load_multiple(fh, 0, size) 43 | 44 | if not contents: 45 | print("Error, failed to load .mp4 file.") 46 | return None 47 | elif len(contents) == 0: 48 | print("Error, no boxes found.") 49 | return None 50 | 51 | loaded_mpeg4 = Mpeg4Container() 52 | loaded_mpeg4.contents = contents 53 | 54 | for element in loaded_mpeg4.contents: 55 | if (element.name == constants.TAG_MOOV): 56 | loaded_mpeg4.moov_box = element 57 | if (element.name == constants.TAG_FREE): 58 | loaded_mpeg4.free_box = element 59 | if (element.name == constants.TAG_MDAT 60 | and not loaded_mpeg4.first_mdat_box): 61 | loaded_mpeg4.first_mdat_box = element 62 | if (element.name == constants.TAG_FTYP): 63 | loaded_mpeg4.ftyp_box = element 64 | 65 | if not loaded_mpeg4.moov_box: 66 | print("Error, file does not contain moov box.") 67 | return None 68 | 69 | if not loaded_mpeg4.first_mdat_box: 70 | print("Error, file does not contain mdat box.") 71 | return None 72 | 73 | loaded_mpeg4.first_mdat_position = \ 74 | loaded_mpeg4.first_mdat_box.position 75 | loaded_mpeg4.first_mdat_position += \ 76 | loaded_mpeg4.first_mdat_box.header_size 77 | 78 | loaded_mpeg4.content_size = 0 79 | for element in loaded_mpeg4.contents: 80 | loaded_mpeg4.content_size += element.size() 81 | 82 | return loaded_mpeg4 83 | 84 | 85 | class Mpeg4Container(container.Container): 86 | """Specialized behaviour for the root mpeg4 container.""" 87 | 88 | def __init__(self): 89 | self.contents = list() 90 | self.content_size = 0 91 | self.header_size = 0 92 | self.moov_box = None 93 | self.free_box = None 94 | self.first_mdat_box = None 95 | self.ftyp_box = None 96 | self.first_mdat_position = None 97 | self.padding = 0 98 | 99 | def merge(self, element): 100 | """Mpeg4 containers do not support merging.""" 101 | print("Cannot merge mpeg4 files") 102 | exit(0) 103 | 104 | def print_structure(self): 105 | """Print mpeg4 file structure recursively.""" 106 | print("mpeg4 [{}]".format(self.content_size)) 107 | 108 | size = len(self.contents) 109 | for i in range(size): 110 | next_indent = " ├──" 111 | if i == (size - 1): 112 | next_indent = " └──" 113 | 114 | self.contents[i].print_structure(next_indent) 115 | 116 | def save(self, in_fh, out_fh): 117 | """Save mpeg4 filecontent to file. 118 | 119 | Args: 120 | in_fh: file handle, source file handle for uncached contents. 121 | out_fh: file handle, destination file hand for saved file. 122 | """ 123 | self.resize() 124 | new_position = 0 125 | for element in self.contents: 126 | if element.name == constants.TAG_MDAT: 127 | new_position += element.header_size 128 | break 129 | new_position += element.size() 130 | delta = new_position - self.first_mdat_position 131 | 132 | for element in self.contents: 133 | element.save(in_fh, out_fh, delta) 134 | -------------------------------------------------------------------------------- /spatialmedia/mpeg/sa3d.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright 2016 Google Inc. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | """MPEG SA3D box processing classes. 19 | 20 | Enables the injection of an SA3D MPEG-4. The SA3D box specification 21 | conforms to that outlined in docs/spatial-audio-rfc.md 22 | """ 23 | 24 | import struct 25 | 26 | from spatialmedia.mpeg import box 27 | from spatialmedia.mpeg import constants 28 | 29 | 30 | def load(fh, position=None, end=None): 31 | """ Loads the SA3D box located at position in an mp4 file. 32 | 33 | Args: 34 | fh: file handle, input file handle. 35 | position: int or None, current file position. 36 | 37 | Returns: 38 | new_box: box, SA3D box loaded from the file location or None. 39 | """ 40 | if position is None: 41 | position = fh.tell() 42 | 43 | fh.seek(position) 44 | new_box = SA3DBox() 45 | new_box.position = position 46 | size = struct.unpack(">I", fh.read(4))[0] 47 | name = fh.read(4) 48 | 49 | if (name != constants.TAG_SA3D): 50 | print("Error: box is not an SA3D box.") 51 | return None 52 | 53 | if (position + size > end): 54 | print("Error: SA3D box size exceeds bounds.") 55 | return None 56 | 57 | new_box.content_size = size - new_box.header_size 58 | new_box.version = struct.unpack(">B", fh.read(1))[0] 59 | new_box.ambisonic_type = struct.unpack(">B", fh.read(1))[0] 60 | new_box.head_locked_stereo = (new_box.ambisonic_type & int('10000000', 2) != 0) 61 | new_box.ambisonic_type = new_box.ambisonic_type & int('01111111', 2) 62 | new_box.ambisonic_order = struct.unpack(">I", fh.read(4))[0] 63 | new_box.ambisonic_channel_ordering = struct.unpack(">B", fh.read(1))[0] 64 | new_box.ambisonic_normalization = struct.unpack(">B", fh.read(1))[0] 65 | new_box.num_channels = struct.unpack(">I", fh.read(4))[0] 66 | for i in range(0, new_box.num_channels): 67 | new_box.channel_map.append( 68 | struct.unpack(">I", fh.read(4))[0]) 69 | return new_box 70 | 71 | 72 | class SA3DBox(box.Box): 73 | ambisonic_types = {'periphonic': 0} 74 | ambisonic_orderings = {'ACN': 0} 75 | ambisonic_normalizations = {'SN3D': 0} 76 | 77 | def __init__(self): 78 | box.Box.__init__(self) 79 | self.name = constants.TAG_SA3D 80 | self.header_size = 8 81 | self.version = 0 82 | self.ambisonic_type = 0 83 | self.head_locked_stereo = False 84 | self.ambisonic_order = 0 85 | self.ambisonic_channel_ordering = 0 86 | self.ambisonic_normalization = 0 87 | self.num_channels = 0 88 | self.channel_map = list() 89 | 90 | @staticmethod 91 | def create(num_channels, audio_metadata): 92 | new_box = SA3DBox() 93 | new_box.header_size = 8 94 | new_box.name = constants.TAG_SA3D 95 | new_box.version = 0 # uint8 96 | new_box.content_size += 1 # uint8 97 | new_box.ambisonic_type = SA3DBox.ambisonic_types[ 98 | audio_metadata["ambisonic_type"]] 99 | new_box.head_locked_stereo = audio_metadata["head_locked_stereo"] 100 | new_box.content_size += 1 # uint8 101 | new_box.ambisonic_order = audio_metadata["ambisonic_order"] 102 | new_box.content_size += 4 # uint32 103 | new_box.ambisonic_channel_ordering = SA3DBox.ambisonic_orderings[ 104 | audio_metadata["ambisonic_channel_ordering"]] 105 | new_box.content_size += 1 # uint8 106 | new_box.ambisonic_normalization = SA3DBox.ambisonic_normalizations[ 107 | audio_metadata["ambisonic_normalization"]] 108 | new_box.content_size += 1 # uint8 109 | new_box.num_channels = num_channels 110 | new_box.content_size += 4 # uint32 111 | 112 | channel_map = audio_metadata["channel_map"] 113 | for channel_element in channel_map: 114 | new_box.channel_map.append(channel_element) 115 | new_box.content_size += 4 # uint32 116 | return new_box 117 | 118 | def ambisonic_type_name(self): 119 | return next((key for key,value in SA3DBox.ambisonic_types.items() 120 | if value==self.ambisonic_type)) 121 | 122 | def ambisonic_channel_ordering_name(self): 123 | return next((key for key,value in SA3DBox.ambisonic_orderings.items() 124 | if value==self.ambisonic_channel_ordering)) 125 | 126 | def ambisonic_normalization_name(self): 127 | return next((key for key,value in SA3DBox.ambisonic_normalizations.items() 128 | if value==self.ambisonic_normalization)) 129 | 130 | def print_box(self, console): 131 | """ Prints the contents of this spatial audio (SA3D) box to the 132 | console. 133 | """ 134 | ambisonic_type = self.ambisonic_type_name() 135 | channel_ordering = self.ambisonic_channel_ordering_name() 136 | ambisonic_normalization = self.ambisonic_normalization_name() 137 | console("\t\tAmbisonic Type: %s" % ambisonic_type) 138 | console("\t\tContains Head-Locked Stereo: %r" % self.head_locked_stereo) 139 | console("\t\tAmbisonic Order: %d" % self.ambisonic_order) 140 | console("\t\tAmbisonic Channel Ordering: %s" % channel_ordering) 141 | console("\t\tAmbisonic Normalization: %s" % ambisonic_normalization) 142 | console("\t\tNumber of Channels: %d" % self.num_channels) 143 | console("\t\tChannel Map: %s" % str(self.channel_map)) 144 | 145 | def get_metadata_string(self): 146 | """ Outputs a concise single line audio metadata string. """ 147 | metadata = "%s, %s, %s, Order %d, %d Channel(s), Channel Map: %s" \ 148 | % (self.ambisonic_normalization_name(),\ 149 | self.ambisonic_channel_ordering_name(),\ 150 | self.ambisonic_type_name(),\ 151 | self.ambisonic_order,\ 152 | self.num_channels,\ 153 | str(self.channel_map)) 154 | return metadata 155 | 156 | def save(self, in_fh, out_fh, delta): 157 | if (self.header_size == 16): 158 | out_fh.write(struct.pack(">I", 1)) 159 | out_fh.write(struct.pack(">Q", self.size())) 160 | out_fh.write(self.name) 161 | elif(self.header_size == 8): 162 | out_fh.write(struct.pack(">I", self.size())) 163 | out_fh.write(self.name) 164 | 165 | ambisonic_type = ( 166 | self.ambisonic_type | int('10000000', 2) if 167 | self.head_locked_stereo else self.ambisonic_type & int('01111111', 2)) 168 | out_fh.write(struct.pack(">B", self.version)) 169 | out_fh.write(struct.pack(">B", ambisonic_type)) 170 | out_fh.write(struct.pack(">I", self.ambisonic_order)) 171 | out_fh.write(struct.pack(">B", self.ambisonic_channel_ordering)) 172 | out_fh.write(struct.pack(">B", self.ambisonic_normalization)) 173 | out_fh.write(struct.pack(">I", self.num_channels)) 174 | for i in self.channel_map: 175 | if (i != None): 176 | out_fh.write(struct.pack(">I", int(i))) 177 | -------------------------------------------------------------------------------- /spatialmedia/mpeg/sv3d.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright 2016 Google Inc. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | """MPEG SV3D box processing classes. 19 | 20 | Enables the injection of an SV3D MPEG-4. The SV3D box specification 21 | conforms to that outlined in docs/spatial-video-v2-rfc.md 22 | """ 23 | 24 | import struct 25 | 26 | from spatialmedia.mpeg import box 27 | from spatialmedia.mpeg import constants 28 | 29 | 30 | def is_supported_box_name(name): 31 | """Returns true if the box name is a supported sv3d box.""" 32 | return (name == constants.TAG_PRHD or 33 | name == constants.TAG_EQUI or 34 | name == constants.TAG_ST3D) 35 | 36 | 37 | def load(fh, position=None, end=None): 38 | """ Loads the SV3D box located at position in an mp4 file. 39 | 40 | Args: 41 | fh: file handle, input file handle. 42 | position: int or None, current file position. 43 | 44 | Returns: 45 | new_box: box, SV3D box loaded from the file location or None. 46 | """ 47 | if position is None: 48 | position = fh.tell() 49 | 50 | fh.seek(position) 51 | size = struct.unpack(">I", fh.read(4))[0] 52 | name = fh.read(4) 53 | 54 | if name == constants.TAG_PRHD: 55 | box = PRHDBox() 56 | elif name == constants.TAG_EQUI: 57 | box = EQUIBox() 58 | elif name == constants.TAG_ST3D: 59 | box = ST3DBox() 60 | else: 61 | print("Error: box is not a supported SV3D sub-box.") 62 | return None 63 | 64 | box.position = position 65 | box.content_size = size - box.header_size 66 | box.load_content(fh) 67 | return box 68 | 69 | 70 | class PRHDBox(box.Box): 71 | def __init__(self): 72 | box.Box.__init__(self) 73 | self.name = constants.TAG_PRHD 74 | self.header_size = 8 75 | self.pose_yaw_degrees = 0 76 | self.pose_pitch_degrees = 0 77 | self.pose_roll_degrees = 0 78 | self.content_size = 16 79 | 80 | @staticmethod 81 | def create(): 82 | return PRHDBox() 83 | 84 | def print_box(self, console): 85 | """ Prints the contents of this box to console.""" 86 | console("\t\t\tPRHD {") 87 | console("\t\t\t\tPose Yaw Degrees: %d" % self.pose_yaw_degrees) 88 | console("\t\t\t\tPose Pitch Degrees: %d" % self.pose_pitch_degrees) 89 | console("\t\t\t\tPose Roll Degrees: %d" % self.pose_roll_degrees) 90 | console("\t\t\t}") 91 | 92 | def get_metadata_string(self): 93 | """ Outputs a concise single line proj metadata string. """ 94 | return ("yaw:%d, pitch:%d, roll:%d" % 95 | (self.pose_yaw_degrees, self.pose_pitch_degrees, self.pose_roll_degrees)) 96 | 97 | def save(self, in_fh, out_fh, delta): 98 | if (self.header_size == 16): 99 | out_fh.write(struct.pack(">I", 1)) 100 | out_fh.write(struct.pack(">Q", self.size())) 101 | out_fh.write(self.name) 102 | elif(self.header_size == 8): 103 | out_fh.write(struct.pack(">I", self.size())) 104 | out_fh.write(self.name) 105 | out_fh.write(struct.pack(">I", 0)) # Version and flags 106 | out_fh.write(struct.pack(">I", self.pose_yaw_degrees)) 107 | out_fh.write(struct.pack(">I", self.pose_pitch_degrees)) 108 | out_fh.write(struct.pack(">I", self.pose_roll_degrees)) 109 | 110 | def load_content(self, in_fh): 111 | in_fh.read(4) # Version and flags 112 | self.pose_yaw_degress = struct.unpack(">I", in_fh.read(4))[0] 113 | self.pose_pitch_degrees = struct.unpack(">I", in_fh.read(4))[0] 114 | self.pose_roll_degrees = struct.unpack(">I", in_fh.read(4))[0] 115 | 116 | 117 | class EQUIBox(box.Box): 118 | def __init__(self): 119 | box.Box.__init__(self) 120 | self.name = constants.TAG_EQUI 121 | self.header_size = 8 122 | self.bounds_top = 0 123 | self.bounds_bottom = 0 124 | self.bounds_left = 0 125 | self.bounds_right = 0 126 | self.content_size = 20 127 | 128 | @staticmethod 129 | def create(): 130 | return EQUIBox() 131 | 132 | def print_box(self, console): 133 | """ Prints the contents of this box to console.""" 134 | console("\t\t\tEQUI {") 135 | console("\t\t\t\tBounds Top: %d" % self.bounds_top) 136 | console("\t\t\t\tBounds Bottom: %d" % self.bounds_bottom) 137 | console("\t\t\t\tBounds Left: %d" % self.bounds_left) 138 | console("\t\t\t\tBounds Right: %d" % self.bounds_right) 139 | console("\t\t\t}") 140 | 141 | def get_metadata_string(self): 142 | """ Outputs a concise single line proj metadata string. """ 143 | return ("Equi (top:%d, bottom:%d, left:%d, right:%d)" 144 | % (self.bounds_top, self.bounds_bottom, self.bounds_left, self.bounds_right)) 145 | 146 | def save(self, in_fh, out_fh, delta): 147 | if (self.header_size == 16): 148 | out_fh.write(struct.pack(">I", 1)) 149 | out_fh.write(struct.pack(">Q", self.size())) 150 | out_fh.write(self.name) 151 | elif(self.header_size == 8): 152 | out_fh.write(struct.pack(">I", self.size())) 153 | out_fh.write(self.name) 154 | out_fh.write(struct.pack(">I", 0)) # Version and flags 155 | out_fh.write(struct.pack(">I", self.bounds_top)) 156 | out_fh.write(struct.pack(">I", self.bounds_bottom)) 157 | out_fh.write(struct.pack(">I", self.bounds_left)) 158 | out_fh.write(struct.pack(">I", self.bounds_right)) 159 | 160 | def load_content(self, in_fh): 161 | in_fh.read(4) # Version and flags 162 | self.bounds_top = struct.unpack(">I", in_fh.read(4))[0] 163 | self.bounds_bottom = struct.unpack(">I", in_fh.read(4))[0] 164 | self.bounds_left = struct.unpack(">I", in_fh.read(4))[0] 165 | self.bounds_right = struct.unpack(">I", in_fh.read(4))[0] 166 | 167 | 168 | class ST3DBox(box.Box): 169 | def __init__(self): 170 | box.Box.__init__(self) 171 | self.name = constants.TAG_ST3D 172 | self.header_size = 8 173 | self.stereo_mode = 0 174 | self.content_size = 5 175 | 176 | @staticmethod 177 | def create(): 178 | return ST3DBox() 179 | 180 | def set_stereo_mode_from_string(self, stereo_mode): 181 | if stereo_mode == "mono": 182 | self.stereo_mode = 0 183 | elif stereo_mode == "top-bottom": 184 | self.stereo_mode = 1 185 | elif stereo_mode == "left-right": 186 | self.stereo_mode = 2 187 | else: 188 | print("Error: unknown stereo mode") 189 | 190 | def print_box(self, console): 191 | """ Prints the contents of this box to console.""" 192 | console("\t\t\tStereo Mode: %d" % self.stereo_mode) 193 | 194 | def get_metadata_string(self): 195 | """ Outputs a concise single line stereo metadata string. """ 196 | return "Stereo Mode: %d" % self.stereo_mode 197 | 198 | def save(self, in_fh, out_fh, delta): 199 | if (self.header_size == 16): 200 | out_fh.write(struct.pack(">I", 1)) 201 | out_fh.write(struct.pack(">Q", self.size())) 202 | out_fh.write(self.name) 203 | elif(self.header_size == 8): 204 | out_fh.write(struct.pack(">I", self.size())) 205 | out_fh.write(self.name) 206 | out_fh.write(struct.pack(">I", 0)) # Version and flags 207 | out_fh.write(struct.pack(">B", self.stereo_mode)) 208 | 209 | def load_content(self, in_fh): 210 | in_fh.read(4) # Version and flags 211 | self.stereo_mode = int(struct.unpack(">B", in_fh.read(1))[0]) 212 | -------------------------------------------------------------------------------- /spatialmedia/spatial_media_metadata_injector.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python -*- 2 | # PyInstaller spec. 3 | 4 | import sys 5 | 6 | block_cipher = None 7 | 8 | a = Analysis(['gui.py'], 9 | binaries=None, 10 | datas=None, 11 | hiddenimports=[], 12 | hookspath=[], 13 | runtime_hooks=[], 14 | excludes=[], 15 | win_no_prefer_redirects=False, 16 | win_private_assemblies=False, 17 | cipher=block_cipher) 18 | pyz = PYZ(a.pure, a.zipped_data, 19 | cipher=block_cipher) 20 | exe = EXE(pyz, 21 | a.scripts, 22 | a.binaries, 23 | a.zipfiles, 24 | a.datas, 25 | name='Spatial Media Metadata Injector', 26 | debug=False, 27 | strip=False, 28 | upx=True, 29 | console=False ) 30 | if sys.platform == 'darwin': 31 | app = BUNDLE(exe, 32 | name='Spatial Media Metadata Injector.app', 33 | icon=None, 34 | bundle_identifier=None, 35 | info_plist={'NSHighResolutionCapable': 'True'}) 36 | if sys.platform.startswith('linux'): 37 | exe = EXE(pyz, 38 | a.scripts, 39 | a.binaries, 40 | a.zipfiles, 41 | a.datas, 42 | name='Spatial Media Metadata Injector', 43 | debug=False, 44 | strip=False, 45 | upx=True, 46 | console=False) 47 | --------------------------------------------------------------------------------