├── .gitignore ├── CHANGELOG ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── ego4d ├── __init__.py ├── cli │ ├── README.md │ ├── __init__.py │ ├── cli.py │ ├── config.py │ ├── config_test.py │ ├── datasets.csv │ ├── download.py │ ├── e2e_test.py │ ├── integrity.py │ ├── manifest.py │ ├── manifest_test.py │ ├── progressbar.py │ ├── s3path.py │ ├── s3path_test.py │ └── universities.py ├── egoexo │ ├── __init__.py │ ├── download │ │ ├── README.md │ │ ├── __init__.py │ │ └── cli.py │ ├── expert_commentary.py │ └── scripts │ │ └── extract_audio_transcribe.py ├── features │ ├── README.md │ ├── __init__.py │ ├── audio.py │ ├── config.py │ ├── configs │ │ ├── audio_mel_spectrogram.yaml │ │ ├── audio_speechbrain_asr.yaml │ │ ├── egoexo_maws_clip.yaml │ │ ├── egoexo_omnivore.yaml │ │ ├── mvit_imagenet.yaml │ │ ├── mvit_k400.yaml │ │ ├── omnivore_image.yaml │ │ ├── omnivore_video.yaml │ │ └── slowfast_r101_8x8.yaml │ ├── dataset.py │ ├── extract_features.py │ ├── inference.py │ ├── models │ │ ├── __init__.py │ │ ├── common.py │ │ ├── maws.py │ │ ├── mel_spectrogram.py │ │ ├── mvit.py │ │ ├── omnivore.py │ │ ├── slowfast.py │ │ └── speechbrain_asr.py │ ├── profile_extract.py │ ├── slurm.py │ └── visualize_dataloader.py ├── internal │ ├── __init__.py │ ├── colmap │ │ ├── README.md │ │ ├── __init__.py │ │ ├── configs │ │ │ ├── cmu_example.yaml │ │ │ ├── cmu_example_in_paths.yaml │ │ │ ├── cmu_frame_example.yaml │ │ │ └── paths_example.yaml │ │ ├── preprocess.py │ │ └── requirements.txt │ ├── download │ │ ├── __init__.py │ │ ├── cli.py │ │ ├── manifest.py │ │ └── manifest_gen.py │ ├── downscale.py │ ├── expert_commentary │ │ ├── export.py │ │ ├── extract.py │ │ └── transcribe.py │ ├── human_pose │ │ ├── README.md │ │ ├── bbox_detector.py │ │ ├── camera.py │ │ ├── config.py │ │ ├── configs │ │ │ ├── cmu_soccer.yaml │ │ │ ├── cmu_soccer_rawal.yaml │ │ │ ├── dev_release_base.yaml │ │ │ ├── iiith_cooking01.yaml │ │ │ ├── iiith_cooking_01_1.yaml │ │ │ ├── iu_bike.yaml │ │ │ ├── iu_bike_rawal.yaml │ │ │ ├── iu_music.yaml │ │ │ ├── iu_music_rawal.yaml │ │ │ ├── unc_T1.yaml │ │ │ └── unc_T1_rawal.yaml │ │ ├── dataset.py │ │ ├── launch_main.py │ │ ├── main.py │ │ ├── main_single_camera.py │ │ ├── pose_estimator.py │ │ ├── pose_refiner.py │ │ ├── postprocess_pose3d.py │ │ ├── readers.py │ │ ├── requirements.txt │ │ ├── scripts │ │ │ ├── _install │ │ │ │ └── conda.sh │ │ │ ├── copy_s3_to_manifold.sh │ │ │ ├── process │ │ │ │ ├── 0_get_bbox.sh │ │ │ │ ├── 1_get_pose2d.sh │ │ │ │ ├── 2_get_pose3d.sh │ │ │ │ ├── 3_multi_view_vis.sh │ │ │ │ ├── 3_refine_pose3d.sh │ │ │ │ └── 4_multi_view_vis.sh │ │ │ └── upload_s3.sh │ │ ├── triangulator.py │ │ ├── triangulator_nonlinear.py │ │ ├── undistort_to_halo.py │ │ └── utils.py │ ├── notebooks │ │ └── EgoExo_Dev_Release_Example.ipynb │ ├── s3.py │ ├── utilities │ │ └── egoexo_metadata_generator.ipynb │ ├── utils │ │ └── launch_utils.py │ └── validation │ │ ├── README.md │ │ ├── __init__.py │ │ ├── cli.py │ │ ├── config.py │ │ ├── credential_s3.py │ │ ├── ffmpeg_utils.py │ │ ├── manifest.py │ │ ├── standard_metadata │ │ ├── ego4d │ │ │ ├── component_type.csv │ │ │ ├── device.csv │ │ │ └── scenario.csv │ │ └── egoexo │ │ │ ├── device.csv │ │ │ └── scenario.csv │ │ ├── validate.py │ │ └── validate_test.py └── research │ ├── README.md │ ├── __init__.py │ ├── chunk.py │ ├── clep │ ├── README.md │ ├── __init__.py │ ├── config.py │ ├── configs │ │ └── omnivore_features.yaml │ ├── dataset.py │ ├── model.py │ ├── preprocess │ │ ├── __init__.py │ │ ├── cc.py │ │ ├── charades.py │ │ ├── common.py │ │ ├── ego4d_data.py │ │ └── kinetics.py │ ├── run_preprocess.py │ ├── train.py │ ├── utils.py │ └── val.py │ ├── common.py │ ├── dataset.py │ ├── readers.py │ └── util │ ├── __init__.py │ ├── lzstring.py │ └── masks.py ├── notebooks ├── CLEP_Features_Tutorial_CVPR_Presentation.ipynb ├── COLMAP.ipynb ├── Feature_Visualization_with_TSNE.ipynb ├── annotation_verification.ipynb ├── annotation_visualization.ipynb ├── egoexo │ ├── Ego-Exo4D_EgoPose_Tutorial.ipynb │ ├── EgoExo_Aria_Data_Tutorial.ipynb │ ├── EgoExo_Atomic_Descriptions_Tutorial.ipynb │ ├── EgoExo_Expert_Commentary_Tutorial.ipynb │ ├── EgoExo_MAWS_Features.ipynb │ ├── EgoExo_Relations.ipynb │ ├── requirements.txt │ └── tutorials │ │ └── gaze_tutorial.ipynb ├── moments_cvpr │ ├── MomentsWorkshop.ipynb │ ├── conda-env.yaml │ ├── moments_label_ids.json │ ├── moments_mini_train_uids.csv │ └── moments_mini_val_uids.csv ├── nb_video_utils.py ├── requirements.txt └── transform_annotations.ipynb ├── pyproject.toml ├── requirements.txt ├── run_viz.sh ├── scripts └── run_pyre.sh ├── setup.cfg ├── setup.py └── viz └── narrations ├── README.md ├── recipes ├── .gitignore ├── 0_config.sh ├── 1_gather_ids.sh ├── 2_dl_videos.sh ├── 3_prepare_input.sh ├── 4_review.sh └── README.md └── review ├── .gitattributes ├── .gitignore ├── README.md ├── build ├── asset-manifest.json ├── favicon.ico ├── index.html ├── robots.txt └── static │ ├── css │ ├── 2.0e909511.chunk.css │ ├── 2.0e909511.chunk.css.map │ ├── main.b199b389.chunk.css │ └── main.b199b389.chunk.css.map │ ├── js │ ├── 2.d8ddf740.chunk.js │ ├── 2.d8ddf740.chunk.js.LICENSE.txt │ ├── 2.d8ddf740.chunk.js.map │ ├── main.9fedd964.chunk.js │ ├── main.9fedd964.chunk.js.map │ ├── runtime-main.23afa8cb.js │ └── runtime-main.23afa8cb.js.map │ └── media │ ├── icons-16.13933033.ttf │ ├── icons-16.1645f50f.woff │ ├── icons-16.2368f88a.eot │ ├── icons-20.1ef633d3.woff │ ├── icons-20.57b3e708.ttf │ └── icons-20.cde033c5.eot ├── package.json ├── public ├── favicon.ico ├── index.html └── robots.txt ├── sample-data.csv ├── sample-data.jsonl ├── src ├── components │ ├── CollectionView.jsx │ ├── ErrorPane.js │ ├── ItemView.jsx │ └── pagination │ │ ├── Pagination.css │ │ ├── Pagination.jsx │ │ └── index.js ├── config.js ├── custom │ ├── .gitkeep │ ├── NarrationsApp.css │ ├── NarrationsApp.js │ ├── NarrationsItem.js │ └── NarrationsThumbnail.js ├── index.css ├── index.js ├── renderers │ ├── GridCollection │ │ ├── GridCollection.css │ │ ├── GridCollection.jsx │ │ └── index.js │ ├── JSONItem │ │ ├── JSONItem.css │ │ ├── JSONItem.jsx │ │ └── index.js │ ├── ListCollection │ │ ├── ListCollection.css │ │ ├── ListCollection.jsx │ │ ├── ListItem.jsx │ │ └── index.js │ ├── WordCloudItem │ │ ├── WordCloud.css │ │ ├── WordCloud.jsx │ │ ├── WordCloudItem.jsx │ │ └── index.js │ └── index.js └── utils.js └── yarn.lock /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.log 3 | *.pyc 4 | build/ 5 | dist/ 6 | *.egg-info 7 | .idea/ 8 | lightning_logs/ 9 | venv/ 10 | .vscode/ 11 | **.pth 12 | tp/* 13 | *.mp4 14 | *.json 15 | **/outputs/** 16 | .ipynb_checkpoints 17 | ego4d/internal/human_pose/outputs/* 18 | expert_commentary_log/** 19 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | # Changelog 2 | This changelog represents notable changes to the Ego4D dataset primarily, and secondarily for significant changes to this repo. 3 | 4 | # [1.0.0] 2/17/22 The Ego4d dataset is released 5 | 6 | # [1.0.1] Incremental updates 7 | 8 | 2/24/22 AV/Social Updated To Reflect is_looking_at_me = True for all instances (as intended) 9 | 3/3/22 AV models updated 10 | 3/4/22 Ego4d.json metadata updated to include additional fields 11 | 3/14/22 Gaze added to the release 12 | 3/15/22 2 missing videos added to clips 13 | 3/15/22 Ego4d.json updated to include additional clip metadata 14 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | This Code of Conduct also applies outside the project spaces when there is a 56 | reasonable belief that an individual's behavior may have a negative impact on 57 | the project or its community. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported by contacting the project team at . All 63 | complaints will be reviewed and investigated and will result in a response that 64 | is deemed necessary and appropriate to the circumstances. The project team is 65 | obligated to maintain confidentiality with regard to the reporter of an incident. 66 | Further details of specific enforcement policies may be posted separately. 67 | 68 | Project maintainers who do not follow or enforce the Code of Conduct in good 69 | faith may face temporary or permanent repercussions as determined by other 70 | members of the project's leadership. 71 | 72 | ## Attribution 73 | 74 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 75 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 76 | 77 | [homepage]: https://www.contributor-covenant.org 78 | 79 | For answers to common questions about this code of conduct, see 80 | https://www.contributor-covenant.org/faq 81 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Ego4d 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Pull Requests 6 | We actively welcome your pull requests. 7 | 8 | 1. Fork the repo and create your branch from `main`. 9 | 2. If you've added code that should be tested, add tests. 10 | 3. If you've changed APIs, update the documentation. 11 | 4. Ensure the test suite passes. 12 | 5. Make sure your code lints. 13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 14 | 15 | ## Contributor License Agreement ("CLA") 16 | In order to accept your pull request, we need you to submit a CLA. You only need 17 | to do this once to work on any of Facebook's open source projects. 18 | 19 | Complete your CLA here: 20 | 21 | ## Issues 22 | We use GitHub issues to track public bugs. Please ensure your description is 23 | clear and has sufficient instructions to be able to reproduce the issue. 24 | 25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 26 | disclosure of security bugs. In those cases, please go through the process 27 | outlined on that page and do not file a public issue. 28 | 29 | ## License 30 | By contributing to Ego4d, you agree that your contributions will be licensed 31 | under the LICENSE file in the root directory of this source tree. 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Meta Platforms, Inc. and affiliates. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ego4d/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | -------------------------------------------------------------------------------- /ego4d/cli/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | -------------------------------------------------------------------------------- /ego4d/cli/datasets.csv: -------------------------------------------------------------------------------- 1 | dataset,description 2 | annotations,The full set of annotations for the majority of benchmarks. 3 | full_scale,The full scale version of all videos. (Provide `benchmarks` or `video_uids` filters to reduce the 5TB download size.) 4 | clips,Clips available for benchmark training tasks. (Provide `benchmarks` or `video_uids` filters to reduce the download size.) 5 | video_540ss,The downscaled version of all videos - rescaled to 540px on the short side. (Provide `benchmarks` or `video_uids` filters to reduce the 5TB download size.) 6 | annotations_540ss,The annotations corresponding to the downscaled `video_540ss` videos - primarily differing only in spatial annotations (e.g. bounding boxes). 7 | 3d,Annotations for the 3D VQ benchmark. 8 | 3d_scans,3D location scans for the 3D VQ benchmark. 9 | 3d_scan_keypoints,3D location scan keypoints for the 3D VQ benchmark. 10 | imu,IMU data for the subset of videos available 11 | slowfast8x8_r101_k400,Precomputed [action features](https://ego4d-data.org/docs/data/features/) for the Slowfast 8x8 (R101) model 12 | omnivore_video_swinl,Precomputed [action features](https://ego4d-data.org/docs/data/features/) for the Omnivore Video model 13 | omnivore_image_swinl,Precomputed [action features](https://ego4d-data.org/docs/data/features/) for the Omnivore Image model 14 | fut_loc,Images and annotations for the future locomotion benchmark. 15 | av_models,Model checkpoints for the AV/Social benchmark. 16 | lta_models,Model checkpoints for the Long Term Anticipation benchmark. 17 | moments_models,Model checkpoints for the Moments benchmark. 18 | nlq_models,Model checkpoints for the NLQ benchmark. 19 | sta_models,Model checkpoints for the Short Term Anticipation benchmark. 20 | vq2d_models,Model checkpoints for the 2D VQ benchmark. -------------------------------------------------------------------------------- /ego4d/cli/e2e_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import boto3 4 | from moto import mock_aws 5 | 6 | 7 | @mock_aws 8 | def test_s3(): 9 | s3 = boto3.resource("s3") 10 | print(s3) 11 | -------------------------------------------------------------------------------- /ego4d/cli/integrity.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | """ 4 | Verifies integrity of downloaded files 5 | """ 6 | 7 | from typing import Collection, List 8 | 9 | from ego4d.cli.download import VideoOnDisk 10 | 11 | 12 | def list_corrupt_files(downloads: Collection[VideoOnDisk]) -> List[VideoOnDisk]: 13 | """ 14 | Returns a list of any downloaded files that appear corrupted. 15 | """ 16 | return [d for d in downloads if _file_is_corrupt(d)] 17 | 18 | 19 | def _file_is_corrupt(download: VideoOnDisk): 20 | if download.file_path.exists(): 21 | return download.file_path.stat().st_size != download.s3_content_size_bytes 22 | 23 | return True 24 | -------------------------------------------------------------------------------- /ego4d/cli/progressbar.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import threading 4 | from typing import Optional 5 | 6 | import tqdm 7 | 8 | 9 | class DownloadProgressBar: 10 | """ 11 | Thread-safe progress bar for tracking downloads based on bytes. 12 | """ 13 | 14 | def __init__(self, total_size_bytes: Optional[int]): 15 | self.__tqdm = tqdm.tqdm( 16 | total=total_size_bytes if total_size_bytes else None, 17 | unit="iB", 18 | unit_scale=True, 19 | unit_divisor=1024, 20 | ) 21 | self.__lock = threading.Lock() 22 | 23 | def update(self, num_bytes: int) -> None: 24 | with self.__lock: 25 | self.__tqdm.update(n=num_bytes) 26 | -------------------------------------------------------------------------------- /ego4d/cli/s3path.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | """ 4 | Functionality for parsing AWS S3 paths. 5 | 6 | References: 7 | https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-bucket-intro.html#accessing-a-bucket-using-S3-format 8 | https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html 9 | https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html 10 | 11 | """ 12 | 13 | import re 14 | from typing import Tuple 15 | 16 | 17 | __S3_PATH_REGEX = re.compile(r"^s3://(?P[^/]*)/(?P.*)$") 18 | 19 | 20 | def bucket_and_key_from_path(path: str) -> Tuple[str, str]: 21 | """ 22 | Takes an S3 path (i.e. s3:///) and returns the bucket and 23 | object key as a string tuple. 24 | 25 | Notes: 26 | This does not validate whether or not the bucket and key satisfy the naming and 27 | character requirements imposed by AWS (e.g character limits on bucket names). 28 | """ 29 | match = __S3_PATH_REGEX.match(path) 30 | return match.group("bucket"), match.group("key") 31 | -------------------------------------------------------------------------------- /ego4d/cli/s3path_test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from .s3path import bucket_and_key_from_path 4 | 5 | 6 | def test_key_without_slashes(): 7 | b, k = bucket_and_key_from_path("s3://bucket/key") 8 | assert b == "bucket" 9 | assert k == "key" 10 | 11 | 12 | def test_key_with_slashes(): 13 | b, k = bucket_and_key_from_path("s3://bucket/object/key") 14 | assert b == "bucket" 15 | assert k == "object/key" 16 | 17 | 18 | def test_bucket_with_special_chars(): 19 | b, k = bucket_and_key_from_path("s3://bucket-with.chars/object/key") 20 | assert b == "bucket-with.chars" 21 | assert k == "object/key" 22 | 23 | 24 | def test_key_with_extension(): 25 | b, k = bucket_and_key_from_path("s3://bucket-name/object/key.json") 26 | assert b == "bucket-name" 27 | assert k == "object/key.json" 28 | 29 | 30 | def test_key_path_with_special_chars(): 31 | b, k = bucket_and_key_from_path("s3://bucket-name/object$-/data/folder/key.json") 32 | assert b == "bucket-name" 33 | assert k == "object$-/data/folder/key.json" 34 | -------------------------------------------------------------------------------- /ego4d/cli/universities.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | """ 4 | Information about contributing universities. 5 | """ 6 | 7 | UNIV_TO_BUCKET = { 8 | "bristol": "ego4d-bristol", 9 | "cmu": "ego4d-cmu", 10 | "frl_track_1_public": "ego4d-consortium-sharing", 11 | "georgiatech": "ego4d-georgiatech", 12 | "iiith": "ego4d-iiith", 13 | "indiana": "ego4d-indiana", 14 | "kaust": "ego4d-kaust", 15 | "minnesota": "ego4d-minnesota", 16 | "nus": "ego4d-speac", 17 | "sfu": "ego4d-sfu", 18 | "unc": "ego4d-unc", 19 | "unict": "ego4d-unict-milan", 20 | "utokyo": "ego4d-utokyo", 21 | "uniandes": "ego4d-university-sa", 22 | "cmu_africa": "ego4d-universityaf", 23 | "upenn": "ego4d-penn", 24 | "fair": "ego4d-fair", 25 | } 26 | 27 | BUCKET_TO_UNIV = {v: k for k, v in UNIV_TO_BUCKET.items()} 28 | -------------------------------------------------------------------------------- /ego4d/egoexo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/ego4d/egoexo/__init__.py -------------------------------------------------------------------------------- /ego4d/egoexo/download/README.md: -------------------------------------------------------------------------------- 1 | # EgoExo Dowloader 2 | 3 | > 4 | > [!IMPORTANT] 5 | > Please ensure you have installed the Ego4D python package. Follow the 6 | > [instructions at the root 7 | > README](https://github.com/facebookresearch/Ego4d/tree/main?tab=readme-ov-file#setup) 8 | > for details. 9 | 10 | ## Pre-Read 11 | 12 | Please see the [documentation](https://docs.ego-exo4d-data.org/download/) for 13 | an overview of how to download the data and how/why it is partitioned. 14 | 15 | ## Usage 16 | 17 | To use the downloader, please run `egoexo`. Typing 18 | 19 | ```bash 20 | egoexo --help 21 | ``` 22 | 23 | Will show you a summary of the available options. You will need to supply an 24 | output directory for where you wish to download the data to. You can do so by 25 | supplying an argument to `-o`, for example: 26 | 27 | >[!WARNING] 28 | >**If confirmed:** this will attempt to **download 14TiB** which is the 29 | >*recommended set*. Please see the section on **[Filtering](#filtering) to 30 | >reduce the download** size to obtain only what you care to download. 31 | 32 | ```bash 33 | egoexo -o 34 | ``` 35 | 36 | By default, this will download the recommended set of data. This is equivalent 37 | to providing `--parts metadata annotations takes captures take_trajectory`. This is quite large 38 | (~14TiB), and as such the rest of this document will describe how to filter down 39 | this set or include parts that are not in the "recommended" set. 40 | 41 | ### Basic Examples 42 | 43 | To only download annotations: 44 | ```bash 45 | egoexo -o --parts annotations 46 | ``` 47 | Feel free to include any other part you wish, you can include multiple, e.g. 48 | ```bash 49 | egoexo -o --parts annotations metadata 50 | ``` 51 | 52 | Use --help for more information 53 | ```bash 54 | egoexo --help 55 | ``` 56 | 57 | ### Filtering 58 | 59 | The following flags are used for filtering: 60 | 61 | - `--benchmarks [b2] ...`: only include data from a specific benchmark. If a provided dataset `--part` includes data that is not relevant to a benchmark (i.e. general data): it will be downloaded. 62 | - `--splits [split2] ...`: only include data from the train, val (validation) or test set. If a provided dataset `--part` includes data that is not relevant to a split (i.e. general data): it will be downloaded. 63 | - `--views [view2] ...`: include data only from the provided views. If data 64 | is general and not specific to any view: it will be downloaded (similar to `--splits`, `--benchmarks`). Provided arguments must be one or more of `ego` or `exo`. 65 | - `-u [u2] ...`, `--universities`: filter data that comes from specific universities 66 | - `--uids [uid2] ...`: filter by a specific `take_uid` or `capture_uid` 67 | 68 | ### Advanced options 69 | 70 | - `-y`, `--yes`: don't prompt a yes/no confirmation to download 71 | - `-d`, `--delete`: delete any auxiliary files from your file system that are not included 72 | in this download 73 | - `--num_workers `: supply the number of workers (threads) to perform the 74 | download. Default is 15. 75 | - `--release `: download a specific version of the dataset 76 | - `--force`: force a download of all the files. Please see "A Note On Dataset 77 | Updates" below for more information. 78 | 79 | ### A Note On Dataset Updates 80 | 81 | When an update occurs: if existing files you have downloaded update then the 82 | downloader tool will heuristically check if the file has changed by checking 83 | if there is a delta in file size. This is not as robust as a checksum, thus you 84 | may supply `--force` to force a download of all files. 85 | -------------------------------------------------------------------------------- /ego4d/egoexo/download/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/ego4d/egoexo/download/__init__.py -------------------------------------------------------------------------------- /ego4d/egoexo/download/cli.py: -------------------------------------------------------------------------------- 1 | from ego4d.internal.download.cli import create_arg_parse, main as download_main 2 | 3 | 4 | def main() -> None: 5 | parser = create_arg_parse( 6 | script_name="egoexo", 7 | release_name="v2", 8 | base_dir="s3://ego4d-consortium-sharing/egoexo-public/", 9 | ) 10 | args = parser.parse_args() 11 | download_main(args) 12 | 13 | 14 | if __name__ == "__main__": 15 | main() 16 | -------------------------------------------------------------------------------- /ego4d/egoexo/expert_commentary.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | 4 | def get_paths_for_commentary_time(comm: dict, t_sec: float) -> List[dict]: 5 | t = t_sec * 1000 # convert to ms 6 | 7 | paths = [] 8 | comm_start_t = comm["start_global_time"] 9 | for event in comm["events"]: 10 | assert event["type"] == "path" 11 | event_t_rel = event["global_time"] - comm_start_t 12 | for path in event["paths"]: 13 | if event["action"] == "clear" and event_t_rel < t: 14 | # NOTE: this could be implemented more efficiently 15 | paths = [] # clear out the paths 16 | 17 | path_t_rel = path["to"]["t"] - comm["start_global_time"] 18 | if path_t_rel > t: 19 | break 20 | else: 21 | paths.append( 22 | { 23 | "from": { 24 | "x": path["from"]["x"], 25 | "y": path["from"]["y"], 26 | "t": (path["from"]["t"] - event["global_time"]) / 1000.0, 27 | }, 28 | "to": { 29 | "x": path["to"]["x"], 30 | "y": path["to"]["y"], 31 | "t": (path["to"]["t"] - event["global_time"]) / 1000.0, 32 | }, 33 | } 34 | ) 35 | return paths 36 | -------------------------------------------------------------------------------- /ego4d/egoexo/scripts/extract_audio_transcribe.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import functools 3 | import json 4 | import math 5 | import os 6 | import subprocess 7 | import tempfile 8 | from concurrent.futures import ThreadPoolExecutor 9 | 10 | import submitit 11 | 12 | import whisper 13 | from ego4d.research.common import batch_it 14 | from tqdm.auto import tqdm 15 | 16 | 17 | ROOT_DIR = "/large_experiments/egoexo/v2/" 18 | OUT_TAKES_DIR = "/checkpoint/miguelmartin/egoexo/v2/audio/" 19 | PART_BY_UNI = True 20 | 21 | 22 | def extract_audio_and_run_whisper(x, model): 23 | # need this here - otherwise pickle error 24 | from projectaria_tools.core.vrs import extract_audio_track 25 | 26 | base_dir = ( 27 | os.path.join(OUT_TAKES_DIR, x["take_name"]) 28 | if not PART_BY_UNI 29 | else os.path.join(OUT_TAKES_DIR, x["uni_id"], x["take_name"]) 30 | ) 31 | os.makedirs(base_dir, exist_ok=True) 32 | audio_outpath = os.path.join(base_dir, "audio", f"{x['vrs_base_name']}.wav") 33 | transcribe_outpath = os.path.join( 34 | base_dir, 35 | "audio", 36 | f"{x['vrs_base_name']}_transcriptions.json", 37 | ) 38 | if not os.path.exists(audio_outpath): 39 | _ = extract_audio_track( 40 | x["vrs_path"], 41 | audio_outpath, 42 | ) 43 | os.remove(audio_outpath + ".json") 44 | 45 | if os.path.exists(audio_outpath) and not os.path.exists(transcribe_outpath): 46 | temp = model.transcribe(audio_outpath, word_timestamps=True) 47 | json.dump(temp, open(transcribe_outpath, "w"), indent=2) 48 | 49 | 50 | def process_all(xs): 51 | model_name = "large-v3" 52 | device = "cuda" 53 | model = whisper.load_model(model_name, device=device) 54 | map_fn = functools.partial(extract_audio_and_run_whisper, model=model) 55 | for x in tqdm(xs): 56 | map_fn(x) 57 | 58 | 59 | def main(): 60 | # TODO: argparse 61 | num_machines: int = 256 62 | root_dir: str = ROOT_DIR 63 | out_dir: str = OUT_TAKES_DIR 64 | 65 | takes_to_process = json.load(open(os.path.join(ROOT_DIR, "takes.json"))) 66 | 67 | map_values = [] 68 | completed = 0 69 | num_vids = 0 70 | for take in takes_to_process: 71 | td = os.path.join(root_dir, take["root_dir"]) 72 | fs = os.listdir(td) 73 | fs = [f for f in fs if "noimagestream" in f] 74 | if len(fs) == 0: 75 | continue 76 | vrs_f = fs[0] 77 | vrs_file_path = os.path.join(root_dir, take["root_dir"], vrs_f) 78 | map_values.append( 79 | { 80 | "vrs_path": vrs_file_path, 81 | "vrs_base_name": os.path.splitext(vrs_f)[0].split("_")[0], 82 | "take_dir": td, 83 | "take_name": take["take_name"], 84 | "uni_id": take["university_id"], 85 | } 86 | ) 87 | num_vids += 1 88 | 89 | print( 90 | f"# to process: {len(map_values)} / {num_vids} [{completed} / {1 - (len(map_values) / num_vids):.2%} completed]" 91 | ) 92 | os.makedirs(out_dir, exist_ok=True) 93 | job_inputs = batch_it( 94 | map_values, batch_size=math.ceil(len(map_values) / num_machines) 95 | ) 96 | num_machines = min(num_machines, len(job_inputs)) 97 | 98 | dt_now = datetime.datetime.now().strftime("%y%m%d_%H%M") 99 | log_dir = f"extract_audio_and_run_whisper/{dt_now}" 100 | print(f"Logging to: {log_dir}") 101 | os.makedirs(log_dir, exist_ok=True) 102 | executor = submitit.AutoExecutor(folder=log_dir) 103 | 104 | executor.update_parameters( 105 | timeout_min=3200, 106 | slurm_array_parallelism=num_machines, 107 | slurm_constraint="volta", 108 | slurm_partition="eht", 109 | gpus_per_node=1, 110 | cpus_per_task=10, 111 | ) 112 | jobs = executor.map_array(process_all, job_inputs) 113 | 114 | print("Waiting...") 115 | results = [] 116 | for job in tqdm(jobs): 117 | results.append(job.result()) 118 | print("Done") 119 | 120 | 121 | if __name__ == "__main__": 122 | main() 123 | -------------------------------------------------------------------------------- /ego4d/features/README.md: -------------------------------------------------------------------------------- 1 | # Ego4D Features 2 | 3 | This sub-directory contains code to extract features from the Ego4D dataset. 4 | The code allows you to use a set of models and can be used for audio, video or 5 | image features. 6 | 7 | For scheduling on a cluster with SLURM support see `slurm.py`. Scheduling 8 | occurs via a greedy bin packing algorithm. SLURM arrays for job submission are 9 | used. 10 | 11 | ## Requirements 12 | 13 | Please see requirements.txt at the base repository directory. 14 | 15 | submitit is the only requirement that is not required if you are wanting to 16 | *not* schedule on a SLURM cluster. 17 | 18 | ### conda 19 | 20 | ```sh 21 | conda create --name ego4d_public 22 | conda activate ego4d_public 23 | pip install -r requirements.txt 24 | ``` 25 | 26 | ## Usage 27 | 28 | ### Profiling/Testing 29 | Running a test extraction to ensure you have everything setup right: 30 | 31 | ```sh 32 | python3 ego4d/features/profile.py --config-name slowfast_r101_8x8 schedule_config.run_locally=1 33 | ``` 34 | 35 | This will benchmark the code to allow you to estimate/configure the scheduling 36 | parameters. Don't provide `schedule_config.run_locally=1` if you want to 37 | schedule it on the cluster. 38 | 39 | 40 | #### Inference 41 | 42 | MViT on Kinetics-400 example: 43 | ```sh 44 | python3 ego4d/features/inference.py --config-name mvit_k400 schedule_config.run_locally=1 \ 45 | +dataset_type="k400" \ 46 | +dataset_dir="/datasets01/Kinetics400_Frames/videos/" \ 47 | +set_to_use="val" \ 48 | +seed=1337 \ 49 | +top_k=2\ 50 | +num_examples=4 51 | ``` 52 | 53 | Omnivore on imagenet: 54 | ```sh 55 | python3 ego4d/features/inference.py --config-name omnivore_image schedule_config.run_locally=1 \ 56 | +dataset_type="imagenet" \ 57 | +dataset_dir="/datasets01/imagenet_full_size/061417/" \ 58 | +set_to_use="train" \ 59 | +seed=1337 \ 60 | +top_k=2\ 61 | +num_examples=3 62 | ``` 63 | 64 | 65 | ### Schedule The Extraction 66 | 67 | ```sh 68 | python3 ego4d/features/slurm.py --config-name slowfast_r101_8x8 69 | ``` 70 | 71 | ### As an API 72 | 73 | Refer to `ego4d/features/extract_features.py` and the functions: 74 | - `extract_features` 75 | - `perform_feature_extraction` 76 | 77 | 78 | ## Configuring 79 | 80 | Hydra is used for configuration. You can override configuration options through 81 | CLI arguments or by modifying the yaml files in the directory 82 | 83 | Pre-configured YAML files are in the subdirectory `ego4d/features/configs/`. 84 | 85 | There exists the following model configurations: 86 | 1. SlowFast 8x8 ResNet101 pre-trained on Kinetics 400 (see [`slowfast_r101_8x8.yaml`](ego4d/features/configs/slowfast_r101_8x8.yaml)) 87 | 2. MViT 32x8 pre-trained on Kinetics 400 (see [`mvit_k400.yaml`](ego4d/features/configs/mvit_k400.yaml)) 88 | 89 | #### How to Run with a different config (model) 90 | 91 | Provide `--config-name ` 92 | 93 | Where name is the name of the configuration file without the `.yaml` extension. 94 | 95 | #### Run on a subset of videos 96 | 97 | Provide `io.uid_list` in the YAML (`InputOutputConfig.uid_list`) or as a list of arguments on the CLI. 98 | 99 | Example: 100 | 101 | ```bash 102 | python3 ego4d/features/slurm.py --config-name slowfast_r101_8x8 io.uid_list="[000a3525-6c98-4650-aaab-be7d2c7b9402]" 103 | ``` 104 | 105 | ## Adding a Model 106 | 107 | I'd recommend just copy-pasting an existing model python file. 108 | 109 | 1. Add a new python file to `ego4d/features/models` 110 | 2. Ensure you have the following: 111 | - ModelConfig, which must inherit from `ego4d.features.model.base_model_config.BaseModelConfig` 112 | - Additional configuration for your model 113 | - get_transform(config: ModelConfig) 114 | - load_model(config: ModelConfig) 115 | -------------------------------------------------------------------------------- /ego4d/features/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/ego4d/features/__init__.py -------------------------------------------------------------------------------- /ego4d/features/audio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/ego4d/features/audio.py -------------------------------------------------------------------------------- /ego4d/features/configs/audio_mel_spectrogram.yaml: -------------------------------------------------------------------------------- 1 | fps: 30 2 | force_yes: false 3 | io: 4 | filter_completed: true 5 | video_dir_path: /datasets01/ego4d_track2/v1/full_scale/ 6 | ego4d_download_dir: /checkpoint/miguelmartin/ego4d/ 7 | uid_list: null 8 | video_limit: -1 9 | out_path: /checkpoint/miguelmartin/ego4d_track2_features/full_scale/audio_mel_spectrogram 10 | debug_path: /checkpoint/miguelmartin/ego4d_track2_features/full_scale/audio_mel_spectrogram_debug 11 | debug_mode: false 12 | exclude_no_audio: true 13 | dataset_version: ego4d 14 | inference_config: 15 | device: cpu 16 | batch_size: 1 17 | num_workers: 0 18 | prefetch_factor: 2 19 | fps: 30 20 | frame_window: 100000000 21 | stride: 100000000 22 | include_audio: true 23 | include_video: false 24 | norm_config: 25 | normalize_audio: true 26 | resample_audio_rate: 16000 27 | resampling_method: "sinc_interpolation" 28 | schedule_config: 29 | run_locally: false 30 | log_folder: slurm_log/%j 31 | timeout_min: 720 32 | constraint: volta 33 | slurm_partition: pixar 34 | slurm_array_parallelism: 10 35 | gpus_per_node: 1 36 | cpus_per_task: 10 37 | overhead: 1.0 38 | time_per_forward_pass: 100 39 | schedule_time_per_node: 10.0 40 | model_config: 41 | n_fft: 1024 42 | win_length: null 43 | hop_length: 160 44 | n_mels: 128 45 | model_module_str: ego4d.features.models.mel_spectrogram 46 | -------------------------------------------------------------------------------- /ego4d/features/configs/audio_speechbrain_asr.yaml: -------------------------------------------------------------------------------- 1 | fps: 30 2 | force_yes: false 3 | io: 4 | filter_completed: true 5 | video_dir_path: /datasets01/ego4d_track2/v1/full_scale/ 6 | uid_list: null 7 | video_limit: 1 8 | ego4d_download_dir: /checkpoint/miguelmartin/ego4d/ 9 | out_path: /checkpoint/miguelmartin/ego4d_track2_features/full_scale/audio_speechbrain_transcriptions 10 | debug_path: /checkpoint/miguelmartin/ego4d_track2_features/full_scale/audio_speechbrain_transcriptions_debug 11 | debug_mode: false 12 | exclude_no_audio: true 13 | dataset_version: ego4d 14 | inference_config: 15 | device: cuda 16 | batch_size: 1 17 | num_workers: 10 18 | prefetch_factor: 2 19 | fps: 30 20 | frame_window: 150 21 | stride: 150 22 | include_audio: true 23 | include_video: false 24 | norm_config: 25 | normalize_audio: false 26 | resample_audio_rate: 16000 27 | resampling_method: "sinc_interpolation" 28 | schedule_config: 29 | run_locally: false 30 | log_folder: slurm_log/%j 31 | timeout_min: 720 32 | constraint: volta 33 | slurm_partition: pixar 34 | slurm_array_parallelism: 10 35 | gpus_per_node: 1 36 | cpus_per_task: 10 37 | overhead: 1.0 38 | time_per_forward_pass: 0.1 39 | schedule_time_per_node: 10.0 40 | model_config: 41 | source: "speechbrain/asr-crdnn-transformerlm-librispeech" 42 | savedir: "pretrained_models/asr-crdnn-transformerlm-librispeech" 43 | model_module_str: ego4d.features.models.speechbrain_asr 44 | -------------------------------------------------------------------------------- /ego4d/features/configs/egoexo_maws_clip.yaml: -------------------------------------------------------------------------------- 1 | fps: 30 2 | force_yes: false 3 | io: 4 | filter_completed: true 5 | egoexo_data_dir: "/large_experiments/egoexo/dev/" 6 | eligible_cam_prefixes: null 7 | dataset_version: egoexo 8 | uid_list: null 9 | video_limit: -1 10 | out_path: /checkpoint/miguelmartin/egoexo_features/maws_clip_2b 11 | debug_path: /checkpoint/miguelmartin/egoexo_features/maws_clip_2b_debug 12 | debug_mode: false 13 | exclude_no_audio: false 14 | inference_config: 15 | device: cuda 16 | batch_size: 1 17 | num_workers: 0 18 | prefetch_factor: null 19 | fps: 30 20 | frame_window: 1 21 | stride: 1 22 | include_audio: false 23 | include_video: true 24 | norm_config: 25 | normalize_audio: false 26 | resample_audio_rate: 16000 27 | resampling_method: "sinc_interpolation" 28 | schedule_config: 29 | run_locally: false 30 | log_folder: slurm_log/%j 31 | timeout_min: 600 32 | constraint: volta 33 | slurm_partition: eht 34 | slurm_array_parallelism: 256 35 | gpus_per_node: 1 36 | cpus_per_task: 10 37 | overhead: 2.0 38 | time_per_forward_pass: 0.25 39 | schedule_time_per_node: 10.0 40 | model_config: 41 | model_name: "vit_2b14_xlmr_l" 42 | base_model: "maws_clip" 43 | input_type: "video" 44 | model_module_str: ego4d.features.models.maws 45 | check_fv_count: false # doing this due to variable frame rate 46 | -------------------------------------------------------------------------------- /ego4d/features/configs/egoexo_omnivore.yaml: -------------------------------------------------------------------------------- 1 | fps: 30 2 | force_yes: false 3 | io: 4 | filter_completed: true 5 | egoexo_data_dir: "/large_experiments/egoexo/v2/" 6 | eligible_cam_prefixes: null 7 | dataset_version: egoexo 8 | uid_list: null 9 | video_limit: -1 10 | out_path: /checkpoint/miguelmartin/egoexo_features/omnivore_public 11 | debug_path: /checkpoint/miguelmartin/egoexo_features/omnivore_debug 12 | debug_mode: false 13 | exclude_no_audio: false 14 | inference_config: 15 | device: cuda 16 | batch_size: 1 17 | num_workers: 9 18 | prefetch_factor: null 19 | fps: 30 20 | frame_window: 32 21 | stride: 16 22 | include_audio: false 23 | include_video: true 24 | norm_config: 25 | normalize_audio: false 26 | resample_audio_rate: 16000 27 | resampling_method: "sinc_interpolation" 28 | schedule_config: 29 | run_locally: false 30 | log_folder: slurm_log/%j 31 | timeout_min: 400 32 | constraint: volta 33 | slurm_partition: eht 34 | slurm_array_parallelism: 256 35 | gpus_per_node: 1 36 | cpus_per_task: 10 37 | overhead: 1.3 38 | time_per_forward_pass: 3.1 39 | schedule_time_per_node: 10.0 40 | model_config: 41 | model_name: "omnivore_swinL_imagenet21k" 42 | input_type: "video" 43 | side_size: 256 44 | crop_size: 224 45 | mean: 46 | - 0.485 47 | - 0.456 48 | - 0.406 49 | std: 50 | - 0.229 51 | - 0.224 52 | - 0.225 53 | model_module_str: ego4d.features.models.omnivore 54 | check_fv_count: false # doing this due to variable frame rate 55 | -------------------------------------------------------------------------------- /ego4d/features/configs/mvit_imagenet.yaml: -------------------------------------------------------------------------------- 1 | fps: 30 2 | force_yes: false 3 | io: 4 | filter_completed: true 5 | video_dir_path: /datasets01/ego4d_track2/v1/full_scale/ 6 | uid_list: null 7 | video_limit: -1 8 | out_path: /checkpoint/miguelmartin/ego4d_track2_features/full_scale/mvit_imagenet/ 9 | exclude_no_audio: false 10 | dataset_version: ego4d 11 | inference_config: 12 | device: cuda 13 | batch_size: 1 14 | num_workers: 9 15 | prefetch_factor: 2 16 | fps: 30 17 | frame_window: 1 18 | stride: 5 19 | include_audio: false 20 | include_video: true 21 | norm_config: 22 | normalize_audio: false 23 | resample_audio_rate: 16000 24 | resampling_method: "sinc_interpolation" 25 | schedule_config: 26 | run_locally: false 27 | log_folder: slurm_log/%j 28 | timeout_min: 720 29 | constraint: volta 30 | slurm_partition: pixar 31 | slurm_array_parallelism: 256 32 | gpus_per_node: 1 33 | cpus_per_task: 10 34 | overhead: 2.0 35 | time_per_forward_pass: 0.8 36 | schedule_time_per_node: 10.0 37 | model_config: 38 | pretrained_dataset: "imagenet" 39 | side_size: 256 40 | crop_size: 224 41 | mean: 42 | - 0.485 43 | - 0.456 44 | - 0.406 45 | std: 46 | - 0.229 47 | - 0.224 48 | - 0.225 49 | model_module_str: ego4d.features.models.mvit 50 | -------------------------------------------------------------------------------- /ego4d/features/configs/mvit_k400.yaml: -------------------------------------------------------------------------------- 1 | fps: 30 2 | force_yes: false 3 | io: 4 | filter_completed: true 5 | video_dir_path: /datasets01/ego4d_track2/v1/full_scale/ 6 | ego4d_download_dir: /checkpoint/miguelmartin/ego4d/ 7 | uid_list: null 8 | video_limit: -1 9 | out_path: /checkpoint/miguelmartin/ego4d_track2_features/full_scale/mvit_k400/ 10 | debug_path: /checkpoint/miguelmartin/ego4d_track2_features/full_scale/mvit_k400_debug 11 | debug_mode: false 12 | exclude_no_audio: false 13 | dataset_version: ego4d 14 | inference_config: 15 | device: cuda 16 | batch_size: 1 17 | num_workers: 9 18 | prefetch_factor: 2 19 | fps: 30 20 | frame_window: 32 21 | stride: 16 22 | include_audio: false 23 | include_video: true 24 | schedule_config: 25 | run_locally: false 26 | log_folder: slurm_log/%j 27 | timeout_min: 720 28 | constraint: volta 29 | slurm_partition: pixar 30 | slurm_array_parallelism: 256 31 | gpus_per_node: 1 32 | cpus_per_task: 10 33 | overhead: 2.0 34 | time_per_forward_pass: 0.8 35 | schedule_time_per_node: 10.0 36 | model_config: 37 | pretrained_dataset: "k400" 38 | side_size: 256 39 | crop_size: 224 40 | mean: 41 | - 0.45 42 | - 0.45 43 | - 0.45 44 | std: 45 | - 0.225 46 | - 0.225 47 | - 0.225 48 | model_module_str: ego4d.features.models.mvit 49 | -------------------------------------------------------------------------------- /ego4d/features/configs/omnivore_image.yaml: -------------------------------------------------------------------------------- 1 | fps: 30 2 | force_yes: false 3 | io: 4 | filter_completed: true 5 | video_dir_path: /datasets01/ego4d_track2/v1/full_scale/ 6 | uid_list: null 7 | video_limit: -1 8 | out_path: /checkpoint/miguelmartin/ego4d_track2_features/full_scale/omnivore_video 9 | dataset_version: ego4d 10 | inference_config: 11 | device: cuda 12 | batch_size: 1 13 | num_workers: 9 14 | prefetch_factor: 2 15 | fps: 30 16 | frame_window: 1 17 | stride: 5 18 | include_audio: false 19 | include_video: true 20 | schedule_config: 21 | run_locally: false 22 | log_folder: slurm_log/%j 23 | timeout_min: 720 24 | constraint: volta 25 | slurm_partition: pixar 26 | slurm_array_parallelism: 256 27 | gpus_per_node: 1 28 | cpus_per_task: 10 29 | overhead: 2.0 30 | time_per_forward_pass: 0.8 31 | schedule_time_per_node: 10.0 32 | model_config: 33 | model_name: "omnivore_swinB" 34 | input_type: "image" 35 | side_size: 256 36 | crop_size: 224 37 | mean: 38 | - 0.485 39 | - 0.456 40 | - 0.406 41 | std: 42 | - 0.229 43 | - 0.224 44 | - 0.225 45 | model_module_str: ego4d.features.models.omnivore 46 | -------------------------------------------------------------------------------- /ego4d/features/configs/omnivore_video.yaml: -------------------------------------------------------------------------------- 1 | fps: 30 2 | force_yes: false 3 | io: 4 | filter_completed: true 5 | video_dir_path: /datasets01/ego4d_track2/v1/full_scale/ 6 | ego4d_download_dir: /checkpoint/miguelmartin/ego4d/ 7 | uid_list: null 8 | video_limit: -1 9 | out_path: /checkpoint/miguelmartin/ego4d_track2_features/full_scale/omnivore_video 10 | debug_path: /checkpoint/miguelmartin/ego4d_track2_features/full_scale/omnivore_video_debug 11 | debug_mode: false 12 | exclude_no_audio: false 13 | dataset_version: ego4d 14 | inference_config: 15 | device: cuda 16 | batch_size: 1 17 | num_workers: 10 18 | prefetch_factor: 3 19 | fps: 30 20 | frame_window: 32 21 | stride: 16 22 | include_audio: false 23 | include_video: true 24 | norm_config: 25 | normalize_audio: false 26 | resample_audio_rate: 16000 27 | resampling_method: "sinc_interpolation" 28 | schedule_config: 29 | run_locally: false 30 | log_folder: slurm_log/%j 31 | timeout_min: 3750 32 | constraint: volta 33 | slurm_partition: pixar 34 | slurm_array_parallelism: 256 35 | gpus_per_node: 1 36 | cpus_per_task: 10 37 | overhead: 1.1 38 | time_per_forward_pass: 2.5 39 | schedule_time_per_node: 10.0 40 | model_config: 41 | model_name: "omnivore_swinL_imagenet21k" 42 | input_type: "video" 43 | side_size: 256 44 | crop_size: 224 45 | mean: 46 | - 0.485 47 | - 0.456 48 | - 0.406 49 | std: 50 | - 0.229 51 | - 0.224 52 | - 0.225 53 | model_module_str: ego4d.features.models.omnivore 54 | -------------------------------------------------------------------------------- /ego4d/features/configs/slowfast_r101_8x8.yaml: -------------------------------------------------------------------------------- 1 | fps: 30 2 | force_yes: false 3 | io: 4 | filter_completed: true 5 | video_dir_path: /datasets01/ego4d_track2/v1/full_scale/ 6 | ego4d_download_dir: /checkpoint/miguelmartin/ego4d/ 7 | uid_list: null 8 | video_limit: -1 9 | out_path: /checkpoint/miguelmartin/ego4d_track2_features/full_scale/slowfast 10 | debug_path: /checkpoint/miguelmartin/ego4d_track2_features/full_scale/action_features_debug 11 | debug_mode: false 12 | exclude_no_audio: false 13 | dataset_version: ego4d 14 | inference_config: 15 | device: cuda 16 | batch_size: 1 17 | num_workers: 10 18 | prefetch_factor: 2 19 | fps: 30 20 | frame_window: 32 21 | stride: 16 22 | include_audio: false 23 | include_video: true 24 | norm_config: 25 | normalize_audio: false 26 | resample_audio_rate: 16000 27 | resampling_method: "sinc_interpolation" 28 | schedule_config: 29 | run_locally: false 30 | log_folder: slurm_log/%j 31 | timeout_min: 3200 32 | constraint: volta 33 | slurm_partition: pixar 34 | slurm_array_parallelism: 128 35 | gpus_per_node: 1 36 | cpus_per_task: 10 37 | overhead: 1.1 38 | time_per_forward_pass: 2.4 39 | schedule_time_per_node: 10.0 40 | model_config: 41 | model_path: null 42 | hub_path: slowfast_r101 43 | slowfast_alpha: 4 44 | side_size: 256 45 | crop_size: 256 46 | mean: 47 | - 0.45 48 | - 0.45 49 | - 0.45 50 | std: 51 | - 0.225 52 | - 0.225 53 | - 0.225 54 | model_module_str: ego4d.features.models.slowfast 55 | -------------------------------------------------------------------------------- /ego4d/features/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/ego4d/features/models/__init__.py -------------------------------------------------------------------------------- /ego4d/features/models/common.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Module 3 | 4 | 5 | class FeedVideoInput(Module): 6 | def __init__(self, model: Module): 7 | super().__init__() 8 | self.model = model 9 | 10 | def forward(self, x) -> torch.Tensor: 11 | return self.model(x["video"]) 12 | -------------------------------------------------------------------------------- /ego4d/features/models/maws.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | from dataclasses import dataclass 4 | 5 | import torch 6 | from ego4d.features.config import BaseModelConfig, InferenceConfig 7 | from maws.model_builder import build_model 8 | from pytorchvideo.transforms import ApplyTransformToKey 9 | from torch.nn import Module 10 | from torchvision.transforms import CenterCrop, Compose, Lambda, Normalize, Resize 11 | 12 | 13 | @dataclass 14 | class ModelConfig(BaseModelConfig): 15 | model_name: str = "vit_2b14_xlmr_l" 16 | base_model: str = "maws_clip" 17 | input_type: str = "video" 18 | 19 | 20 | class WrapModel(Module): 21 | def __init__(self, model: Module): 22 | super().__init__() 23 | self.model = model 24 | 25 | def forward(self, x) -> torch.Tensor: 26 | imgs = x["video"].half() 27 | imgs = imgs.view(-1, 3, imgs.shape[-2], imgs.shape[-1]) 28 | return self.model.encode_images(imgs) 29 | 30 | 31 | def load_model( 32 | inference_config: InferenceConfig, 33 | config: ModelConfig, 34 | patch_final_layer: bool = True, 35 | ) -> Module: 36 | assert patch_final_layer, "maws does not provide a head" 37 | 38 | model = build_model(config.model_name, config.base_model) 39 | # model_name = f"{config.model_name}_{config.base_model}" 40 | # model = torch.hub.load("facebookresearch/maws", model=model_name) 41 | 42 | # Set to GPU or CPU 43 | model = WrapModel(model) 44 | model = model.to(inference_config.device) 45 | model = model.eval().half() 46 | return model 47 | 48 | 49 | def norm_pixels(x): 50 | return x / 255.0 51 | 52 | 53 | def video_to_image(x): 54 | x = x.permute(1, 0, 2, 3).squeeze(0) 55 | return x 56 | 57 | 58 | def get_transform(inference_config: InferenceConfig, config: ModelConfig): 59 | assert inference_config.frame_window == 1 60 | transforms = [ 61 | Lambda(norm_pixels), 62 | Lambda(video_to_image), 63 | Resize(size=224, interpolation=3), # pyre-ignore 64 | CenterCrop(size=224), 65 | Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 66 | ] 67 | 68 | return ApplyTransformToKey( 69 | key="video", 70 | transform=Compose(transforms), 71 | ) 72 | -------------------------------------------------------------------------------- /ego4d/features/models/mel_spectrogram.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | from dataclasses import dataclass 4 | from typing import Optional 5 | 6 | import torch 7 | from ego4d.features.config import BaseModelConfig, InferenceConfig 8 | from torch.nn import Module 9 | from torchaudio.transforms import MelSpectrogram 10 | from torchvision.transforms import Compose 11 | 12 | 13 | @dataclass 14 | class ModelConfig(BaseModelConfig): 15 | n_fft: int = 1024 16 | win_length: Optional[int] = None 17 | hop_length: int = 160 # 10ms 18 | n_mels: int = 128 19 | 20 | 21 | class MelSpectrogramModel(Module): 22 | def __init__(self, inference_config: InferenceConfig, model_config: ModelConfig): 23 | super().__init__() 24 | self.inference_config = inference_config 25 | self.config = model_config 26 | 27 | def get_mel_spectrogram_transform(self, freq): 28 | return MelSpectrogram( 29 | sample_rate=self.inference_config.norm_config.resample_audio_rate, 30 | n_fft=self.config.n_fft, 31 | win_length=self.config.win_length, 32 | hop_length=self.config.hop_length, 33 | n_mels=self.config.n_mels, 34 | ) 35 | 36 | def forward(self, x) -> torch.Tensor: 37 | assert len(x["audio_sample_rate"]) == 1 38 | f = self.get_mel_spectrogram_transform(x["audio_sample_rate"][0]) 39 | if "audio" not in x: 40 | return torch.empty(1, 1) 41 | return f(x["audio"]) 42 | 43 | 44 | def load_model( 45 | inference_config: InferenceConfig, 46 | config: ModelConfig, 47 | patch_final_layer: bool = True, 48 | ) -> Module: 49 | model = MelSpectrogramModel(inference_config, config) 50 | # don't need to set to GPU for a Mel spectrogram - but will do anyway 51 | model = model.eval() 52 | model = model.to(inference_config.device) 53 | return model 54 | 55 | 56 | def get_transform(inference_config: InferenceConfig, config: ModelConfig): 57 | # do nothing 58 | return Compose([]) 59 | -------------------------------------------------------------------------------- /ego4d/features/models/mvit.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | from dataclasses import dataclass 4 | from typing import Tuple 5 | 6 | from ego4d.features.config import BaseModelConfig, InferenceConfig 7 | from ego4d.features.models.common import FeedVideoInput 8 | from pytorchvideo.models.hub.vision_transformers import mvit_base_16, mvit_base_32x3 9 | from pytorchvideo.transforms import ApplyTransformToKey, ShortSideScale 10 | from torch.nn import Identity, Module 11 | from torchvision.transforms import Compose, Lambda 12 | from torchvision.transforms._transforms_video import CenterCropVideo, NormalizeVideo 13 | 14 | 15 | @dataclass 16 | class ModelConfig(BaseModelConfig): 17 | pretrained_dataset: str = "k400" 18 | 19 | # transformation config 20 | side_size: int = 256 21 | crop_size: int = 224 22 | mean: Tuple[float] = (0.45, 0.45, 0.45) 23 | std: Tuple[float] = (0.225, 0.225, 0.225) 24 | 25 | 26 | def load_model( 27 | inference_config: InferenceConfig, 28 | config: ModelConfig, 29 | patch_final_layer: bool = True, 30 | ) -> Module: 31 | assert config.pretrained_dataset in ("k400", "imagenet") 32 | if config.pretrained_dataset == "k400": 33 | print("Loading K400 MViT") 34 | model = mvit_base_32x3(pretrained=True) 35 | else: 36 | print("Loading ImageNet MViT") 37 | model = mvit_base_16(pretrained=True) 38 | 39 | assert model is not None 40 | 41 | if patch_final_layer: 42 | model.head = Identity() 43 | 44 | # Set to GPU or CPU 45 | model = FeedVideoInput(model) 46 | model = model.eval() 47 | model = model.to(inference_config.device) 48 | return model 49 | 50 | 51 | def get_transform(inference_config: InferenceConfig, config: ModelConfig): 52 | transforms = [ 53 | Lambda(lambda x: x / 255.0), 54 | NormalizeVideo(config.mean, config.std), 55 | ShortSideScale(size=config.side_size), 56 | CenterCropVideo(config.crop_size), 57 | ] 58 | # image-based dataset 59 | if config.pretrained_dataset == "imagenet": 60 | # NOTE untested due to MViT imagenet not not available on torch hub 61 | transforms += [Lambda(lambda x: x.squeeze_(2))] 62 | return Compose( 63 | [ 64 | ApplyTransformToKey( 65 | key="video", 66 | transform=Compose(transforms), 67 | ) 68 | ] 69 | ) 70 | -------------------------------------------------------------------------------- /ego4d/features/models/omnivore.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | from dataclasses import dataclass 4 | from typing import Tuple 5 | 6 | import torch 7 | from ego4d.features.config import BaseModelConfig, InferenceConfig 8 | from pytorchvideo.transforms import ApplyTransformToKey, ShortSideScale 9 | from torch.nn import Identity, Module 10 | from torchvision.transforms import Compose, Lambda 11 | from torchvision.transforms._transforms_video import CenterCropVideo, NormalizeVideo 12 | 13 | 14 | @dataclass 15 | class ModelConfig(BaseModelConfig): 16 | model_name: str = "omnivore_swinB" 17 | input_type: str = "video" 18 | side_size: int = 256 19 | crop_size: int = 224 20 | mean: Tuple[float] = (0.485, 0.456, 0.406) 21 | std: Tuple[float] = (0.229, 0.224, 0.225) 22 | 23 | 24 | class WrapModel(Module): 25 | def __init__(self, model: Module, input_type: str): 26 | super().__init__() 27 | self.model = model 28 | self.input_type = input_type 29 | 30 | def forward(self, x) -> torch.Tensor: 31 | return self.model(x["video"], input_type=self.input_type) 32 | 33 | 34 | def load_model( 35 | inference_config: InferenceConfig, 36 | config: ModelConfig, 37 | patch_final_layer: bool = True, 38 | ) -> Module: 39 | model = torch.hub.load("facebookresearch/omnivore", model=config.model_name) 40 | 41 | if patch_final_layer: 42 | model.heads.image = Identity() 43 | model.heads.video = Identity() 44 | model.heads.rgbd = Identity() 45 | 46 | # Set to GPU or CPU 47 | model = WrapModel(model, config.input_type) 48 | model = model.eval() 49 | model = model.to(inference_config.device) 50 | return model 51 | 52 | 53 | def norm_pixels(x): 54 | return x / 255.0 55 | 56 | 57 | def get_transform(inference_config: InferenceConfig, config: ModelConfig): 58 | if config.input_type == "video": 59 | transforms = [ 60 | Lambda(norm_pixels), 61 | NormalizeVideo(config.mean, config.std), 62 | ShortSideScale(size=config.side_size), 63 | CenterCropVideo(config.crop_size), 64 | ] 65 | else: 66 | assert inference_config.frame_window == 1 67 | transforms = [ 68 | Lambda(norm_pixels), 69 | NormalizeVideo(config.mean, config.std), 70 | ShortSideScale(size=config.side_size), 71 | CenterCropVideo(config.crop_size), 72 | ] 73 | 74 | return ApplyTransformToKey( 75 | key="video", 76 | transform=Compose(transforms), 77 | ) 78 | -------------------------------------------------------------------------------- /ego4d/features/models/slowfast.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | from dataclasses import dataclass 4 | from typing import Optional, Tuple 5 | 6 | import torch 7 | from ego4d.features.config import BaseModelConfig, InferenceConfig 8 | from ego4d.features.models.common import FeedVideoInput 9 | from pytorchvideo.transforms import ApplyTransformToKey, ShortSideScale 10 | from torch.nn import Identity, Module 11 | from torchvision.transforms import Compose, Lambda 12 | from torchvision.transforms._transforms_video import CenterCropVideo, NormalizeVideo 13 | 14 | 15 | @dataclass 16 | class ModelConfig(BaseModelConfig): 17 | model_path: Optional[str] = None 18 | hub_path: Optional[str] = "slowfast_r101" 19 | slowfast_alpha: int = 4 20 | 21 | # transformation config 22 | side_size: int = 256 23 | crop_size: int = 256 24 | mean: Tuple[float] = (0.45, 0.45, 0.45) 25 | std: Tuple[float] = (0.225, 0.225, 0.225) 26 | 27 | 28 | class GetFv(Module): 29 | def __init__(self): 30 | super().__init__() 31 | 32 | def forward(self, x: torch.Tensor) -> torch.Tensor: 33 | bs = x.shape[0] 34 | fv_s = x.shape[1] 35 | return x.view(bs, fv_s, -1).mean(2) 36 | 37 | 38 | def load_model( 39 | inference_config: InferenceConfig, 40 | config: ModelConfig, 41 | patch_final_layer: bool = True, 42 | ) -> Module: 43 | if config.model_path is not None: 44 | raise AssertionError("not supported yet") 45 | model = None 46 | else: 47 | assert config.hub_path is not None 48 | model = torch.hub.load( 49 | "facebookresearch/pytorchvideo", config.hub_path, pretrained=True 50 | ) 51 | 52 | assert model is not None 53 | 54 | if patch_final_layer: 55 | model.blocks[6] = GetFv() 56 | 57 | # Set to GPU or CPU 58 | model = FeedVideoInput(model) 59 | model = model.eval() 60 | model = model.to(inference_config.device) 61 | return model 62 | 63 | 64 | class PackPathway(torch.nn.Module): 65 | """ 66 | Transform for converting video frames as a list of tensors. 67 | """ 68 | 69 | def __init__(self, slow_fast_alpha): 70 | super().__init__() 71 | self.slow_fast_alpha = slow_fast_alpha 72 | 73 | def forward(self, frames: torch.Tensor): 74 | fast_pathway = frames 75 | # Perform temporal sampling from the fast pathway. 76 | slow_pathway = torch.index_select( 77 | frames, 78 | 1, 79 | torch.linspace( 80 | 0, frames.shape[1] - 1, frames.shape[1] // self.slow_fast_alpha 81 | ).long(), 82 | ) 83 | frame_list = [slow_pathway, fast_pathway] 84 | return frame_list 85 | 86 | 87 | def get_transform(inference_config: InferenceConfig, config: ModelConfig): 88 | return ApplyTransformToKey( 89 | key="video", 90 | transform=Compose( 91 | [ 92 | Lambda(lambda x: x / 255.0), 93 | NormalizeVideo(config.mean, config.std), 94 | ShortSideScale(size=config.side_size), 95 | CenterCropVideo(config.crop_size), 96 | PackPathway(config.slowfast_alpha), 97 | ] 98 | ), 99 | ) 100 | -------------------------------------------------------------------------------- /ego4d/features/models/speechbrain_asr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | from dataclasses import dataclass 4 | from typing import Any, Dict 5 | 6 | import torch 7 | from ego4d.features.config import BaseModelConfig, InferenceConfig 8 | from speechbrain.pretrained import EncoderDecoderASR 9 | from torch.nn import Module 10 | from torchvision.transforms import Compose 11 | 12 | 13 | @dataclass 14 | class ModelConfig(BaseModelConfig): 15 | source: str = "speechbrain/asr-crdnn-transformerlm-librispeech" 16 | savedir: str = "pretrained_models/asr-crdnn-transformerlm-librispeech" 17 | 18 | 19 | class WrapAsrModel(Module): 20 | def __init__(self, model): 21 | super().__init__() 22 | self.model = model 23 | 24 | def forward(self, x) -> Dict[str, Any]: 25 | assert len(x["audio_sample_rate"]) == 1 26 | if "audio" not in x: 27 | return [] 28 | 29 | # see 30 | # https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/pretrained/interfaces.py#L545 31 | # docstring is out-dated, need to use `audio_normalizer` not `normalizer` 32 | inp = self.model.audio_normalizer( 33 | x["audio"].permute(1, 0), x["audio_sample_rate"].item() 34 | ) 35 | 36 | wavs = inp.unsqueeze(0) 37 | wav_lens = torch.tensor([1.0]) 38 | # https://github.com/speechbrain/speechbrain/blob/598f6eda70f9b0c9ad49b393114ff483add1fd25/speechbrain/pretrained/interfaces.py#L595 39 | with torch.no_grad(): 40 | wav_lens = wav_lens.to(self.model.device) 41 | encoder_out = self.model.encode_batch(wavs, wav_lens) 42 | predicted_tokens, scores = self.model.mods.decoder(encoder_out, wav_lens) 43 | pred_words = [ 44 | self.model.tokenizer.decode_ids(token_seq) 45 | for token_seq in predicted_tokens 46 | ] 47 | # or... 48 | # pred_words, pred_tokens = self.model(wavs, wav_lens) 49 | 50 | return { 51 | "text": pred_words[0] if len(pred_words[0]) > 0 else None, 52 | "score": scores[0].item(), 53 | } 54 | 55 | 56 | def load_model( 57 | inference_config: InferenceConfig, 58 | config: ModelConfig, 59 | patch_final_layer: bool = True, 60 | ) -> Module: 61 | if patch_final_layer: 62 | print("WARNING: this model outputs text, and patching is not supporting") 63 | model = EncoderDecoderASR.from_hparams( 64 | source=config.source, 65 | savedir=config.savedir, 66 | run_opts={"device": inference_config.device}, 67 | ) 68 | return WrapAsrModel(model) 69 | 70 | 71 | def get_transform(inference_config: InferenceConfig, config: ModelConfig): 72 | # do nothing 73 | return Compose([]) 74 | -------------------------------------------------------------------------------- /ego4d/features/profile_extract.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import functools 4 | import time 5 | from typing import List, Optional, Tuple 6 | 7 | import hydra 8 | import torch 9 | from ego4d.features.config import FeatureExtractConfig, get_videos, load_model, Video 10 | from ego4d.features.extract_features import extract_features, num_fvs 11 | from ego4d.features.slurm import create_executor 12 | 13 | 14 | def profile_extraction(config: FeatureExtractConfig): 15 | videos, all_videos = get_videos(config) 16 | if videos: 17 | print( 18 | f"Video 0/{len(videos)} ({videos[0].frame_count} | {videos[0].path}): {videos[0]}" 19 | ) 20 | # videos = [v for v in all_videos if v.frame_count <= 500] 21 | videos = [v for v in all_videos if v.frame_count > 2000 and v.frame_count <= 10000] 22 | # videos = [v for v in all_videos if "f1bdf9f3-4f65-4c70-b8ba-b3d4607c0cff" in v.uid] 23 | videos = videos[1:2] 24 | print(videos) 25 | 26 | assert len(videos) > 0, "No videos to process!" 27 | 28 | print(f"Got {len(videos)} videos") 29 | 30 | batch_sizes = [1] 31 | num_workers = [2] 32 | prefetch_factor = [2] 33 | model = load_model(config) 34 | 35 | num_examples = -1 36 | 37 | print( 38 | "prefetch_factor,batch_size,num_workers,total,mean,forward_pass,to_load,transfer_to_device" # noqa 39 | ) 40 | for batch_size, nw, pf in zip(batch_sizes, num_workers, prefetch_factor): 41 | config.inference_config.batch_size = batch_size 42 | config.inference_config.num_workers = nw 43 | config.inference_config.prefetch_factor = pf 44 | 45 | t1 = time.time() 46 | time_stats = extract_features( 47 | videos=videos, 48 | config=config, 49 | model=model, 50 | log_info=False, 51 | max_examples=num_examples, 52 | silent=False, 53 | ).time_stats 54 | t2 = time.time() 55 | 56 | total_time = t2 - t1 57 | 58 | forward_pass = torch.Tensor(time_stats.forward_pass) 59 | to_load = torch.Tensor(time_stats.to_load) 60 | transfer_time = torch.Tensor(time_stats.transfer_device) 61 | 62 | assert len(forward_pass.shape) == 1 63 | assert len(to_load.shape) == 1 64 | assert len(transfer_time.shape) == 1 65 | 66 | mean_sum = forward_pass.mean() + to_load.mean() + transfer_time.mean() 67 | mean_sum /= max(1, batch_size) 68 | 69 | if num_examples > 0: 70 | assert forward_pass.shape[0] * batch_size == num_examples 71 | 72 | if batch_size == 0: 73 | print( 74 | f"{pf},{batch_size},{nw},{total_time},{mean_sum},{forward_pass.mean()},{to_load.mean()},{transfer_time.mean()}" # noqa 75 | ) 76 | else: 77 | print( 78 | f"{pf},{batch_size},{nw},{total_time},{mean_sum},{forward_pass.mean()/batch_size},{to_load.mean()/batch_size},{transfer_time.mean()/batch_size}" # noqa 79 | ) 80 | 81 | 82 | @hydra.main(config_path="configs", config_name=None) 83 | def schedule_profile_extraction(config: FeatureExtractConfig): 84 | if config.schedule_config.run_locally: 85 | profile_extraction(config) 86 | else: 87 | executor = create_executor(config.schedule_config) 88 | job = executor.submit(functools.partial(profile_extraction, config=config)) 89 | print(f"{job}") 90 | 91 | # wait for the job 92 | job.result() 93 | 94 | 95 | if __name__ == "__main__": 96 | schedule_profile_extraction() # pyre-ignore 97 | -------------------------------------------------------------------------------- /ego4d/features/visualize_dataloader.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import functools 4 | 5 | import random 6 | 7 | import hydra 8 | from ego4d.features.config import FeatureExtractConfig, get_videos, load_model 9 | from ego4d.features.extract_features import extract_features 10 | from ego4d.features.slurm import create_executor 11 | 12 | 13 | def visualize_extraction(config: FeatureExtractConfig): 14 | random.seed(1337) 15 | 16 | is_audio_model = config.inference_config.include_audio 17 | 18 | _, videos = get_videos(config) 19 | if is_audio_model: 20 | videos = [v for v in videos if v.uid == "046855d7-41d7-4f41-a6a7-fce921ea8133"] 21 | else: 22 | videos = [v for v in videos if v.frame_count > 1000 and v.frame_count <= 2000] 23 | random.shuffle(videos) 24 | videos = videos[0:5] 25 | 26 | print("Frame count=", videos[0].frame_count) 27 | print(f"Got {len(videos)} videos") 28 | 29 | config.io.debug_mode = not is_audio_model 30 | model = load_model(config) 31 | 32 | output_features = extract_features( 33 | videos=videos, 34 | config=config, 35 | model=model, 36 | log_info=False, 37 | max_examples=-1, 38 | silent=True, 39 | assert_feature_size=not is_audio_model, 40 | ) 41 | if is_audio_model: 42 | for video_uid, xs in output_features.result.items(): 43 | print(video_uid) 44 | for x in xs: 45 | print(x) 46 | print() 47 | 48 | 49 | @hydra.main(config_path="configs", config_name=None) 50 | def schedule_profile_extraction(config: FeatureExtractConfig): 51 | if config.schedule_config.run_locally: 52 | visualize_extraction(config) 53 | else: 54 | executor = create_executor(config.schedule_config) 55 | job = executor.submit(functools.partial(visualize_extraction, config=config)) 56 | print(f"{job}") 57 | 58 | # wait for the job 59 | job.result() 60 | 61 | 62 | if __name__ == "__main__": 63 | schedule_profile_extraction() # pyre-ignore 64 | -------------------------------------------------------------------------------- /ego4d/internal/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/ego4d/internal/__init__.py -------------------------------------------------------------------------------- /ego4d/internal/colmap/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/ego4d/internal/colmap/__init__.py -------------------------------------------------------------------------------- /ego4d/internal/colmap/configs/cmu_example.yaml: -------------------------------------------------------------------------------- 1 | in_metadata_path: "s3://ego4d-consortium-sharing/internal/egoexo_pilot/unc/T1/metadata.json" 2 | in_videos: null 3 | output_dir: "/private/home/miguelmartin/ego4d/ego4d_public/colmap_outputs" 4 | name: null 5 | aria_fps: 30 6 | exo_fps: null 7 | mobile_fps: null 8 | include_aria: true 9 | sync_exo_views: false 10 | aria_use_sync_info: false 11 | aria_walkthrough_start_sec: null 12 | aria_walkthrough_end_sec: null 13 | use_gpu: true 14 | rot_mode: 1 15 | camera_model: "OPENCV_FISHEYE" 16 | exo_from_frame: 200 17 | exo_to_frame: 300 18 | frame_rate: 0.25 19 | exo_frames: null 20 | mobile_frames: null 21 | aria_frames: null 22 | colmap_bin: null 23 | vrs_bin: null 24 | run_colmap: false 25 | force_download: false 26 | download_video_files: true 27 | take_id: null 28 | video_source: null 29 | -------------------------------------------------------------------------------- /ego4d/internal/colmap/configs/cmu_example_in_paths.yaml: -------------------------------------------------------------------------------- 1 | in_metadata_path: null 2 | in_videos: { 3 | cam01: "/private/home/miguelmartin/ego4d/ego4d_public/unc_T1_data/cam01.mp4", 4 | cam02: "/private/home/miguelmartin/ego4d/ego4d_public/unc_T1_data/cam02.mp4", 5 | cam03: "/private/home/miguelmartin/ego4d/ego4d_public/unc_T1_data/cam03.mp4", 6 | cam04: "/private/home/miguelmartin/ego4d/ego4d_public/unc_T1_data/cam04.mp4", 7 | mobile: "/private/home/miguelmartin/ego4d/ego4d_public/unc_T1_data/mobile.mp4", 8 | aria01: "/private/home/miguelmartin/ego4d/ego4d_public/unc_T1_data/aria01.vrs", 9 | } 10 | output_dir: "/private/home/miguelmartin/ego4d/ego4d_public/colmap_outputs" 11 | name: null 12 | aria_fps: 30 13 | exo_fps: null 14 | mobile_fps: null 15 | include_aria: true 16 | sync_exo_views: false 17 | aria_use_sync_info: false 18 | aria_walkthrough_start_sec: 50.0 19 | aria_walkthrough_end_sec: 600.0 20 | use_gpu: true 21 | rot_mode: 1 22 | camera_model: "OPENCV_FISHEYE" 23 | exo_from_frame: 200 24 | exo_to_frame: 300 25 | frame_rate: 0.25 26 | exo_frames: null 27 | mobile_frames: null 28 | aria_frames: null 29 | colmap_bin: null 30 | vrs_bin: null 31 | run_colmap: false 32 | force_download: false 33 | download_video_files: true 34 | take_id: "T1" 35 | video_source: "unc" 36 | -------------------------------------------------------------------------------- /ego4d/internal/colmap/configs/cmu_frame_example.yaml: -------------------------------------------------------------------------------- 1 | in_metadata_path: "s3://ego4d-consortium-sharing/internal/egoexo_pilot/unc/T1/metadata.json" 2 | in_videos: null 3 | take_id: null 4 | video_source: null 5 | output_dir: "/private/home/miguelmartin/ego4d/ego4d_public/colmap_outputs" 6 | name: null 7 | aria_fps: 30 8 | exo_fps: null 9 | mobile_fps: null 10 | include_aria: true 11 | sync_exo_views: false 12 | aria_use_sync_info: false 13 | aria_walkthrough_start_sec: null 14 | aria_walkthrough_end_sec: null 15 | rot_mode: 1 16 | camera_model: "OPENCV_FISHEYE" 17 | frame_rate: 0.5 18 | exo_from_frame: null 19 | exo_to_frame: null 20 | exo_frames: { 21 | cam01: [100, 200], 22 | cam02: [30, 20], 23 | cam03: [5, 10], 24 | cam04: [11, 50], 25 | } 26 | mobile_frames: 27 | - 1 28 | - 2 29 | - 3 30 | - 4 31 | aria_frames: { 32 | aria01: [100, 500] 33 | } 34 | run_colmap: false 35 | colmap_bin: null 36 | vrs_bin: null 37 | force_download: false 38 | download_video_files: true 39 | -------------------------------------------------------------------------------- /ego4d/internal/colmap/configs/paths_example.yaml: -------------------------------------------------------------------------------- 1 | in_metadata_path: null 2 | in_videos: { 3 | aria01: "s3://ego4d-penn/data/0303_Violin_2/ego/aria/c2e4b041-4e68-4b75-8338-f8c625429e75.vrs", 4 | cam01: "s3://ego4d-penn/data/0303_Violin_2/exo/gp01/GX010190.MP4", 5 | cam02: "s3://ego4d-penn/data/0303_Violin_2/exo/gp02/GX010175.MP4", 6 | cam03: "s3://ego4d-penn/data/0303_Violin_2/exo/gp03/GX010012.MP4", 7 | cam04: "s3://ego4d-penn/data/0303_Violin_2/exo/gp04/GX010195.MP4", 8 | mobile: "s3://ego4d-penn/data/0303_Violin_2/exo/mobile/GX010020.MP4", 9 | } 10 | output_dir: "/private/home/miguelmartin/ego4d/ego4d_public/colmap_outputs" 11 | name: null 12 | aria_fps: 30 13 | exo_fps: null 14 | mobile_fps: null 15 | include_aria: true 16 | sync_exo_views: false 17 | aria_use_sync_info: false 18 | aria_walkthrough_start_sec: 30.0 19 | aria_walkthrough_end_sec: 500.0 20 | use_gpu: true 21 | rot_mode: 1 22 | camera_model: "OPENCV_FISHEYE" 23 | exo_from_frame: 200 24 | exo_to_frame: 300 25 | frame_rate: 0.25 26 | exo_frames: null 27 | mobile_frames: null 28 | aria_frames: null 29 | colmap_bin: null 30 | vrs_bin: null 31 | run_colmap: false 32 | force_download: false 33 | download_video_files: true 34 | take_id: "0303_Violin_2" 35 | video_source: "penn" 36 | -------------------------------------------------------------------------------- /ego4d/internal/colmap/requirements.txt: -------------------------------------------------------------------------------- 1 | av==10.0.0 2 | boto3==1.26.59 3 | hydra-core==1.3.1 4 | ipykernel==6.20.2 5 | ipython==8.8.0 6 | jupyter==1.0.0 7 | matplotlib==3.7.0 8 | numpy==1.24.1 9 | omegaconf==2.3.0 10 | opencv-python==4.7.0.68 11 | tqdm==4.64.1 12 | pandas 13 | pycolmap 14 | -------------------------------------------------------------------------------- /ego4d/internal/download/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/ego4d/internal/download/__init__.py -------------------------------------------------------------------------------- /ego4d/internal/download/manifest.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from typing import List, Optional 3 | 4 | from dataclasses_json import dataclass_json 5 | 6 | 7 | @dataclass_json 8 | @dataclass(frozen=True) 9 | class PathSpecification: 10 | source_path: str 11 | relative_path: str 12 | views: Optional[List[str]] = field( 13 | default_factory=lambda: None, compare=False, hash=False 14 | ) 15 | universities: Optional[List[str]] = field( 16 | default_factory=lambda: None, compare=False, hash=False 17 | ) 18 | file_type: Optional[str] = field( 19 | default_factory=lambda: None, compare=False, hash=False 20 | ) 21 | size: Optional[int] = field(default_factory=lambda: None, compare=False, hash=False) 22 | checksum: Optional[str] = field( 23 | default_factory=lambda: None, compare=False, hash=False 24 | ) 25 | 26 | 27 | @dataclass_json 28 | @dataclass(frozen=True) 29 | class ManifestEntry: 30 | uid: str 31 | paths: List[PathSpecification] 32 | splits: Optional[List[str]] = field( 33 | default_factory=lambda: None, compare=False, hash=False 34 | ) 35 | benchmarks: Optional[List[str]] = field( 36 | default_factory=lambda: None, compare=False, hash=False 37 | ) 38 | 39 | 40 | def manifest_dumps(xs: List[ManifestEntry]) -> str: 41 | return ManifestEntry.schema().dumps(xs, many=True) # pyre-ignore 42 | 43 | 44 | def manifest_loads(data: str) -> List[ManifestEntry]: 45 | return ManifestEntry.schema().loads(data, many=True) # pyre-ignore 46 | -------------------------------------------------------------------------------- /ego4d/internal/downscale.py: -------------------------------------------------------------------------------- 1 | """ 2 | Performs downscaling of takes videos with the SLURM cluster. 3 | 4 | Credits to Santhosh Kumar Ramakrishnan for providing the original snippet this 5 | code originates from. 6 | """ 7 | 8 | import datetime 9 | import json 10 | import math 11 | import os 12 | import subprocess as sp 13 | from concurrent.futures import ThreadPoolExecutor 14 | 15 | import submitit 16 | from ego4d.research.common import batch_it 17 | from tqdm.auto import tqdm 18 | 19 | 20 | ROOT_DIR = "/large_experiments/egoexo/v2/" 21 | DS_TAKES_DIR = "/checkpoint/miguelmartin/egoexo/v2/downscaled_takes/takes_by_uni" 22 | 23 | 24 | def call_ffmpeg(paths): 25 | src_path, tgt_path = paths 26 | assert os.path.exists(src_path) 27 | # https://docs.nvidia.com/video-technologies/video-codec-sdk/12.0/ffmpeg-with-nvidia-gpu/index.html 28 | cmd = [ 29 | "ffmpeg", 30 | "-hide_banner", 31 | "-loglevel", 32 | "error", 33 | # TODO: try with cuda encoding for faster processing 34 | # '-hwaccel', 'cuda', 35 | # '-hwaccel_output_format', 'cuda', 36 | "-i", 37 | src_path, 38 | # This sweet conditional is thanks to ChatGPT :) 39 | "-vf", 40 | "scale=w=if(lt(iw\,ih)\,448\,-2):h=if(lt(iw\,ih)\,-2\,448)", # noqa 41 | # '-c:a', 'copy', 42 | # '-c:v', 'h264_nvenc', 43 | # '-b:v', '5M', 44 | tgt_path, 45 | "-y", 46 | ] 47 | print(" ".join(cmd)) 48 | os.makedirs(os.path.dirname(tgt_path), exist_ok=True) 49 | sp.call(cmd) 50 | 51 | 52 | def process_all(paths): 53 | map_fn = call_ffmpeg 54 | with ThreadPoolExecutor(5) as pool: 55 | for _ in tqdm( 56 | pool.map(map_fn, paths), total=len(paths), desc="Processing takes" 57 | ): 58 | continue 59 | 60 | 61 | def main(): 62 | num_machines: int = 50 63 | root_dir: str = ROOT_DIR 64 | ds_take_dir: str = DS_TAKES_DIR 65 | 66 | takes_to_process = json.load(open(os.path.join(root_dir, "takes.json"))) 67 | 68 | map_values = [] 69 | completed = 0 70 | num_vids = 0 71 | for take in takes_to_process: 72 | for _, streams in take["frame_aligned_videos"].items(): 73 | for _, stream in streams.items(): 74 | rel_path = stream["relative_path"] 75 | if rel_path is None: 76 | continue 77 | if stream["is_collage"]: 78 | continue 79 | src_path = os.path.join(root_dir, take["root_dir"], rel_path) 80 | dst_path = os.path.join( 81 | ds_take_dir, take["university_id"], take["take_name"], rel_path 82 | ) 83 | assert os.path.exists(src_path), src_path 84 | num_vids += 1 85 | if os.path.exists(dst_path): 86 | completed += 1 87 | continue 88 | map_values.append((src_path, dst_path)) 89 | 90 | print( 91 | f"# videos to process: {len(map_values)} / {num_vids} [{completed} / {1 - (len(map_values) / num_vids):.2%} completed]" 92 | ) 93 | job_inputs = batch_it( 94 | map_values, batch_size=math.ceil(len(map_values) / num_machines) 95 | ) 96 | num_machines = min(num_machines, len(job_inputs)) 97 | 98 | dt_now = datetime.datetime.now().strftime("%y%m%d_%H%M") 99 | log_dir = f"downscale/{dt_now}" 100 | print(f"Logging to: {log_dir}") 101 | os.makedirs(log_dir, exist_ok=True) 102 | executor = submitit.AutoExecutor(folder=log_dir) 103 | 104 | executor.update_parameters( 105 | timeout_min=3200, 106 | slurm_array_parallelism=num_machines, 107 | slurm_constraint="volta", 108 | slurm_partition="eht", 109 | gpus_per_node=1, 110 | cpus_per_task=10, 111 | ) 112 | jobs = executor.map_array(process_all, job_inputs) 113 | 114 | print("Waiting...") 115 | results = [] 116 | for job in tqdm(jobs): 117 | results.append(job.result()) 118 | print("Done") 119 | 120 | 121 | if __name__ == "__main__": 122 | main() 123 | -------------------------------------------------------------------------------- /ego4d/internal/expert_commentary/extract.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import shutil 4 | import tempfile 5 | import zipfile 6 | from typing import List 7 | 8 | from tqdm.auto import tqdm 9 | 10 | RAW_COMMENTARY_ROOT = ( 11 | "/checkpoint/miguelmartin/expert_commentary/do_not_use/240207/raw_data_do_not_sync" 12 | ) 13 | RAW_EXTRACTED_COMM_ROOT = ( 14 | "/checkpoint/miguelmartin/expert_commentary/do_not_use/240207/raw_data_extracted" 15 | ) 16 | 17 | 18 | def extract_commentaries( 19 | input_dir: str = RAW_COMMENTARY_ROOT, 20 | output_dir: str = RAW_EXTRACTED_COMM_ROOT, 21 | ): 22 | """ 23 | NOTE: INTERNAL USAGE ONLY 24 | """ 25 | seen_dirs = {} 26 | merge_dirs = 0 27 | bad_dirs = [] 28 | for root, _, files in tqdm(os.walk(input_dir)): 29 | if "merge" in root.lower(): 30 | merge_dirs += 1 31 | for file in files: 32 | with tempfile.TemporaryDirectory() as tempdir: 33 | if file == "data.json": # already unextracted 34 | extract_path = root 35 | elif file.endswith(".zip"): # in expected zip form 36 | zip_path = os.path.join(root, file) 37 | extract_path = f"{tempdir}/temp" 38 | try: 39 | with zipfile.ZipFile(zip_path, "r") as zip_ref: 40 | zip_ref.extractall(extract_path) 41 | except zipfile.BadZipfile: 42 | print(f"file: {file} is not a zip") 43 | bad_dirs.append(file) 44 | continue 45 | else: 46 | continue 47 | 48 | data = json.load(open(os.path.join(extract_path, "data.json"))) 49 | copy_to_path = None 50 | for i in range(100): 51 | uid_path = f"{data['user_id']}_{data['video_name']}_{i}" 52 | copy_to_path = os.path.join(output_dir, uid_path) 53 | if copy_to_path not in seen_dirs: 54 | break 55 | assert copy_to_path and copy_to_path not in seen_dirs 56 | seen_dirs[copy_to_path] = True 57 | if os.path.exists(copy_to_path): 58 | shutil.rmtree(copy_to_path) 59 | shutil.copytree(extract_path, copy_to_path) 60 | return bad_dirs, merge_dirs 61 | 62 | 63 | def load_all_raw_commentaries(raw_extracted_dir: str) -> List[str]: 64 | result = [os.path.join(raw_extracted_dir, x) for x in os.listdir(raw_extracted_dir)] 65 | return sorted(result) 66 | 67 | 68 | def load_uniq_commentaries(raw_extracted_dir: str) -> List[str]: 69 | result = {} 70 | for root in tqdm(os.listdir(raw_extracted_dir)): 71 | data = json.load(open(os.path.join(raw_extracted_dir, root, "data.json"))) 72 | key = (data["user_id"], data["video_name"]) 73 | if key in result: 74 | ds_curr = result[key]["data"]["ds"] 75 | if ds_curr < data["ds"]: 76 | result[key] = { 77 | "dir": os.path.join(raw_extracted_dir, root), 78 | "data": data, 79 | } 80 | else: 81 | result[key] = { 82 | "dir": os.path.join(raw_extracted_dir, root), 83 | "data": data, 84 | } 85 | return [v["dir"] for v in result.values()] 86 | 87 | 88 | if __name__ == "__main__": 89 | bad_dirs, merges = extract_commentaries() 90 | print("Bad dirs= ", len(bad_dirs)) 91 | print("Merge dirs= ", merges) 92 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/README.md: -------------------------------------------------------------------------------- 1 | # Human Pose Estimation Pipeline 2 | 3 | Steps: 4 | 1. Preprocess egoexo data 5 | - download files 6 | - frame extraction 7 | - aria frame extraction 8 | - exo camera frame extraction 9 | - paths + camera calibrations to JSON 10 | 2. Obtain human bounding boxes 11 | - Uses the Aria trajectory + FasterRCNN + heuristics 12 | 3. 2D Pose Detection 13 | 4. Triangulation 14 | 5. Smoothing 15 | 16 | ## Setup 17 | 18 | ### Dependencies 19 | 20 | We depend on @rawalkhirodkar's fork of mmlab and related repositories. This 21 | will be installed with the `requirements.txt` via pip. If you want to manually 22 | install it, you can follow the instructions below. 23 | 24 | ```bash 25 | mkdir tp 26 | pushd tp 27 | git clone git@github.com:rawalkhirodkar/mmlab.git 28 | popd 29 | 30 | pushd tp/mmlab/mmcv 31 | pip install -r requirements/optional.txt 32 | MMCV_WITH_OPS=1 pip install -e . -v 33 | popd 34 | 35 | pushd tp/mmlab/mmpose 36 | pip install . 37 | popd 38 | 39 | pushd tp/mmlab/mmdetection 40 | pip install . 41 | popd 42 | 43 | pip install "torch>=2.0.0" 44 | ``` 45 | NOTE: python3.9 needed due to pycolmap, see: https://github.com/colmap/pycolmap#getting-started 46 | 47 | ### Install 48 | 49 | ```bash 50 | cd ego4d/internal/human_pose/ 51 | conda create -n human_pose39 python=3.9 -y 52 | conda activate human_pose39 53 | pip install -r requirements.txt 54 | pip install --upgrade numpy 55 | ``` 56 | 57 | Alternatively there is an install script located under 58 | `ego4d/internal/human_pose/scripts/_install/conda.sh` maintained by 59 | @rawalkhirodkar. 60 | 61 | ### Notes for the FAIR cluster 62 | On the FAIR cluster you will have to load CUDA and CUDNN before installing, via: 63 | 64 | ```bash 65 | module load cuda/11.2 cudnn/v8.1.1.33-cuda.11.0 66 | ``` 67 | 68 | ## Usage 69 | 70 | Configurations are written with Hydra/OmegaConf as YAML files. New 71 | configurations must be added to `ego4d/internal/human_pose/configs`. 72 | 73 | ### Run Pipeline 74 | 75 | Please ensure you have run the setup step first. From the repository root: 76 | 77 | ```bash 78 | python3 ego4d/internal/human_pose/main.py --config-name unc_T1 mode=preprocess repo_root_dir=$PWD 79 | python3 ego4d/internal/human_pose/main.py --config-name unc_T1 mode=bbox repo_root_dir=$PWD 80 | python3 ego4d/internal/human_pose/main.py --config-name unc_T1 mode=pose2d repo_root_dir=$PWD 81 | ``` 82 | 83 | # TODOs 84 | - [ ] TODO: scale horizontally / run via SLURM 85 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/bbox_detector.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | import numpy as np 5 | from mmdet.apis import inference_detector, init_detector 6 | 7 | from mmpose.apis import ( 8 | inference_top_down_pose_model, 9 | init_pose_model, 10 | process_mmdet_results, 11 | vis_pose_result, 12 | ) 13 | from mmpose.core.bbox.transforms import ( 14 | bbox_cs2xywh, 15 | bbox_xywh2cs, 16 | bbox_xywh2xyxy, 17 | bbox_xyxy2xywh, 18 | ) 19 | from mmpose.datasets import DatasetInfo 20 | from tqdm import tqdm 21 | 22 | 23 | ##------------------------------------------------------------------------------------ 24 | class DetectorModel: 25 | def __init__( 26 | self, detector_config=None, detector_checkpoint=None, min_bbox_score=0.7 27 | ): 28 | self.detector_config = detector_config 29 | self.detector_checkpoint = detector_checkpoint 30 | self.detector = init_detector( 31 | self.detector_config, self.detector_checkpoint, device="cuda:0".lower() 32 | ) 33 | self.min_bbox_score = min_bbox_score 34 | 35 | ## iou_threshold: the threshold to decide whether to use the offshelf bbox or not 36 | def get_bboxes(self, image_name, bboxes, iou_threshold=0.3): 37 | det_results = inference_detector(self.detector, image_name) 38 | person_results = process_mmdet_results( 39 | det_results, 1 40 | ) # keep the person class bounding boxes. 41 | person_results = [ 42 | bbox for bbox in person_results if bbox["bbox"][4] > self.min_bbox_score 43 | ] 44 | 45 | refined_bboxes = bboxes.copy() 46 | is_offshelf_valid = [True] * len(person_results) 47 | 48 | ## go through over the aria bboxes 49 | for i, bbox in enumerate(refined_bboxes): 50 | max_iou = 0 51 | max_iou_offshelf_bbox = None 52 | max_iou_index = -1 53 | 54 | for j, offshelf_bbox in enumerate(person_results): 55 | if is_offshelf_valid[j] == True: 56 | iou = self.bb_intersection_over_union( 57 | boxA=bbox["bbox"], boxB=offshelf_bbox["bbox"] 58 | ) 59 | 60 | if iou > max_iou: 61 | max_iou = iou 62 | max_iou_offshelf_bbox = offshelf_bbox["bbox"] 63 | max_iou_index = j 64 | if max_iou > iou_threshold: 65 | refined_bboxes[i]["bbox"] = max_iou_offshelf_bbox 66 | is_offshelf_valid[max_iou_index] = False 67 | 68 | return refined_bboxes 69 | 70 | def bb_intersection_over_union(self, boxA, boxB): 71 | # determine the (x, y)-coordinates of the intersection rectangle 72 | xA = max(boxA[0], boxB[0]) 73 | yA = max(boxA[1], boxB[1]) 74 | xB = min(boxA[2], boxB[2]) 75 | yB = min(boxA[3], boxB[3]) 76 | 77 | # compute the area of intersection rectangle 78 | interArea = abs(max((xB - xA, 0)) * max((yB - yA), 0)) 79 | if interArea == 0: 80 | return 0 81 | # compute the area of both the prediction and ground-truth 82 | # rectangles 83 | boxAArea = abs((boxA[2] - boxA[0]) * (boxA[3] - boxA[1])) 84 | boxBArea = abs((boxB[2] - boxB[0]) * (boxB[3] - boxB[1])) 85 | 86 | # compute the intersection over union by taking the intersection 87 | # area and dividing it by the sum of prediction + ground-truth 88 | # areas - the interesection area 89 | iou = interArea / float(boxAArea + boxBArea - interArea) 90 | 91 | # return the intersection over union value 92 | return iou 93 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Dict, List, Optional 3 | 4 | 5 | @dataclass 6 | class Input: 7 | from_frame_number: int 8 | to_frame_number: int 9 | sample_interval: int 10 | subclip_json_dir: Optional[str] 11 | # min_subclip_length: 12 | # example: suppose min_subclip_length=120 (unit: frames) and subclip=[30, 80], 13 | # it will extend the subclip to [30, 150]. This could be useful for some actions 14 | # that take more than 2 seconds (60 frames) so that we sample sufficient frames 15 | # for the action. 16 | # see function `calculate_frame_selection` for how it's used 17 | min_subclip_length: int 18 | take_name: Optional[str] 19 | take_uid: Optional[str] 20 | capture_root_dir: Optional[str] 21 | metadata_json_path: Optional[str] 22 | aria_trajectory_dir: Optional[str] 23 | exo_trajectory_dir: Optional[str] 24 | aria_streams: List[str] 25 | exo_timesync_name_to_calib_name: Optional[Dict[str, str]] 26 | 27 | 28 | @dataclass 29 | class Output: 30 | # storage_level: 31 | # 0 is minimum storage, meaning most stuff shall be cleaned up after the job is done 32 | # at the cost of losing debugging information; 33 | # the higher the level is, the more disk storage we are allow to use 34 | # (e.g., for debugging purpose) 35 | storage_level: int 36 | 37 | 38 | @dataclass 39 | class PreprocessFrameConfig: 40 | dataset_name: str 41 | vrs_bin_path: str 42 | extract_all_aria_frames: bool 43 | extract_frames: bool 44 | download_video_files: bool 45 | force_download: bool 46 | 47 | 48 | @dataclass 49 | class BBoxConfig: 50 | detector_model_config: Optional[str] 51 | detector_model_checkpoint: Optional[str] 52 | use_aria_trajectory: bool 53 | human_height: float 54 | human_radius: float 55 | min_bbox_score: float 56 | min_area_ratio: float 57 | 58 | 59 | @dataclass 60 | class PoseEstimationConfig: 61 | pose_config: str 62 | pose_checkpoint: str 63 | dummy_pose_config: str 64 | dummy_pose_checkpoint: str 65 | 66 | 67 | @dataclass 68 | class Pose3DConfig: 69 | min_body_kpt2d_conf: float 70 | 71 | 72 | @dataclass 73 | class TriangulationConfig: 74 | pass 75 | 76 | 77 | @dataclass 78 | class Config: 79 | legacy: bool 80 | data_dir: str 81 | cache_root_dir: str 82 | repo_root_dir: str 83 | gpu_id: int # use -1 for CPU 84 | mode: str 85 | inputs: Input 86 | outputs: Output 87 | mode_preprocess: PreprocessFrameConfig 88 | mode_bbox: BBoxConfig 89 | mode_pose2d: PoseEstimationConfig 90 | mode_pose3d: Pose3DConfig 91 | mode_triangulate: TriangulationConfig 92 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/configs/cmu_soccer.yaml: -------------------------------------------------------------------------------- 1 | repo_root_dir: null 2 | data_dir: "colmap_experiments/" 3 | gpu_id: 0 4 | mode: "preprocess" 5 | inputs: 6 | from_frame_number: 12000 7 | to_frame_number: 12599 8 | metadata_json_path: "s3://ego4d-consortium-sharing/internal/egoexo_pilot/cmu/take_0/metadata.json" 9 | aria_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/cmu_soccer_nopointcloud/closed_loop_trajectory.csv" 10 | exo_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/cmu_soccer_nopointcloud/gopro_calibs.csv" 11 | aria_streams: 12 | - 214-1 13 | - 1201-1 14 | - 1201-2 15 | exo_timesync_name_to_calib_name: 16 | cam01: "cam01" 17 | cam02: "cam02" 18 | cam03: "cam03" 19 | cam04: "cam04" 20 | mode_preprocess: 21 | dataset_name: "dataset" 22 | vrs_bin_path: "vrs" 23 | download_video_files: true 24 | force_download: false 25 | extract_all_aria_frames: false 26 | mode_bbox: 27 | detector_config: "tp/mmlab/mmpose/demo/mmdetection_cfg/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py" 28 | detector_checkpoint: 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth' 29 | use_aria_trajectory: true 30 | human_height: 1.5 31 | mode_pose2d: 32 | pose_config: "tp/mmlab/mmpose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py" 33 | pose_checkpoint: "https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth" 34 | dummy_pose_config: 'tp/mmlab/mmpose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py' 35 | dummy_pose_checkpoint: 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth' 36 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/configs/cmu_soccer_rawal.yaml: -------------------------------------------------------------------------------- 1 | repo_root_dir: null 2 | data_dir: "/home/rawalk/Desktop/datasets/ego4d_data" 3 | 4 | gpu_id: 0 5 | 6 | mode: "preprocess" 7 | inputs: 8 | from_frame_number: 12000 9 | to_frame_number: 12599 10 | metadata_json_path: "s3://ego4d-consortium-sharing/internal/egoexo_pilot/cmu/take_0/metadata.json" 11 | aria_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/cmu_soccer_nopointcloud/closed_loop_trajectory.csv" 12 | exo_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/cmu_soccer_nopointcloud/gopro_calibs.csv" 13 | aria_streams: 14 | - 214-1 15 | - 1201-1 16 | - 1201-2 17 | exo_timesync_name_to_calib_name: 18 | cam01: "cam01" 19 | cam02: "cam02" 20 | cam03: "cam03" 21 | cam04: "cam04" 22 | 23 | mode_preprocess: 24 | dataset_name: "dataset" 25 | vrs_bin_path: "vrs" 26 | download_video_files: true 27 | force_download: false 28 | extract_all_aria_frames: false 29 | 30 | mode_bbox: 31 | detector_config: "ego4d/internal/human_pose/external/mmlab/mmpose/demo/mmdetection_cfg/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py" 32 | detector_checkpoint: 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth' 33 | use_aria_trajectory: true 34 | human_height: 1.5 35 | human_radius: 0.3 36 | min_bbox_score: 0.7 37 | min_area_ratio: 0.002 38 | 39 | mode_pose2d: 40 | pose_config: "ego4d/internal/human_pose/external/mmlab/mmpose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py" 41 | pose_checkpoint: "https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth" 42 | dummy_pose_config: 'ego4d/internal/human_pose/external/mmlab/mmpose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py' 43 | dummy_pose_checkpoint: 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth' 44 | 45 | mode_pose3d: 46 | start_frame: 0 47 | end_frame: -1 48 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/configs/dev_release_base.yaml: -------------------------------------------------------------------------------- 1 | legacy: false 2 | repo_root_dir: null 3 | data_dir: "/checkpoint/miguelmartin/egoexo_data/dev/" 4 | cache_root_dir: "/checkpoint/miguelmartin/egoexo_data/dev/" 5 | gpu_id: 0 6 | mode: "preprocess" 7 | inputs: 8 | from_frame_number: 0 9 | to_frame_number: null 10 | sample_interval: 1 11 | subclip_json_dir: null 12 | min_subclip_length: 120 13 | take_name: "uniandes_dance_007_3" 14 | take_uid: null 15 | metadata_json_path: null 16 | aria_trajectory_path: null 17 | exo_trajectory_path: null 18 | aria_streams: 19 | - rgb 20 | - slam-left 21 | - slam-right 22 | exo_timesync_name_to_calib_name: null 23 | outputs: 24 | storage_level: 30 25 | mode_preprocess: 26 | dataset_name: "dataset" 27 | vrs_bin_path: "vrs" 28 | download_video_files: true 29 | force_download: false 30 | extract_all_aria_frames: false 31 | extract_frames: true 32 | mode_bbox: 33 | detector_config: "tp/mmlab/mmpose/demo/mmdetection_cfg/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py" 34 | detector_checkpoint: 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth' 35 | use_aria_trajectory: true 36 | human_height: 1.5 37 | human_radius: 0.3 38 | min_bbox_score: 0.7 39 | min_area_ratio: 0.005 40 | mode_pose2d: 41 | pose_config: "tp/mmlab/mmpose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py" 42 | pose_checkpoint: "https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth" 43 | dummy_pose_config: 'tp/mmlab/mmpose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py' 44 | dummy_pose_checkpoint: 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth' 45 | hand_pose_config: 'tp/mmlab/mmpose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_w18_coco_wholebody_hand_256x256_dark.py' 46 | hand_pose_ckpt: 'https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_coco_wholebody_hand_256x256_dark-a9228c9c_20210908.pth' 47 | mode_pose3d: 48 | min_body_kpt2d_conf: 0.6 49 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/configs/iiith_cooking01.yaml: -------------------------------------------------------------------------------- 1 | root_dir: "/private/home/miguelmartin/ego4d/ego4d_public/colmap_experiments/" 2 | gpu_id: 0 3 | mode: "preprocess" 4 | inputs: 5 | from_frame_number: 5000 6 | to_frame_number: 5600 7 | capture_root_dir: "s3://ego4d-consortium-sharing/internal/egoexo_phaseone/iiith/cooking_01/" 8 | metadata_json_path: null 9 | aria_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/iiith_cooking_01-0404/closed_loop_trajectory.csv" 10 | exo_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/iiith_cooking_01-0404/gopro_calibrations.csv" 11 | aria_streams: 12 | - 214-1 13 | - 1201-1 14 | - 1201-2 15 | exo_timesync_name_to_calib_name: 16 | cam01: cam01 17 | cam02: cam02 18 | cam03: cam03 19 | cam04: cam04 20 | mode_preprocess: 21 | dataset_name: "dataset" 22 | vrs_bin_path: "vrs" 23 | download_video_files: true 24 | force_download: false 25 | extract_all_aria_frames: false 26 | mode_bbox: 27 | detector_model_config: "/private/home/miguelmartin/ego4d/ego4d_public/tp/mmdetection/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py" 28 | detector_model_checkpoint: "https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco/faster_rcnn_x101_64x4d_fpn_1x_coco_20200204-833ee192.pth" 29 | use_aria_trajectory: true 30 | mode_pose_estimation: 31 | pose_model_config: "/private/home/miguelmartin/ego4d/ego4d_public/tp/mmpose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py" 32 | pose_model_checkpoint: "https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth" 33 | mode_triangulate: 34 | todo: abc 35 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/configs/iiith_cooking_01_1.yaml: -------------------------------------------------------------------------------- 1 | legacy: false 2 | repo_root_dir: "/home/jinxu/code/Ego4d" 3 | data_dir: "/media/jinxu/New\ Volume/ego4dData" 4 | cache_root_dir: "/media/jinxu/New\ Volume/ego4dData" 5 | gpu_id: 0 6 | mode: "preprocess" 7 | inputs: 8 | from_frame_number: 5000 9 | to_frame_number: 5100 10 | take_name: "iiith_cooking_01_1" 11 | take_uid: null 12 | metadata_json_path: null 13 | aria_trajectory_path: null 14 | exo_trajectory_path: null 15 | aria_streams: 16 | - rgb 17 | - slam-left 18 | - slam-right 19 | exo_timesync_name_to_calib_name: null 20 | mode_preprocess: 21 | dataset_name: "dataset" 22 | vrs_bin_path: "vrs" 23 | download_video_files: true 24 | force_download: false 25 | extract_all_aria_frames: false 26 | mode_bbox: 27 | detector_config: "ego4d/internal/human_pose/external/mmlab/mmpose/demo/mmdetection_cfg/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py" 28 | detector_checkpoint: 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth' 29 | use_aria_trajectory: true 30 | human_height: 1.5 31 | human_radius: 0.3 32 | min_bbox_score: 0.7 33 | min_area_ratio: 0.005 34 | mode_pose2d: 35 | pose_config: "ego4d/internal/human_pose/external/mmlab/mmpose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py" 36 | pose_checkpoint: "https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth" 37 | dummy_pose_config: 'ego4d/internal/human_pose/external/mmlab/mmpose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py' 38 | dummy_pose_checkpoint: 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth' 39 | hand_pose_config: 'ego4d/internal/human_pose/external/mmlab/mmpose/configs/hand/2d_kpt_sview_rgb_img/topdown_heatmap/coco_wholebody_hand/hrnetv2_w18_coco_wholebody_hand_256x256_dark.py' 40 | hand_pose_ckpt: 'https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_coco_wholebody_hand_256x256_dark-a9228c9c_20210908.pth' 41 | 42 | mode_pose3d: 43 | start_frame: 0 44 | end_frame: -1 45 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/configs/iu_bike.yaml: -------------------------------------------------------------------------------- 1 | repo_root_dir: null 2 | data_dir: "colmap_experiments/" 3 | gpu_id: 0 4 | mode: "preprocess" 5 | inputs: 6 | from_frame_number: 7000 7 | to_frame_number: 7599 8 | metadata_json_path: "s3://ego4d-consortium-sharing/internal/egoexo_pilot/iu/bike/metadata.json" 9 | aria_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/iu_bike_nopointcloud/closed_loop_trajectory.csv" 10 | exo_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/iu_bike_nopointcloud/gopro_calibs.csv" 11 | aria_streams: 12 | - 214-1 13 | - 1201-1 14 | - 1201-2 15 | exo_timesync_name_to_calib_name: 16 | cam01: "GX010017" 17 | cam02: "GX010005" 18 | cam03: "GX010007" 19 | cam04: "GX010009" 20 | mode_preprocess: 21 | dataset_name: "dataset" 22 | vrs_bin_path: "vrs" 23 | download_video_files: true 24 | force_download: false 25 | extract_all_aria_frames: false 26 | mode_bbox: 27 | detector_config: "tp/mmlab/mmpose/demo/mmdetection_cfg/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py" 28 | detector_checkpoint: 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth' 29 | use_aria_trajectory: true 30 | human_height: 1.5 31 | mode_pose2d: 32 | pose_config: "tp/mmlab/mmpose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py" 33 | pose_checkpoint: "https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth" 34 | dummy_pose_config: 'tp/mmlab/mmpose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py' 35 | dummy_pose_checkpoint: 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth' 36 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/configs/iu_bike_rawal.yaml: -------------------------------------------------------------------------------- 1 | repo_root_dir: null 2 | data_dir: "/home/rawalk/Desktop/datasets/ego4d_data" 3 | gpu_id: 0 4 | 5 | mode: "preprocess" 6 | exo_camera_name: "cam01" 7 | 8 | inputs: 9 | from_frame_number: 7000 10 | to_frame_number: 7599 11 | metadata_json_path: "s3://ego4d-consortium-sharing/internal/egoexo_pilot/iu/bike/metadata.json" 12 | aria_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/iu_bike_nopointcloud/closed_loop_trajectory.csv" 13 | exo_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/iu_bike_nopointcloud/gopro_calibs.csv" 14 | aria_streams: 15 | - 214-1 16 | - 1201-1 17 | - 1201-2 18 | exo_timesync_name_to_calib_name: 19 | cam01: "GX010017" 20 | cam02: "GX010005" 21 | cam03: "GX010007" 22 | cam04: "GX010009" 23 | 24 | mode_preprocess: 25 | dataset_name: "dataset" 26 | vrs_bin_path: "vrs" 27 | download_video_files: true 28 | force_download: false 29 | extract_all_aria_frames: false 30 | 31 | mode_bbox: 32 | detector_config: "ego4d/internal/human_pose/external/mmlab/mmpose/demo/mmdetection_cfg/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py" 33 | detector_checkpoint: 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth' 34 | use_aria_trajectory: true 35 | human_height: 1.5 36 | human_radius: 0.3 37 | min_bbox_score: 0.7 38 | min_area_ratio: 0.005 39 | 40 | mode_pose2d: 41 | pose_config: "ego4d/internal/human_pose/external/mmlab/mmpose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py" 42 | pose_checkpoint: "https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth" 43 | 44 | dummy_pose_config: 'ego4d/internal/human_pose/external/mmlab/mmpose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py' 45 | dummy_pose_checkpoint: 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth' 46 | 47 | mode_pose3d: 48 | start_frame: 0 49 | end_frame: -1 50 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/configs/iu_music.yaml: -------------------------------------------------------------------------------- 1 | repo_root_dir: null 2 | data_dir: "colmap_experiments/" 3 | gpu_id: 0 4 | mode: "preprocess" 5 | inputs: 6 | from_frame_number: 7000 7 | to_frame_number: 7599 8 | metadata_json_path: "s3://ego4d-consortium-sharing/internal/egoexo_pilot/iu/music/metadata.json" 9 | aria_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/iu_music_nopointcloud/closed_loop_trajectory.csv" 10 | exo_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/iu_music_nopointcloud/gopro_calibs.csv" 11 | aria_streams: 12 | - 214-1 13 | - 1201-1 14 | - 1201-2 15 | exo_timesync_name_to_calib_name: 16 | cam01: "GX010020" 17 | cam02: "GX010006" 18 | cam03: "GX010008" 19 | cam04: null 20 | mode_preprocess: 21 | dataset_name: "dataset" 22 | vrs_bin_path: "vrs" 23 | download_video_files: true 24 | force_download: false 25 | extract_all_aria_frames: false 26 | mode_bbox: 27 | detector_config: "tp/mmlab/mmpose/demo/mmdetection_cfg/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py" 28 | detector_checkpoint: 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth' 29 | use_aria_trajectory: true 30 | human_height: 1.5 31 | mode_pose2d: 32 | pose_config: "tp/mmlab/mmpose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py" 33 | pose_checkpoint: "https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth" 34 | dummy_pose_config: 'tp/mmlab/mmpose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py' 35 | dummy_pose_checkpoint: 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth' 36 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/configs/iu_music_rawal.yaml: -------------------------------------------------------------------------------- 1 | repo_root_dir: null 2 | data_dir: "/home/rawalk/Desktop/datasets/ego4d_data" 3 | 4 | gpu_id: 0 5 | 6 | mode: "preprocess" 7 | inputs: 8 | from_frame_number: 7000 9 | to_frame_number: 7599 10 | metadata_json_path: "s3://ego4d-consortium-sharing/internal/egoexo_pilot/iu/music/metadata.json" 11 | aria_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/iu_music_nopointcloud/closed_loop_trajectory.csv" 12 | exo_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/iu_music_nopointcloud/gopro_calibs.csv" 13 | aria_streams: 14 | - 214-1 15 | - 1201-1 16 | - 1201-2 17 | exo_timesync_name_to_calib_name: 18 | cam01: "GX010020" 19 | cam02: "GX010006" 20 | cam03: "GX010008" 21 | cam04: null 22 | 23 | mode_preprocess: 24 | dataset_name: "dataset" 25 | vrs_bin_path: "vrs" 26 | download_video_files: true 27 | force_download: false 28 | extract_all_aria_frames: false 29 | 30 | mode_bbox: 31 | detector_config: "ego4d/internal/human_pose/external/mmlab/mmpose/demo/mmdetection_cfg/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py" 32 | detector_checkpoint: 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth' 33 | use_aria_trajectory: true 34 | human_height: 0.6 35 | human_radius: 0.3 36 | min_bbox_score: 0.7 37 | min_area_ratio: 0.005 38 | 39 | mode_pose2d: 40 | pose_config: "ego4d/internal/human_pose/external/mmlab/mmpose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py" 41 | pose_checkpoint: "https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth" 42 | dummy_pose_config: 'ego4d/internal/human_pose/external/mmlab/mmpose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py' 43 | dummy_pose_checkpoint: 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth' 44 | 45 | mode_pose3d: 46 | start_frame: 0 47 | end_frame: -1 48 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/configs/unc_T1.yaml: -------------------------------------------------------------------------------- 1 | repo_root_dir: null 2 | data_dir: "colmap_experiments/" 3 | gpu_id: 0 4 | mode: "preprocess" 5 | inputs: 6 | from_frame_number: 7000 7 | to_frame_number: 7599 8 | metadata_json_path: "s3://ego4d-consortium-sharing/internal/egoexo_pilot/unc/T1/metadata.json" 9 | aria_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/unc_t1/multi_closed_loop_trajectory_framerate_0.csv" 10 | exo_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/unc_t1/gopro_calibs.csv" 11 | aria_streams: 12 | - 214-1 13 | - 1201-1 14 | - 1201-2 15 | exo_timesync_name_to_calib_name: 16 | cam01: "/home/jingdong/data/egoexo/unc_t1/raw/gopro1_%09d.jpg" 17 | cam02: "/home/jingdong/data/egoexo/unc_t1/raw/gopro2_%09d.jpg" 18 | cam03: "/home/jingdong/data/egoexo/unc_t1/raw/gopro3_%09d.jpg" 19 | cam04: "/home/jingdong/data/egoexo/unc_t1/raw/gopro4_%09d.jpg" 20 | mode_preprocess: 21 | dataset_name: "dataset" 22 | vrs_bin_path: "vrs" 23 | download_video_files: true 24 | force_download: false 25 | extract_all_aria_frames: false 26 | mode_bbox: 27 | detector_config: "tp/mmlab/mmpose/demo/mmdetection_cfg/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py" 28 | detector_checkpoint: 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth' 29 | use_aria_trajectory: true 30 | human_height: 1.5 31 | human_radius: 0.3 32 | min_bbox_score: 0.7 33 | mode_pose2d: 34 | pose_config: "tp/mmlab/mmpose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py" 35 | pose_checkpoint: "https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth" 36 | dummy_pose_config: 'tp/mmlab/mmpose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py' 37 | dummy_pose_checkpoint: 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth' 38 | mode_pose3d: 39 | start_frame: 0 40 | end_frame: -1 41 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/configs/unc_T1_rawal.yaml: -------------------------------------------------------------------------------- 1 | repo_root_dir: null 2 | data_dir: "/home/rawalk/Desktop/datasets/ego4d_data" 3 | 4 | gpu_id: 0 5 | 6 | mode: "preprocess" 7 | inputs: 8 | from_frame_number: 7000 9 | to_frame_number: 7599 10 | metadata_json_path: "s3://ego4d-consortium-sharing/internal/egoexo_pilot/unc/T1/metadata.json" 11 | aria_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/unc_t1/multi_closed_loop_trajectory_framerate_0.csv" 12 | exo_trajectory_path: "s3://ego4d-consortium-sharing/internal/temple3_egoexo/unc_t1/gopro_calibs.csv" 13 | aria_streams: 14 | - 214-1 15 | - 1201-1 16 | - 1201-2 17 | exo_timesync_name_to_calib_name: 18 | cam01: "/home/jingdong/data/egoexo/unc_t1/raw/gopro1_%09d.jpg" 19 | cam02: "/home/jingdong/data/egoexo/unc_t1/raw/gopro2_%09d.jpg" 20 | cam03: "/home/jingdong/data/egoexo/unc_t1/raw/gopro3_%09d.jpg" 21 | cam04: "/home/jingdong/data/egoexo/unc_t1/raw/gopro4_%09d.jpg" 22 | 23 | 24 | mode_preprocess: 25 | dataset_name: "dataset" 26 | vrs_bin_path: "vrs" 27 | download_video_files: true 28 | force_download: false 29 | extract_all_aria_frames: false 30 | 31 | mode_bbox: 32 | detector_config: "ego4d/internal/human_pose/external/mmlab/mmpose/demo/mmdetection_cfg/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py" 33 | detector_checkpoint: 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth' 34 | use_aria_trajectory: true 35 | human_height: 1.5 36 | human_radius: 0.3 37 | min_bbox_score: 0.7 38 | min_area_ratio: 0.005 39 | 40 | mode_pose2d: 41 | pose_config: "ego4d/internal/human_pose/external/mmlab/mmpose/configs/wholebody/2d_kpt_sview_rgb_img/topdown_heatmap/coco-wholebody/hrnet_w48_coco_wholebody_384x288_dark_plus.py" 42 | pose_checkpoint: "https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth" 43 | dummy_pose_config: 'ego4d/internal/human_pose/external/mmlab/mmpose/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/hrnet_w32_coco_256x192.py' 44 | dummy_pose_checkpoint: 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth' 45 | 46 | mode_pose3d: 47 | start_frame: 0 48 | end_frame: -1 49 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from typing import Any, Dict, List, Optional 4 | 5 | import cv2 6 | import pandas as pd 7 | 8 | from iopath.common.file_io import PathManager 9 | from iopath.common.s3 import S3PathHandler 10 | 11 | pathmgr = PathManager() 12 | pathmgr.register_handler(S3PathHandler(profile="default")) 13 | 14 | 15 | def _get_synced_timesync_df(timesync_df): 16 | ks = [k for k in timesync_df.keys() if "_global_time" in k] 17 | start_indices = [timesync_df[k].first_valid_index() for k in ks] 18 | last_indices = [timesync_df[k].last_valid_index() for k in ks] 19 | first_idx = max(start_indices) 20 | last_idx = min(last_indices) 21 | return timesync_df.iloc[first_idx : last_idx + 1] 22 | 23 | 24 | def _get_accurate_timestamps(aria_path, stream_id): 25 | # NOTE if used with pyark datatools, pyvrs will cause a segfault 26 | from pyvrs import SyncVRSReader 27 | 28 | vrs_r = SyncVRSReader(aria_path, auto_read_configuration_records=True) 29 | rgb_stream = vrs_r.filtered_by_fields(stream_ids={stream_id}, record_types="data") 30 | 31 | return {idx: f.timestamp * 1e6 for idx, f in enumerate(rgb_stream)} 32 | 33 | 34 | def get_synced_timesync_df(metadata_json): 35 | timesync_df = pd.read_csv(pathmgr.open(metadata_json["timesync_csv_path"])) 36 | return _get_synced_timesync_df(timesync_df) 37 | 38 | 39 | # TODO: changeme to support a dynamic dataset, similar to what is present in notebook 40 | class SyncedEgoExoCaptureDset: 41 | def __init__( 42 | self, data_dir: str, dataset_json_path: str, read_frames: bool, legacy=False 43 | ): 44 | self.dataset_json = json.load(open(dataset_json_path)) 45 | self.read_frames = read_frames 46 | self.root_dir = data_dir 47 | if legacy: 48 | self.frame_dir = os.path.join( 49 | self.root_dir, self.dataset_json["dataset_dir"], "frames" 50 | ) 51 | else: 52 | self.frame_dir = os.path.join(self.root_dir, self.dataset_json["frame_dir"]) 53 | 54 | def __getitem__(self, idx): 55 | row = self.dataset_json["frames"][idx] 56 | for cam_id in row.keys(): 57 | # transform path to be absolute 58 | frame_path = os.path.join(self.frame_dir, row[cam_id]["frame_path"]) 59 | row[cam_id]["abs_frame_path"] = frame_path 60 | if self.read_frames: 61 | row[cam_id]["frame"] = cv2.imread(frame_path) 62 | 63 | return row 64 | 65 | def all_cam_ids(self): 66 | return self.dataset_json["frames"][0].keys() 67 | 68 | def __len__(self): 69 | return len(self.dataset_json["frames"]) 70 | 71 | 72 | class FrameDirDset: 73 | def __init__(self, directory: str, read_file: bool = True, ext: str = ".jpg"): 74 | frames = [f for f in os.listdir(directory) if f.endswith(ext)] 75 | frames = sorted(frames, key=lambda x: int(x.split(".")[0])) 76 | self.frame_paths = [os.path.join(directory, f) for f in frames] 77 | self.read_file = read_file 78 | self.directory = directory 79 | 80 | def __getitem__(self, idx): 81 | if self.read_file: 82 | return cv2.imread(self.frame_paths[idx]) 83 | return self.frame_paths[idx] 84 | 85 | def __len__(self): 86 | return len(self.frame_paths) 87 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/launch_main.py: -------------------------------------------------------------------------------- 1 | import ego4d.internal.human_pose.main as task 2 | from ego4d.internal.utils.launch_utils import run_task 3 | 4 | if __name__ == "__main__": 5 | run_task(task) 6 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/readers.py: -------------------------------------------------------------------------------- 1 | import av 2 | 3 | 4 | def get_video_meta(path): 5 | with av.open(path) as cont: 6 | n_frames = cont.streams[0].frames 7 | codec = cont.streams[0].codec.name 8 | tb = cont.streams[0].time_base 9 | 10 | all_pts = [] 11 | for x in cont.demux(video=0): 12 | if x.pts is None: 13 | continue 14 | all_pts.append(x.pts) 15 | 16 | if len(all_pts) >= 2: 17 | assert all_pts[-1] > all_pts[-2] 18 | 19 | assert len(all_pts) == n_frames 20 | return { 21 | "all_pts": all_pts, 22 | "codec": codec, 23 | "tb": tb, 24 | } 25 | 26 | 27 | def read_frame_idx_set(path, frame_indices, stream_id): 28 | meta = get_video_meta(path) 29 | with av.open(path) as cont: 30 | initial_pts = meta["all_pts"][frame_indices[0]] 31 | last_pts = meta["all_pts"][frame_indices[-1]] 32 | pts_to_idx = {meta["all_pts"][idx]: idx for idx in frame_indices} 33 | cont.seek(initial_pts, stream=cont.streams.video[stream_id], any_frame=False) 34 | seen = 0 35 | for f in cont.decode(video=stream_id): 36 | if f.pts > last_pts: 37 | break 38 | if f.pts not in pts_to_idx: 39 | # print("Skipping", f.pts) 40 | continue 41 | 42 | idx = pts_to_idx[f.pts] 43 | seen += 1 44 | yield idx, f.to_ndarray(format="rgb24") 45 | 46 | assert seen == len(pts_to_idx) 47 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=2.0.0 2 | boto3 3 | numpy==1.24.2 4 | opencv-python 5 | mmcv-full==1.7.0 6 | mmtrack==0.14.0 7 | pycolmap 8 | hydra-core 9 | av 10 | iopath 11 | torchaudio 12 | torchvision 13 | black==22.3.0 14 | ufmt==2.0.0 15 | usort==1.0.3 16 | requests 17 | smplx>=0.1.28 18 | trimesh 19 | timm==0.4.9 20 | yacs 21 | Rtree 22 | pyvista 23 | pyntcloud 24 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/scripts/_install/conda.sh: -------------------------------------------------------------------------------- 1 | source ~/anaconda3/etc/profile.d/conda.sh 2 | 3 | ## cd to root of the repository 4 | cd ../.. 5 | 6 | conda_env=${1:-human_pose} 7 | 8 | ##----------------------------------------------- 9 | conda create -n $conda_env python=3.9 -y ## pycolmap not supported for 3.10 10 | conda activate $conda_env 11 | 12 | ##----------------------------------------------- 13 | ## install pytorch 14 | conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia -y ## pytorch 2.0 15 | 16 | ##---------install dependencies--------------------------------- 17 | cd external 18 | 19 | git clone git@github.com:rawalkhirodkar/mmlab.git ## host of all forks 20 | 21 | cd mmlab/mmcv 22 | pip install -r requirements/optional.txt 23 | MMCV_WITH_OPS=1 pip install -e . -v 24 | cd ../.. 25 | 26 | cd mmlab/mmpose 27 | pip install -r requirements.txt 28 | pip install -v -e . 29 | pip install flask 30 | pip install timm==0.4.9 31 | cd ../.. 32 | 33 | cd mmlab/mmdetection 34 | pip install -v -e . 35 | cd ../.. 36 | 37 | cd .. 38 | 39 | ##----------------------------------------------- 40 | ## install other dependencies 41 | pip install yacs 42 | pip install Rtree 43 | pip install pyntcloud pyvista 44 | pip install python-fcl 45 | pip install hydra-core --upgrade 46 | pip install av iopath 47 | pip install pycolmap 48 | pip install projectaria_tools 49 | 50 | ##----------------------------------------------- 51 | ## install ego4d locally 52 | cd ../../.. 53 | pip install -e . 54 | cd ego4d/internal/human_pose 55 | 56 | ##----------------------------------------------- 57 | echo "Done Installing" 58 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/scripts/copy_s3_to_manifold.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This script is supposed to run on devserver 3 | 4 | data=$1 5 | 6 | aws_date=$2 7 | # date=20230910 8 | date=$2 9 | 10 | # queue=pilot 11 | queue=production 12 | 13 | manifold mkdir "ego4d_fair/tree/egoexo/egopose/${queue}/${date}" 14 | manifold mkdir "ego4d_fair/tree/egoexo/egopose/${queue}/${date}/${data}" 15 | 16 | set -e 17 | 18 | export https_proxy=fwdproxy:8080 19 | aws s3 sync "s3://ego4d-fair/egopose/${queue}/${aws_date}/${data}" ~/egopose/"${queue}/${date}/${data}" 20 | 21 | manifold putr --ignoreExisting -j 10 --threads 1 ~/egopose/"${queue}/${date}/${data}/body" "ego4d_fair/tree/egoexo/egopose/${queue}/${date}/${data}/body" 22 | 23 | manifold putr --ignoreExisting -j 10 --threads 1 ~/egopose/"${queue}/${date}/${data}/hand" "ego4d_fair/tree/egoexo/egopose/${queue}/${date}/${data}/hand" 24 | 25 | rm -rf ~/egopose/"${queue}/${date}/${data}/body" 26 | rm -rf ~/egopose/"${queue}/${date}/${data}/hand" 27 | 28 | manifold put ~/egopose/"${queue}/${date}/${data}/body_bbox.mp4" "ego4d_fair/tree/egoexo/egopose/${queue}/${date}/${data}/body_bbox.mp4" 29 | manifold put ~/egopose/"${queue}/${date}/${data}/body_pose2d.mp4" "ego4d_fair/tree/egoexo/egopose/${queue}/${date}/${data}/body_pose2d.mp4" 30 | manifold put ~/egopose/"${queue}/${date}/${data}/body_pose3d.mp4" "ego4d_fair/tree/egoexo/egopose/${queue}/${date}/${data}/body_pose3d.mp4" 31 | manifold put ~/egopose/"${queue}/${date}/${data}/hand_pose2d.mp4" "ego4d_fair/tree/egoexo/egopose/${queue}/${date}/${data}/hand_pose2d.mp4" 32 | manifold put ~/egopose/"${queue}/${date}/${data}/hand_pose3d.mp4" "ego4d_fair/tree/egoexo/egopose/${queue}/${date}/${data}/hand_pose3d.mp4" 33 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/scripts/process/0_get_bbox.sh: -------------------------------------------------------------------------------- 1 | cd ../.. 2 | 3 | ##-------------------------------------------------------------- 4 | RUN_FILE=ego4d/internal/human_pose/main.py 5 | 6 | CONFIG=unc_T1_rawal; DEVICES=0, 7 | # CONFIG=iu_bike_rawal; DEVICES=0, 8 | # CONFIG=iu_music_rawal; DEVICES=0, 9 | # CONFIG=cmu_soccer_rawal; DEVICES=0, 10 | 11 | # ##-------------------------------------------------------------- 12 | # CUDA_VISIBLE_DEVICES=${DEVICES} python main.py --config-name $CONFIG mode=bbox 13 | 14 | # ##---------------------parallel process--------------------------- 15 | DEVICES=0,; CAMERA_NAME=cam01; CUDA_VISIBLE_DEVICES=${DEVICES} python main_single_camera.py --config-name $CONFIG mode=bbox exo_camera_name=$CAMERA_NAME & 16 | DEVICES=0,; CAMERA_NAME=cam02; CUDA_VISIBLE_DEVICES=${DEVICES} python main_single_camera.py --config-name $CONFIG mode=bbox exo_camera_name=$CAMERA_NAME & 17 | DEVICES=1,; CAMERA_NAME=cam03; CUDA_VISIBLE_DEVICES=${DEVICES} python main_single_camera.py --config-name $CONFIG mode=bbox exo_camera_name=$CAMERA_NAME & 18 | DEVICES=1,; CAMERA_NAME=cam04; CUDA_VISIBLE_DEVICES=${DEVICES} python main_single_camera.py --config-name $CONFIG mode=bbox exo_camera_name=$CAMERA_NAME & 19 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/scripts/process/1_get_pose2d.sh: -------------------------------------------------------------------------------- 1 | cd ../.. 2 | 3 | ##-------------------------------------------------------------- 4 | RUN_FILE=ego4d/internal/human_pose/main.py 5 | 6 | CONFIG=unc_T1_rawal; DEVICES=0, 7 | # CONFIG=iu_bike_rawal; DEVICES=0, 8 | # CONFIG=iu_music_rawal; DEVICES=0, 9 | # CONFIG=cmu_soccer_rawal; DEVICES=0, 10 | 11 | # # ##-------------------------------------------------------------- 12 | # CUDA_VISIBLE_DEVICES=${DEVICES} python main.py --config-name $CONFIG mode=pose2d 13 | 14 | 15 | # ##---------------------parallel process--------------------------- 16 | DEVICES=0,; CAMERA_NAME=cam01; CUDA_VISIBLE_DEVICES=${DEVICES} python main_single_camera.py --config-name $CONFIG mode=pose2d exo_camera_name=$CAMERA_NAME & 17 | DEVICES=0,; CAMERA_NAME=cam02; CUDA_VISIBLE_DEVICES=${DEVICES} python main_single_camera.py --config-name $CONFIG mode=pose2d exo_camera_name=$CAMERA_NAME & 18 | DEVICES=1,; CAMERA_NAME=cam03; CUDA_VISIBLE_DEVICES=${DEVICES} python main_single_camera.py --config-name $CONFIG mode=pose2d exo_camera_name=$CAMERA_NAME & 19 | DEVICES=1,; CAMERA_NAME=cam04; CUDA_VISIBLE_DEVICES=${DEVICES} python main_single_camera.py --config-name $CONFIG mode=pose2d exo_camera_name=$CAMERA_NAME & 20 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/scripts/process/2_get_pose3d.sh: -------------------------------------------------------------------------------- 1 | cd ../.. 2 | 3 | ##-------------------------------------------------------------- 4 | RUN_FILE=ego4d/internal/human_pose/main.py 5 | 6 | CONFIG=unc_T1_rawal; DEVICES=0, 7 | # CONFIG=iu_bike_rawal; DEVICES=0, 8 | # CONFIG=iu_music_rawal; DEVICES=0, 9 | # CONFIG=cmu_soccer_rawal; DEVICES=0, 10 | 11 | # ##-------------------------------------------------------------- 12 | # CUDA_VISIBLE_DEVICES=${DEVICES} python main.py --config-name $CONFIG mode=pose3d 13 | 14 | # ##-------------------------------------------------------------- 15 | ## read the yaml file and get the start and end frame 16 | start_frame=$(python -c "import yaml; file = open('configs/${CONFIG}.yaml', 'r'); config = yaml.safe_load(file); file.close(); print(config['inputs']['from_frame_number'])") 17 | end_frame=$(python -c "import yaml; file = open('configs/${CONFIG}.yaml', 'r'); config = yaml.safe_load(file); file.close(); print(config['inputs']['to_frame_number'])") 18 | TOTAL_FRAMES=$(($end_frame-$start_frame + 1)) 19 | 20 | NUM_JOBS=8 21 | 22 | ## start num_jobs, each job runs for 1/num_jobs of the total frames, set the start and end frame accordingly 23 | ## start from 0 to TOTAL_FRAMES 24 | 25 | for ((i=0; i<$NUM_JOBS; i++)) 26 | do 27 | start=$(($i*$TOTAL_FRAMES/$NUM_JOBS)) 28 | end=$(($(($i+1))*$TOTAL_FRAMES/$NUM_JOBS)) 29 | echo "start: $start, end: $end" 30 | CUDA_VISIBLE_DEVICES=${DEVICES} python main.py --config-name $CONFIG mode=pose3d mode_pose3d.start_frame=$start mode_pose3d.end_frame=$end & 31 | done 32 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/scripts/process/3_multi_view_vis.sh: -------------------------------------------------------------------------------- 1 | cd ../.. 2 | 3 | ##-------------------------------------------------------------- 4 | DEVICES=0, 5 | RUN_FILE=ego4d/internal/human_pose/main.py 6 | # CONFIG=unc_T1_rawal 7 | CONFIG=iu_bike_rawal 8 | # CONFIG=iu_music_rawal 9 | 10 | 11 | # ##-------------------------------------------------------------- 12 | CUDA_VISIBLE_DEVICES=${DEVICES} python main.py --config-name $CONFIG mode=multi_view_vis -------------------------------------------------------------------------------- /ego4d/internal/human_pose/scripts/process/3_refine_pose3d.sh: -------------------------------------------------------------------------------- 1 | cd ../.. 2 | 3 | ##-------------------------------------------------------------- 4 | RUN_FILE=ego4d/internal/human_pose/main.py 5 | 6 | # CONFIG=unc_T1_rawal; DEVICES=0, 7 | CONFIG=iu_bike_rawal; DEVICES=0, 8 | # CONFIG=iu_music_rawal; DEVICES=0, 9 | # CONFIG=cmu_soccer_rawal; DEVICES=0, 10 | 11 | # ##-------------------------------------------------------------- 12 | CUDA_VISIBLE_DEVICES=${DEVICES} python main.py --config-name $CONFIG mode=refine_pose3d 13 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/scripts/process/4_multi_view_vis.sh: -------------------------------------------------------------------------------- 1 | cd ../.. 2 | 3 | ##-------------------------------------------------------------- 4 | DEVICES=0, 5 | RUN_FILE=ego4d/internal/human_pose/main.py 6 | # CONFIG=unc_T1_rawal 7 | CONFIG=iu_bike_rawal 8 | # CONFIG=iu_music_rawal 9 | # CONFIG=cmu_soccer_rawal; DEVICES=0, 10 | 11 | 12 | # ##-------------------------------------------------------------- 13 | # CUDA_VISIBLE_DEVICES=${DEVICES} python main.py --config-name $CONFIG mode=multi_view_vis_bbox 14 | # CUDA_VISIBLE_DEVICES=${DEVICES} python main.py --config-name $CONFIG mode=multi_view_vis_pose2d 15 | CUDA_VISIBLE_DEVICES=${DEVICES} python main.py --config-name $CONFIG mode=multi_view_vis_pose3d 16 | -------------------------------------------------------------------------------- /ego4d/internal/human_pose/scripts/upload_s3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | data=$1 4 | 5 | # date=20230910 6 | date=$2 7 | 8 | # queue=pilot 9 | queue=production 10 | 11 | set -e 12 | 13 | # cache_root_dir="/checkpoint/${USER}/datasets/EgoExoPose/tmp_0_80000_1" 14 | cache_root_dir="/large_experiments/egoexo/egopose/${USER}/tmp_0_80000_1" 15 | 16 | aws s3 sync "${cache_root_dir}/cache/${data}/body/halo" "s3://ego4d-fair/egopose/${queue}/${date}/${data}/body" 17 | 18 | aws s3 sync "${cache_root_dir}/cache/${data}/hand/halo" "s3://ego4d-fair/egopose/${queue}/${date}/${data}/hand" 19 | 20 | aws s3 sync "${cache_root_dir}/cache/${data}/vis_pose3d" "s3://ego4d-fair/egopose/${queue}/${date}/${data}" 21 | -------------------------------------------------------------------------------- /ego4d/internal/validation/README.md: -------------------------------------------------------------------------------- 1 | # Validation 2 | 3 | This section of the repository validates input metadata files and associated 4 | data files located on S3 or on a local filesystem. 5 | 6 | ## Error Classification 7 | 8 | There are two main classes of errors: warnings and errors (similar to a compiler) 9 | - Errors must *be fixed* before they can be ingested 10 | - Warnings are flagged such that you are aware of them 11 | 12 | ## Usage 13 | ``` 14 | python ego4d/internal/validation/cli.py -i "" 15 | ``` 16 | 17 | - By default errors will be logged to S3. You can override this by providing an 18 | output directory via `--output` or `-o` 19 | - `` can be a folder on S3 or on the local filesystem 20 | - If it is on the local filesystem, you must give a university name such 21 | that we can dump the errors to S3 22 | - The metadata CSV files can reference local files, doing so will result in a 23 | set of errors. Such that you do not upload these files to S3, but instead fix them prior to doing so. 24 | 25 | ## Example 26 | ``` 27 | python ego4d/internal/validation/cli.py -i s3://ego4d-penn/egoexo/metadata_v2 28 | python ego4d/internal/validation/cli.py -i s3://ego4d-utokyo/egoexo/metadata_v1 29 | ``` 30 | 31 | ## Debugging 32 | 33 | ``` 34 | ipython --pdb ego4d/internal/validation/cli.py -- -i s3://ego4d-utokyo/egoexo/metadata_v1 35 | ipython --pdb ego4d/internal/validation/cli.py -- -i s3://ego4d-iiith/egoexo/manifest_v1/ -o errors_temp 36 | ipython --pdb ego4d/internal/validation/cli.py -- -i ./iiith_manifest_v1 -u iiith -o errors_temp 37 | python ego4d/internal/validation/cli.py -i ./iiith_manifest_v1 -u iiith -o errors_temp 38 | ``` 39 | 40 | ## Skipping MP4 File Checks 41 | 42 | You may skip mp4 file checks with `--skip_mp4_check`, since this is the slowest 43 | step. 44 | 45 | ``` 46 | python ego4d/internal/validation/cli.py -i s3://ego4d-penn/egoexo/metadata_v2 --skip_mp4_check 47 | ``` 48 | -------------------------------------------------------------------------------- /ego4d/internal/validation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/ego4d/internal/validation/__init__.py -------------------------------------------------------------------------------- /ego4d/internal/validation/cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | """ 5 | Command line tool to download Ego4D datasets. 6 | 7 | Examples: 8 | python ego4d/internal/validation/cli.py \ 9 | -i "s3://ego4d-unict/metadata_v7" \ 10 | -mf ego4d/internal/validation/standard_metadata/ego4d 11 | 12 | python ego4d/internal/validation/cli.py \ 13 | -i "s3://ego4d-georgiatech/metadata_v5" \ 14 | -mf ego4d/internal/validation/standard_metadata/ego4d \ 15 | -u georgiatech \ 16 | -o errors 17 | """ 18 | 19 | import boto3 20 | import botocore.client as bclient 21 | from ego4d.cli.universities import UNIV_TO_BUCKET 22 | from ego4d.internal.s3 import get_client 23 | from ego4d.internal.validation.config import Config, config_from_args, validate_config 24 | from ego4d.internal.validation.validate import run_validation 25 | 26 | 27 | def main_cfg(cfg: Config) -> None: 28 | validated_cfg = validate_config(cfg) 29 | run_validation( 30 | manifest_dir=validated_cfg.input_directory, 31 | input_university=validated_cfg.input_university, 32 | released_video_path=validated_cfg.released_video_path, 33 | standard_metadata_folder=validated_cfg.metadata_folder, 34 | output_dir=validated_cfg.output_dir, 35 | num_workers=validated_cfg.num_workers, 36 | expiry_time_sec=validated_cfg.expiry_time_sec, 37 | version=validated_cfg.version, 38 | skip_mp4_check=validated_cfg.skip_mp4_check, 39 | ) 40 | 41 | 42 | def main() -> None: 43 | config = config_from_args() 44 | main_cfg(config) 45 | 46 | 47 | if __name__ == "__main__": 48 | main() 49 | -------------------------------------------------------------------------------- /ego4d/internal/validation/credential_s3.py: -------------------------------------------------------------------------------- 1 | # pyre-unsafe 2 | import logging 3 | import os 4 | import re 5 | from dataclasses import dataclass 6 | from typing import List, Optional, Tuple 7 | 8 | import botocore 9 | 10 | 11 | @dataclass 12 | class FileInfo: 13 | key: str 14 | size: int 15 | 16 | 17 | def is_file_readable(file_path): 18 | return os.path.isfile(file_path) and os.access(file_path, os.R_OK) 19 | 20 | 21 | def parse_bucket_key(key: str) -> Tuple[str, str]: 22 | # Determine from the key 23 | assert key and key.startswith( 24 | "s3://" 25 | ), f"Invalid key without bucket supplied: {key}" 26 | m = re.match("^s3://([^/]*)/(.*)$", key) 27 | assert m, f"Invalid s3:// search key: {key}" 28 | grp = m.groups() 29 | assert len(grp) == 2 30 | return grp[0], grp[1] 31 | 32 | 33 | class S3Helper: 34 | _bucket_name = None 35 | 36 | def __init__(self, s3, bucket_name): 37 | self._bucket_name = bucket_name 38 | # self._s3 39 | self._s3 = s3 40 | 41 | @property 42 | def bucket(self): 43 | return self._bucket_name 44 | 45 | def ls(self, prefix: str, max_results=-1, **kwargs) -> Tuple[bool, List[FileInfo]]: 46 | if max_results < 0: 47 | paginator = self._s3.get_paginator("list_objects_v2") 48 | pages = paginator.paginate(Bucket=self._bucket_name, Prefix=prefix) 49 | results = [] 50 | for page in pages: 51 | for f in page["Contents"]: 52 | results.append(FileInfo(f["Key"], f["Size"])) 53 | return False, results 54 | else: 55 | # legacy 56 | ls_result = self._s3.list_objects_v2( 57 | Bucket=self._bucket_name, Prefix=prefix, MaxKeys=max_results, **kwargs 58 | ) 59 | return ( 60 | ls_result["IsTruncated"], 61 | [FileInfo(f["Key"], f["Size"]) for f in ls_result["Contents"]], 62 | ) 63 | 64 | def get_file(self, key: str, local_path: str, **kwargs) -> Optional[str]: 65 | print(f"Downloading: {key} to {local_path}") 66 | try: 67 | self._s3.download_file( 68 | Bucket=self._bucket_name, Key=key, Filename=local_path, **kwargs 69 | ) 70 | except botocore.exceptions.ClientError as e: 71 | # If a client error is thrown, then check that it was a 404 error. 72 | # If it was a 404 error, then the file does not exist. 73 | error_code = int(e.response["Error"]["Code"]) 74 | if error_code == 404: 75 | return None 76 | else: 77 | raise e 78 | return local_path 79 | 80 | def exists(self, key: str, bucket: Optional[str] = None, **kwargs) -> bool: 81 | if not bucket: 82 | bucket, key = parse_bucket_key(key) 83 | assert key and bucket 84 | try: 85 | obj = self._s3.get_object(Bucket=bucket, Key=key) 86 | return bool(obj) 87 | except botocore.exceptions.ClientError as e: 88 | logging.exception("AWS get_size exception: ", e) 89 | raise e 90 | -------------------------------------------------------------------------------- /ego4d/internal/validation/standard_metadata/ego4d/component_type.csv: -------------------------------------------------------------------------------- 1 | component_type_id,name 2 | 1,imu_data 3 | 2,3d_registration 4 | 3,gps 5 | 4,eye_tracking 6 | 5,burned_in_gaze_video 7 | 6,binaural_audio 8 | 7,3rd_person_video -------------------------------------------------------------------------------- /ego4d/internal/validation/standard_metadata/ego4d/device.csv: -------------------------------------------------------------------------------- 1 | device_id,name 2 | 0,GoPro Hero Black 8 3 | 1,GoPro Hero Black 7 4 | 2,Vuzix 5 | 3,Zshade 6 | 4,Weeview 7 | 5,GoPro Max 8 | 6,GoPro Hero 4 9 | 7,GoPro Hero Silver 7 10 | 8,Pupil Labs Invisible 11 | 9,iVue Rincon 1080P 12 | 10,Ordro EP6 4K 13 | 11,GoPro Hero Black 9 14 | 12,LeaPower 15 | 13,GoPro - Unknown Model 16 | -------------------------------------------------------------------------------- /ego4d/internal/validation/standard_metadata/egoexo/device.csv: -------------------------------------------------------------------------------- 1 | device_id,name 2 | aria,Aria Glasses 3 | hero8,GoPro Hero Black 8 4 | hero9,GoPro Hero Black 9 5 | hero10,GoPro Hero Black 10 6 | hero11,GoPro Hero Black 11 -------------------------------------------------------------------------------- /ego4d/internal/validation/standard_metadata/egoexo/scenario.csv: -------------------------------------------------------------------------------- 1 | scenario_id,name 2 | 0,Unknown 3 | 1000,Cooking 4 | 1001,Cooking an Omelet 5 | 1002,Cooking Scrambled Eggs 6 | 1003,Cooking Tomato & Eggs 7 | 1004,Cooking Noodles 8 | 1005,Cooking Dumplings 9 | 1006,Cooking Noodles 10 | 1007,Cooking Pasta 11 | 1008,Cooking Sushi Rolls 12 | 1009,Cooking Samosas 13 | 1010,Making Cucumber & Tomato Salad 14 | 1011,Making Sesame-Ginger Asian Salad 15 | 1012,Making Greek Salad 16 | 1013,Making Coffee latte 17 | 1014,Making Chai Tea 18 | 1015,Making Milk Tea 19 | 1016,Cooking Cookies 20 | 1017,Cooking Brownies 21 | 1018,Making White Radish & Lettuce & Tomato & Cucumber Salad 22 | 2000,Health 23 | 2001,Covid-19 Rapid Antigen Test 24 | 2002,Medical Checkup - Blood Pressure Monitor 25 | 2003,Medical Checkup - Oximeter 26 | 2004,Health Checkup - Thermometer 27 | 2005,First Aid - CPR 28 | 3000,Campsite 29 | 3001,Set up a Tent 30 | 3002,Disassemble a Tent 31 | 4000,Bike Repair 32 | 4001,Remove a Wheel 33 | 4002,Install a Wheel 34 | 4003,Fix a Flat Tire - Replace a Bike Tube 35 | 4004,Clean and Lubricate the Chain 36 | 4005,Overhaul the Headset 37 | 4006,Adjust a Rear Derailueur 38 | 5000,Music 39 | 5001,Playing Guitar 40 | 5002,Playing Guitar - Scales and Arpeggios 41 | 5003,Playing Guitar - Suzuki Books 42 | 5004,Playing Guitar - Freeplaying 43 | 5005,Playing Violin 44 | 5006,Playing Violin - Scales and Arpeggios 45 | 5007,Playing Violin - Suzuki Books 46 | 5008,Playing Violin - Freeplaying 47 | 5009,Playing Piano 48 | 5010,Playing Piano - Scales and Arpeggios 49 | 5011,Playing Piano - Suzuki Books 50 | 5012,Playing Piano - Freeplaying 51 | 5013,Playing Trombone 52 | 5014,Playing Trombone - Scales 53 | 5015,Playing Trombone - Freeplaying 54 | 5016,Playing Trumpet 55 | 5017,Playing Trumpet - Scales 56 | 5018,Playing Trumpet - Freeplaying 57 | 5019,Playing Alto-Saxophone 58 | 5020,Playing Alto-Saxophone - Scales 59 | 5021,Playing Alto-Saxophone - Freeplaying 60 | 6000,Basketball 61 | 6001,Basketball Drills - Mikan Layup 62 | 6002,Basketball Drills - Reverse Layup 63 | 6003,Basketball Drills - Mid-Range Jump Shooting 64 | 7000,Rock Climbing 65 | 7001,Los_Andes_FPC5_women 66 | 7002,Los_Andes_FPC1_women 67 | 7003,Los_Andes_FPC2_women 68 | 7004,Los_Andes_FPC10_men 69 | 7005,Los_Andes_FPC11_men 70 | 7006,Los_Andes_FPC12_men 71 | 7007,Los_Andes_FPC13_women 72 | 7008,Los_Andes_FPC14_women 73 | 7009,Los_Andes_FPC15_women 74 | 7010,Los_Andes_FPC16_women 75 | 7011,Los_Andes_FPC17_women 76 | 7012,Los_Andes_FPC14_men 77 | 7013,Los_Andes_FPC15_men 78 | 7014,Los_Andes_FPC16_men 79 | 7015,Los_Andes_FPC18_men 80 | 7016,Los_Andes_FPC17_men 81 | 7017,Los_Andes_Super_Final_men 82 | 7300,Minnesota_RW_Casual 83 | 7018,LosAndes_V2_R1 84 | 7019,LosAndes_V1_R1 85 | 7020,LosAndes_V2_R2 86 | 7021,LosAndes_V4_R1 87 | 7022,LosAndes_V3_R1 88 | 7023,LosAndes_V3_R2 89 | 7024,LosAndes_V4_R2 90 | 7025,LosAndes_V3_R3 91 | 7026,LosAndes_V1_R2 92 | 7027,LosAndes_V4_R3 93 | 7028,LosAndes_V4_R4 94 | 7029,LosAndes_V6_R1 95 | 7030,LosAndes_V2_R3 96 | 7031,LosAndes_V3_R4 97 | 7032,LosAndes_V5_R1 98 | 7033,LosAndes_V5_R2 99 | 7301,Minnesota_VE_Casual 100 | 8000,Soccer 101 | 8001,Soccer Drills - Inside Trap and Outside Play 102 | 8002,Soccer Drills - Outside Trap and Outside Play 103 | 8003,Soccer Drills - Dribbling 104 | 8004,Soccer Drills - Juggling 105 | 8005,Soccer Drills - Penalty Kick 106 | 9000,Dance 107 | 9001,Performing the basic choreography 108 | 9002,Performing the advanced choreography 109 | 9003,Teaching the basic choreography 110 | 9004,Teaching the advanced choreography 111 | 9005,LosAndes_Basic_Salsa 112 | 9006,LosAndes_Intermediate_Salsa 113 | 9007,LosAndes_Advanced_Salsa 114 | -------------------------------------------------------------------------------- /ego4d/research/README.md: -------------------------------------------------------------------------------- 1 | # Research for Ego4D 2 | 3 | This subsection of the repository is dedicated to research and usability of the 4 | Ego4D dataset. 5 | 6 | ## dataset 7 | 8 | - `LabelledFeatureDset` 9 | - A simple utility class to load features associated with labels 10 | - Use `save_ego4d_features_to_hdf5` to preprocess video features before usage 11 | -------------------------------------------------------------------------------- /ego4d/research/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/ego4d/research/__init__.py -------------------------------------------------------------------------------- /ego4d/research/clep/README.md: -------------------------------------------------------------------------------- 1 | # CLEP: Contrastive Language Egocentric-video Pre-training 2 | 3 | NOTE: this is very experimental code. You may have to read and modify the code 4 | in order to run it on your machine. Dependent on interest, this may change. 5 | 6 | This repository contains the code for contrastive learning with Ego4D's 7 | narrations. This serves as an example of using the features for 8 | training/prototyping ideas. It includes: 9 | 10 | - A dataset for supervised-learning with the features. Refer to `LabelledFeatureDataset` on the [research/README.md](../README.md) 11 | - Pre-processing scripts to convert the features to HDF5, which is a more efficient format for training. 12 | - Zero-shot on Kinetics, Ego-Charades 13 | 14 | ## CVPR Presentation 15 | 16 | The code in this repository was presented at CVPR 2022 in New Orleans. You can 17 | find the presentation code in: `notebooks/` under the root directory of this 18 | repository of this repository. 19 | 20 | ## Preprocessing Data 21 | 22 | To pre-process the data please use the script under 23 | `ego4d/research/clep/run_preprocess.py`. Provide `pre_config.mode` to change what 24 | is being pre-processed. 25 | 26 | 27 | ``` 28 | python3 ego4d/research/clep/run_preprocess.py --config-name omnivore_features pre_config.mode="k400" 29 | python3 ego4d/research/clep/run_preprocess.py --config-name omnivore_features pre_config.mode="ego_charade" 30 | python3 ego4d/research/clep/run_preprocess.py --config-name omnivore_features pre_config.mode="ego4d_narr" 31 | python3 ego4d/research/clep/run_preprocess.py --config-name omnivore_features pre_config.mode="ego_features" 32 | python3 ego4d/research/clep/run_preprocess.py --config-name omnivore_features pre_config.mode="cc" 33 | ``` 34 | 35 | Also add `pre_config.slurm_config.run_locally=1` to run the preprocessing locally. 36 | 37 | Valid preprocessing options are: 38 | - `ego4d_narr` 39 | - Extracts narration embeddings and saves via `torch.save` 40 | - `ego4d_features` 41 | - Converts the features to HDF5 42 | - `k400` 43 | - Extracts features from Kinetics and 44 | - `ego_charade` 45 | - Extracts features from Ego-Charades 46 | - `cc` 47 | - Extracts features from Conceptual Captions 48 | - Requires you to download with `open_clip` prior 49 | 50 | You will likely have to modify the paths in `configs/omnivore_features` 51 | 52 | ## Datasets 53 | 54 | Please refer to the code `ego4d/research/clep/dataset.py`. 55 | 56 | - `create_ego_charades_dset`: creates the Ego-Charades dataset (previously 57 | pre-processed) 58 | - `create_kinetics`: creates the K400 dataset (previously pre-processed) 59 | - `Ego4DCLEP`: The dataset for narrations/video Ego4D data 60 | - `CCDset`: A Conceptual Captions dataset for Ego4D 61 | 62 | ## Training 63 | 64 | Run training with `ego4d/research/clep/train.py`. 65 | 66 | Pass `run_locally=1` if you want to run the training process not on the cluster. 67 | -------------------------------------------------------------------------------- /ego4d/research/clep/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/ego4d/research/clep/__init__.py -------------------------------------------------------------------------------- /ego4d/research/clep/config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Optional 3 | 4 | from ego4d.research.common import SlurmConfig 5 | 6 | 7 | @dataclass 8 | class InputConfig: 9 | feature_path: str 10 | metadata_path: str 11 | features_per_second: float 12 | narration_width_sample_sec: float 13 | max_num_feature_vec_video_uids: int 14 | dsets_to_use: List[str] 15 | 16 | 17 | @dataclass 18 | class ModelConfig: 19 | nlp_feature_size: int 20 | visual_feature_size: int 21 | final_proj_size: int 22 | 23 | 24 | @dataclass 25 | class K400PreprocessConfig: 26 | dataset_dir: str 27 | set_to_use: str 28 | root_dir: str 29 | viz_feature_path: str 30 | metadata_out_path: str 31 | feature_extract_config_path: str 32 | num_labels_per_machine: int 33 | 34 | 35 | @dataclass 36 | class EgoPreprocessFeatureConfig: 37 | hdf5_path: str 38 | 39 | 40 | @dataclass 41 | class EgoPreprocessNarrConfig: 42 | narration_json_path: str 43 | num_workers: int 44 | st_model_name: str 45 | accelerator: str 46 | root_dir: str 47 | metadata_out_path: str 48 | narration_out_dir: str 49 | limit: Optional[int] 50 | num_narrs_per_machine: int 51 | 52 | 53 | @dataclass 54 | class EgoCharadePreprocessConfig: 55 | set_path: str 56 | video_root_path: str 57 | class_desc_path: str 58 | out_path: str 59 | out_label_path: str 60 | num_vids_per_machine: int 61 | 62 | 63 | @dataclass 64 | class CCPreprocessConfig: 65 | in_path: str 66 | helper_workers: int 67 | 68 | hdf5_viz_path: str 69 | hdf5_sent_path: str 70 | meta_path: str 71 | batch_size: int 72 | num_workers: int 73 | prefetch_factor: int 74 | imgs_per_gpu: int 75 | 76 | 77 | @dataclass 78 | class PreprocessConfig: 79 | slurm_config: SlurmConfig 80 | 81 | mode: str 82 | root_dir: str 83 | ego4d_narr: EgoPreprocessNarrConfig 84 | ego4d_features: EgoPreprocessFeatureConfig 85 | k400: K400PreprocessConfig 86 | ego_charade: EgoCharadePreprocessConfig 87 | cc: CCPreprocessConfig 88 | 89 | 90 | @dataclass 91 | class TrainConfig: 92 | input_config: InputConfig 93 | model_config: ModelConfig 94 | pre_config: PreprocessConfig 95 | 96 | checkpoint_dir: str 97 | checkpoint_metric: str 98 | batch_size: int 99 | num_workers: int 100 | prefetch_factor: int 101 | 102 | num_epochs: int 103 | accelerator: str 104 | devices: int 105 | 106 | run_locally: bool 107 | tb_log_dir: str 108 | tb_log_name: str 109 | 110 | lr: float 111 | beta1: float 112 | beta2: float 113 | wd: float 114 | eps: float 115 | 116 | eval_per_iter: int 117 | eval_init: bool 118 | 119 | use_soft_loss: Optional[bool] 120 | soft_loss_threshold: float 121 | use_bce: bool 122 | use_logit_scale: bool 123 | norm_logits: bool 124 | -------------------------------------------------------------------------------- /ego4d/research/clep/configs/omnivore_features.yaml: -------------------------------------------------------------------------------- 1 | input_config: 2 | metadata_path: "/checkpoint/miguelmartin/ego4d_data/ego4d.json" 3 | feature_path: "/checkpoint/miguelmartin/ego4d_track2_features/full_scale/omnivore_video_swinL" 4 | feature_extract_config_path: "/private/home/miguelmartin/ego4d/ego4d_public/ego4d/features/configs/omnivore_video.yaml" 5 | features_per_second: 1.875 6 | narration_width_sample_sec: 0.1 7 | max_num_feature_vec_video_uids: 30 8 | dsets_to_use: [ego4d] 9 | model_config: 10 | nlp_feature_size: 768 11 | visual_feature_size: 1536 12 | final_proj_size: 1536 13 | pre_config: 14 | mode: "k400" 15 | root_dir: "/checkpoint/miguelmartin/clep/" 16 | ego_charade: 17 | set_path: "/datasets01/Charades-ego-v1/101320/charades-ego-v1/CharadesEgo/CharadesEgo_v1_test.csv" 18 | video_root_path: "/datasets01/Charades-ego-v1/101320/charades-ego-v1/CharadesEgo_v1_480/" 19 | class_desc_path: "/datasets01/Charades-ego-v1/101320/charades-ego-v1/CharadesEgo/Charades_v1_classes.txt" 20 | out_path: "charades.hdf5" 21 | out_label_path: "labels.pt" 22 | num_vids_per_machine: 50 23 | k400: 24 | csv_dir: "/datasets01/kinetics/092121/400/lists" 25 | dataset_dir: "/datasets01/kinetics/092121/400/val_288px/" 26 | set_to_use: "val" 27 | root_dir: "k400_hdf5_all" 28 | viz_feature_path: "viz.hdf5" 29 | metadata_out_path: "meta.pth" 30 | num_labels_per_machine: 400 31 | ego4d_features: 32 | hdf5_path: "features.hdf5" 33 | ego4d_narr: 34 | narration_json_path: "/datasets01/ego4d_track2/v1/annotations/narration.json" 35 | st_model_name: "all-mpnet-base-v2" 36 | accelerator: "cuda" 37 | metadata_out_path: "narrs_meta.pt" 38 | narration_out_dir: "narrs" 39 | min_words: 5 40 | batch_size: 50 41 | num_narrs_per_machine: 100000 42 | limit: null 43 | cc: 44 | in_path: "/checkpoint/miguelmartin/conceptial_captions/Train_GCC-training_output.csv" 45 | hdf5_viz_path: "cc_viz.hdf5" 46 | hdf5_sent_path: "cc_sent.hdf5" 47 | meta_path: "cc.pt" 48 | helper_workers: 32 49 | batch_size: 200 50 | num_workers: 10 51 | prefetch_factor: 2 52 | imgs_per_gpu: 50000 53 | num_narrs_per_machine: 50000 54 | slurm_config: 55 | slurm_log_folder: "slurm_log" 56 | timeout_min: 240 57 | constraint: "volta" 58 | slurm_partition: "pixar" 59 | slurm_array_parallelism: 100 60 | gpus_per_node: 1 61 | cpus_per_task: 10 62 | run_locally: false 63 | batch_size: 128 64 | num_workers: 10 65 | prefetch_factor: 4 66 | num_epochs: 200 67 | accelerator: "gpu" 68 | devices: 1 69 | tb_log_dir: "/private/home/miguelmartin/ego4d/ego4d_public/runs" 70 | tb_log_name: "clep" 71 | lr: 0.001 72 | beta1: 0.9 73 | beta2: 0.98 74 | wd: 0.1 75 | eps: 1.0e-6 76 | run_locally: false 77 | eval_per_iter: 500 78 | eval_init: true 79 | use_soft_loss: null 80 | use_bce: false 81 | norm_logits: false 82 | soft_loss_threshold: 0.95 83 | use_logit_scale: true 84 | checkpoint_dir: "/checkpoint/miguelmartin/clep/checkpoints" 85 | checkpoint_metric: "Val/Char_1Ego_Labels/mAP" 86 | -------------------------------------------------------------------------------- /ego4d/research/clep/model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from ego4d.research.clep.config import ModelConfig 6 | 7 | 8 | def _get_layers(initial_dim, config): 9 | return [ 10 | nn.Linear(initial_dim, config.final_proj_size), 11 | nn.ReLU(True), 12 | nn.Linear(config.final_proj_size, config.final_proj_size), 13 | ] 14 | 15 | 16 | class EgoLangaugeAssociation(nn.Module): 17 | def __init__(self, config: ModelConfig): 18 | super().__init__() 19 | 20 | self.config = config 21 | txt_layers = _get_layers(config.nlp_feature_size, config) 22 | viz_layers = _get_layers(config.visual_feature_size, config) 23 | self.text_proj = nn.Sequential(*tuple(txt_layers)) 24 | self.visual_proj = nn.Sequential(*tuple(viz_layers)) 25 | 26 | self.apply(self.init_weights) 27 | 28 | # don't want to init this with 0 29 | self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07)) 30 | 31 | def forward(self, x): 32 | ve = self.visual_proj(x["video"]) 33 | te = self.text_proj(x["text"]) 34 | return ve, te, self.logit_scale 35 | 36 | def init_weights(self, module): 37 | if isinstance(module, nn.Linear): 38 | torch.nn.init.xavier_uniform_( 39 | module.weight.data, gain=torch.nn.init.calculate_gain("relu") 40 | ) 41 | module.bias.data.zero_() 42 | -------------------------------------------------------------------------------- /ego4d/research/clep/preprocess/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/ego4d/research/clep/preprocess/__init__.py -------------------------------------------------------------------------------- /ego4d/research/clep/preprocess/charades.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import os 3 | 4 | import h5py 5 | import pandas as pd 6 | import torch 7 | from ego4d.features.config import load_model 8 | from ego4d.research.clep.config import EgoCharadePreprocessConfig, TrainConfig 9 | from ego4d.research.clep.preprocess.common import ( 10 | get_language_model, 11 | run_feature_extraction, 12 | ) 13 | 14 | from ego4d.research.common import batch_it, create_executor 15 | from omegaconf import OmegaConf 16 | from tqdm.auto import tqdm 17 | 18 | 19 | def preprocess_ego_charade( 20 | config: TrainConfig, char_config: EgoCharadePreprocessConfig 21 | ): 22 | out_dir = config.pre_config.root_dir 23 | os.makedirs(out_dir, exist_ok=True) 24 | 25 | df = pd.read_csv(char_config.set_path) 26 | 27 | root_path = char_config.video_root_path 28 | feature_extract_config = OmegaConf.load( 29 | config.input_config.feature_extract_config_path 30 | ) 31 | 32 | class_desc_path = char_config.class_desc_path 33 | class_name_df = pd.read_csv(class_desc_path, header=None) 34 | class_names = [" ".join(x[1].split(" ")[1:]) for x in class_name_df.itertuples()] 35 | 36 | def get_label_name(x): 37 | x.replace("Someone", "") 38 | x.replace("is", "") 39 | return x.lower() 40 | 41 | sentences_ego = [ 42 | f"Camera wearer is {get_label_name(clazz)}" for clazz in class_names 43 | ] 44 | 45 | sentences_non_ego = [ 46 | f"The person in this video is {get_label_name(clazz)}" for clazz in class_names 47 | ] 48 | model = get_language_model(config) 49 | # pyre-ignore 50 | label_name_fv = model.encode( 51 | class_names, 52 | device="cuda", 53 | show_progress_bar=True, 54 | ) 55 | # pyre-ignore 56 | sent_ego_fv = model.encode( 57 | sentences_ego, 58 | device="cuda", 59 | show_progress_bar=True, 60 | ) 61 | # pyre-ignore 62 | sent_non_ego = model.encode( 63 | sentences_non_ego, 64 | device="cuda", 65 | show_progress_bar=True, 66 | ) 67 | torch.save( 68 | { 69 | "labels": label_name_fv, 70 | "sent_ego_fv": sent_ego_fv, 71 | "sent_non_ego_fv": sent_non_ego, 72 | }, 73 | os.path.join(out_dir, char_config.out_label_path), 74 | ) 75 | video_path_ids = [ 76 | (os.path.join(root_path, f"{row.id}.mp4"), row.id) for row in df.itertuples() 77 | ] 78 | video_path_ids = [vp for vp in video_path_ids if os.path.exists(vp[0])] 79 | 80 | batches = batch_it(video_path_ids, char_config.num_vids_per_machine) 81 | executor = create_executor(config.pre_config.slurm_config, len(batches)) 82 | map_fn = functools.partial( 83 | _preprocess_ego_charade, 84 | feature_extract_config=feature_extract_config, 85 | ) 86 | 87 | jobs = executor.map_array(map_fn, batches) 88 | 89 | out_path = os.path.join(out_dir, char_config.out_path) 90 | with h5py.File(out_path, "w") as out_f: 91 | for j in tqdm(jobs): 92 | feat = j.result() 93 | for uid, ret in feat.items(): 94 | out_f.create_dataset(uid, data=ret["features"].numpy()) 95 | 96 | 97 | def _preprocess_ego_charade(video_path_ids, feature_extract_config): 98 | model = load_model(feature_extract_config, patch_final_layer=True) 99 | 100 | ret = {} 101 | for path, uid in tqdm(video_path_ids): 102 | predictions = run_feature_extraction(path, model, feature_extract_config) 103 | assert predictions is not None 104 | ret[uid] = { 105 | "features": predictions.result[path], 106 | } 107 | return ret 108 | -------------------------------------------------------------------------------- /ego4d/research/clep/preprocess/common.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import numpy as np 4 | 5 | import torch.nn as nn 6 | from ego4d.features.config import FeatureExtractConfig, Video 7 | from ego4d.features.extract_features import extract_features 8 | from ego4d.features.inference import _video_info as video_info 9 | from ego4d.research.clep.config import TrainConfig 10 | from sentence_transformers import SentenceTransformer 11 | 12 | 13 | def run_feature_extraction( 14 | path: str, model: nn.Module, feature_extract_config: FeatureExtractConfig 15 | ): 16 | v_info = video_info(path) 17 | # pyre-ignore 18 | vid = Video( 19 | path, 20 | path, 21 | v_info["num_frames"], 22 | w=None, 23 | h=None, 24 | has_audio=False, 25 | is_stereo=False, 26 | ) 27 | if vid.frame_count is None: 28 | return None 29 | 30 | feature_extract_config = copy.deepcopy(feature_extract_config) 31 | fps = v_info["fps"] 32 | assert fps is not None 33 | feature_extract_config.inference_config.fps = int(np.round(float(fps))) 34 | feature_extract_config.inference_config.stride = int( 35 | np.round(float(fps * (16 / 30))) 36 | ) 37 | feature_extract_config.inference_config.frame_window = int( 38 | np.round(float(fps * (32 / 30))) 39 | ) 40 | 41 | return extract_features( 42 | videos=[vid], 43 | config=feature_extract_config, 44 | model=model, 45 | log_info=False, 46 | silent=True, 47 | assert_feature_size=False, 48 | ) 49 | 50 | 51 | def get_language_model(config: TrainConfig) -> nn.Module: 52 | return SentenceTransformer(config.pre_config.ego4d_narr.st_model_name) 53 | -------------------------------------------------------------------------------- /ego4d/research/clep/run_preprocess.py: -------------------------------------------------------------------------------- 1 | import hydra 2 | from ego4d.features.config import FeatureExtractConfig 3 | from ego4d.research.clep.config import TrainConfig 4 | from ego4d.research.clep.preprocess.cc import preprocess_cc 5 | from ego4d.research.clep.preprocess.charades import preprocess_ego_charade 6 | from ego4d.research.clep.preprocess.ego4d_data import ( 7 | preprocess_ego_features, 8 | preprocess_ego_narrations, 9 | ) 10 | from ego4d.research.clep.preprocess.kinetics import preprocess_k400_data 11 | 12 | 13 | @hydra.main(config_path="configs", config_name=None) 14 | def preprocess(config: TrainConfig): 15 | if config.pre_config.mode == "ego4d_narr": 16 | preprocess_ego_narrations(config, config.pre_config.ego4d_narr) 17 | elif config.pre_config.mode == "ego4d_features": 18 | preprocess_ego_features( 19 | config.input_config.feature_path, 20 | config, 21 | config.pre_config.ego4d_features, 22 | ) 23 | elif config.pre_config.mode == "k400": 24 | preprocess_k400_data(config, config.pre_config.k400) 25 | elif config.pre_config.mode == "ego_charade": 26 | preprocess_ego_charade(config, config.pre_config.ego_charade) 27 | elif config.pre_config.mode == "cc": 28 | preprocess_cc(config, config.pre_config.cc) 29 | else: 30 | raise AssertionError(f"{config.pre_config.mode} not supported") 31 | 32 | 33 | if __name__ == "__main__": 34 | preprocess() # pyre-ignore 35 | -------------------------------------------------------------------------------- /ego4d/research/clep/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | # Taken from: https://raw.githubusercontent.com/wykang/Charades/master/utils/map.py 5 | def mAP(submission_array, gt_array): 6 | """Returns mAP, weighted mAP, and AP array""" 7 | m_aps = [] 8 | n_classes = submission_array.shape[1] 9 | for oc_i in range(n_classes): 10 | sorted_idxs = np.argsort(-submission_array[:, oc_i]) 11 | tp = gt_array[:, oc_i][sorted_idxs] == 1 12 | fp = np.invert(tp) 13 | n_pos = tp.sum() 14 | if n_pos < 0.1: 15 | m_aps.append(float("nan")) 16 | continue 17 | fp.sum() 18 | f_pcs = np.cumsum(fp) 19 | t_pcs = np.cumsum(tp) 20 | prec = t_pcs / (f_pcs + t_pcs).astype(float) 21 | avg_prec = 0 22 | for i in range(submission_array.shape[0]): 23 | if tp[i]: 24 | avg_prec += prec[i] 25 | m_aps.append(avg_prec / n_pos.astype(float)) 26 | m_aps = np.array(m_aps) 27 | m_ap = np.mean(m_aps) 28 | w_ap = m_aps * gt_array.sum(axis=0) / gt_array.sum().sum().astype(float) 29 | return m_ap, w_ap, m_aps 30 | 31 | 32 | def nanmap(submission_array, gt_array): 33 | """Returns mAP, weighted mAP, and AP array""" 34 | m_aps = [] 35 | n_classes = submission_array.shape[1] 36 | for oc_i in range(n_classes): 37 | sorted_idxs = np.argsort(-submission_array[:, oc_i]) 38 | tp = gt_array[:, oc_i][sorted_idxs] == 1 39 | fp = np.invert(tp) 40 | n_pos = tp.sum() 41 | if n_pos < 0.1: 42 | m_aps.append(float("nan")) 43 | continue 44 | fp.sum() 45 | f_pcs = np.cumsum(fp) 46 | t_pcs = np.cumsum(tp) 47 | prec = t_pcs / (f_pcs + t_pcs).astype(float) 48 | avg_prec = 0 49 | for i in range(submission_array.shape[0]): 50 | if tp[i]: 51 | avg_prec += prec[i] 52 | m_aps.append(avg_prec / n_pos.astype(float)) 53 | m_aps = np.array(m_aps) 54 | m_ap = np.nanmean(m_aps) 55 | w_ap = m_aps * np.nansum(gt_array, axis=0) / gt_array.sum().sum().astype(float) 56 | return m_ap, w_ap, m_aps 57 | 58 | 59 | def charades_map(submission_array, gt_array): 60 | """ 61 | Approximate version of the charades evaluation function 62 | For precise numbers, use the submission file with the official matlab script 63 | """ 64 | fix = submission_array.copy() 65 | empty = np.sum(gt_array, axis=1) == 0 66 | fix[empty, :] = np.NINF 67 | return mAP(fix, gt_array) 68 | 69 | 70 | def charades_nanmap(submission_array, gt_array): 71 | """ 72 | Approximate version of the charades evaluation function 73 | For precise numbers, use the submission file with the official matlab script 74 | """ 75 | fix = submission_array.copy() 76 | empty = np.sum(gt_array, axis=1) == 0 77 | fix[empty, :] = np.NINF 78 | return nanmap(fix, gt_array) 79 | -------------------------------------------------------------------------------- /ego4d/research/common.py: -------------------------------------------------------------------------------- 1 | import math 2 | from dataclasses import dataclass 3 | from typing import Any, List 4 | 5 | import submitit 6 | 7 | 8 | @dataclass 9 | class SlurmConfig: 10 | slurm_log_folder: str 11 | timeout_min: int 12 | constraint: str 13 | slurm_partition: str 14 | slurm_array_parallelism: int 15 | gpus_per_node: int 16 | cpus_per_task: int 17 | run_locally: bool = False 18 | 19 | 20 | def batch_it(things: List[Any], batch_size: int) -> List[List[Any]]: 21 | num_batches: int = math.ceil(len(things) / batch_size) 22 | 23 | result = [] 24 | for i in range(num_batches): 25 | result.append(things[i * batch_size : (i + 1) * batch_size]) 26 | return result 27 | 28 | 29 | def create_executor(config: SlurmConfig, num_batches: int): 30 | if config.run_locally: 31 | executor = submitit.LocalExecutor(folder=config.slurm_log_folder) 32 | else: 33 | executor = submitit.AutoExecutor(folder=config.slurm_log_folder) 34 | 35 | executor.update_parameters( 36 | timeout_min=config.timeout_min, 37 | slurm_constraint=config.constraint, 38 | slurm_partition=config.slurm_partition, 39 | slurm_array_parallelism=min(config.slurm_array_parallelism, num_batches), 40 | gpus_per_node=config.gpus_per_node, 41 | cpus_per_task=config.cpus_per_task, 42 | ) 43 | return executor 44 | -------------------------------------------------------------------------------- /ego4d/research/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/ego4d/research/util/__init__.py -------------------------------------------------------------------------------- /ego4d/research/util/masks.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | 3 | import numpy as np 4 | 5 | from ego4d.research.util.lzstring import decompress_from_encoded_uri 6 | from pycocotools import mask as mask_utils 7 | 8 | 9 | @dataclass 10 | class Mask: 11 | width: int 12 | height: int 13 | encoded_mask: str 14 | 15 | 16 | def decode_mask(mask: dict) -> np.ndarray: 17 | w = mask["width"] 18 | h = mask["height"] 19 | encoded_mask = mask["encodedMask"] 20 | return decode_mask_obj(Mask(width=w, height=h, encoded_mask=encoded_mask)) 21 | 22 | 23 | def decode_mask_obj(mask: Mask) -> np.ndarray: 24 | decomp_string = decompress_from_encoded_uri(mask.encoded_mask) 25 | decomp_encoded = decomp_string.encode() # pyre-ignore 26 | rle_obj = { 27 | "size": [mask.height, mask.width], 28 | "counts": decomp_encoded, 29 | } 30 | 31 | output = mask_utils.decode(rle_obj) 32 | return output 33 | 34 | 35 | def blend_mask( 36 | input_img: np.ndarray, binary_mask: np.ndarray, alpha: float = 0.5 37 | ) -> np.ndarray: 38 | if input_img.ndim == 2: 39 | return input_img 40 | 41 | mask_image = np.zeros(input_img.shape, np.uint8) 42 | mask_image[:, :, 1] = 255 43 | mask_image = mask_image * np.repeat(binary_mask[:, :, np.newaxis], 3, axis=2) 44 | 45 | blend_image = input_img[:, :, :] 46 | pos_idx = binary_mask > 0 47 | for ind in range(input_img.ndim): 48 | ch_img1 = input_img[:, :, ind] 49 | ch_img2 = mask_image[:, :, ind] 50 | ch_img3 = blend_image[:, :, ind] 51 | ch_img3[pos_idx] = alpha * ch_img1[pos_idx] + (1 - alpha) * ch_img2[pos_idx] 52 | blend_image[:, :, ind] = ch_img3 53 | return blend_image 54 | -------------------------------------------------------------------------------- /notebooks/egoexo/requirements.txt: -------------------------------------------------------------------------------- 1 | av==8.1.0 2 | projectaria_tools==1.5.0 3 | torchvision==0.12.0 4 | 5 | -------------------------------------------------------------------------------- /notebooks/moments_cvpr/conda-env.yaml: -------------------------------------------------------------------------------- 1 | name: moments_cvpr 2 | channels: 3 | - pytorch 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - python=3.8 8 | - pytorch::pytorch=1.11.0 9 | - pytorch::torchvision=0.12.0 10 | - pytorch::torchaudio=0.11.0 11 | - conda-forge::ffmpeg 12 | - pip 13 | - pip: 14 | - git+https://github.com/facebookresearch/pytorchvideo.git 15 | -------------------------------------------------------------------------------- /notebooks/moments_cvpr/moments_label_ids.json: -------------------------------------------------------------------------------- 1 | { 2 | "serve_food_onto_a_plate": 0, 3 | "converse_/_interact_with_someone": 1, 4 | "use_phone": 2, 5 | "clean_/_wipe_a_table_or_kitchen_counter": 3, 6 | "plant_seeds_/_plants_/_flowers_into_ground": 4, 7 | "tie_up_branches_/_plants_with_string": 5, 8 | "cut_tree_branch": 6, 9 | "harvest_vegetables_/_fruits_/_crops_from_trees": 7, 10 | "remove_weeds_from_ground": 8, 11 | "cut_other_item_using_tool": 9, 12 | "throw_away_trash_/_put_trash_in_trash_can": 10, 13 | "water_soil_/_plants_/_crops": 11, 14 | "wash_hands": 12, 15 | "turn-on_/_light_the_stove_burner": 13, 16 | "trim_hedges_or_branches": 14, 17 | "harvest_vegetables_/_fruits_/_crops_from_plants_on_the_ground": 15, 18 | "put_away_(or_take_out)_food_items_in_the_fridge": 16, 19 | "put_food_into_the_oven_to_bake": 17, 20 | "remove_food_from_the_oven": 18, 21 | "load/unload_the_dishwasher": 19, 22 | "wash_vegetable_/_fruit_/_food_item": 20, 23 | "stir_/_mix_food_while_cooking": 21, 24 | "use_the_microwave_/_warm_something_in_the_microwave": 22, 25 | "fill_a_pot_/_bottle_/_container_with_water": 23, 26 | "\"cut_/_chop_/_slice_a_vegetable,_fruit,_or_meat\"": 24, 27 | "taste_food_while_cooking": 25, 28 | "clean_/_wipe_other_surface_or_object": 26, 29 | "knead_/_shape_/_roll-out_dough": 27, 30 | "put_away_(or_take_out)_dishes_/_utensils_in_storage": 28, 31 | "put_away_(or_take_out)_ingredients_in_storage": 29, 32 | "browse_through_groceries_or_food_items_on_rack_/_shelf": 30, 33 | "peel_a_fruit_or_vegetable": 31, 34 | "cut_dough": 32, 35 | "cut_open_a_package_(e.g._with_scissors)": 33, 36 | "fold_clothes_/_sheets": 34, 37 | "iron_clothes_or_sheets": 35, 38 | "drink_beverage": 36, 39 | "load_/_unload_a_washing_machine_or_dryer": 37, 40 | "arrange_/_organize_other_items": 38, 41 | "arrange_/_organize_clothes_in_closet/dresser": 39, 42 | "drill_into_wall_/_wood_/_floor_/_metal": 40, 43 | "hang_clothes_in_closet_/_on_hangers": 41, 44 | "clean_/_sweep_floor_with_broom": 42, 45 | "use_a_laptop_/_computer": 43, 46 | "walk_down_stairs_/_walk_up_stairs": 44, 47 | "read_a_book_/_magazine_/_shopping_list_etc.": 45, 48 | "watch_television": 46, 49 | "wash_dishes_/_utensils_/_bakeware_etc.": 47, 50 | "\"put_on_safety_equipment_(e.g._gloves,_helmet,_safety_goggles)\"": 48, 51 | "paint_using_paint_brush_/_roller": 49, 52 | "cut_thread_/_paper_/_cardboard_using_scissors_/_knife_/_cutter": 50, 53 | "drive_a_vehicle": 51, 54 | "eat_a_snack": 52, 55 | "fix_wiring": 53, 56 | "fix_other_item": 54, 57 | "operate_a_dough_mixing_machine": 55, 58 | "use_a_vacuum_cleaner_to_clean": 56, 59 | "cut_/_trim_grass_with_other_tools": 57, 60 | "cut_/_trim_grass_with_a_lawnmower": 58, 61 | "pack_food_items_/_groceries_into_bags_/_boxes": 59, 62 | "arrange_/_organize_items_in_fridge": 60, 63 | "clean_/_wipe_kitchen_appliance": 61, 64 | "stir_/_mix_ingredients_in_a_bowl_or_pan_(before_cooking)": 62, 65 | "hang_clothes_to_dry": 63, 66 | "dig_or_till_the_soil_with_a_hoe_or_other_tool": 64, 67 | "exit_a_supermarket_/_shop": 65, 68 | "write_notes_in_a_paper_/_book": 66, 69 | "enter_a_supermarket_/_shop": 67, 70 | "interact_or_play_with_pet_/_animal": 68, 71 | "collect_/_rake_dry_leaves_on_ground": 69, 72 | "weigh_food_/_ingredient_using_a_weighing_scale": 70, 73 | "play_a_video_game": 71, 74 | "pack_soil_into_the_ground_or_a_pot_/_container": 72, 75 | "pay_at_billing_counter": 73, 76 | "place_items_in_shopping_cart": 74 77 | } -------------------------------------------------------------------------------- /notebooks/moments_cvpr/moments_mini_train_uids.csv: -------------------------------------------------------------------------------- 1 | 2560bb74-cad3-4ea6-b034-bea360d9507c 2 | f161ecef-4337-4d50-8d71-41ed1ea1982d 3 | e4259a99-fd9f-437b-b887-3d671e32d4f0 4 | 10217e82-0c41-4ee5-af7d-2e2ecf67b7d1 5 | b1ac8c7e-6b29-4196-a28b-e4e21b872ede 6 | 5af8e5f9-2a28-485c-9604-6f5f9f290c82 7 | 1ce16a97-f614-4660-b21c-4205b33c8bab 8 | 6fd90f8d-7a4d-425d-a812-3268db0b0342 9 | 2970b84a-d6cf-4593-a487-53f853f0418f 10 | 06456897-960d-4d0c-8ce2-cd50a5a57bc3 11 | b0429a48-6f89-4544-8128-a24fd63d3149 12 | fc9824ed-63c1-4844-a80a-2c223d4ff7c1 13 | 8746eb41-4b1b-4ce7-a0af-d84570f75a24 14 | 04fe8f4d-081e-437e-a56a-2d53b6233fc9 15 | af8dda66-0025-4ee4-9888-b568b28986e6 16 | 115cb95f-7030-4eab-b5de-120b63b3f21c 17 | a0705b91-51b7-489d-8b7d-09282f85db6e 18 | 200a8f32-c44a-463d-ba39-0190a0bddf7a 19 | 555ff1a5-c30b-434c-a251-b5105cdc1fb9 20 | -------------------------------------------------------------------------------- /notebooks/moments_cvpr/moments_mini_val_uids.csv: -------------------------------------------------------------------------------- 1 | 3b0ce903-543e-4a4b-a17b-ccc3739f60f0 2 | 2a2ff7db-5460-4296-a8a7-946ba628226d 3 | 9930414f-b998-4217-b6b5-aa686236fdf8 4 | 1c7426ad-b6ba-4494-897c-537dd4a201e9 5 | 6e0c294c-d634-4804-8d41-cd6e9d79f0b9 6 | 6f93978d-a3d0-467d-b401-e23b3c92f3bf 7 | 0793bbe0-b8d5-4d46-9f02-c71d1bd4fad2 8 | 91e3e6ce-bc01-4720-a490-e319dd380509 9 | e303a071-7d0b-40e7-b8df-dea6e79ad7e8 10 | 9954bb90-a2fd-444f-bc5b-437058a7db07 11 | -------------------------------------------------------------------------------- /notebooks/requirements.txt: -------------------------------------------------------------------------------- 1 | av==8.1.0 2 | boto3==1.20.35 3 | botocore==1.23.35 4 | celluloid==0.2.0 5 | iopath==0.1.9 6 | ipython==8.0.1 7 | ipywidgets==7.6.5 8 | matplotlib==3.5.1 9 | numpy==1.22.0 10 | opencv_python==4.5.5.62 11 | pandas==1.3.5 12 | progressbar33==2.4 13 | deepdiff==5.8.1 14 | 15 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.usort] 2 | first_party_detection = false 3 | 4 | [tool.black] 5 | target-version = ["py38"] 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm 2 | pytorchvideo 3 | boto3 4 | regex 5 | iopath 6 | dataclasses_json 7 | pycocotools 8 | av==9.0.2 9 | einops==0.4.1 10 | numpy==1.22.3 11 | pandas==1.4.1 12 | speechbrain==0.5.11 13 | submitit==1.4.1 14 | timm==0.5.4 15 | torch==1.11.0 16 | torchaudio==0.11.0 17 | torchvision==0.12.0 18 | black==22.3.0 19 | ufmt==2.0.0 20 | usort==1.0.3 21 | -------------------------------------------------------------------------------- /run_viz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 3 | set -e 4 | 5 | # Help Screen 6 | HelpScreen() 7 | { 8 | # Display Help 9 | echo "Downloads and Starts Mephisto Visualization of Ego4D Data" 10 | echo 11 | echo "usage: ./run_viz.sh [options]" 12 | echo " options:" 13 | echo " -h Print help screen" 14 | echo " -o Overrides video root dir (default ~/ego4d_data/)" 15 | echo " -r Overrides review build dir (default ./viz/narrations/review/build/)" 16 | echo " -p Run server on a different port (default 3030)" 17 | echo 18 | } 19 | 20 | # Handle CMD Options 21 | while getopts ":ho:r:p:" option; do 22 | case $option in 23 | h) # display help 24 | HelpScreen 25 | exit;; 26 | o) # Enter a vid root dir 27 | VID_ROOT=$OPTARG;; 28 | r) # Enter a review build dir 29 | REVIEW_DIR=$OPTARG;; 30 | p) # Pick an alternate port 31 | PORT=$OPTARG;; 32 | \?) # Invalid option 33 | echo "Error: Invalid option, please check the usage:" 34 | HelpScreen 35 | exit;; 36 | esac 37 | done 38 | 39 | # Configuration options: 40 | VID_ROOT=${VID_ROOT:-~/ego4d_data/} 41 | REVIEW_DIR=${REVIEW_DIR:-./viz/narrations/review/build/} 42 | PORT=${PORT:-3030} 43 | 44 | # If there's a ~/ starting any paths, they need to be expanded for file checks to work 45 | VID_ROOT=${VID_ROOT/#~\//$HOME\/} 46 | REVIEW_DIR=${REVIEW_DIR/#~\//$HOME\/} 47 | 48 | # This is the preprocessed data that will drive the review interface: 49 | INPUT_FILE=${INPUT_FILE:-$VID_ROOT/v1/viz/preprocessed_narrations_input.jsonl} 50 | 51 | # Check that Mephisto is installed: 52 | if ! command -v mephisto &> /dev/null 53 | then 54 | echo "Mephisto could not be found. Install with: pip install mephisto"; 55 | exit 56 | fi 57 | 58 | # Check that the preprocessed data exists, if not attempt to download it: 59 | if [ -f $INPUT_FILE ]; then 60 | echo "Preprocessed file found, using $INPUT_FILE" 61 | true 62 | else 63 | # If the file cannot be found within the ego4d viz dataset location, 64 | # we will need to download the dataset first: 65 | echo "Preprocessed file not found, downloading the 'viz' dataset using the Ego4D CLI to $VID_ROOT..." 66 | python -m ego4d.cli.cli --yes --datasets viz --output_directory $VID_ROOT 67 | fi 68 | 69 | if [ -f $INPUT_FILE ]; then 70 | cat $INPUT_FILE | VID_ROOT=$VID_ROOT REVIEW_DIR=$REVIEW_DIR PORT=$PORT ./viz/narrations/recipes/4_review.sh 71 | else 72 | # If the file still cannot be found, it's an error 73 | echo "Error: $INPUT_FILE does not exist." 74 | exit 1 75 | fi 76 | -------------------------------------------------------------------------------- /scripts/run_pyre.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pyre --source-directory ego4d 4 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file=README.md 3 | license_files=LICENSE.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 3 | 4 | from setuptools import find_packages, setup 5 | 6 | setup( 7 | name="ego4d", 8 | version="1.7.3", 9 | author="FAIR", 10 | author_email="info@ego4d-data.org", 11 | description="Ego4D Dataset CLI", 12 | url="https://github.com/facebookresearch/Ego4d/", 13 | install_requires=[ 14 | "boto3", 15 | "tqdm", 16 | "regex", 17 | "dataclasses_json", 18 | "iopath", 19 | ], 20 | tests_require=[ 21 | "pytest", 22 | "moto", 23 | ], 24 | packages=find_packages(exclude=("tests", "tests.*")), 25 | entry_points={ 26 | "console_scripts": [ 27 | "ego4d=ego4d.cli.cli:main", 28 | "egoexo=ego4d.egoexo.download.cli:main", 29 | "egoexo_internal=ego4d.internal.download.cli:internal_main", 30 | "ego4d_validation = ego4d.internal.validation.cli:main", 31 | ], 32 | }, 33 | ) 34 | -------------------------------------------------------------------------------- /viz/narrations/README.md: -------------------------------------------------------------------------------- 1 | This folder consists of: 2 | 3 | 1. a review interface in the `review/` directory 4 | 2. various recipes in the `recipes/` directory 5 | 6 | ## Start Script 7 | 8 | The easiest way to run the interface is to use the run_viz script. 9 | Once you have Mephisto installed ([`pip install mephisto`](https://github.com/facebookresearch/mephisto/blob/main/docs/quickstart.md)) and the Ego4D CLI installed, you can run: 10 | 11 | ``` 12 | ./run_viz.sh 13 | ``` 14 | 15 | from this repo's root directory. 16 | 17 | This will install the ego4d `viz` dataset and launch the interface. You can use `-h` or examine the script to modify configurations options. 18 | 19 | In particular, `VID_ROOT` will default to `~\ego4d_data` and should be updated if you've already used the CLI to download the videos (and/or the `viz` dataset, which is used here). 20 | 21 | ## Review Interface 22 | 23 | The `review/` folder was created via a create-react-app template. 24 | 25 | ```bash 26 | $ npx create-react-app review --template mephisto-review 27 | ``` 28 | 29 | Most of the custom code added to the template will be found in either the `review/src/custom/`folder or `review/src/index.js`. 30 | 31 | Therefore, if you'd like to update the version of the template to integrate upstream changes, you can easily do so by invoke `create-react-app` again as shown above and copying over the file and folder mentioned above. 32 | 33 | ## Recipes 34 | 35 | See the [README](recipes/README.md) in the directory for more information of installation and prerequisites. 36 | -------------------------------------------------------------------------------- /viz/narrations/recipes/.gitignore: -------------------------------------------------------------------------------- 1 | narrations*.json 2 | final.json 3 | checkpoint.* 4 | -------------------------------------------------------------------------------- /viz/narrations/recipes/0_config.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 3 | 4 | INPUT_FILE=${INPUT_FILE:-./narrations_v2_7-27-21.json} 5 | VID_ROOT=${VID_ROOT:-~/ego4d} 6 | 7 | export $INPUT_FILE 8 | 9 | -------------------------------------------------------------------------------- /viz/narrations/recipes/1_gather_ids.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 3 | 4 | INPUT_FILE=${INPUT_FILE:-./narrations_v2_7-27-21.json} 5 | FIRST_X="${1:-2}" 6 | 7 | if [[ $FIRST_X = "ALL" ]] 8 | then 9 | jq -r 'keys | @csv' $INPUT_FILE | tr -d '"' | tr , ' ' 10 | else 11 | jq -r --arg FIRST_X "$FIRST_X" 'keys | .[0:($FIRST_X|tonumber)] | @csv' $INPUT_FILE | tr -d '"' | tr , ' ' 12 | fi 13 | -------------------------------------------------------------------------------- /viz/narrations/recipes/2_dl_videos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 3 | 4 | VID_ROOT=${VID_ROOT:-~/ego4d} 5 | OUTPUT=${1:LOG} 6 | 7 | if [[ $OUTPUT = "LOG" ]] 8 | then 9 | xargs -n 1 -I {} python -m ego4d.cli.cli --yes --datasets full_scale viz --output_directory $VID_ROOT --video_uids {} 10 | else 11 | cat 12 | xargs -n 1 -I {} python -m ego4d.cli.cli --yes --datasets full_scale viz --output_directory $VID_ROOT --video_uids {} > /dev/null 13 | fi 14 | -------------------------------------------------------------------------------- /viz/narrations/recipes/3_prepare_input.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 3 | 4 | INPUT_FILE=${INPUT_FILE:-./narrations_v2_7-27-21.json} 5 | VID_ROOT=${VID_ROOT:-~/ego4d} 6 | MODE="${1:ALL}" 7 | BASE_PATH=/assets/ 8 | 9 | if [[ $MODE = "ALL" ]] 10 | then 11 | jq -c --arg BASE_PATH "$BASE_PATH" 'to_entries[] | {file: ($BASE_PATH+"full_scale/"+.key+".mp4"), img: ($BASE_PATH+"viz/"+.key+"_small.jpg"), uid: .key, info: { type: "TIME_SEGMENTATION", role: "RESULT", payload: .value.narration_pass_1.narrations | map({start_time: .timestamp_sec, end_time: .timestamp_sec, label: .narration_text, id: .annotation_uid}) } }' $INPUT_FILE 12 | else 13 | xargs -n 1 -I {} jq --arg BASE_PATH "$BASE_PATH" --arg ID "{}" '{info: { payload: .[$ID].narration_pass_1.narrations , type: "TIME_SEGMENTATION", role: "RESULT" }, file: ($BASE_PATH+"full_scale/"+$ID+".mp4"), img: ($BASE_PATH+"viz/"+$ID+"_small.jpg"), uid: $ID }' $INPUT_FILE | jq -c '.info.payload |= map({start_time: .timestamp_sec, end_time: .timestamp_sec, label: .narration_text, id: .annotation_uid})' 14 | fi 15 | 16 | # jq -c "to_entries[]" ../narrations_v2_7-27-21.json 17 | # jq -c '{file: ("/assets"+.key+".mp4"), uid: .key, info: { type: "TIME_SEGMENTATION", role: "RESULT", payload: .value.narration_pass_1.narrations | map({start_time: .timestamp_sec, end_time: .timestamp_sec, label: .narration_text, id: .annotation_uid}) } }' ./to_entries.json > final.json 18 | 19 | # jq -c 'to_entries[] | {file: ("/assets/"+.key+".mp4"), uid: .key, info: { type: "TIME_SEGMENTATION", role: "RESULT", payload: .value.narration_pass_1.narrations | map({start_time: .timestamp_sec, end_time: .timestamp_sec, label: .narration_text, id: .annotation_uid}) } }' $INPUT_FILE 20 | -------------------------------------------------------------------------------- /viz/narrations/recipes/4_review.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 3 | 4 | VID_ROOT=${VID_ROOT:-~/ego4d} 5 | REVIEW_DIR=${REVIEW_DIR:-../review/build/} 6 | PORT=${PORT:-3030} 7 | 8 | mephisto review $REVIEW_DIR --json --stdout --assets $VID_ROOT/v1/ --all --port $PORT 9 | -------------------------------------------------------------------------------- /viz/narrations/review/.gitattributes: -------------------------------------------------------------------------------- 1 | **/build/**/* binary 2 | -------------------------------------------------------------------------------- /viz/narrations/review/.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.js 7 | 8 | # testing 9 | /coverage 10 | 11 | # production 12 | # build 13 | 14 | # misc 15 | .DS_Store 16 | .env.local 17 | .env.development.local 18 | .env.test.local 19 | .env.production.local 20 | 21 | npm-debug.log* 22 | yarn-debug.log* 23 | yarn-error.log* 24 | -------------------------------------------------------------------------------- /viz/narrations/review/build/asset-manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": { 3 | "main.css": "/static/css/main.b199b389.chunk.css", 4 | "main.js": "/static/js/main.9fedd964.chunk.js", 5 | "main.js.map": "/static/js/main.9fedd964.chunk.js.map", 6 | "runtime-main.js": "/static/js/runtime-main.23afa8cb.js", 7 | "runtime-main.js.map": "/static/js/runtime-main.23afa8cb.js.map", 8 | "static/css/2.0e909511.chunk.css": "/static/css/2.0e909511.chunk.css", 9 | "static/js/2.d8ddf740.chunk.js": "/static/js/2.d8ddf740.chunk.js", 10 | "static/js/2.d8ddf740.chunk.js.map": "/static/js/2.d8ddf740.chunk.js.map", 11 | "index.html": "/index.html", 12 | "static/css/2.0e909511.chunk.css.map": "/static/css/2.0e909511.chunk.css.map", 13 | "static/css/main.b199b389.chunk.css.map": "/static/css/main.b199b389.chunk.css.map", 14 | "static/js/2.d8ddf740.chunk.js.LICENSE.txt": "/static/js/2.d8ddf740.chunk.js.LICENSE.txt", 15 | "static/media/blueprint-icons.css": "/static/media/icons-20.cde033c5.eot" 16 | }, 17 | "entrypoints": [ 18 | "static/js/runtime-main.23afa8cb.js", 19 | "static/css/2.0e909511.chunk.css", 20 | "static/js/2.d8ddf740.chunk.js", 21 | "static/css/main.b199b389.chunk.css", 22 | "static/js/main.9fedd964.chunk.js" 23 | ] 24 | } -------------------------------------------------------------------------------- /viz/narrations/review/build/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/viz/narrations/review/build/favicon.ico -------------------------------------------------------------------------------- /viz/narrations/review/build/index.html: -------------------------------------------------------------------------------- 1 | Mephisto Review
-------------------------------------------------------------------------------- /viz/narrations/review/build/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /viz/narrations/review/build/static/css/main.b199b389.chunk.css: -------------------------------------------------------------------------------- 1 | .json-item-renderer{margin:auto 0;height:100%;width:100%}.json-item-card{height:100%}.json-item-renderer-pre{text-align:left;white-space:pre-wrap;max-width:75vw;max-height:75vh;overflow:auto;color:#555}.json-item-renderer-pre.small{height:16vh}.default-collection-renderer-container{display:grid;grid-template-rows:auto;grid-template-columns:50% 50%;grid-row-gap:12px;grid-column-gap:12px;padding:12px 24px 0;min-width:75vw;max-width:100vw;justify-content:center}@media only screen and (min-width:1400px){.default-collection-renderer-container{grid-template-columns:33.33% 33.33% 33.33%}}.word-cloud{text-align:center;justify-content:center}.word-cloud p{text-align:center;vertical-align:middle;width:100%}.word-cloud-item-renderer{margin:auto 0}.word-cloud-item-renderer-card.small{width:100%;height:27vh;overflow:auto}.word-cloud-item-renderer-card{height:100%;max-width:75vw;max-height:75vh;overflow:auto}.list-view-renderer-container{width:75vw;margin:12px 0 0;justify-content:center}.list-view-renderer-item{text-align:left;padding:0 4px;width:100%;margin:auto 0}.list-view-renderer-item.divider{margin-top:18px}.list-view-renderer-item>pre{text-align:left;white-space:pre-wrap;max-width:75vw;max-height:75vh;overflow:auto;color:#555;height:16vh}.pagination{padding:12px 0}body{margin:0;font-family:-apple-system,BlinkMacSystemFont,"Segoe UI","Roboto","Oxygen","Ubuntu","Cantarell","Fira Sans","Droid Sans","Helvetica Neue",sans-serif;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;background-color:#ededed}code{font-family:source-code-pro,Menlo,Monaco,Consolas,"Courier New",monospace}.error{color:#e3242b}.navbar-wrapper{margin:0 auto}.all-item-view-search-bar{width:60vw}@media only screen and (max-width:1200px){.all-item-view-search-bar{width:75vw}}.all-item-view-message{margin:auto 0}.all-item-view-error{margin-top:36px;max-width:50%}.all-item-view-no-data{max-width:50%}.all-item-view,.item-view{padding-top:50px;width:100vw;overflow:auto}.item-dynamic{display:flex;flex-direction:column;align-items:center;font-size:calc(10px + 1vmin);height:calc(100vh - 50px)}.item-view-message{margin:auto 0}.item-view-error{position:absolute;bottom:0;left:0;right:0;background:red;color:#fff;padding:10px;font-size:14px}.item-view-no-data{max-width:50%}.btn{margin:0 6px}code{background-color:#ddd;padding:3px 5px}.highlight{background-color:hsla(60,83%,68%,.3)}.segment-wrapper{display:block;margin:3px 0 10px;background-color:#eee;padding:10px;border-radius:10px;border:1px solid #dedede;position:relative;cursor:pointer;transition:transform .15s ease-out}.segment-wrapper:hover{box-shadow:0 5px 10px 3px rgba(56,55,53,.1);transform:translateY(-4px);border-color:#c1c1c1}.segment-wrapper.active{background-color:#fdf7dc}.bar{background-color:#ff4500;height:5px}.track{background-color:#ddd;border-radius:3px;height:5px;margin:4px 0 10px}.app-container{display:flex;background-color:#fff;min-height:calc(100vh - 50px)}.segment-viewer{margin-left:50%;padding:10px 20px 30px 30px;flex:1 1}.video-viewer{position:fixed;width:50%;padding:10px 30px 30px 20px}.duration{font-size:10px}.button{color:#000!important;font-weight:700;text-transform:uppercase;text-decoration:none;background:#eee;padding:10px;border-radius:5px;display:inline-block;border:none;transition:all .3s ease 0s;cursor:pointer;margin:0 auto}.button:hover{background:#ccc;letter-spacing:1px;box-shadow:5px 40px -10px rgba(0,0,0,.57);transition:all .4s ease 0s} 2 | /*# sourceMappingURL=main.b199b389.chunk.css.map */ -------------------------------------------------------------------------------- /viz/narrations/review/build/static/js/runtime-main.23afa8cb.js: -------------------------------------------------------------------------------- 1 | !function(e){function r(r){for(var n,i,l=r[0],f=r[1],a=r[2],c=0,s=[];c0.2%", 34 | "not dead", 35 | "not op_mini all" 36 | ], 37 | "development": [ 38 | "last 1 chrome version", 39 | "last 1 firefox version", 40 | "last 1 safari version" 41 | ] 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /viz/narrations/review/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/viz/narrations/review/public/favicon.ico -------------------------------------------------------------------------------- /viz/narrations/review/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 17 | Mephisto Review 18 | 19 | 20 | 21 |
22 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /viz/narrations/review/public/robots.txt: -------------------------------------------------------------------------------- 1 | # https://www.robotstxt.org/robotstxt.html 2 | User-agent: * 3 | Disallow: 4 | -------------------------------------------------------------------------------- /viz/narrations/review/sample-data.csv: -------------------------------------------------------------------------------- 1 | 1,ball,bouncy 2 | 2,car,shiny 3 | -------------------------------------------------------------------------------- /viz/narrations/review/sample-data.jsonl: -------------------------------------------------------------------------------- 1 | {"id": 1, "name": "ball", "description": "bouncy"} 2 | {"id": 2, "name": "car", "description": "shiny"} 3 | -------------------------------------------------------------------------------- /viz/narrations/review/src/components/ErrorPane.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import React from "react"; 4 | import { Button, Icon } from "@blueprintjs/core"; 5 | 6 | function ErrorPane({ error }) { 7 | const [hide, setHide] = React.useState(false); 8 | const errorType = error && error.type; 9 | React.useEffect(() => { 10 | if (!error) return; 11 | console.error(error); 12 | }, [error, errorType]); 13 | return ( 14 | error && 15 | !hide && ( 16 |
17 |
24 | ) 25 | ); 26 | } 27 | 28 | export default ErrorPane; 29 | -------------------------------------------------------------------------------- /viz/narrations/review/src/components/pagination/Pagination.css: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. */ 2 | 3 | .pagination { 4 | padding: 12px 0px; 5 | } 6 | -------------------------------------------------------------------------------- /viz/narrations/review/src/components/pagination/index.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import Pagination from "./Pagination"; 4 | 5 | export { Pagination }; 6 | -------------------------------------------------------------------------------- /viz/narrations/review/src/config.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | const config = { 4 | /* 5 | The port that useMephistoReview() in the browser will connect to the 6 | server on. 7 | 8 | Tip: This is useful when developing on the review interface locally so UI changes 9 | don't require you to kill and relaunch the server each time. 10 | You can launch `$ mephisto review --port 9000` once to launch 11 | the review server fed in with the appropriate data source, and then instead of using 12 | the interface at , you can run `npm start` to get a 13 | live-reloading developer build server. 14 | */ 15 | // port: 3030, 16 | }; 17 | 18 | export default config; 19 | -------------------------------------------------------------------------------- /viz/narrations/review/src/custom/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/facebookresearch/Ego4d/077fcfc4705779fc815ff109713a930db72fd22f/viz/narrations/review/src/custom/.gitkeep -------------------------------------------------------------------------------- /viz/narrations/review/src/custom/NarrationsApp.css: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. */ 2 | 3 | .segment { 4 | } 5 | .segment-wrapper { 6 | display: block; 7 | margin: 3px 0px 10px 0px; 8 | background-color: #eee; 9 | padding: 10px; 10 | border-radius: 10px; 11 | border: 1px solid #dedede; 12 | position: relative; 13 | cursor: pointer; 14 | transition: transform 0.15s ease-out; 15 | } 16 | 17 | .segment-wrapper:hover { 18 | box-shadow: 0px 5px 10px 3px rgba(56, 55, 53, 0.1); 19 | transform: translateY(-4px); 20 | border-color: #c1c1c1; 21 | } 22 | 23 | .segment-wrapper.active { 24 | background-color: rgb(253, 247, 220); 25 | } 26 | 27 | .bar { 28 | background-color: orangered; 29 | height: 5px; 30 | } 31 | 32 | .track { 33 | background-color: #ddd; 34 | border-radius: 3px; 35 | height: 5px; 36 | margin: 4px 0 10px; 37 | } 38 | 39 | .app-container { 40 | display: flex; 41 | background-color: white; 42 | min-height: calc(100vh - 50px); 43 | } 44 | 45 | .segment-viewer { 46 | margin-left: 50%; 47 | padding: 10px 20px 30px 30px; 48 | flex: 1; 49 | } 50 | 51 | .video-viewer { 52 | position: fixed; 53 | width: 50%; 54 | padding: 10px 30px 30px 20px; 55 | } 56 | 57 | .duration { 58 | font-size: 10px; 59 | } 60 | 61 | .button { 62 | color: black !important; 63 | font-weight: bold; 64 | text-transform: uppercase; 65 | text-decoration: none; 66 | background: #eee; 67 | padding: 10px; 68 | border-radius: 5px; 69 | display: inline-block; 70 | border: none; 71 | transition: all 0.3s ease 0s; 72 | cursor: pointer; 73 | margin: 0 auto; 74 | } 75 | 76 | .button:hover { 77 | background: #ccc; 78 | letter-spacing: 1px; 79 | -webkit-box-shadow: 0px 5px 40px -10px rgba(0, 0, 0, 0.57); 80 | -moz-box-shadow: 0px 5px 40px -10px rgba(0, 0, 0, 0.57); 81 | box-shadow: 5px 40px -10px rgba(0, 0, 0, 0.57); 82 | transition: all 0.4s ease 0s; 83 | } 84 | -------------------------------------------------------------------------------- /viz/narrations/review/src/custom/NarrationsItem.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import React from "react"; 4 | import NarrationsApp from "./NarrationsApp"; 5 | 6 | function NarrationsItem({ item }) { 7 | const data = item.data; 8 | const payload = data.info.payload; 9 | 10 | return ( 11 |
12 | 13 |
14 | ); 15 | } 16 | 17 | export default NarrationsItem; 18 | -------------------------------------------------------------------------------- /viz/narrations/review/src/custom/NarrationsThumbnail.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import React from "react"; 4 | import { H6, Card, Elevation } from "@blueprintjs/core"; 5 | import { getHostname } from "../utils"; 6 | import { getAllObjectValWordCounts } from "../renderers/WordCloudItem/WordCloud"; 7 | 8 | function NarrationsThumbnail({ item }) { 9 | const data = item.data; 10 | const payload = data.info.payload; 11 | 12 | const [isError, setError] = React.useState(false); 13 | 14 | return ( 15 |
16 | 21 |

22 | {data.uid} — {payload.length} entries 23 |

24 | { 27 | // console.log(e); 28 | e.target.onerror = null; 29 | // e.target.src = "image_path_here"; 30 | setError(true); 31 | }} 32 | src={ 33 | data.img && !isError 34 | ? getHostname() + data.img 35 | : "" 36 | } 37 | alt="Thumbnail" 38 | style={{ width: data.img ? "100%" : "1px" }} 39 | /> 40 | {/* {JSON.stringify(payload[0].label)} */} 41 |

42 | {getKeyWords(payload).map((word) => ( 43 | 51 | #{word} 52 | 53 | ))} 54 |

55 |
56 |
57 | ); 58 | } 59 | 60 | function getKeyWords(payload) { 61 | const counts = getAllObjectValWordCounts( 62 | payload, 63 | [ 64 | "C", 65 | "the", 66 | "be", 67 | "of", 68 | "from", 69 | "to", 70 | "and", 71 | "a", 72 | "in", 73 | "that", 74 | "have", 75 | "it", 76 | "for", 77 | "not", 78 | "on", 79 | "with", 80 | "by", 81 | "his", 82 | "her", 83 | "up", 84 | "down", 85 | ], 86 | ["id"] 87 | ); 88 | const mostCommonWords = Object.entries(counts).sort( 89 | ([firstKey, firstValue], [secondKey, secondValue]) => 90 | secondValue - firstValue 91 | ); 92 | return mostCommonWords.slice(0, 10).map(([word, _count]) => word); 93 | } 94 | 95 | export default NarrationsThumbnail; 96 | -------------------------------------------------------------------------------- /viz/narrations/review/src/index.css: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. */ 2 | 3 | body { 4 | margin: 0; 5 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen", 6 | "Ubuntu", "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue", 7 | sans-serif; 8 | -webkit-font-smoothing: antialiased; 9 | -moz-osx-font-smoothing: grayscale; 10 | background-color: #ededed; 11 | } 12 | 13 | code { 14 | font-family: source-code-pro, Menlo, Monaco, Consolas, "Courier New", 15 | monospace; 16 | } 17 | 18 | .error { 19 | color: #e3242b; 20 | } 21 | 22 | .navbar-wrapper { 23 | /* width: 75vw; */ 24 | margin: 0 auto; 25 | } 26 | 27 | .all-item-view-search-bar { 28 | width: 60vw; 29 | } 30 | 31 | @media only screen and (max-width: 1200px) { 32 | .navbar-header { 33 | /* display: none !important; */ 34 | } 35 | 36 | .all-item-view-search-bar { 37 | width: 75vw; 38 | } 39 | } 40 | 41 | .all-item-view-message { 42 | margin: auto 0px; 43 | } 44 | 45 | .all-item-view-error { 46 | margin-top: 36px; 47 | max-width: 50%; 48 | } 49 | 50 | .all-item-view-no-data { 51 | max-width: 50%; 52 | } 53 | 54 | .all-item-view, 55 | .item-view { 56 | padding-top: 50px; 57 | width: 100vw; 58 | overflow: auto; 59 | } 60 | 61 | .item-dynamic { 62 | display: flex; 63 | flex-direction: column; 64 | align-items: center; 65 | font-size: calc(10px + 1vmin); 66 | height: calc(100vh - 50px); 67 | } 68 | 69 | .item-view-message { 70 | margin: auto 0px; 71 | } 72 | 73 | .item-view-error { 74 | position: absolute; 75 | bottom: 0px; 76 | left: 0; 77 | right: 0; 78 | background: red; 79 | color: white; 80 | padding: 10px; 81 | font-size: 14px; 82 | } 83 | 84 | .item-view-no-data { 85 | max-width: 50%; 86 | } 87 | 88 | .btn { 89 | margin: 0px 6px; 90 | } 91 | 92 | code { 93 | background-color: #ddd; 94 | padding: 3px 5px; 95 | } 96 | 97 | .highlight { 98 | background-color: hsla(60, 83%, 68%, 0.3); 99 | } 100 | -------------------------------------------------------------------------------- /viz/narrations/review/src/index.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import React from "react"; 4 | import ReactDOM from "react-dom"; 5 | import { BrowserRouter as Router, Switch, Route } from "react-router-dom"; 6 | import CollectionView from "./components/CollectionView"; 7 | import ItemView from "./components/ItemView"; 8 | import "normalize.css/normalize.css"; 9 | import "@blueprintjs/icons/lib/css/blueprint-icons.css"; 10 | import "@blueprintjs/core/lib/css/blueprint.css"; 11 | import "./index.css"; 12 | 13 | import { GridCollection, JSONItem, WordCloudItem } from "./renderers"; 14 | import NarrationsThumbnail from "./custom/NarrationsThumbnail"; 15 | import NarrationsItem from "./custom/NarrationsItem"; 16 | 17 | ReactDOM.render( 18 | 19 | 20 | 21 | 22 | {/* For more information see the 'Customization' section of the README.md file. */} 23 | {/* */} 24 | 25 | 26 | 27 | {/* For more information see the 'Customization' section of the README.md file. */} 28 | 35 | 36 | 37 | 38 | , 39 | document.getElementById("root") 40 | ); 41 | -------------------------------------------------------------------------------- /viz/narrations/review/src/renderers/GridCollection/GridCollection.css: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. */ 2 | 3 | .default-collection-renderer-container { 4 | display: grid; 5 | grid-template-rows: auto; 6 | grid-template-columns: 50% 50%; 7 | grid-row-gap: 12px; 8 | grid-column-gap: 12px; 9 | padding: 12px 24px 0px 24px; 10 | min-width: 75vw; 11 | max-width: 100vw; 12 | justify-content: center; 13 | } 14 | 15 | @media only screen and (min-width: 1400px) { 16 | .default-collection-renderer-container { 17 | grid-template-columns: 33.33% 33.33% 33.33%; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /viz/narrations/review/src/renderers/GridCollection/GridCollection.jsx: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import React from "react"; 4 | import { Link } from "react-router-dom"; 5 | import { JSONItem } from "../JSONItem"; 6 | import "./GridCollection.css"; 7 | 8 | function GridCollection({ items, itemRenderer: ItemRenderer = JSONItem }) { 9 | return items && items.length > 0 ? ( 10 |
11 | {items.map((item) => { 12 | return ( 13 | 19 | 20 | 21 | ); 22 | })} 23 |
24 | ) : null; 25 | } 26 | 27 | export { GridCollection }; 28 | -------------------------------------------------------------------------------- /viz/narrations/review/src/renderers/GridCollection/index.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import { GridCollection } from "./GridCollection"; 4 | 5 | export { GridCollection }; 6 | -------------------------------------------------------------------------------- /viz/narrations/review/src/renderers/JSONItem/JSONItem.css: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. */ 2 | 3 | .json-item-renderer { 4 | margin: auto 0px; 5 | height: 100%; 6 | width: 100%; 7 | } 8 | 9 | .json-item-card { 10 | height: 100%; 11 | } 12 | 13 | .json-item-renderer-pre { 14 | text-align: left; 15 | white-space: pre-wrap; 16 | max-width: 75vw; 17 | max-height: 75vh; 18 | overflow: auto; 19 | color: #555555; 20 | } 21 | 22 | .json-item-renderer-pre.small { 23 | height: 16vh; 24 | } 25 | -------------------------------------------------------------------------------- /viz/narrations/review/src/renderers/JSONItem/JSONItem.jsx: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import React, { useRef, useEffect, useState } from "react"; 4 | import { H6, Card, Elevation } from "@blueprintjs/core"; 5 | import "./JSONItem.css"; 6 | 7 | function JSONItem({ item }) { 8 | const SMALL_CARD_WIDTH_LIMIT = 1000; 9 | const [cardWidth, setCardWidth] = useState(0); 10 | const card = useRef(); 11 | 12 | useEffect(() => { 13 | setCardWidth(card.current.offsetWidth); 14 | }, []); 15 | 16 | const smallCard = cardWidth < SMALL_CARD_WIDTH_LIMIT; 17 | 18 | return ( 19 |
24 | 29 |
36 |           {JSON.stringify(item && item.data)}
37 |         
38 |
39 | ID: {item && item.id} 40 |
41 |
42 |
43 | ); 44 | } 45 | 46 | export { JSONItem }; 47 | -------------------------------------------------------------------------------- /viz/narrations/review/src/renderers/JSONItem/index.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import { JSONItem } from "./JSONItem"; 4 | 5 | export { JSONItem }; 6 | -------------------------------------------------------------------------------- /viz/narrations/review/src/renderers/ListCollection/ListCollection.css: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. */ 2 | 3 | .list-view-renderer-container { 4 | width: 75vw; 5 | margin: 12px 0px 0px 0px; 6 | justify-content: center; 7 | } 8 | 9 | .list-view-renderer-item { 10 | text-align: left; 11 | padding: 0px 4px; 12 | width: 100%; 13 | margin: auto 0px; 14 | } 15 | 16 | .list-view-renderer-item.divider { 17 | margin-top: 18px; 18 | } 19 | 20 | .list-view-renderer-item > pre { 21 | text-align: left; 22 | white-space: pre-wrap; 23 | max-width: 75vw; 24 | max-height: 75vh; 25 | overflow: auto; 26 | color: #555555; 27 | height: 16vh; 28 | } 29 | -------------------------------------------------------------------------------- /viz/narrations/review/src/renderers/ListCollection/ListCollection.jsx: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import React from "react"; 4 | import { Link } from "react-router-dom"; 5 | import { Card, Divider } from "@blueprintjs/core"; 6 | import ListItem from "./ListItem"; 7 | import "./ListCollection.css"; 8 | 9 | /* 10 | EXAMPLE PLUGIN ALL DATA RENDERER 11 | Displays all mephisto review data as a list 12 | */ 13 | function ListCollection({ items, itemRenderer: Renderer = ListItem }) { 14 | return items && items.length > 0 ? ( 15 | 16 | {items.map((item, index) => ( 17 | <> 18 | {index !== 0 ? : null} 19 | 24 |
31 | 32 |
33 | 34 | 35 | ))} 36 |
37 | ) : null; 38 | } 39 | 40 | export default ListCollection; 41 | -------------------------------------------------------------------------------- /viz/narrations/review/src/renderers/ListCollection/ListItem.jsx: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import React from "react"; 4 | import { H6 } from "@blueprintjs/core"; 5 | 6 | function ListItem({ item }) { 7 | return ( 8 | <> 9 |
{JSON.stringify(item && item.data)}
10 |
11 | ID: {item && item.id} 12 |
13 | 14 | ); 15 | } 16 | 17 | export default ListItem; 18 | -------------------------------------------------------------------------------- /viz/narrations/review/src/renderers/ListCollection/index.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import ListItem from "./ListItem"; 4 | import ListCollection from "./ListCollection"; 5 | 6 | export { ListItem, ListCollection }; 7 | -------------------------------------------------------------------------------- /viz/narrations/review/src/renderers/WordCloudItem/WordCloud.css: -------------------------------------------------------------------------------- 1 | /* Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. */ 2 | 3 | .word-cloud { 4 | text-align: center; 5 | justify-content: center; 6 | } 7 | 8 | .word-cloud p { 9 | text-align: center; 10 | vertical-align: middle; 11 | width: 100%; 12 | } 13 | 14 | .word-cloud-item-renderer { 15 | margin: auto 0px; 16 | } 17 | 18 | .word-cloud-item-renderer-card.small { 19 | width: 100%; 20 | height: 27vh; 21 | overflow: auto; 22 | } 23 | 24 | .word-cloud-item-renderer-card { 25 | height: 100%; 26 | max-width: 75vw; 27 | max-height: 75vh; 28 | overflow: auto; 29 | } 30 | -------------------------------------------------------------------------------- /viz/narrations/review/src/renderers/WordCloudItem/WordCloudItem.jsx: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import React, { useRef, useEffect, useState } from "react"; 4 | import { H6, Card, Elevation } from "@blueprintjs/core"; 5 | import WordCloud from "./WordCloud"; 6 | import "./WordCloud.css"; 7 | 8 | /* 9 | EXAMPLE PLUGIN ITEM RENDERER 10 | Renders mephisto review data items as word clouds of the most common words in the object 11 | For use inside an ItemListRenderer or AllItemView as an itemRenderer prop 12 | */ 13 | 14 | function WordCloudItem({ item }) { 15 | const SMALL_CARD_WIDTH_LIMIT = 1000; 16 | const data = item && item.data; 17 | const id = item && item.id; 18 | 19 | const [cardWidth, setCardWidth] = useState(0); 20 | const card = useRef(); 21 | 22 | useEffect(() => { 23 | setCardWidth(card.current.offsetWidth); 24 | }, []); 25 | 26 | const smallCard = cardWidth < SMALL_CARD_WIDTH_LIMIT; 27 | 28 | const normalWordCloudProps = { 29 | data: data, 30 | excludedKeys: ["URL"], 31 | excludedWords: ["true", "false", "the", "with", "on", "in", "of", "and"], 32 | minFontEmSize: 1, 33 | maxFontEmSize: 2.5, 34 | minFontWeight: 100, 35 | maxFontWeight: 700, 36 | }; 37 | 38 | const smallWordCloudProps = { 39 | data: data, 40 | excludedKeys: ["URL"], 41 | excludedWords: ["true", "false", "the", "with", "on", "in", "of", "and"], 42 | minFontEmSize: 0.4, 43 | maxFontEmSize: 1.25, 44 | minFontWeight: 200, 45 | maxFontWeight: 600, 46 | }; 47 | 48 | const wordCloudProps = smallCard ? smallWordCloudProps : normalWordCloudProps; 49 | 50 | if (!item) return

No Data Available

; 51 | return ( 52 | <> 53 |
54 | 63 |
64 | ID: {id} 65 |
66 |
Data keywords:
67 | {/*example WordCloud with example excluded keys and words*/} 68 | 69 |
70 |
71 | 72 | ); 73 | } 74 | 75 | export default WordCloudItem; 76 | -------------------------------------------------------------------------------- /viz/narrations/review/src/renderers/WordCloudItem/index.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import WordCloud from "./WordCloud"; 4 | import WordCloudItem from "./WordCloudItem"; 5 | 6 | export { WordCloud, WordCloudItem }; 7 | -------------------------------------------------------------------------------- /viz/narrations/review/src/renderers/index.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import { GridCollection } from "./GridCollection"; 4 | import { JSONItem } from "./JSONItem"; 5 | import { WordCloudItem } from "./WordCloudItem"; 6 | import { ListCollection } from "./ListCollection"; 7 | 8 | export { GridCollection, ListCollection, JSONItem, WordCloudItem }; 9 | -------------------------------------------------------------------------------- /viz/narrations/review/src/utils.js: -------------------------------------------------------------------------------- 1 | // Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved. 2 | 3 | import config from "./config"; 4 | 5 | function getHostname() { 6 | return config.port 7 | ? `${window.location.protocol}//${window.location.hostname}:${config.port}` 8 | : window.location.origin; 9 | } 10 | 11 | export { getHostname }; 12 | --------------------------------------------------------------------------------