├── test ├── test_results │ └── readme.md ├── ckpts │ └── readme.md ├── util │ ├── __pycache__ │ │ ├── misc.cpython-38.pyc │ │ └── lr_sched.cpython-38.pyc │ └── lr_sched.py ├── robots │ ├── __pycache__ │ │ └── panda_robot.cpython-38.pyc │ ├── franka_description │ │ └── meshes │ │ │ └── collision │ │ │ ├── hand.stl │ │ │ ├── finger.stl │ │ │ ├── link0.stl │ │ │ ├── link1.stl │ │ │ ├── link2.stl │ │ │ ├── link3.stl │ │ │ ├── link4.stl │ │ │ ├── link5.stl │ │ │ ├── link6.stl │ │ │ ├── link7.stl │ │ │ └── finger.stl.convex.stl │ ├── panda_gripper.urdf │ └── panda_robot.py ├── llama │ ├── __init__.py │ ├── tokenizer.py │ ├── utils.py │ └── llama_adapter.py ├── colors.py ├── test.sh ├── clean_data.py ├── test_entireprocess_in_sapien.py ├── test_llama.py ├── processgen.py ├── cal_test_mani_succ_rate.py ├── camera.py ├── utils.py └── test_one_stick_clean.py ├── data_collection ├── data │ └── readme.md ├── asset │ └── readme.md ├── code │ ├── robots │ │ ├── misc │ │ │ ├── table_map.jpg │ │ │ └── cube.obj │ │ ├── franka_description │ │ │ └── meshes │ │ │ │ └── collision │ │ │ │ ├── finger.stl │ │ │ │ ├── hand.stl │ │ │ │ ├── link0.stl │ │ │ │ ├── link1.stl │ │ │ │ ├── link2.stl │ │ │ │ ├── link3.stl │ │ │ │ ├── link4.stl │ │ │ │ ├── link5.stl │ │ │ │ ├── link6.stl │ │ │ │ ├── link7.stl │ │ │ │ └── finger.stl.convex.stl │ │ ├── panda_gripper.urdf │ │ ├── panda_robot.py │ │ └── panda.urdf │ ├── colors.py │ ├── check_cat_balance.py │ ├── test_data_collect.py │ ├── scripts │ │ └── run_gen_offline_data.sh │ ├── transfer_dataset.py │ ├── train_test_split.py │ ├── gen_offline_data.py │ ├── datagen.py │ ├── camera.py │ └── utils.py └── stats │ ├── ins_cnt_46cats.txt │ └── test_id.txt ├── train ├── ckpts │ └── readme.md ├── exp │ └── readme.md ├── llama │ ├── __init__.py │ ├── tokenizer.py │ ├── utils.py │ └── llama_adapter.py ├── output │ └── events.out.tfevents.1718348142.di-20240614115853-6ppx6.1005317.0 ├── finetune.sh ├── util │ └── lr_sched.py ├── engine_finetune.py ├── data │ ├── dataset.py │ └── create_dataset_aff.py ├── main_finetune.py └── utils.py ├── requirements.txt └── README.md /test/test_results/readme.md: -------------------------------------------------------------------------------- 1 | The test results will be shown here 2 | -------------------------------------------------------------------------------- /data_collection/data/readme.md: -------------------------------------------------------------------------------- 1 | The generated train/test data will be shown here 2 | -------------------------------------------------------------------------------- /test/ckpts/readme.md: -------------------------------------------------------------------------------- 1 | Please place the downloaded checkpoints (CLIP, LlaMa, Llama-Adapter) in this folder 2 | -------------------------------------------------------------------------------- /train/ckpts/readme.md: -------------------------------------------------------------------------------- 1 | Please place the downloaded checkpoints here, including CLIP, LlaMa, and LlaMa-Adapter 2 | -------------------------------------------------------------------------------- /data_collection/asset/readme.md: -------------------------------------------------------------------------------- 1 | Please place the urdf from the official partnet-mobility website in this folder 2 | -------------------------------------------------------------------------------- /test/util/__pycache__/misc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/test/util/__pycache__/misc.cpython-38.pyc -------------------------------------------------------------------------------- /data_collection/code/robots/misc/table_map.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/data_collection/code/robots/misc/table_map.jpg -------------------------------------------------------------------------------- /test/util/__pycache__/lr_sched.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/test/util/__pycache__/lr_sched.cpython-38.pyc -------------------------------------------------------------------------------- /test/robots/__pycache__/panda_robot.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/test/robots/__pycache__/panda_robot.cpython-38.pyc -------------------------------------------------------------------------------- /test/robots/franka_description/meshes/collision/hand.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/test/robots/franka_description/meshes/collision/hand.stl -------------------------------------------------------------------------------- /train/exp/readme.md: -------------------------------------------------------------------------------- 1 | The training checkpoint will be saved here. 2 | Or if you want to test the checkpoint of ManipLLM, place the ManipLLM checkpoint in this folder. 3 | -------------------------------------------------------------------------------- /test/llama/__init__.py: -------------------------------------------------------------------------------- 1 | from .llama import ModelArgs, Transformer 2 | from .tokenizer import Tokenizer 3 | from .llama_adapter import * 4 | from .utils import format_prompt -------------------------------------------------------------------------------- /test/robots/franka_description/meshes/collision/finger.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/test/robots/franka_description/meshes/collision/finger.stl -------------------------------------------------------------------------------- /test/robots/franka_description/meshes/collision/link0.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/test/robots/franka_description/meshes/collision/link0.stl -------------------------------------------------------------------------------- /test/robots/franka_description/meshes/collision/link1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/test/robots/franka_description/meshes/collision/link1.stl -------------------------------------------------------------------------------- /test/robots/franka_description/meshes/collision/link2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/test/robots/franka_description/meshes/collision/link2.stl -------------------------------------------------------------------------------- /test/robots/franka_description/meshes/collision/link3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/test/robots/franka_description/meshes/collision/link3.stl -------------------------------------------------------------------------------- /test/robots/franka_description/meshes/collision/link4.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/test/robots/franka_description/meshes/collision/link4.stl -------------------------------------------------------------------------------- /test/robots/franka_description/meshes/collision/link5.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/test/robots/franka_description/meshes/collision/link5.stl -------------------------------------------------------------------------------- /test/robots/franka_description/meshes/collision/link6.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/test/robots/franka_description/meshes/collision/link6.stl -------------------------------------------------------------------------------- /test/robots/franka_description/meshes/collision/link7.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/test/robots/franka_description/meshes/collision/link7.stl -------------------------------------------------------------------------------- /train/llama/__init__.py: -------------------------------------------------------------------------------- 1 | from .llama import ModelArgs, Transformer 2 | from .tokenizer import Tokenizer 3 | from .llama_adapter import * 4 | from .utils import format_prompt -------------------------------------------------------------------------------- /data_collection/code/robots/franka_description/meshes/collision/finger.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/data_collection/code/robots/franka_description/meshes/collision/finger.stl -------------------------------------------------------------------------------- /data_collection/code/robots/franka_description/meshes/collision/hand.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/data_collection/code/robots/franka_description/meshes/collision/hand.stl -------------------------------------------------------------------------------- /data_collection/code/robots/franka_description/meshes/collision/link0.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/data_collection/code/robots/franka_description/meshes/collision/link0.stl -------------------------------------------------------------------------------- /data_collection/code/robots/franka_description/meshes/collision/link1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/data_collection/code/robots/franka_description/meshes/collision/link1.stl -------------------------------------------------------------------------------- /data_collection/code/robots/franka_description/meshes/collision/link2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/data_collection/code/robots/franka_description/meshes/collision/link2.stl -------------------------------------------------------------------------------- /data_collection/code/robots/franka_description/meshes/collision/link3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/data_collection/code/robots/franka_description/meshes/collision/link3.stl -------------------------------------------------------------------------------- /data_collection/code/robots/franka_description/meshes/collision/link4.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/data_collection/code/robots/franka_description/meshes/collision/link4.stl -------------------------------------------------------------------------------- /data_collection/code/robots/franka_description/meshes/collision/link5.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/data_collection/code/robots/franka_description/meshes/collision/link5.stl -------------------------------------------------------------------------------- /data_collection/code/robots/franka_description/meshes/collision/link6.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/data_collection/code/robots/franka_description/meshes/collision/link6.stl -------------------------------------------------------------------------------- /data_collection/code/robots/franka_description/meshes/collision/link7.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/data_collection/code/robots/franka_description/meshes/collision/link7.stl -------------------------------------------------------------------------------- /train/output/events.out.tfevents.1718348142.di-20240614115853-6ppx6.1005317.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/clorislili/ManipLLM/HEAD/train/output/events.out.tfevents.1718348142.di-20240614115853-6ppx6.1005317.0 -------------------------------------------------------------------------------- /test/colors.py: -------------------------------------------------------------------------------- 1 | colors = [[0.5, 0.5, 0.5], [0.8, 0, 0], [0, 0.8, 0], [0, 0, 0.8], \ 2 | [0.5, 0.5, 0], [0.5, 0, 0.5], [0, 0.5, 0.5], \ 3 | [0.3, 0.6, 0], [0.6, 0, 0.3], [0.3, 0, 0.6], \ 4 | [0.6, 0.3, 0], [0.3, 0, 0.6], [0.6, 0, 0.3], \ 5 | [0.8, 0.2, 0.5]] 6 | 7 | -------------------------------------------------------------------------------- /data_collection/code/colors.py: -------------------------------------------------------------------------------- 1 | colors = [[0.5, 0.5, 0.5], [0.8, 0, 0], [0, 0.8, 0], [0, 0, 0.8], \ 2 | [0.5, 0.5, 0], [0.5, 0, 0.5], [0, 0.5, 0.5], \ 3 | [0.3, 0.6, 0], [0.6, 0, 0.3], [0.3, 0, 0.6], \ 4 | [0.6, 0.3, 0], [0.3, 0, 0.6], [0.6, 0, 0.3], \ 5 | [0.8, 0.2, 0.5]] 6 | 7 | -------------------------------------------------------------------------------- /data_collection/code/check_cat_balance.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | data_dir = '../data/train_data' 5 | data_list = os.listdir(data_dir) 6 | cat_cal = dict() 7 | for data_name in data_list: 8 | cat = data_name.split('_')[1] 9 | if cat not in list(cat_cal.keys()): 10 | cat_cal[cat] = 1 11 | else: 12 | cat_cal[cat] += 1 13 | print(cat_cal) -------------------------------------------------------------------------------- /data_collection/code/test_data_collect.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import shutil 4 | cat_dict = dict() 5 | data_dir = '../data/train_data' 6 | data_list = os.listdir(data_dir) 7 | 8 | 9 | for data_name in data_list: 10 | cat = data_name.split('_')[1] 11 | 12 | if cat in list(cat_dict.keys()): 13 | 14 | cat_dict[cat] += 1 15 | else: 16 | cat_dict[cat] = 1 17 | print(cat_dict) 18 | -------------------------------------------------------------------------------- /test/test.sh: -------------------------------------------------------------------------------- 1 | #step1: model inference 2 | OUPUT_DIR='./test_results/result_ori' 3 | CUDA_VISIBLE_DEVICES=0 python test_llama.py \ 4 | --llama_dir ./ckpts/llama_model_weights \ 5 | --adapter_dir /PATH/TO/MANIPLLM/MODEL \ 6 | --data_dir ../data_collection/data/test_data \ 7 | --out_dir "$OUPUT_DIR" \ 8 | --action pulling 9 | 10 | #step2: test in simulator 11 | python test_entireprocess_in_sapien.py \ 12 | --data_dir ../data_collection/data/test_data \ 13 | --num_processes 10 \ 14 | --out_dir "$OUPUT_DIR" \ 15 | --no_gui \ 16 | --use_mask True 17 | 18 | # #step3: calculate success rate 19 | python cal_test_mani_succ_rate.py \ 20 | --primact_type pulling \ 21 | --data_dir "$OUPUT_DIR" 22 | -------------------------------------------------------------------------------- /data_collection/stats/ins_cnt_46cats.txt: -------------------------------------------------------------------------------- 1 | Safe 30 11 2 | Door 36 9 3 | Display 37 9 4 | Refrigerator 44 7 5 | Laptop 55 6 6 | Lighter 28 12 7 | Microwave 16 21 8 | Mouse 14 25 9 | Box 28 12 10 | TrashCan 70 5 11 | KitchenPot 25 14 12 | Suitcase 24 14 13 | Pliers 25 14 14 | StorageFurniture 346 1 15 | Remote 49 7 16 | Bottle 57 6 17 | FoldingChair 26 13 18 | Toaster 25 14 19 | Lamp 45 7 20 | Dispenser 57 6 21 | Toilet 69 5 22 | Scissors 47 7 23 | Table 101 3 24 | Stapler 23 15 25 | Kettle 29 12 26 | USB 51 6 27 | Switch 70 5 28 | WashingMachine 17 20 29 | Faucet 84 4 30 | Phone 18 19 31 | Bucket 36 12 32 | Dishwaher 48 7 33 | Window 58 6 34 | Oven 30 12 35 | Knife 44 8 36 | Fan 81 4 37 | Keyboard 37 10 38 | Printer 29 12 39 | Eyeglasses 65 5 40 | Globe 61 6 41 | Cart 61 6 42 | Pen 48 8 -------------------------------------------------------------------------------- /train/finetune.sh: -------------------------------------------------------------------------------- 1 | #step1: generate training json 2 | JSON_DIR='./data/train_json' 3 | python ./data/create_dataset_aff.py --folder_dir ../data_collection/data/train_data --output_dir "$JSON_DIR" --num_point 20 4 | 5 | #step2: train model 6 | OUTPUT_DIR='./exp/train_ckpts' 7 | mkdir -p "$OUTPUT_DIR" 8 | CUDA_VISIBLE_DEVICES=0 python -u -m torch.distributed.launch --master_port=11710 --nproc_per_node=1 --use_env main_finetune.py --batch_size 1 \ 9 | --epochs 10 --warmup_epochs 1 --blr 1e-3 --weight_decay 0.02 \ 10 | --output_dir "$OUTPUT_DIR" \ 11 | --pretrained_path ./ckpts/BIAS_LORA_NORM-336-Chinese-7B.pth \ 12 | --llama_path ./ckpts/llama_model_weights \ 13 | --bins True \ 14 | --mlm True\ 15 | --aff_prior \ 16 | --data_config "$JSON_DIR" 17 | -------------------------------------------------------------------------------- /data_collection/code/scripts/run_gen_offline_data.sh: -------------------------------------------------------------------------------- 1 | # generate around 20,000 training samples, then stop it manually 2 | python gen_offline_data.py \ 3 | --data_dir ../data/train_data\ 4 | --data_fn ../stats/train_id.txt\ 5 | --primact_types pulling \ 6 | --num_processes 40 \ 7 | --num_epochs 100 \ 8 | --starting_epoch 0 \ 9 | --ins_cnt_fn ../stats/ins_cnt_46cats.txt \ 10 | --mode train 11 | 12 | # delete the extra testing dataset, and remain around 1,500 testing samples. Make sure that each category has as least 50 samples. 13 | python gen_offline_data.py \ 14 | --data_dir ../data/test_data\ 15 | --data_fn ../stats/test_id.txt\ 16 | --primact_types pulling \ 17 | --num_processes 10 \ 18 | --num_epochs 20 \ 19 | --starting_epoch 0 \ 20 | --ins_cnt_fn ../stats/ins_cnt_46cats.txt \ 21 | --mode test 22 | -------------------------------------------------------------------------------- /test/clean_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | data_dir = '/home/jiyao/mingxu/where2act-main/data/highreso996_1119' 4 | data_tar = '/home/jiyao/mingxu/where2act-main/data/highreso996_1119_rgnonly' 5 | data_list = os.listdir(data_dir) 6 | for data_id in data_list: 7 | # file_list = os.listdir(os.path.join(data_dir,data_id)) 8 | # for file_dir in file_list: 9 | # if file_dir != 'rgb.png': 10 | # os.remove(os.path.join(data_dir,data_id,file_dir)) 11 | # if not os.path.exists(os.path.join(data_dir,data_id,'result.json')): 12 | # shutil.rmtree(os.path.join(data_dir,data_id)) 13 | source_file = os.path.join(data_dir,data_id,'rgb.png') 14 | destination_directory = os.path.join(data_tar,data_id) 15 | if not os.path.exists(destination_directory): 16 | os.makedirs(destination_directory) 17 | shutil.copy(source_file, destination_directory) -------------------------------------------------------------------------------- /test/util/lr_sched.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | 9 | def adjust_learning_rate(optimizer, epoch, args): 10 | """Decay the learning rate with half-cycle cosine after warmup""" 11 | if epoch < args.warmup_epochs: 12 | lr = args.lr * epoch / args.warmup_epochs 13 | else: 14 | lr = args.min_lr + (args.lr - args.min_lr) * 0.5 * \ 15 | (1. + math.cos(math.pi * (epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs))) 16 | for param_group in optimizer.param_groups: 17 | if "lr_scale" in param_group: 18 | param_group["lr"] = lr * param_group["lr_scale"] 19 | else: 20 | param_group["lr"] = lr 21 | return lr 22 | -------------------------------------------------------------------------------- /train/util/lr_sched.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # All rights reserved. 3 | 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | 7 | import math 8 | 9 | def adjust_learning_rate(optimizer, epoch, args): 10 | """Decay the learning rate with half-cycle cosine after warmup""" 11 | if epoch < args.warmup_epochs: 12 | lr = args.lr * epoch / args.warmup_epochs 13 | else: 14 | lr = args.min_lr + (args.lr - args.min_lr) * 0.5 * \ 15 | (1. + math.cos(math.pi * (epoch - args.warmup_epochs) / (args.epochs - args.warmup_epochs))) 16 | for param_group in optimizer.param_groups: 17 | if "lr_scale" in param_group: 18 | param_group["lr"] = lr * param_group["lr_scale"] 19 | else: 20 | param_group["lr"] = lr 21 | return lr 22 | -------------------------------------------------------------------------------- /test/test_entireprocess_in_sapien.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from argparse import ArgumentParser 4 | 5 | from processgen import Processgen 6 | import json 7 | parser = ArgumentParser() 8 | parser.add_argument('--no_gui', action='store_true', default=False, help='no_gui [default: False]') 9 | parser.add_argument('--data_dir', type=str, help='data directory') 10 | parser.add_argument('--num_processes', type=int, default=40, help='number of CPU cores to use') 11 | parser.add_argument('--out_dir', type=str, help='outdata directory') 12 | parser.add_argument('--use_mask', type=str, default=False, help='whether use movable mask') 13 | conf = parser.parse_args() 14 | 15 | if os.path.exists(conf.out_dir): 16 | pass 17 | else: 18 | print('NO infer directory') 19 | exit() 20 | 21 | processgen = Processgen(conf.num_processes) 22 | record_names = os.listdir(conf.out_dir) 23 | for record_name in record_names: 24 | processgen.add_one_test_job(record_name,conf) 25 | processgen.start_all() 26 | data_tuple_list = processgen.join_all() 27 | 28 | 29 | -------------------------------------------------------------------------------- /data_collection/code/transfer_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | file_path = '../stats/train_30cats_train_data_list.txt' 4 | 5 | # Open the file in read mode 6 | lines = [] 7 | with open(file_path, 'r') as file: 8 | # Iterate over each line in the file 9 | for line in file: 10 | # Process the line (for example, print it) 11 | # print(line.strip()) # .strip() removes leading/trailing whitespace, including the newline character 12 | lines.append(line.strip()) 13 | data_dir = '../data/train_data' 14 | target_dir = '../data/train_data0606' 15 | data_list = os.listdir(data_dir) 16 | cat_cal = dict() 17 | for data_name in data_list: 18 | data_id = data_name.split('_')[0] 19 | data_cat = data_name.split('_')[1] 20 | source_dir = os.path.join(data_dir,data_name) 21 | destination_dir = os.path.join(target_dir,data_name) 22 | try: 23 | for line in lines: 24 | if data_id in line and data_cat in line: 25 | shutil.copytree(source_dir, destination_dir) 26 | break 27 | except: 28 | continue 29 | -------------------------------------------------------------------------------- /data_collection/code/robots/misc/cube.obj: -------------------------------------------------------------------------------- 1 | # Blender v2.93.4 OBJ File: '' 2 | # www.blender.org 3 | mtllib untitled.mtl 4 | o Cube 5 | v 1.000000 1.000000 -1.000000 6 | v 1.000000 -1.000000 -1.000000 7 | v 1.000000 1.000000 1.000000 8 | v 1.000000 -1.000000 1.000000 9 | v -1.000000 1.000000 -1.000000 10 | v -1.000000 -1.000000 -1.000000 11 | v -1.000000 1.000000 1.000000 12 | v -1.000000 -1.000000 1.000000 13 | vt 1.000000 1.000000 14 | vt 0.000000 1.000000 15 | vt 0.000000 0.000000 16 | vt 1.000000 0.000000 17 | vt 1.000000 0.000000 18 | vt 1.000000 1.000000 19 | vt 0.000000 1.000000 20 | vt 0.000000 0.000000 21 | vt 1.000000 1.000000 22 | vt 1.000000 0.000000 23 | vt 0.000000 1.000000 24 | vt 1.000000 1.000000 25 | vt 1.000000 0.000000 26 | vt 0.000000 1.000000 27 | vt 0.000000 0.000000 28 | vt 0.000000 0.000000 29 | vn 0.0000 1.0000 0.0000 30 | vn 0.0000 0.0000 1.0000 31 | vn -1.0000 0.0000 0.0000 32 | vn 0.0000 -1.0000 0.0000 33 | vn 1.0000 0.0000 0.0000 34 | vn 0.0000 0.0000 -1.0000 35 | usemtl Material 36 | s off 37 | f 1/1/1 5/2/1 7/3/1 3/4/1 38 | f 4/5/2 3/6/2 7/7/2 8/8/2 39 | f 8/8/3 7/7/3 5/9/3 6/10/3 40 | f 6/11/4 2/12/4 4/5/4 8/8/4 41 | f 2/13/5 1/1/5 3/14/5 4/15/5 42 | f 6/16/6 5/2/6 1/1/6 2/13/6 43 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | click==8.1.7 2 | cmake==3.28.1 3 | comm==0.2.1 4 | dash==2.17.0 5 | ffmpy==0.3.1 6 | ftfy==6.1.3 7 | future==0.18.3 8 | h5py==3.10.0 9 | huggingface-hub==0.23.0 10 | imageio==2.33.1 11 | Jinja2==3.1.3 12 | jupyter-client==7.3.4 13 | matplotlib==3.7.4 14 | matplotlib-inline==0.1.6 15 | mpmath==1.3.0 16 | multidict==6.0.5 17 | networkx==3.1 18 | numpy==1.24.4 19 | oauthlib==3.2.2 20 | open3d==0.18.0 21 | openai==0.28.0 22 | openai-clip==1.0.1 23 | opencv-python==4.9.0.80 24 | packaging==23.2 25 | pandas==2.0.3 26 | parso==0.8.3 27 | pexpect==4.9.0 28 | pickleshare==0.7.5 29 | pillow==10.2.0 30 | pip==24.0 31 | pkgutil_resolve_name==1.3.10 32 | platformdirs==4.2.0 33 | plotly==5.22.0 34 | prompt-toolkit==3.0.42 35 | protobuf==4.25.2 36 | PyYAML==6.0.1 37 | requests==2.31.0 38 | safetensors==0.4.2 39 | sapien==0.7.0.dev0 40 | scikit-learn==1.2.2 41 | scipy==1.10.1 42 | segment-anything==1.0 43 | semantic-version==2.10.0 44 | sentencepiece==0.1.99 45 | sentry-sdk==1.42.0 46 | service-identity==24.1.0 47 | setproctitle==1.3.3 48 | setuptools==68.2.2 49 | shapely==2.0.3 50 | shellingham==1.5.4 51 | shortuuid==1.0.13 52 | six==1.16.0 53 | tensorboard==2.14.0 54 | timm==0.6.13 55 | tensorboard-data-server==0.7.2 56 | tensorboardX==2.6.2.2 57 | tokenizers==0.13.3 58 | torch==2.0.1 59 | torchvision==0.15.2 60 | tornado==6.1 61 | tqdm==4.66.1 62 | transformers==4.31.0 63 | transforms3d==0.4.1 64 | urllib3==2.1.0 65 | wandb==0.16.4 66 | wheel==0.41.2 67 | -------------------------------------------------------------------------------- /test/llama/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # This software may be used and distributed according to the terms of the GNU General Public License version 3. 3 | 4 | from sentencepiece import SentencePieceProcessor 5 | from logging import getLogger 6 | from typing import List 7 | import os 8 | 9 | 10 | logger = getLogger() 11 | 12 | 13 | class Tokenizer: 14 | def __init__(self, model_path: str): 15 | # reload tokenizer 16 | assert os.path.isfile(model_path), model_path 17 | self.sp_model = SentencePieceProcessor(model_file=model_path) 18 | logger.info(f"Reloaded SentencePiece model from {model_path}") 19 | 20 | # BOS / EOS token IDs 21 | self.n_words: int = self.sp_model.vocab_size() 22 | self.bos_id: int = self.sp_model.bos_id() 23 | self.eos_id: int = self.sp_model.eos_id() 24 | self.pad_id: int = self.sp_model.pad_id() 25 | logger.info( 26 | f"#words: {self.n_words} - BOS ID: {self.bos_id} - EOS ID: {self.eos_id}" 27 | ) 28 | assert self.sp_model.vocab_size() == self.sp_model.get_piece_size() 29 | 30 | def encode(self, s: str, bos: bool, eos: bool) -> List[int]: 31 | assert type(s) is str 32 | t = self.sp_model.encode(s) 33 | if bos: 34 | t = [self.bos_id] + t 35 | if eos: 36 | t = t + [self.eos_id] 37 | return t 38 | 39 | def decode(self, t: List[int]) -> str: 40 | return self.sp_model.decode(t) 41 | -------------------------------------------------------------------------------- /train/llama/tokenizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Meta Platforms, Inc. and affiliates. 2 | # This software may be used and distributed according to the terms of the GNU General Public License version 3. 3 | 4 | from sentencepiece import SentencePieceProcessor 5 | from logging import getLogger 6 | from typing import List 7 | import os 8 | 9 | 10 | logger = getLogger() 11 | 12 | 13 | class Tokenizer: 14 | def __init__(self, model_path: str): 15 | # reload tokenizer 16 | assert os.path.isfile(model_path), model_path 17 | self.sp_model = SentencePieceProcessor(model_file=model_path) 18 | logger.info(f"Reloaded SentencePiece model from {model_path}") 19 | 20 | # BOS / EOS token IDs 21 | self.n_words: int = self.sp_model.vocab_size() 22 | self.bos_id: int = self.sp_model.bos_id() 23 | self.eos_id: int = self.sp_model.eos_id() 24 | self.pad_id: int = self.sp_model.pad_id() 25 | logger.info( 26 | f"#words: {self.n_words} - BOS ID: {self.bos_id} - EOS ID: {self.eos_id}" 27 | ) 28 | assert self.sp_model.vocab_size() == self.sp_model.get_piece_size() 29 | 30 | def encode(self, s: str, bos: bool, eos: bool) -> List[int]: 31 | assert type(s) is str 32 | t = self.sp_model.encode(s) 33 | if bos: 34 | t = [self.bos_id] + t 35 | if eos: 36 | t = t + [self.eos_id] 37 | return t 38 | 39 | def decode(self, t: List[int]) -> str: 40 | return self.sp_model.decode(t) 41 | -------------------------------------------------------------------------------- /data_collection/code/train_test_split.py: -------------------------------------------------------------------------------- 1 | import random 2 | cat_test = ['Toilet', 'Scissors','Table', 'Stapler','USB','WashingMachine', 'Oven','Faucet', 'Phone','Kettle','Window'] 3 | cat_train = ['Safe', 'Door','Display','Refrigerator' ,'Laptop','Lighter','Microwave','Mouse','Box','TrashCan','KitchenPot','Suitcase','Pliers','StorageFurniture','Remote','Bottle' 4 | , 'FoldingChair','Toaster','Lamp','Dispenser', 'Cart', 'Globe','Eyeglasses','Pen','Switch','Printer','Keyboard','Fan','Knife','Dishwaher'] 5 | file_path = '../stats/train_46cats_all_data_list.txt' 6 | 7 | 8 | test_lines = [] 9 | with open(file_path, 'r') as file: 10 | for line in file: 11 | cleaned_line = line.strip() 12 | for cat in cat_test: 13 | if cat in cleaned_line: 14 | test_lines.append(cleaned_line) 15 | break 16 | train_lines = [] 17 | with open(file_path, 'r') as file: 18 | for line in file: 19 | cleaned_line = line.strip() 20 | for cat in cat_train: 21 | if cat in cleaned_line: 22 | train_lines.append(cleaned_line) 23 | break 24 | random.shuffle(train_lines) 25 | length = len(train_lines) 26 | 27 | 28 | train_lines_output = train_lines[:int((4*length)/5)] 29 | test_lines.extend(train_lines[int((4*length)/5):]) 30 | 31 | file_path1 = '../stats/test_id.txt' 32 | 33 | with open(file_path1, 'w') as file: 34 | for item in test_lines: 35 | file.write(f"{item}\n") 36 | 37 | file_path2 = '../stats/train_id.txt' 38 | with open(file_path2, 'w') as file: 39 | for item in train_lines_output: 40 | file.write(f"{item}\n") 41 | 42 | -------------------------------------------------------------------------------- /test/test_llama.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | import torch 3 | import llama 4 | import os 5 | from PIL import Image, ImageDraw 6 | import cv2 7 | import json 8 | from tqdm import tqdm 9 | import numpy as np 10 | import torch.nn as nn 11 | parser = ArgumentParser() 12 | parser.add_argument('--llama_dir', type=str, help='llama directory') 13 | parser.add_argument('--adapter_dir', type=str, help='adapter directory') 14 | parser.add_argument('--data_dir', type=str) 15 | parser.add_argument('--out_dir', type=str) 16 | parser.add_argument('--action', type=str, help='llama directory') 17 | conf = parser.parse_args() 18 | device = 'cuda' if torch.cuda.is_available() else "cpu" 19 | llama_dir = conf.llama_dir 20 | # print(conf.adapter_dir, llama_dir, device) 21 | model, preprocess = llama.load(conf.adapter_dir, llama_dir, device) 22 | model.to(device) 23 | model.eval() 24 | if '-ori' in conf.adapter_dir: 25 | prompt = llama.format_prompt('Specify the contact point and orientation of pushing the object.') # though it is called pushing, but the prediction is the same as manipulating. It is just an old version of prompt naming during training 26 | else: 27 | prompt = llama.format_prompt('Specify the contact point and gripper direction of manipulating the object.') 28 | record_names = os.listdir(conf.data_dir) 29 | for record_name in tqdm(record_names): 30 | out_dir = os.path.join(conf.out_dir,record_name) 31 | 32 | if not os.path.exists(out_dir): 33 | os.makedirs(out_dir) 34 | 35 | record_dir = os.path.join(conf.data_dir,record_name) 36 | rgb_dir = os.path.join(record_dir,'original_rgb.png') 37 | if not os.path.exists(rgb_dir): 38 | continue 39 | start_pixel = 0 40 | size=336 41 | img_1 = Image.fromarray(np.array(Image.open(rgb_dir).convert('RGB'))[start_pixel:start_pixel+336,start_pixel:start_pixel+336,:]) 42 | img = preprocess(img_1).unsqueeze(0).to(device) 43 | with torch.no_grad(): 44 | result = model.generate(img, [prompt])[0] 45 | # print(result) 46 | with open(os.path.join(out_dir, 'prediction.json'), 'w') as fout: 47 | json.dump(result, fout) 48 | -------------------------------------------------------------------------------- /test/processgen.py: -------------------------------------------------------------------------------- 1 | """ 2 | to control multiprocess test in sapien 3 | """ 4 | 5 | import os 6 | import numpy as np 7 | import multiprocessing as mp 8 | from subprocess import call 9 | import time 10 | 11 | def printout(flog, strout): 12 | print(strout) 13 | if flog is not None: 14 | flog.write(strout + '\n') 15 | class Processgen(object): 16 | 17 | def __init__(self, num_processes, flog=None): 18 | self.num_processes = num_processes 19 | self.flog = flog 20 | 21 | self.todos = [] 22 | self.processes = [] 23 | self.is_running = False 24 | self.Q = mp.Queue() 25 | 26 | def __len__(self): 27 | return len(self.todos) 28 | 29 | def add_one_test_job(self,record_name,conf): 30 | if self.is_running: 31 | printout(self.flog, 'ERROR: cannot add a new job while Processgen is running!') 32 | exit(1) 33 | todo = (conf.data_dir,record_name,conf.out_dir,conf.use_mask) 34 | self.todos.append(todo) 35 | 36 | 37 | @staticmethod 38 | def job_func(pid, todos, Q): 39 | succ_todos = [] 40 | # print(todos) 41 | for todo in todos: 42 | cmd = 'xvfb-run -a python test_one_stick_clean.py --data_dir {} --record_name {} --out_dir {} --no_gui --use_mask {}' \ 43 | .format(todo[0], todo[1], todo[2], todo[-1]) 44 | folder_name_withjob = os.path.join(todo[2],todo[1]) 45 | # print(cmd) 46 | # exit() 47 | 48 | ret = call(cmd, shell=True) 49 | 50 | if ret == 0: 51 | succ_todos.append(folder_name_withjob) 52 | if ret == 2: 53 | succ_todos.append(None) 54 | Q.put(succ_todos) 55 | 56 | def start_all(self): 57 | if self.is_running: 58 | printout(self.flog, 'ERROR: cannot start all while Processgen is running!') 59 | exit(1) 60 | 61 | total_todos = len(self) 62 | num_todos_per_process = int(np.ceil(total_todos / self.num_processes)) 63 | np.random.shuffle(self.todos) 64 | for i in range(self.num_processes): 65 | todos = self.todos[i*num_todos_per_process: min(total_todos, (i+1)*num_todos_per_process)] 66 | p = mp.Process(target=self.job_func, args=(i, todos, self.Q)) 67 | p.start() 68 | self.processes.append(p) 69 | 70 | self.is_running = True 71 | 72 | def join_all(self): 73 | if not self.is_running: 74 | printout(self.flog, 'ERROR: cannot join all while Processgen is idle!') 75 | exit(1) 76 | 77 | ret = [] 78 | for p in self.processes: 79 | ret += self.Q.get() 80 | 81 | for p in self.processes: 82 | p.join() 83 | 84 | self.todos = [] 85 | self.processes = [] 86 | self.Q = mp.Queue() 87 | self.is_running=False 88 | return ret 89 | 90 | 91 | -------------------------------------------------------------------------------- /data_collection/code/gen_offline_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from argparse import ArgumentParser 4 | 5 | from datagen import DataGen 6 | 7 | parser = ArgumentParser() 8 | parser.add_argument('--data_dir', type=str, help='data directory') 9 | parser.add_argument('--data_fn', type=str, help='data file that indexs all shape-ids') 10 | parser.add_argument('--primact_types', type=str, help='list all primacts [separated by comma], default: None, meaning all', default=None) 11 | parser.add_argument('--num_processes', type=int, default=40, help='number of CPU cores to use') 12 | parser.add_argument('--num_epochs', type=int, default=160, help='control the data amount') 13 | parser.add_argument('--starting_epoch', type=int, default=0, help='help to resume. If previous generating does not generate the expected amount of data, when resuming, set this term to the previous epoch number to prevent from overlapping') 14 | parser.add_argument('--ins_cnt_fn', type=str, help='a file listing all category instance count, which is used to balance the interaction data amount to make sure that all categories have roughly same amount of data interaction, regardless of different shape counts in these categories') 15 | parser.add_argument('--mode', type=str, help='train or test; control the categories') 16 | conf = parser.parse_args() 17 | 18 | 19 | 20 | if conf.mode == 'train' and conf.primact_types == 'pulling': 21 | #set train categories 22 | conf.category_types = ['Safe', 'Door','Display','Refrigerator' ,'Laptop','Lighter','Microwave','Mouse','Box','TrashCan','KitchenPot','Suitcase','Pliers','StorageFurniture','Remote','Bottle' 23 | , 'FoldingChair','Toaster','Lamp','Dispenser', 'Cart', 'Globe','Eyeglasses','Pen','Switch','Printer','Keyboard','Fan','Knife','Dishwaher'] 24 | elif conf.mode == 'test' and conf.primact_types == 'pulling': 25 | #set test categories 26 | conf.category_types = ['Safe', 'Door','Display','Refrigerator' ,'Laptop','Lighter','Microwave','Mouse','Box','TrashCan','KitchenPot','Suitcase','Pliers','StorageFurniture','Remote','Bottle' 27 | , 'FoldingChair','Toaster','Lamp','Dispenser', 'Cart', 'Globe','Eyeglasses','Pen','Switch','Printer','Keyboard','Fan','Knife','Dishwaher','Toilet', 'Scissors','Table', 'Stapler','USB', 28 | 'WashingMachine', 'Oven','Faucet', 'Phone','Kettle','Window'] 29 | 30 | hard_train_cat = ['Dispenser','Globe','Remote','Cart','Fan','Knife'] 31 | easy_train_cat = ['StorageFurniture','Pen','Laptop','Microwave','Refrigerator','Safe'] 32 | 33 | cat2freq = dict() 34 | with open(conf.ins_cnt_fn, 'r') as fin: 35 | for l in fin.readlines(): 36 | cat, _, freq = l.rstrip().split() 37 | #hard categories are harder to collect success samples, therefore, increase the frequency of interacting with these categories to keep the category balance 38 | if cat in hard_train_cat: 39 | freq *= 2 40 | cat2freq[cat] = freq 41 | elif cat in easy_train_cat: 42 | freq = int(float(freq) / 1.2) 43 | cat2freq[cat] = freq 44 | cat2freq[cat] = int(freq) 45 | 46 | datagen = DataGen(conf.num_processes) 47 | primact_type = conf.primact_types 48 | with open(conf.data_fn, 'r') as fin: 49 | for l in fin.readlines(): 50 | shape_id, cat = l.rstrip().split() 51 | if cat in conf.category_types: 52 | for epoch in range(conf.starting_epoch, conf.starting_epoch+conf.num_epochs): 53 | for cnt_id in range(cat2freq[cat]): 54 | datagen.add_one_collect_job(conf.data_dir, shape_id, cat, cnt_id, primact_type, epoch) 55 | 56 | datagen.start_all() 57 | 58 | print('start generating data') 59 | -------------------------------------------------------------------------------- /train/engine_finetune.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | from typing import Iterable 4 | 5 | import torch 6 | 7 | import util.misc as misc 8 | import util.lr_sched as lr_sched 9 | 10 | from llama import LLaMA_adapter 11 | 12 | def train_one_epoch(model: LLaMA_adapter, 13 | data_loader: Iterable, optimizer: torch.optim.Optimizer, 14 | device: torch.device, epoch: int, loss_scaler, 15 | log_writer=None, 16 | args=None): 17 | model.train(True) 18 | # model.module.set_default_trainability() 19 | 20 | metric_logger = misc.MetricLogger(delimiter=" ") 21 | metric_logger.add_meter('lr', misc.SmoothedValue(window_size=1, fmt='{value:.6f}')) 22 | header = 'Epoch: [{}]'.format(epoch) 23 | print_freq = 10 24 | 25 | accum_iter = args.accum_iter 26 | 27 | optimizer.zero_grad() 28 | 29 | if log_writer is not None: 30 | print('log_dir: {}'.format(log_writer.log_dir)) 31 | for data_iter_step, (examples, labels, example_mask, imgs) in enumerate(metric_logger.log_every(data_loader, print_freq, header)): 32 | # we use a per iteration (instead of per epoch) lr scheduler 33 | if data_iter_step % accum_iter == 0: 34 | lr_sched.adjust_learning_rate(optimizer, data_iter_step / len(data_loader) + epoch, args) 35 | 36 | #把张量移到同一设备上 37 | 38 | examples = examples.to(device) 39 | labels = labels.to(device) 40 | imgs = imgs.to(device, non_blocking=True) 41 | imgs = imgs.to(device, non_blocking=True) 42 | 43 | # print("________---------------66666") 44 | #with torch.no_grad():#不计算梯度减少内存占用 45 | with torch.cuda.amp.autocast(): 46 | c_loss, m_loss = model(examples, labels, imgs) 47 | loss = c_loss + m_loss * 0 48 | loss_value = loss.item() 49 | c_loss_value = c_loss.item() 50 | m_loss_value = m_loss 51 | if not math.isfinite(loss_value): 52 | print("Loss is {}, stopping training".format(loss_value)) 53 | sys.exit(1) 54 | 55 | loss /= accum_iter 56 | loss_scaler(loss, optimizer, parameters=model.parameters(), 57 | update_grad=(data_iter_step + 1) % accum_iter == 0) 58 | if (data_iter_step + 1) % accum_iter == 0: 59 | optimizer.zero_grad() 60 | 61 | torch.cuda.synchronize() 62 | 63 | metric_logger.update(closs=c_loss_value) 64 | metric_logger.update(mloss=m_loss_value) 65 | 66 | lr = optimizer.param_groups[0]["lr"] 67 | metric_logger.update(lr=lr) 68 | 69 | loss_value_reduce = misc.all_reduce_mean(loss_value) 70 | c_loss_value_reduce = misc.all_reduce_mean(c_loss_value) 71 | m_loss_value_reduce = misc.all_reduce_mean(m_loss_value) 72 | if log_writer is not None and (data_iter_step + 1) % accum_iter == 0: 73 | """ We use epoch_1000x as the x-axis in tensorboard. 74 | This calibrates different curves when batch size changes. 75 | """ 76 | epoch_1000x = int((data_iter_step / len(data_loader) + epoch) * 1000) 77 | log_writer.add_scalar('c_train_loss', c_loss_value_reduce, epoch_1000x) 78 | log_writer.add_scalar('m_train_loss', m_loss_value_reduce, epoch_1000x) 79 | log_writer.add_scalar('lr', lr, epoch_1000x) 80 | # break 81 | 82 | 83 | # gather the stats from all processes 84 | metric_logger.synchronize_between_processes() 85 | print("Averaged stats:", metric_logger) 86 | return {k: meter.global_avg for k, meter in metric_logger.meters.items()} 87 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ManipLLM 2 | The official codebase for ManipLLM: Embodied Multimodal Large Language Model for Object-Centric Robotic Manipulation (CVPR 2024) 3 | 4 | ## Acknowledgement 5 | This repo benefits from [LLama_Adapter](https://github.com/OpenGVLab/LLaMA-Adapter) and [Where2act](https://github.com/daerduoCarey/where2act). Thanks for their wonderful works. 6 | 7 | ## Setup 8 | 1) conda create --name manipllm python=3.8 9 | 10 | 2) conda activate manipllm 11 | 12 | 3) pip install -r requirements.txt 13 | 14 | 15 | ## Data Collection 16 | 17 | 18 | - Collect data by your own: Download [partnet mobility](https://sapien.ucsd.edu/downloads) urdf from its official website and place under ./ManipLLM/data_collection/asset. 19 | ```bash 20 | ./asset/original_sapien_dataset 21 | ├── 148 22 | | └── mobility.urdf 23 | ├── 149 24 | | └── mobility.urdf 25 | ├── ... 26 | │ ... 27 | └── ... 28 | 29 | cd ./ManipLLM/data_collection/code 30 | 31 | bash scripts/run_gen_offline_data.sh 32 | 33 | This command will first generate training dataset and then generate the testing dataset. 34 | 35 | ## Model Training 36 | - Preparation: 37 | 38 | Download checkpoints for [CLIP](https://disk.pku.edu.cn/link/AA93FF7210CF0D4F428850C0F520C81453), [LLaMa-Adapter](https://disk.pku.edu.cn/link/AA682A19DB7FDA4028B112449D24BBC308). The downloaded checkpoints should be placed under /ManipLLM/train/ckpts. Obtain the LLaMA backbone weights using this [form](https://docs.google.com/forms/d/e/1FAIpQLSfqNECQnMkycAp2jP4Z9TFX0cGR4uf7b_fBxjY_OjhJILlKGA/viewform). Please note that checkpoints from unofficial sources (e.g., BitTorrent) may contain malicious code and should be used with care. Organize the downloaded checkpoints in the following structure: 39 | ```plaintext 40 | ./ckpts/llama_model_weights 41 | ├── 7B 42 | │ ├── checklist.chk 43 | │ ├── consolidated.00.pth 44 | │ └── params.json 45 | └── tokenizer.model 46 | ./ckpts/BIAS_LORA_NORM-336-Chinese-7B.pth 47 | ./ckpts/ViT-L-14-336px.pt 48 | - Model training: The training requires the server to has a least 40g memory. The command will first generate the training json, then start training 49 | 50 | 51 | ```bash 52 | cd ./ManipLLM/train 53 | 54 | bash finetune.sh 55 | 56 | ## Model Testing 57 | - The public code only infers on the final prompt without chain-of-thought, predicting the pose directly. 58 | 59 | - Remember to add the checkpoints of [CLIP](https://disk.pku.edu.cn/link/AA93FF7210CF0D4F428850C0F520C81453), [LLaMa](same with the process in training), and [LLaMa-Adapter](https://disk.pku.edu.cn/link/AA682A19DB7FDA4028B112449D24BBC308) under /ManipLLM/test/ckpts as well. 60 | 61 | - We release the checkpoint: checkpoint-9-ori.pth. Note that, due to the randomness in data collection, the provided testing dataset is different from the ones in paper, so you may result in slightly different but comparable results compared with the results in paper. Download the released [checkpoint-9-ori](https://pan.baidu.com/s/1kh_LO7W7TnnrpPzI4khw0Q?pwd=cipc) or use your own trained checkpoint. The link we provide is baiduyun downloading link. If you need a google drive download link, send your google account via email to xl3062@columbia.edu, then we will share the link with you. Remember to change the line5 in test.sh to the dir you placed the ckpts. 62 | 63 | - Download OUR [test data](https://disk.pku.edu.cn/link/AA103C5B00398E4E4089903CB06AC09D8C) or collect the test data by your own. The downloaded 'test_data' folder should be unziped under /ManipLLM/data_collection/data. Download [partnet mobility](https://sapien.ucsd.edu/downloads) urdf from its official website and place under /ManipLLM/data_collection/asset. 64 | 65 | - The testing requires the server to has a least 40g memory. This command will first use the model to infer on all the test samples, and then interact with object in the simulator (SAPIEN). 66 | 67 | ```bash 68 | cd ./ManipLLM/test 69 | 70 | bash test.sh 71 | 72 | -------------------------------------------------------------------------------- /test/cal_test_mani_succ_rate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import json 3 | from argparse import ArgumentParser 4 | import utils 5 | import os 6 | def calculate_succ_ration(data_list_for_cat,conf,out_dir): 7 | out_info={} 8 | for cat in conf.category_types: 9 | if cat in data_list_for_cat.keys(): 10 | succ_ration_list=[] 11 | for i in data_list_for_cat[cat]: 12 | try: 13 | with open(os.path.join(i, 'result.json'), 'r') as fin: 14 | result_data = json.load(fin) 15 | succ_ration_list.append(result_data['mani_succ']) 16 | except: 17 | continue 18 | 19 | succ_ration_list = np.array(succ_ration_list) 20 | out_info['number_of_%s'%cat]= len(succ_ration_list) 21 | mean_value = np.mean(succ_ration_list.astype(float)) 22 | out_info['mani_succ_ration_for_%s'%cat]= mean_value 23 | else: 24 | # print("there is no '%s' data "% cat) 25 | continue 26 | train_cat = ['Safe', 'Door','Display','Refrigerator' ,'Laptop','Lighter','Microwave','Mouse','Box','TrashCan','KitchenPot','Suitcase','Pliers','StorageFurniture','Remote','Bottle' 27 | , 'FoldingChair','Toaster','Lamp','Dispenser','Eyeglasses','Pen','Printer','Keyboard','Fan','Knife','Dishwaher'] 28 | 29 | count_train = 1e-6 30 | count_test = 1e-6 31 | osum_train = 0 32 | osum_test = 0 33 | print(out_info) 34 | for i in range(0,len(out_info.keys()),2): 35 | if list(out_info.keys())[i].split('_')[-1] in train_cat: 36 | if 0.0 <= out_info[list(out_info.keys())[i+1]] and out_info[list(out_info.keys())[i+1]] <= 1.0: 37 | osum_train += out_info[list(out_info.keys())[i]] * out_info[list(out_info.keys())[i+1]] 38 | # print(out_info[list(out_info.keys())[i]],out_info[list(out_info.keys())[i+1]]) 39 | count_train += out_info[list(out_info.keys())[i]] 40 | else: 41 | if 0.0 <= out_info[list(out_info.keys())[i+1]] and out_info[list(out_info.keys())[i+1]] <= 1.0: 42 | osum_test += out_info[list(out_info.keys())[i]] * out_info[list(out_info.keys())[i+1]] 43 | count_test += out_info[list(out_info.keys())[i]] 44 | 45 | print(f'test seen acc on {count_train} samples is {osum_train/count_train}, test unseen acc on {count_test} samples is {osum_test/count_test}') 46 | with open(os.path.join(out_dir, 'mani_succ_ration_for_cats.json'), 'w') as fout: 47 | json.dump(out_info, fout) 48 | 49 | 50 | 51 | if __name__ == '__main__': 52 | parser = ArgumentParser() 53 | parser.add_argument('--primact_type', type=str, help='primact_type:pushing,pulling,pushing left,pulling left') 54 | parser.add_argument('--data_dir', type=str, help='data_dir for whole test data') 55 | parser.add_argument('--out_dir', type=str, help='out_dir for calculate_info') 56 | conf = parser.parse_args() 57 | 58 | 59 | conf.category_types = ['Safe', 'Door','Display','Refrigerator' ,'Laptop','Lighter','Microwave','Mouse','Box','TrashCan','KitchenPot','Suitcase','Pliers','StorageFurniture','Remote','Bottle' 60 | , 'FoldingChair','Toaster','Lamp','Dispenser','Toilet', 'Scissors','Table','USB', 61 | 'WashingMachine', 'Oven','Faucet'] 62 | conf.out_dir = os.path.join(conf.data_dir,'calculate_info') 63 | if not os.path.exists(conf.out_dir): 64 | os.makedirs(conf.out_dir) 65 | 66 | data_list_for_cat={} 67 | record_names = os.listdir(conf.data_dir) 68 | 69 | for record_name in record_names: 70 | 71 | if '.png' in record_name or '.json' in record_name: 72 | continue 73 | else: 74 | 75 | category= record_name.rstrip().split('_')[1] 76 | data_list_for_cat.setdefault(category,[]).append(os.path.join(conf.data_dir, record_name.rstrip())) 77 | 78 | calculate_succ_ration(data_list_for_cat,conf,conf.out_dir) 79 | -------------------------------------------------------------------------------- /test/llama/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib 3 | import hashlib 4 | import warnings 5 | 6 | from tqdm import tqdm 7 | import torch 8 | 9 | 10 | def sample_top_p(probs, p): 11 | probs_sort, probs_idx = torch.sort(probs, dim=-1, descending=True) 12 | probs_sum = torch.cumsum(probs_sort, dim=-1) 13 | mask = probs_sum - probs_sort > p 14 | probs_sort[mask] = 0.0 15 | probs_sort.div_(probs_sort.sum(dim=-1, keepdim=True)) 16 | next_token = torch.multinomial(probs_sort, num_samples=1) 17 | next_token = torch.gather(probs_idx, -1, next_token) 18 | return next_token 19 | 20 | 21 | def format_prompt(instruction, input=None, lang_type='EN'): 22 | 23 | PROMPT_DICT = { 24 | "prompt_input": ( 25 | "Below is an instruction that describes a task, paired with an input that provides further context. " 26 | "Write a response that appropriately completes the request.\n\n" 27 | "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:" 28 | ), 29 | "prompt_no_input": ( 30 | "Below is an instruction that describes a task. " 31 | "Write a response that appropriately completes the request.\n\n" 32 | "### Instruction:\n{instruction}\n\n### Response:" 33 | ), 34 | } 35 | CH_PROMPT_DICT = { 36 | "prompt_input": ( 37 | "Below is a chinese instruction that describes a task, paired with a chinese input that provides further context. " 38 | "Write a chinese response that appropriately completes the request.\n\n" 39 | "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:" 40 | ), 41 | "prompt_no_input": ( 42 | "Below is a chinese instruction that describes a task. " 43 | "Write a chinese response that appropriately completes the request.\n\n" 44 | "### Instruction:\n{instruction}\n\n### Response:" 45 | ) 46 | } 47 | if input is None or input == '': 48 | if lang_type == 'EN': 49 | return PROMPT_DICT['prompt_no_input'].format_map({'instruction': instruction}) 50 | else: 51 | return CH_PROMPT_DICT['prompt_no_input'].format_map({'instruction': instruction}) 52 | else: 53 | if lang_type == 'EN': 54 | return PROMPT_DICT["prompt_input"].format_map({'instruction': instruction, 'input': input}) 55 | else: 56 | return CH_PROMPT_DICT["prompt_input"].format_map({'instruction': instruction, 'input': input}) 57 | 58 | 59 | def _download(url: str, root: str): 60 | os.makedirs(root, exist_ok=True) 61 | filename = os.path.basename(url) 62 | # assume the url is https://some/path/sha256_model.pth 63 | expected_sha256 = url.split("/")[-1].split('_')[0] 64 | # expected_sha256 = url.split("/")[-2] 65 | download_target = os.path.join(root, filename) 66 | 67 | if os.path.exists(download_target) and not os.path.isfile(download_target): 68 | raise RuntimeError(f"{download_target} exists and is not a regular file") 69 | 70 | if os.path.isfile(download_target): 71 | if hashlib.sha256(open(download_target, "rb").read()).hexdigest() == expected_sha256: 72 | return download_target 73 | else: 74 | warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file") 75 | 76 | with urllib.request.urlopen(url) as source, open(download_target, "wb") as output: 77 | with tqdm(total=int(source.info().get("Content-Length")), ncols=80, unit='iB', unit_scale=True, unit_divisor=1024) as loop: 78 | while True: 79 | buffer = source.read(8192) 80 | if not buffer: 81 | break 82 | 83 | output.write(buffer) 84 | loop.update(len(buffer)) 85 | 86 | if hashlib.sha256(open(download_target, "rb").read()).hexdigest() != expected_sha256: 87 | raise RuntimeError("Model has been downloaded but the SHA256 checksum does not not match") 88 | 89 | return download_target 90 | -------------------------------------------------------------------------------- /train/llama/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import urllib 3 | import hashlib 4 | import warnings 5 | 6 | from tqdm import tqdm 7 | import torch 8 | 9 | 10 | def sample_top_p(probs, p): 11 | probs_sort, probs_idx = torch.sort(probs, dim=-1, descending=True) 12 | probs_sum = torch.cumsum(probs_sort, dim=-1) 13 | mask = probs_sum - probs_sort > p 14 | probs_sort[mask] = 0.0 15 | probs_sort.div_(probs_sort.sum(dim=-1, keepdim=True)) 16 | next_token = torch.multinomial(probs_sort, num_samples=1) 17 | next_token = torch.gather(probs_idx, -1, next_token) 18 | return next_token 19 | 20 | 21 | def format_prompt(instruction, input=None, lang_type='EN'): 22 | 23 | PROMPT_DICT = { 24 | "prompt_input": ( 25 | "Below is an instruction that describes a task, paired with an input that provides further context. " 26 | "Write a response that appropriately completes the request.\n\n" 27 | "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:" 28 | ), 29 | "prompt_no_input": ( 30 | "Below is an instruction that describes a task. " 31 | "Write a response that appropriately completes the request.\n\n" 32 | "### Instruction:\n{instruction}\n\n### Response:" 33 | ), 34 | } 35 | CH_PROMPT_DICT = { 36 | "prompt_input": ( 37 | "Below is a chinese instruction that describes a task, paired with a chinese input that provides further context. " 38 | "Write a chinese response that appropriately completes the request.\n\n" 39 | "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:" 40 | ), 41 | "prompt_no_input": ( 42 | "Below is a chinese instruction that describes a task. " 43 | "Write a chinese response that appropriately completes the request.\n\n" 44 | "### Instruction:\n{instruction}\n\n### Response:" 45 | ) 46 | } 47 | if input is None or input == '': 48 | if lang_type == 'EN': 49 | return PROMPT_DICT['prompt_no_input'].format_map({'instruction': instruction}) 50 | else: 51 | return CH_PROMPT_DICT['prompt_no_input'].format_map({'instruction': instruction}) 52 | else: 53 | if lang_type == 'EN': 54 | return PROMPT_DICT["prompt_input"].format_map({'instruction': instruction, 'input': input}) 55 | else: 56 | return CH_PROMPT_DICT["prompt_input"].format_map({'instruction': instruction, 'input': input}) 57 | 58 | 59 | def _download(url: str, root: str): 60 | os.makedirs(root, exist_ok=True) 61 | filename = os.path.basename(url) 62 | # assume the url is https://some/path/sha256_model.pth 63 | expected_sha256 = url.split("/")[-1].split('_')[0] 64 | # expected_sha256 = url.split("/")[-2] 65 | download_target = os.path.join(root, filename) 66 | 67 | if os.path.exists(download_target) and not os.path.isfile(download_target): 68 | raise RuntimeError(f"{download_target} exists and is not a regular file") 69 | 70 | if os.path.isfile(download_target): 71 | if hashlib.sha256(open(download_target, "rb").read()).hexdigest() == expected_sha256: 72 | return download_target 73 | else: 74 | warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file") 75 | 76 | with urllib.request.urlopen(url) as source, open(download_target, "wb") as output: 77 | with tqdm(total=int(source.info().get("Content-Length")), ncols=80, unit='iB', unit_scale=True, unit_divisor=1024) as loop: 78 | while True: 79 | buffer = source.read(8192) 80 | if not buffer: 81 | break 82 | 83 | output.write(buffer) 84 | loop.update(len(buffer)) 85 | 86 | if hashlib.sha256(open(download_target, "rb").read()).hexdigest() != expected_sha256: 87 | raise RuntimeError("Model has been downloaded but the SHA256 checksum does not not match") 88 | 89 | return download_target 90 | -------------------------------------------------------------------------------- /data_collection/code/datagen.py: -------------------------------------------------------------------------------- 1 | """ 2 | Batch-generate data 3 | """ 4 | 5 | import os 6 | import numpy as np 7 | import multiprocessing as mp 8 | from subprocess import call 9 | from utils import printout 10 | import time 11 | 12 | 13 | class DataGen(object): 14 | 15 | def __init__(self, num_processes, flog=None): 16 | self.num_processes = num_processes 17 | self.flog = flog 18 | 19 | self.todos = [] 20 | self.processes = [] 21 | self.is_running = False 22 | self.Q = mp.Queue() 23 | 24 | def __len__(self): 25 | return len(self.todos) 26 | 27 | def add_one_collect_job(self, data_dir, shape_id, category, cnt_id, primact_type, trial_id): 28 | if self.is_running: 29 | printout(self.flog, 'ERROR: cannot add a new job while DataGen is running!') 30 | exit(1) 31 | 32 | todo = ('COLLECT', shape_id, category, cnt_id, primact_type, data_dir, trial_id, np.random.randint(10000000)) 33 | self.todos.append(todo) 34 | 35 | def add_one_recollect_job(self, src_data_dir, dir1, dir2, recollect_record_name, tar_data_dir, x, y): 36 | if self.is_running: 37 | printout(self.flog, 'ERROR: cannot add a new job while DataGen is running!') 38 | exit(1) 39 | 40 | todo = ('RECOLLECT', src_data_dir, recollect_record_name, tar_data_dir, np.random.randint(10000000), x, y, dir1, dir2) 41 | self.todos.append(todo) 42 | 43 | def add_one_checkcollect_job(self, src_data_dir, dir1, dir2, recollect_record_name, tar_data_dir, x, y): 44 | if self.is_running: 45 | printout(self.flog, 'ERROR: cannot add a new job while DataGen is running!') 46 | exit(1) 47 | 48 | todo = ('CHECKCOLLECT', src_data_dir, recollect_record_name, tar_data_dir, np.random.randint(10000000), x, y, dir1, dir2) 49 | self.todos.append(todo) 50 | 51 | @staticmethod 52 | def job_func(pid, todos, Q): 53 | succ_todos = [] 54 | for todo in todos: 55 | if todo[0] == 'COLLECT': 56 | 57 | # the code is runned without gui 58 | cmd = 'xvfb-run -a python collect_data.py %s %s %d %s --out_dir %s --trial_id %d --random_seed %d --no_gui' \ 59 | % (todo[1], todo[2], todo[3], todo[4], todo[5], todo[6], todo[7]) 60 | # print(cmd) 61 | # assert(0) 62 | folder_name = todo[5] 63 | job_name = '%s_%s_%d_%s_%s' % (todo[1], todo[2], todo[3], todo[4], todo[6]) 64 | ret = call(cmd, shell=True) 65 | if ret == 0: 66 | succ_todos.append(os.path.join(folder_name, job_name)) 67 | if ret == 2: 68 | succ_todos.append(None) 69 | Q.put(succ_todos) 70 | 71 | def start_all(self): 72 | if self.is_running: 73 | printout(self.flog, 'ERROR: cannot start all while DataGen is running!') 74 | exit(1) 75 | 76 | total_todos = len(self) 77 | num_todos_per_process = int(np.ceil(total_todos / self.num_processes)) 78 | np.random.shuffle(self.todos) 79 | for i in range(self.num_processes): 80 | todos = self.todos[i*num_todos_per_process: min(total_todos, (i+1)*num_todos_per_process)] 81 | p = mp.Process(target=self.job_func, args=(i, todos, self.Q)) 82 | p.start() 83 | self.processes.append(p) 84 | 85 | self.is_running = True 86 | 87 | def join_all(self): 88 | if not self.is_running: 89 | printout(self.flog, 'ERROR: cannot join all while DataGen is idle!') 90 | exit(1) 91 | 92 | ret = [] 93 | for p in self.processes: 94 | ret += self.Q.get() 95 | 96 | for p in self.processes: 97 | p.join() 98 | 99 | self.todos = [] 100 | self.processes = [] 101 | self.Q = mp.Queue() 102 | self.is_running=False 103 | return ret 104 | 105 | 106 | -------------------------------------------------------------------------------- /test/robots/panda_gripper.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /data_collection/code/robots/panda_gripper.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /test/robots/franka_description/meshes/collision/finger.stl.convex.stl: -------------------------------------------------------------------------------- 1 | solid AssimpScene 2 | facet normal -nan -nan -nan 3 | outer loop 4 | vertex 0.00869277 -0.000132643 0.0501662 5 | vertex 0.0104486 0.00258331 0.000146801 6 | vertex 0.01036 0.0264034 0.000154629 7 | endloop 8 | endfacet 9 | 10 | facet normal -nan -nan -nan 11 | outer loop 12 | vertex 0.00869277 -0.000132643 0.0501662 13 | vertex 0.01036 0.0264034 0.000154629 14 | vertex 0.0104005 0.0252534 0.0190366 15 | endloop 16 | endfacet 17 | 18 | facet normal -nan -nan -nan 19 | outer loop 20 | vertex 0.0104005 0.0252534 0.0190366 21 | vertex 0.00861608 0.0139887 0.0513279 22 | vertex 0.00869277 -0.000132643 0.0501662 23 | endloop 24 | endfacet 25 | 26 | facet normal -nan -nan -nan 27 | outer loop 28 | vertex 0.0104486 0.00258331 0.000146801 29 | vertex -0.0103872 0.00253418 0.000131696 30 | vertex -0.0104013 0.0263094 0.00016651 31 | endloop 32 | endfacet 33 | 34 | facet normal -nan -nan -nan 35 | outer loop 36 | vertex 0.0104486 0.00258331 0.000146801 37 | vertex -0.0104013 0.0263094 0.00016651 38 | vertex 0.01036 0.0264034 0.000154629 39 | endloop 40 | endfacet 41 | 42 | facet normal -nan -nan -nan 43 | outer loop 44 | vertex -0.0103889 0.0252203 0.0191876 45 | vertex -0.00527792 0.0142931 0.053849 46 | vertex 0.00583983 0.0142743 0.0538034 47 | endloop 48 | endfacet 49 | 50 | facet normal -nan -nan -nan 51 | outer loop 52 | vertex -0.0103889 0.0252203 0.0191876 53 | vertex 0.00583983 0.0142743 0.0538034 54 | vertex 0.0104005 0.0252534 0.0190366 55 | endloop 56 | endfacet 57 | 58 | facet normal -nan -nan -nan 59 | outer loop 60 | vertex -0.0103889 0.0252203 0.0191876 61 | vertex 0.0104005 0.0252534 0.0190366 62 | vertex 0.01036 0.0264034 0.000154629 63 | endloop 64 | endfacet 65 | 66 | facet normal -nan -nan -nan 67 | outer loop 68 | vertex -0.0103889 0.0252203 0.0191876 69 | vertex 0.01036 0.0264034 0.000154629 70 | vertex -0.0104013 0.0263094 0.00016651 71 | endloop 72 | endfacet 73 | 74 | facet normal -nan -nan -nan 75 | outer loop 76 | vertex -0.0103872 0.00253418 0.000131696 77 | vertex -0.00862294 -5.68019e-05 0.0509528 78 | vertex -0.00884117 0.0139176 0.0505894 79 | endloop 80 | endfacet 81 | 82 | facet normal -nan -nan -nan 83 | outer loop 84 | vertex -0.0103872 0.00253418 0.000131696 85 | vertex -0.00884117 0.0139176 0.0505894 86 | vertex -0.0103889 0.0252203 0.0191876 87 | endloop 88 | endfacet 89 | 90 | facet normal -nan -nan -nan 91 | outer loop 92 | vertex -0.0103889 0.0252203 0.0191876 93 | vertex -0.0104013 0.0263094 0.00016651 94 | vertex -0.0103872 0.00253418 0.000131696 95 | endloop 96 | endfacet 97 | 98 | facet normal -nan -nan -nan 99 | outer loop 100 | vertex 0.00613802 -2.06026e-05 0.0535776 101 | vertex 0.00869277 -0.000132643 0.0501662 102 | vertex 0.00861608 0.0139887 0.0513279 103 | endloop 104 | endfacet 105 | 106 | facet normal -nan -nan -nan 107 | outer loop 108 | vertex -0.00884117 0.0139176 0.0505894 109 | vertex -0.00527792 0.0142931 0.053849 110 | vertex -0.0103889 0.0252203 0.0191876 111 | endloop 112 | endfacet 113 | 114 | facet normal -nan -nan -nan 115 | outer loop 116 | vertex -0.00884117 0.0139176 0.0505894 117 | vertex -0.00862294 -5.68019e-05 0.0509528 118 | vertex -0.00548142 -9.11208e-05 0.0537247 119 | endloop 120 | endfacet 121 | 122 | facet normal -nan -nan -nan 123 | outer loop 124 | vertex -0.00884117 0.0139176 0.0505894 125 | vertex -0.00548142 -9.11208e-05 0.0537247 126 | vertex -0.00527792 0.0142931 0.053849 127 | endloop 128 | endfacet 129 | 130 | facet normal -nan -nan -nan 131 | outer loop 132 | vertex 0.00583983 0.0142743 0.0538034 133 | vertex -0.00527792 0.0142931 0.053849 134 | vertex -0.00548142 -9.11208e-05 0.0537247 135 | endloop 136 | endfacet 137 | 138 | facet normal -nan -nan -nan 139 | outer loop 140 | vertex 0.00583983 0.0142743 0.0538034 141 | vertex -0.00548142 -9.11208e-05 0.0537247 142 | vertex 0.00613802 -2.06026e-05 0.0535776 143 | endloop 144 | endfacet 145 | 146 | facet normal -nan -nan -nan 147 | outer loop 148 | vertex 0.00583983 0.0142743 0.0538034 149 | vertex 0.00613802 -2.06026e-05 0.0535776 150 | vertex 0.00861608 0.0139887 0.0513279 151 | endloop 152 | endfacet 153 | 154 | facet normal -nan -nan -nan 155 | outer loop 156 | vertex 0.00583983 0.0142743 0.0538034 157 | vertex 0.00861608 0.0139887 0.0513279 158 | vertex 0.0104005 0.0252534 0.0190366 159 | endloop 160 | endfacet 161 | 162 | facet normal -nan -nan -nan 163 | outer loop 164 | vertex -0.00873039 -2.35252e-05 0.0361648 165 | vertex 0.00869277 -0.000132643 0.0501662 166 | vertex 0.00613802 -2.06026e-05 0.0535776 167 | endloop 168 | endfacet 169 | 170 | facet normal -nan -nan -nan 171 | outer loop 172 | vertex -0.00873039 -2.35252e-05 0.0361648 173 | vertex 0.00613802 -2.06026e-05 0.0535776 174 | vertex -0.00548142 -9.11208e-05 0.0537247 175 | endloop 176 | endfacet 177 | 178 | facet normal -nan -nan -nan 179 | outer loop 180 | vertex -0.00548142 -9.11208e-05 0.0537247 181 | vertex -0.00862294 -5.68019e-05 0.0509528 182 | vertex -0.00873039 -2.35252e-05 0.0361648 183 | endloop 184 | endfacet 185 | 186 | facet normal -nan -nan -nan 187 | outer loop 188 | vertex -0.00873039 -2.35252e-05 0.0361648 189 | vertex -0.00862294 -5.68019e-05 0.0509528 190 | vertex -0.0103872 0.00253418 0.000131696 191 | endloop 192 | endfacet 193 | 194 | facet normal -nan -nan -nan 195 | outer loop 196 | vertex -0.00873039 -2.35252e-05 0.0361648 197 | vertex -0.0103872 0.00253418 0.000131696 198 | vertex 0.0104486 0.00258331 0.000146801 199 | endloop 200 | endfacet 201 | 202 | facet normal -nan -nan -nan 203 | outer loop 204 | vertex -0.00873039 -2.35252e-05 0.0361648 205 | vertex 0.0104486 0.00258331 0.000146801 206 | vertex 0.00869277 -0.000132643 0.0501662 207 | endloop 208 | endfacet 209 | 210 | endsolid AssimpScene 211 | -------------------------------------------------------------------------------- /data_collection/code/robots/franka_description/meshes/collision/finger.stl.convex.stl: -------------------------------------------------------------------------------- 1 | solid AssimpScene 2 | facet normal -nan -nan -nan 3 | outer loop 4 | vertex 0.00869277 -0.000132643 0.0501662 5 | vertex 0.0104486 0.00258331 0.000146801 6 | vertex 0.01036 0.0264034 0.000154629 7 | endloop 8 | endfacet 9 | 10 | facet normal -nan -nan -nan 11 | outer loop 12 | vertex 0.00869277 -0.000132643 0.0501662 13 | vertex 0.01036 0.0264034 0.000154629 14 | vertex 0.0104005 0.0252534 0.0190366 15 | endloop 16 | endfacet 17 | 18 | facet normal -nan -nan -nan 19 | outer loop 20 | vertex 0.0104005 0.0252534 0.0190366 21 | vertex 0.00861608 0.0139887 0.0513279 22 | vertex 0.00869277 -0.000132643 0.0501662 23 | endloop 24 | endfacet 25 | 26 | facet normal -nan -nan -nan 27 | outer loop 28 | vertex 0.0104486 0.00258331 0.000146801 29 | vertex -0.0103872 0.00253418 0.000131696 30 | vertex -0.0104013 0.0263094 0.00016651 31 | endloop 32 | endfacet 33 | 34 | facet normal -nan -nan -nan 35 | outer loop 36 | vertex 0.0104486 0.00258331 0.000146801 37 | vertex -0.0104013 0.0263094 0.00016651 38 | vertex 0.01036 0.0264034 0.000154629 39 | endloop 40 | endfacet 41 | 42 | facet normal -nan -nan -nan 43 | outer loop 44 | vertex -0.0103889 0.0252203 0.0191876 45 | vertex -0.00527792 0.0142931 0.053849 46 | vertex 0.00583983 0.0142743 0.0538034 47 | endloop 48 | endfacet 49 | 50 | facet normal -nan -nan -nan 51 | outer loop 52 | vertex -0.0103889 0.0252203 0.0191876 53 | vertex 0.00583983 0.0142743 0.0538034 54 | vertex 0.0104005 0.0252534 0.0190366 55 | endloop 56 | endfacet 57 | 58 | facet normal -nan -nan -nan 59 | outer loop 60 | vertex -0.0103889 0.0252203 0.0191876 61 | vertex 0.0104005 0.0252534 0.0190366 62 | vertex 0.01036 0.0264034 0.000154629 63 | endloop 64 | endfacet 65 | 66 | facet normal -nan -nan -nan 67 | outer loop 68 | vertex -0.0103889 0.0252203 0.0191876 69 | vertex 0.01036 0.0264034 0.000154629 70 | vertex -0.0104013 0.0263094 0.00016651 71 | endloop 72 | endfacet 73 | 74 | facet normal -nan -nan -nan 75 | outer loop 76 | vertex -0.0103872 0.00253418 0.000131696 77 | vertex -0.00862294 -5.68019e-05 0.0509528 78 | vertex -0.00884117 0.0139176 0.0505894 79 | endloop 80 | endfacet 81 | 82 | facet normal -nan -nan -nan 83 | outer loop 84 | vertex -0.0103872 0.00253418 0.000131696 85 | vertex -0.00884117 0.0139176 0.0505894 86 | vertex -0.0103889 0.0252203 0.0191876 87 | endloop 88 | endfacet 89 | 90 | facet normal -nan -nan -nan 91 | outer loop 92 | vertex -0.0103889 0.0252203 0.0191876 93 | vertex -0.0104013 0.0263094 0.00016651 94 | vertex -0.0103872 0.00253418 0.000131696 95 | endloop 96 | endfacet 97 | 98 | facet normal -nan -nan -nan 99 | outer loop 100 | vertex 0.00613802 -2.06026e-05 0.0535776 101 | vertex 0.00869277 -0.000132643 0.0501662 102 | vertex 0.00861608 0.0139887 0.0513279 103 | endloop 104 | endfacet 105 | 106 | facet normal -nan -nan -nan 107 | outer loop 108 | vertex -0.00884117 0.0139176 0.0505894 109 | vertex -0.00527792 0.0142931 0.053849 110 | vertex -0.0103889 0.0252203 0.0191876 111 | endloop 112 | endfacet 113 | 114 | facet normal -nan -nan -nan 115 | outer loop 116 | vertex -0.00884117 0.0139176 0.0505894 117 | vertex -0.00862294 -5.68019e-05 0.0509528 118 | vertex -0.00548142 -9.11208e-05 0.0537247 119 | endloop 120 | endfacet 121 | 122 | facet normal -nan -nan -nan 123 | outer loop 124 | vertex -0.00884117 0.0139176 0.0505894 125 | vertex -0.00548142 -9.11208e-05 0.0537247 126 | vertex -0.00527792 0.0142931 0.053849 127 | endloop 128 | endfacet 129 | 130 | facet normal -nan -nan -nan 131 | outer loop 132 | vertex 0.00583983 0.0142743 0.0538034 133 | vertex -0.00527792 0.0142931 0.053849 134 | vertex -0.00548142 -9.11208e-05 0.0537247 135 | endloop 136 | endfacet 137 | 138 | facet normal -nan -nan -nan 139 | outer loop 140 | vertex 0.00583983 0.0142743 0.0538034 141 | vertex -0.00548142 -9.11208e-05 0.0537247 142 | vertex 0.00613802 -2.06026e-05 0.0535776 143 | endloop 144 | endfacet 145 | 146 | facet normal -nan -nan -nan 147 | outer loop 148 | vertex 0.00583983 0.0142743 0.0538034 149 | vertex 0.00613802 -2.06026e-05 0.0535776 150 | vertex 0.00861608 0.0139887 0.0513279 151 | endloop 152 | endfacet 153 | 154 | facet normal -nan -nan -nan 155 | outer loop 156 | vertex 0.00583983 0.0142743 0.0538034 157 | vertex 0.00861608 0.0139887 0.0513279 158 | vertex 0.0104005 0.0252534 0.0190366 159 | endloop 160 | endfacet 161 | 162 | facet normal -nan -nan -nan 163 | outer loop 164 | vertex -0.00873039 -2.35252e-05 0.0361648 165 | vertex 0.00869277 -0.000132643 0.0501662 166 | vertex 0.00613802 -2.06026e-05 0.0535776 167 | endloop 168 | endfacet 169 | 170 | facet normal -nan -nan -nan 171 | outer loop 172 | vertex -0.00873039 -2.35252e-05 0.0361648 173 | vertex 0.00613802 -2.06026e-05 0.0535776 174 | vertex -0.00548142 -9.11208e-05 0.0537247 175 | endloop 176 | endfacet 177 | 178 | facet normal -nan -nan -nan 179 | outer loop 180 | vertex -0.00548142 -9.11208e-05 0.0537247 181 | vertex -0.00862294 -5.68019e-05 0.0509528 182 | vertex -0.00873039 -2.35252e-05 0.0361648 183 | endloop 184 | endfacet 185 | 186 | facet normal -nan -nan -nan 187 | outer loop 188 | vertex -0.00873039 -2.35252e-05 0.0361648 189 | vertex -0.00862294 -5.68019e-05 0.0509528 190 | vertex -0.0103872 0.00253418 0.000131696 191 | endloop 192 | endfacet 193 | 194 | facet normal -nan -nan -nan 195 | outer loop 196 | vertex -0.00873039 -2.35252e-05 0.0361648 197 | vertex -0.0103872 0.00253418 0.000131696 198 | vertex 0.0104486 0.00258331 0.000146801 199 | endloop 200 | endfacet 201 | 202 | facet normal -nan -nan -nan 203 | outer loop 204 | vertex -0.00873039 -2.35252e-05 0.0361648 205 | vertex 0.0104486 0.00258331 0.000146801 206 | vertex 0.00869277 -0.000132643 0.0501662 207 | endloop 208 | endfacet 209 | 210 | endsolid AssimpScene 211 | -------------------------------------------------------------------------------- /data_collection/code/camera.py: -------------------------------------------------------------------------------- 1 | """ 2 | an RGB-D camera 3 | """ 4 | import numpy as np 5 | from sapien.core import Pose 6 | 7 | 8 | class Camera(object): 9 | 10 | def __init__(self, env, near=0.1, far=100.0, image_size=336, dist=5.0, \ 11 | phi=np.pi/5, theta=np.pi, fov=35, random_position=False, fixed_position=False): 12 | builder = env.scene.create_actor_builder() 13 | camera_mount_actor = builder.build(is_kinematic=True) 14 | self.env = env 15 | 16 | # set camera intrinsics 17 | self.camera = env.scene.add_mounted_camera('camera', camera_mount_actor, Pose(), \ 18 | image_size, image_size, 0, np.deg2rad(fov), near, far) 19 | 20 | # set camera extrinsics 21 | if random_position: 22 | phi = (np.random.random()+1) * np.pi/6 23 | theta = np.random.uniform(low=0.7, high=1.3) * np.pi 24 | dist = 4.5 + np.random.random() 25 | if fixed_position: 26 | theta = np.pi 27 | phi = np.pi/10 28 | pos = np.array([dist*np.cos(phi)*np.cos(theta), \ 29 | dist*np.cos(phi)*np.sin(theta), \ 30 | dist*np.sin(phi)]) 31 | forward = -pos / np.linalg.norm(pos) 32 | left = np.cross([0, 0, 1], forward) 33 | left = left / np.linalg.norm(left) 34 | up = np.cross(forward, left) 35 | mat44 = np.eye(4) 36 | mat44[:3, :3] = np.vstack([forward, left, up]).T 37 | mat44[:3, 3] = pos # mat44 is cam2world 38 | mat44[0, 3] += env.object_position_offset 39 | self.mat44 = mat44 40 | camera_mount_actor.set_pose(Pose.from_transformation_matrix(mat44)) 41 | 42 | # log parameters 43 | self.near = near 44 | self.far = far 45 | self.dist = dist 46 | self.theta = theta 47 | self.phi = phi 48 | self.pos = pos 49 | 50 | def get_observation(self): 51 | self.camera.take_picture() 52 | rgba = self.camera.get_color_rgba() 53 | rgba = (rgba * 255).clip(0, 255).astype(np.float32) / 255 54 | white = np.ones((rgba.shape[0], rgba.shape[1], 3), dtype=np.float32) 55 | mask = np.tile(rgba[:, :, 3:4], [1, 1, 3]) 56 | rgb = rgba[:, :, :3] * mask + white * (1 - mask) 57 | depth = self.camera.get_depth().astype(np.float32) 58 | return rgb, depth 59 | 60 | def compute_camera_XYZA(self, depth): 61 | camera_matrix = self.camera.get_camera_matrix()[:3, :3] 62 | y, x = np.where(depth < 1) 63 | z = self.near * self.far / (self.far + depth * (self.near - self.far)) 64 | permutation = np.array([[0, 0, 1], [-1, 0, 0], [0, -1, 0]]) 65 | points = (permutation @ np.dot(np.linalg.inv(camera_matrix), \ 66 | np.stack([x, y, np.ones_like(x)] * z[y, x], 0))).T 67 | return y, x, points 68 | 69 | @staticmethod 70 | def compute_XYZA_matrix(id1, id2, pts, size1, size2): 71 | out = np.zeros((size1, size2, 4), dtype=np.float32) 72 | out[id1, id2, :3] = pts 73 | out[id1, id2, 3] = 1 74 | return out 75 | 76 | def get_normal_map(self): 77 | nor = self.camera.get_normal_rgba() 78 | # convert from PartNet-space (x-right, y-up, z-backward) to SAPIEN-space (x-front, y-left, z-up) 79 | new_nor = np.array(nor, dtype=np.float32) 80 | new_nor[:, :, 0] = -nor[:, :, 2] 81 | new_nor[:, :, 1] = -nor[:, :, 0] 82 | new_nor[:, :, 2] = nor[:, :, 1] 83 | return new_nor 84 | 85 | def get_movable_link_mask(self, link_ids): 86 | link_seg = self.camera.get_segmentation() 87 | link_mask = np.zeros((link_seg.shape[0], link_seg.shape[1])).astype(np.uint8) 88 | for idx, lid in enumerate(link_ids): 89 | cur_link_pixels = int(np.sum(link_seg==lid)) 90 | if cur_link_pixels > 0: 91 | link_mask[link_seg == lid] = idx+1 92 | return link_mask 93 | 94 | def get_handle_mask(self): 95 | # read part seg partid2renderids 96 | partid2renderids = dict() 97 | for k in self.env.scene.render_id_to_visual_name: 98 | if self.env.scene.render_id_to_visual_name[k].split('-')[0] == 'handle': 99 | part_id = int(self.env.scene.render_id_to_visual_name[k].split('-')[-1]) 100 | if part_id not in partid2renderids: 101 | partid2renderids[part_id] = [] 102 | partid2renderids[part_id].append(k) 103 | # generate 0/1 handle mask 104 | part_seg = self.camera.get_obj_segmentation() 105 | handle_mask = np.zeros((part_seg.shape[0], part_seg.shape[1])).astype(np.uint8) 106 | for partid in partid2renderids: 107 | cur_part_mask = np.isin(part_seg, partid2renderids[partid]) 108 | cur_part_mask_pixels = int(np.sum(cur_part_mask)) 109 | if cur_part_mask_pixels > 0: 110 | handle_mask[cur_part_mask] = 1 111 | return handle_mask 112 | 113 | def get_object_mask(self): 114 | rgba = self.camera.get_albedo_rgba() 115 | return rgba[:, :, 3] > 0.5 116 | 117 | # return camera parameters 118 | def get_metadata(self): 119 | return { 120 | 'pose': self.camera.get_pose(), 121 | 'near': self.camera.get_near(), 122 | 'far': self.camera.get_far(), 123 | 'width': self.camera.get_width(), 124 | 'height': self.camera.get_height(), 125 | 'fov': self.camera.get_fovy(), 126 | 'camera_matrix': self.camera.get_camera_matrix(), 127 | 'projection_matrix': self.camera.get_projection_matrix(), 128 | 'model_matrix': self.camera.get_model_matrix(), 129 | 'mat44': self.mat44, 130 | } 131 | 132 | # return camera parameters 133 | def get_metadata_json(self): 134 | return { 135 | 'dist': self.dist, 136 | 'theta': self.theta, 137 | 'phi': self.phi, 138 | 'near': self.camera.get_near(), 139 | 'far': self.camera.get_far(), 140 | 'width': self.camera.get_width(), 141 | 'height': self.camera.get_height(), 142 | 'fov': self.camera.get_fovy(), 143 | 'camera_matrix': self.camera.get_camera_matrix().tolist(), 144 | 'projection_matrix': self.camera.get_projection_matrix().tolist(), 145 | 'model_matrix': self.camera.get_model_matrix().tolist(), 146 | 'mat44': self.mat44.tolist(), 147 | } 148 | 149 | -------------------------------------------------------------------------------- /test/robots/panda_robot.py: -------------------------------------------------------------------------------- 1 | """ 2 | Franka Panda Robot Arm 3 | support panda.urdf, panda_gripper.urdf 4 | """ 5 | 6 | from __future__ import division 7 | import sapien.core as sapien 8 | from sapien.core import Pose, SceneConfig #,PxrMaterial, 9 | from transforms3d.quaternions import axangle2quat, qmult 10 | import numpy as np 11 | from utils import pose2exp_coordinate, adjoint_matrix 12 | 13 | 14 | class Robot(object): 15 | def __init__(self, env, urdf, material, open_gripper=False): 16 | self.env = env 17 | self.timestep = env.scene.get_timestep() 18 | 19 | # load robot 20 | loader = env.scene.create_urdf_loader() 21 | loader.scale = 1.2 22 | loader.fix_root_link = True 23 | self.robot = loader.load(urdf, {"material": material}) 24 | #self.robot = loader.load(urdf, material) 25 | self.robot.name = "robot" 26 | 27 | # hand (EE), two grippers, the rest arm joints (if any) 28 | self.end_effector_index, self.end_effector = \ 29 | [(i, l) for i, l in enumerate(self.robot.get_links()) if l.name == 'panda_hand'][0] 30 | self.hand_actor_id = self.end_effector.get_id() 31 | self.gripper_joints = [joint for joint in self.robot.get_joints() if 32 | joint.get_name().startswith("panda_finger_joint")] 33 | self.gripper_actor_ids = [joint.get_child_link().get_id() for joint in self.gripper_joints] 34 | self.arm_joints = [joint for joint in self.robot.get_joints() if 35 | joint.get_dof() > 0 and not joint.get_name().startswith("panda_finger")] 36 | 37 | # set drive joint property 38 | for joint in self.arm_joints: 39 | joint.set_drive_property(1000, 400) 40 | for joint in self.gripper_joints: 41 | joint.set_drive_property(200, 60) 42 | 43 | # open/close the gripper at start 44 | if open_gripper: 45 | joint_angles = [] 46 | for j in self.robot.get_joints(): 47 | if j.get_dof() == 1: 48 | if j.get_name().startswith("panda_finger_joint"): 49 | joint_angles.append(0.04) 50 | else: 51 | joint_angles.append(0) 52 | self.robot.set_qpos(joint_angles) 53 | 54 | def compute_joint_velocity_from_twist(self, twist: np.ndarray) -> np.ndarray: 55 | """ 56 | This function is a kinematic-level calculation which do not consider dynamics. 57 | Pay attention to the frame of twist, is it spatial twist or body twist 58 | 59 | Jacobian is provided for your, so no need to compute the velocity kinematics 60 | ee_jacobian is the geometric Jacobian on account of only the joint of robot arm, not gripper 61 | Jacobian in SAPIEN is defined as the derivative of spatial twist with respect to joint velocity 62 | 63 | Args: 64 | twist: (6,) vector to represent the twist 65 | 66 | Returns: 67 | (7, ) vector for the velocity of arm joints (not include gripper) 68 | 69 | """ 70 | assert twist.size == 6 71 | # Jacobian define in SAPIEN use twist (v, \omega) which is different from the definition in the slides 72 | # So we perform the matrix block operation below 73 | dense_jacobian = self.robot.compute_spatial_twist_jacobian() # (num_link * 6, dof()) 74 | ee_jacobian = np.zeros([6, self.robot.dof - 2]) 75 | ee_jacobian[:3, :] = dense_jacobian[self.end_effector_index * 6 - 3: self.end_effector_index * 6, :self.robot.dof - 2] 76 | ee_jacobian[3:6, :] = dense_jacobian[(self.end_effector_index - 1) * 6: self.end_effector_index * 6 - 3, :self.robot.dof - 2] 77 | 78 | #numerical_small_bool = ee_jacobian < 1e-1 79 | #ee_jacobian[numerical_small_bool] = 0 80 | #inverse_jacobian = np.linalg.pinv(ee_jacobian) 81 | inverse_jacobian = np.linalg.pinv(ee_jacobian, rcond=1e-2) 82 | #inverse_jacobian[np.abs(inverse_jacobian) > 5] = 0 83 | #print(inverse_jacobian) 84 | return inverse_jacobian @ twist 85 | 86 | def internal_controller(self, qvel: np.ndarray) -> None: 87 | """Control the robot dynamically to execute the given twist for one time step 88 | 89 | This method will try to execute the joint velocity using the internal dynamics function in SAPIEN. 90 | 91 | Note that this function is only used for one time step, so you may need to call it multiple times in your code 92 | Also this controller is not perfect, it will still have some small movement even after you have finishing using 93 | it. Thus try to wait for some steps using self.wait_n_steps(n) like in the hw2.py after you call it multiple 94 | time to allow it to reach the target position 95 | 96 | Args: 97 | qvel: (7,) vector to represent the joint velocity 98 | 99 | """ 100 | assert qvel.size == len(self.arm_joints) 101 | target_qpos = qvel * self.timestep + self.robot.get_drive_target()[:-2] 102 | for i, joint in enumerate(self.arm_joints): 103 | joint.set_drive_velocity_target(qvel[i]) 104 | joint.set_drive_target(target_qpos[i]) 105 | passive_force = self.robot.compute_passive_force() 106 | self.robot.set_qf(passive_force) 107 | 108 | def calculate_twist(self, time_to_target, target_ee_pose): 109 | relative_transform = self.end_effector.get_pose().inv().to_transformation_matrix() @ target_ee_pose 110 | unit_twist, theta = pose2exp_coordinate(relative_transform) 111 | velocity = theta / time_to_target 112 | body_twist = unit_twist * velocity 113 | current_ee_pose = self.end_effector.get_pose().to_transformation_matrix() 114 | return adjoint_matrix(current_ee_pose) @ body_twist 115 | 116 | def move_to_target_pose(self, target_ee_pose: np.ndarray, num_steps: int) -> None: 117 | """ 118 | Move the robot hand dynamically to a given target pose 119 | Args: 120 | target_ee_pose: (4, 4) transformation of robot hand in robot base frame (ee2base) 121 | num_steps: how much steps to reach to target pose, 122 | each step correspond to self.scene.get_timestep() seconds 123 | in physical simulation 124 | """ 125 | executed_time = num_steps * self.timestep 126 | 127 | spatial_twist = self.calculate_twist(executed_time, target_ee_pose) 128 | for i in range(num_steps): 129 | if i % 100 == 0: 130 | spatial_twist = self.calculate_twist((num_steps - i) * self.timestep, target_ee_pose) 131 | qvel = self.compute_joint_velocity_from_twist(spatial_twist) 132 | self.internal_controller(qvel) 133 | self.env.step() 134 | self.env.render() 135 | return 136 | 137 | def close_gripper(self): 138 | for joint in self.gripper_joints: 139 | joint.set_drive_target(0.0) 140 | 141 | def open_gripper(self): 142 | for joint in self.gripper_joints: 143 | joint.set_drive_target(0.04) 144 | 145 | def clear_velocity_command(self): 146 | for joint in self.arm_joints: 147 | joint.set_drive_velocity_target(0) 148 | 149 | def wait_n_steps(self, n: int): 150 | self.clear_velocity_command() 151 | for i in range(n): 152 | passive_force = self.robot.compute_passive_force() 153 | self.robot.set_qf(passive_force) 154 | self.env.step() 155 | self.env.render() 156 | self.robot.set_qf([0] * self.robot.dof) 157 | 158 | -------------------------------------------------------------------------------- /data_collection/code/robots/panda_robot.py: -------------------------------------------------------------------------------- 1 | """ 2 | Franka Panda Robot Arm 3 | support panda.urdf, panda_gripper.urdf 4 | """ 5 | 6 | from __future__ import division 7 | import sapien.core as sapien 8 | from sapien.core import Pose, SceneConfig #,PxrMaterial, 9 | from transforms3d.quaternions import axangle2quat, qmult 10 | import numpy as np 11 | from utils import pose2exp_coordinate, adjoint_matrix 12 | 13 | 14 | class Robot(object): 15 | def __init__(self, env, urdf, material, open_gripper=False): 16 | self.env = env 17 | self.timestep = env.scene.get_timestep() 18 | 19 | # load robot 20 | loader = env.scene.create_urdf_loader() 21 | loader.scale = 1.3 22 | loader.fix_root_link = True 23 | self.robot = loader.load(urdf, {"material": material}) 24 | #self.robot = loader.load(urdf, material) 25 | self.robot.name = "robot" 26 | 27 | # hand (EE), two grippers, the rest arm joints (if any) 28 | self.end_effector_index, self.end_effector = \ 29 | [(i, l) for i, l in enumerate(self.robot.get_links()) if l.name == 'panda_hand'][0] 30 | self.hand_actor_id = self.end_effector.get_id() 31 | self.gripper_joints = [joint for joint in self.robot.get_joints() if 32 | joint.get_name().startswith("panda_finger_joint")] 33 | self.gripper_actor_ids = [joint.get_child_link().get_id() for joint in self.gripper_joints] 34 | self.arm_joints = [joint for joint in self.robot.get_joints() if 35 | joint.get_dof() > 0 and not joint.get_name().startswith("panda_finger")] 36 | 37 | # set drive joint property 38 | for joint in self.arm_joints: 39 | joint.set_drive_property(1000, 400) 40 | for joint in self.gripper_joints: 41 | joint.set_drive_property(200, 60) 42 | 43 | # open/close the gripper at start 44 | if open_gripper: 45 | joint_angles = [] 46 | for j in self.robot.get_joints(): 47 | if j.get_dof() == 1: 48 | if j.get_name().startswith("panda_finger_joint"): 49 | joint_angles.append(0.04) 50 | else: 51 | joint_angles.append(0) 52 | self.robot.set_qpos(joint_angles) 53 | 54 | def compute_joint_velocity_from_twist(self, twist: np.ndarray) -> np.ndarray: 55 | """ 56 | This function is a kinematic-level calculation which do not consider dynamics. 57 | Pay attention to the frame of twist, is it spatial twist or body twist 58 | 59 | Jacobian is provided for your, so no need to compute the velocity kinematics 60 | ee_jacobian is the geometric Jacobian on account of only the joint of robot arm, not gripper 61 | Jacobian in SAPIEN is defined as the derivative of spatial twist with respect to joint velocity 62 | 63 | Args: 64 | twist: (6,) vector to represent the twist 65 | 66 | Returns: 67 | (7, ) vector for the velocity of arm joints (not include gripper) 68 | 69 | """ 70 | assert twist.size == 6 71 | # Jacobian define in SAPIEN use twist (v, \omega) which is different from the definition in the slides 72 | # So we perform the matrix block operation below 73 | dense_jacobian = self.robot.compute_spatial_twist_jacobian() # (num_link * 6, dof()) 74 | ee_jacobian = np.zeros([6, self.robot.dof - 2]) 75 | ee_jacobian[:3, :] = dense_jacobian[self.end_effector_index * 6 - 3: self.end_effector_index * 6, :self.robot.dof - 2] 76 | ee_jacobian[3:6, :] = dense_jacobian[(self.end_effector_index - 1) * 6: self.end_effector_index * 6 - 3, :self.robot.dof - 2] 77 | 78 | #numerical_small_bool = ee_jacobian < 1e-1 79 | #ee_jacobian[numerical_small_bool] = 0 80 | #inverse_jacobian = np.linalg.pinv(ee_jacobian) 81 | inverse_jacobian = np.linalg.pinv(ee_jacobian, rcond=1e-2) 82 | #inverse_jacobian[np.abs(inverse_jacobian) > 5] = 0 83 | #print(inverse_jacobian) 84 | return inverse_jacobian @ twist 85 | 86 | def internal_controller(self, qvel: np.ndarray) -> None: 87 | """Control the robot dynamically to execute the given twist for one time step 88 | 89 | This method will try to execute the joint velocity using the internal dynamics function in SAPIEN. 90 | 91 | Note that this function is only used for one time step, so you may need to call it multiple times in your code 92 | Also this controller is not perfect, it will still have some small movement even after you have finishing using 93 | it. Thus try to wait for some steps using self.wait_n_steps(n) like in the hw2.py after you call it multiple 94 | time to allow it to reach the target position 95 | 96 | Args: 97 | qvel: (7,) vector to represent the joint velocity 98 | 99 | """ 100 | assert qvel.size == len(self.arm_joints) 101 | target_qpos = qvel * self.timestep + self.robot.get_drive_target()[:-2] 102 | for i, joint in enumerate(self.arm_joints): 103 | joint.set_drive_velocity_target(qvel[i]) 104 | joint.set_drive_target(target_qpos[i]) 105 | passive_force = self.robot.compute_passive_force() 106 | self.robot.set_qf(passive_force) 107 | 108 | def calculate_twist(self, time_to_target, target_ee_pose): 109 | relative_transform = self.end_effector.get_pose().inv().to_transformation_matrix() @ target_ee_pose 110 | unit_twist, theta = pose2exp_coordinate(relative_transform) 111 | velocity = theta / time_to_target 112 | body_twist = unit_twist * velocity 113 | current_ee_pose = self.end_effector.get_pose().to_transformation_matrix() 114 | return adjoint_matrix(current_ee_pose) @ body_twist 115 | 116 | def move_to_target_pose(self, target_ee_pose: np.ndarray, num_steps: int) -> None: 117 | """ 118 | Move the robot hand dynamically to a given target pose 119 | Args: 120 | target_ee_pose: (4, 4) transformation of robot hand in robot base frame (ee2base) 121 | num_steps: how much steps to reach to target pose, 122 | each step correspond to self.scene.get_timestep() seconds 123 | in physical simulation 124 | """ 125 | executed_time = num_steps * self.timestep 126 | 127 | spatial_twist = self.calculate_twist(executed_time, target_ee_pose) 128 | for i in range(num_steps): 129 | if i % 100 == 0: 130 | spatial_twist = self.calculate_twist((num_steps - i) * self.timestep, target_ee_pose) 131 | qvel = self.compute_joint_velocity_from_twist(spatial_twist) 132 | self.internal_controller(qvel) 133 | self.env.step() 134 | self.env.render() 135 | return 136 | 137 | def close_gripper(self): 138 | for joint in self.gripper_joints: 139 | joint.set_drive_target(0.0) 140 | 141 | def open_gripper(self): 142 | for joint in self.gripper_joints: 143 | joint.set_drive_target(0.04) 144 | 145 | def clear_velocity_command(self): 146 | for joint in self.arm_joints: 147 | joint.set_drive_velocity_target(0) 148 | 149 | def wait_n_steps(self, n: int): 150 | self.clear_velocity_command() 151 | for i in range(n): 152 | passive_force = self.robot.compute_passive_force() 153 | self.robot.set_qf(passive_force) 154 | self.env.step() 155 | self.env.render() 156 | self.robot.set_qf([0] * self.robot.dof) 157 | 158 | -------------------------------------------------------------------------------- /test/camera.py: -------------------------------------------------------------------------------- 1 | """ 2 | an RGB-D camera 3 | """ 4 | import numpy as np 5 | from sapien.core import Pose 6 | import utils 7 | # import wandb 8 | # wandb.init(project="multi-view-0110") 9 | class Camera(object): 10 | 11 | def __init__(self, env, near=0.1, far=100.0, image_size=336, dist=5.0, \ 12 | phi=np.pi/5, theta=np.pi, fov=35, random_position=False, fixed_position=False): 13 | builder = env.scene.create_actor_builder() 14 | self.camera_mount_actor = builder.build(is_kinematic=True) 15 | self.env = env 16 | 17 | # set camera intrinsics 18 | self.camera = env.scene.add_mounted_camera('camera', self.camera_mount_actor, Pose(), \ 19 | image_size, image_size, 0, np.deg2rad(fov), near, far) 20 | 21 | # set camera extrinsics 22 | # if random_position: 23 | 24 | 25 | # theta = np.random.uniform(low=0.9, high=1.1) * np.pi 26 | # phi = phi 27 | # # dist = 4.8 + np.random.random() 28 | # pos = np.array([dist*np.cos(phi)*np.cos(theta), \ 29 | # dist*np.cos(phi)*np.sin(theta), \ 30 | # dist*np.sin(phi)]) 31 | # # print(pos) 32 | 33 | # b = pos[1] + np.random.random()*0.6 - 0.3 34 | 35 | # pos[1] = b 36 | # c = pos[2] + np.random.random()*0.4 - 0.2 37 | 38 | # pos[2] = c 39 | # # print(pos) 40 | # else: 41 | 42 | # #theta = -np.pi/10 43 | # #theta = -np.pi/8 44 | # theta = theta 45 | # phi = phi 46 | 47 | # pos = np.array([dist*np.cos(phi)*np.cos(theta), \ 48 | # dist*np.cos(phi)*np.sin(theta), \ 49 | # dist*np.sin(phi)]) 50 | if random_position: 51 | # theta = np.random.random() * np.pi*2 52 | # theta = np.random.uniform(low=0.9, high=1.1) * np.pi 53 | # phi = (np.random.random()+1) * np.pi/6 54 | theta = np.random.uniform(low=0.9, high=1.1) * np.pi 55 | phi = phi 56 | if fixed_position: 57 | #theta = -np.pi/10 58 | #theta = -np.pi/8 59 | theta = np.pi 60 | phi = np.pi/10 61 | pos = np.array([dist*np.cos(phi)*np.cos(theta), \ 62 | dist*np.cos(phi)*np.sin(theta), \ 63 | dist*np.sin(phi)]) 64 | # print(print('1',pos)pos) 65 | # print('2',pos) 66 | # pos = np.array([-3.54468498,-0.36440411,3.52577091]) 67 | 68 | # assert(0) 69 | forward = -pos / np.linalg.norm(pos) 70 | left = np.cross([0, 0, 1], forward) 71 | left = left / np.linalg.norm(left) 72 | up = np.cross(forward, left) 73 | mat44 = np.eye(4) 74 | mat44[:3, :3] = np.vstack([forward, left, up]).T 75 | mat44[:3, 3] = pos # mat44 is cam2world 76 | mat44[0, 3] += env.object_position_offset 77 | self.mat44 = mat44 78 | self.camera_mount_actor.set_pose(Pose.from_transformation_matrix(mat44)) 79 | 80 | # log parameters 81 | self.near = near 82 | self.far = far 83 | self.dist = dist 84 | self.theta = theta 85 | self.phi = phi 86 | self.pos = pos 87 | 88 | def change_pose_by_mat(self, mat44): 89 | self.mat44 = mat44 90 | self.camera_mount_actor.set_pose(Pose.from_transformation_matrix(mat44)) 91 | self.pos = mat44[:3, 3] 92 | self.dist = None 93 | self.theta = None 94 | self.phi = None 95 | 96 | def get_observation(self): 97 | self.camera.take_picture() 98 | rgba = self.camera.get_color_rgba() 99 | rgba = (rgba * 255).clip(0, 255).astype(np.float32) / 255 100 | white = np.ones((rgba.shape[0], rgba.shape[1], 3), dtype=np.float32) 101 | mask = np.tile(rgba[:, :, 3:4], [1, 1, 3]) 102 | rgb = rgba[:, :, :3] * mask + white * (1 - mask) 103 | depth = self.camera.get_depth().astype(np.float32) 104 | # depth = self.camera.get_depth() 105 | return rgb, depth 106 | 107 | def compute_camera_XYZA(self, depth): 108 | camera_matrix = self.camera.get_camera_matrix()[:3, :3] 109 | y, x = np.where(depth < 1) 110 | z = self.near * self.far / (self.far + depth * (self.near - self.far)) 111 | # z = depth 112 | permutation = np.array([[0, 0, 1], [-1, 0, 0], [0, -1, 0]]) 113 | points = (permutation @ np.dot(np.linalg.inv(camera_matrix), \ 114 | np.stack([x, y, np.ones_like(x)] * z[y, x], 0))).T 115 | # wandb.log({"point_cloud":wandb.Object3D(points)}) 116 | 117 | out = np.zeros((996, 996, 4), dtype=np.float32) 118 | out[y, x, :3] = points 119 | out[y, x, 3] = 1 120 | return y, x, points, out 121 | 122 | @staticmethod 123 | def compute_XYZA_matrix(id1, id2, pts, size1, size2): 124 | out = np.zeros((size1, size2, 4), dtype=np.float32) 125 | out[id1, id2, :3] = pts 126 | out[id1, id2, 3] = 1 127 | return out 128 | 129 | def get_normal_map(self): 130 | nor = self.camera.get_normal_rgba() 131 | # convert from PartNet-space (x-right, y-up, z-backward) to SAPIEN-space (x-front, y-left, z-up) 132 | new_nor = np.array(nor, dtype=np.float32) 133 | new_nor[:, :, 0] = -nor[:, :, 2] 134 | new_nor[:, :, 1] = -nor[:, :, 0] 135 | new_nor[:, :, 2] = nor[:, :, 1] 136 | return new_nor 137 | 138 | def get_movable_link_mask(self, link_ids): 139 | link_seg = self.camera.get_segmentation() 140 | link_mask = np.zeros((link_seg.shape[0], link_seg.shape[1])).astype(np.uint8) 141 | for idx, lid in enumerate(link_ids): 142 | cur_link_pixels = int(np.sum(link_seg==lid)) 143 | if cur_link_pixels > 0: 144 | 145 | link_mask[link_seg == lid] = idx+1 146 | return link_mask 147 | 148 | def get_handle_mask(self): 149 | # read part seg partid2renderids 150 | partid2renderids = dict() 151 | for k in self.env.scene.render_id_to_visual_name: 152 | if self.env.scene.render_id_to_visual_name[k].split('-')[0] == 'handle': 153 | part_id = int(self.env.scene.render_id_to_visual_name[k].split('-')[-1]) 154 | if part_id not in partid2renderids: 155 | partid2renderids[part_id] = [] 156 | partid2renderids[part_id].append(k) 157 | # generate 0/1 handle mask 158 | part_seg = self.camera.get_obj_segmentation() 159 | handle_mask = np.zeros((part_seg.shape[0], part_seg.shape[1])).astype(np.uint8) 160 | for partid in partid2renderids: 161 | cur_part_mask = np.isin(part_seg, partid2renderids[partid]) 162 | cur_part_mask_pixels = int(np.sum(cur_part_mask)) 163 | if cur_part_mask_pixels > 0: 164 | handle_mask[cur_part_mask] = 1 165 | return handle_mask 166 | 167 | def get_object_mask(self): 168 | rgba = self.camera.get_albedo_rgba() 169 | return rgba[:, :, 3] > 0.5 170 | 171 | # return camera parameters 172 | def get_metadata(self): 173 | return { 174 | 'pose': self.camera.get_pose(), 175 | 'near': self.camera.get_near(), 176 | 'far': self.camera.get_far(), 177 | 'width': self.camera.get_width(), 178 | 'height': self.camera.get_height(), 179 | 'fov': self.camera.get_fovy(), 180 | 'camera_matrix': self.camera.get_camera_matrix(), 181 | 'projection_matrix': self.camera.get_projection_matrix(), 182 | 'model_matrix': self.camera.get_model_matrix(), 183 | 'mat44': self.mat44, 184 | } 185 | 186 | # return camera parameters 187 | def get_metadata_json(self): 188 | return { 189 | 'dist': self.dist, 190 | 'theta': self.theta, 191 | 'phi': self.phi, 192 | 'near': self.camera.get_near(), 193 | 'far': self.camera.get_far(), 194 | 'width': self.camera.get_width(), 195 | 'height': self.camera.get_height(), 196 | 'fov': self.camera.get_fovy(), 197 | 'camera_matrix': self.camera.get_camera_matrix().tolist(), 198 | 'projection_matrix': self.camera.get_projection_matrix().tolist(), 199 | 'model_matrix': self.camera.get_model_matrix().tolist(), 200 | 'mat44': self.mat44.tolist(), 201 | } 202 | 203 | -------------------------------------------------------------------------------- /train/data/dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import yaml 3 | from torch.utils.data import Dataset 4 | from PIL import Image 5 | import json 6 | import llama.utils 7 | from llama import Tokenizer 8 | import copy 9 | import torchvision.transforms as transforms 10 | import pandas as pd 11 | import random 12 | from random import randrange 13 | import os 14 | import numpy as np 15 | 16 | try: 17 | from torchvision.transforms import InterpolationMode 18 | BICUBIC = InterpolationMode.BICUBIC 19 | except ImportError: 20 | BICUBIC = Image.BICUBIC 21 | 22 | 23 | # create data 24 | transform_train = transforms.Compose([ 25 | transforms.RandomResizedCrop(size=(336, 336), scale=(0.9, 1.0), ratio=(0.75, 1.3333), interpolation=BICUBIC), # 3 is bicubic 26 | transforms.ToTensor(), 27 | transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711])]) 28 | 29 | class FinetuneDataset(Dataset): 30 | def __init__(self, config_path, args, max_words=30, tokenizer_path=None): 31 | print(f"read dataset config from {config_path}") 32 | 33 | self.mlm = args.mlm 34 | self.bins = args.bins 35 | self.config = config_path 36 | self.aff_prior = args.aff_prior 37 | 38 | ann = [] 39 | for meta_name in os.listdir(self.config): 40 | 41 | meta_path = os.path.join(self.config, meta_name) 42 | 43 | ann.append(meta_path) 44 | with open(meta_path, 'r') as f: 45 | meta_data = json.load(f) 46 | 47 | self.ann = ann 48 | print(f"total length: {len(self)}") 49 | 50 | self.transform = transform_train 51 | self.max_words = max_words 52 | self.tokenizer = Tokenizer(model_path=tokenizer_path) 53 | 54 | 55 | 56 | def __len__(self): 57 | return len(self.ann) 58 | 59 | def __getitem__(self, index): 60 | 61 | with open(self.ann[index], 'r') as f: 62 | data_item = json.load(f) 63 | filename = data_item['input'] 64 | answer = data_item['conversations'][1]['gt']#value 65 | start_pixel = 0 66 | loc_tokens = [] 67 | 68 | if self.bins == 'True' and self.mlm == 'True' and self.aff_prior: 69 | words = answer.split(' ') 70 | for idx, word in enumerate(words): 71 | if '.' in word: 72 | if '[' in word: 73 | # print(word[1:-2]) 74 | words[idx] = '['+str(int(float(word[1:-2])//0.02)) + ',' 75 | elif ']' in word: 76 | words[idx] = str(int(float(word[:-2])//0.02)) + ']' 77 | else: 78 | words[idx] = str(int(float(word[:-2])//0.02)) + ',' 79 | loc_tokens.append(idx) 80 | elif '(' in word: 81 | loc_tokens.append(idx) 82 | words[idx] = '('+str(int(word[1:-1])-start_pixel)+ ',' 83 | elif ')' in word: 84 | loc_tokens.append(idx) 85 | words[idx] = str(int(word[:-2])-start_pixel)+ '),' 86 | answer = ' '.join([str(elem) for elem in words]) 87 | 88 | i = random.randint(0, 3) 89 | 90 | #mlm and aff 91 | if i % 4 == 0: 92 | #finetune 93 | question = data_item['conversations'][0]['prompt'] 94 | answer = answer 95 | elif i % 4 == 1: 96 | #mlm 97 | question_ori = answer.split(' ') 98 | i = random.sample(range(0, len(question_ori)-1), int(len(question_ori)*0.15)) 99 | mask_loc = [loc_tokens[random.randint(0, len(loc_tokens)-1)],loc_tokens[random.randint(0, len(loc_tokens)-1)],loc_tokens[random.randint(0, len(loc_tokens)-1)]] 100 | question_mask = [word if idx not in mask_loc else "" for idx, word in enumerate(question_ori)] 101 | question = ' '.join([str(elem) for elem in question_mask]) 102 | answer = answer 103 | elif i % 4 == 2: 104 | #affordance 105 | question = data_item['aff_question'] 106 | answer = data_item['aff_gt'] 107 | elif i % 4 == 3: 108 | #cat 109 | question = data_item['conversations'][0]['prompt'] 110 | answer = answer 111 | # question = data_item['cat_prompt'] 112 | # answer = data_item['cat_ans'] 113 | 114 | image = Image.fromarray(np.array(Image.open(filename).convert('RGB'))[start_pixel:start_pixel+336,start_pixel:start_pixel+336,:]) 115 | 116 | image = self.transform(image) 117 | format_instruction = question 118 | format_input = None 119 | 120 | input1 = llama.utils.format_prompt(format_instruction, format_input) 121 | input2 = input1 + answer 122 | 123 | input1 = torch.tensor(self.tokenizer.encode(input1, bos=True, eos=False), dtype=torch.int64) 124 | input2 = torch.tensor(self.tokenizer.encode(input2, bos=True, eos=True), dtype=torch.int64) 125 | padding = self.max_words - input2.shape[0] 126 | if padding > 0: 127 | input2 = torch.cat((input2, torch.zeros(padding, dtype=torch.int64) - 1)) 128 | elif padding < 0: 129 | input2 = input2[:self.max_words] 130 | labels = copy.deepcopy(input2) 131 | labels[:len(input1)] = -1 132 | input2_mask = input2.ge(0) 133 | label_mask = labels.ge(0) 134 | input2[~input2_mask] = 0 135 | labels[~label_mask] = 0 136 | input2_mask = input2_mask.float() 137 | label_mask = label_mask.float() 138 | 139 | return input2, labels, input2_mask, image 140 | 141 | 142 | 143 | 144 | 145 | class PretrainDataset(Dataset): 146 | def __init__(self, config_path, transform, max_words=30, tokenizer_path=None): 147 | print(f"read dataset config from {config_path}") 148 | with open(config_path, 'r') as f: 149 | self.config = yaml.load(f, Loader=yaml.FullLoader) 150 | print("DATASET CONFIG:") 151 | print(self.config) 152 | images, captions = [], [] 153 | for meta_path in self.config['META']: 154 | images_this_meta, captions_this_meta = [], [] 155 | for chunk in pd.read_csv(meta_path, sep='\t', lineterminator='\n', chunksize=10 ** 6): 156 | images_this_meta.extend(chunk['url'].tolist()) 157 | captions_this_meta.extend(chunk['caption'].tolist()) 158 | print(f"{meta_path}: len {len(images_this_meta)}") 159 | images.extend(images_this_meta) 160 | captions.extend(captions_this_meta) 161 | 162 | self.data_list = [] 163 | for x, y in zip(images, captions): 164 | self.data_list.append({'url': x, 'caption': y}) 165 | print(f"total length: {len(self)}") 166 | self.transform = transform 167 | self.max_words = max_words 168 | self.tokenizer = Tokenizer(model_path=tokenizer_path) 169 | 170 | def __len__(self): 171 | return len(self.data_list) 172 | 173 | def __getitem__(self, index): 174 | sample = self.data_list[index] 175 | image_path, caption = sample['url'], sample['caption'] 176 | if isinstance(caption, list): 177 | caption = random.choice(caption) 178 | caption = str(caption) 179 | 180 | image = Image.open(image_path).convert('RGB') 181 | image = self.transform(image) 182 | 183 | format_instruction = "Generate caption of this image" 184 | input1 = llama.utils.format_prompt(format_instruction, None) 185 | input2 = input1 + caption 186 | 187 | input1 = torch.tensor(self.tokenizer.encode(input1, bos=True, eos=False), dtype=torch.int64) 188 | input2 = torch.tensor(self.tokenizer.encode(input2, bos=True, eos=True), dtype=torch.int64) 189 | padding = self.max_words - input2.shape[0] 190 | if padding > 0: 191 | input2 = torch.cat((input2, torch.zeros(padding, dtype=torch.int64) - 1)) 192 | elif padding < 0: 193 | input2 = input2[:self.max_words] 194 | labels = copy.deepcopy(input2) 195 | labels[:len(input1)] = -1 196 | input2_mask = input2.ge(0) 197 | label_mask = labels.ge(0) 198 | input2[~input2_mask] = 0 199 | labels[~label_mask] = 0 200 | input2_mask = input2_mask.float() 201 | label_mask = label_mask.float() 202 | return input2, labels, input2_mask, image -------------------------------------------------------------------------------- /data_collection/code/robots/panda.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | -------------------------------------------------------------------------------- /train/main_finetune.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.backends.cudnn as cudnn 3 | from torch.utils.tensorboard import SummaryWriter 4 | from torch.utils.data import Dataset 5 | import torch.nn as nn 6 | import util.misc as misc 7 | from util.misc import NativeScalerWithGradNormCount as NativeScaler 8 | from llama.llama_adapter import LLaMA_adapter 9 | 10 | from data.dataset import FinetuneDataset, transform_train 11 | 12 | import argparse 13 | import datetime 14 | import json 15 | import numpy as np 16 | import os 17 | import time 18 | from pathlib import Path 19 | from engine_finetune import train_one_epoch 20 | 21 | #torch.cuda.set_device(4) 22 | def get_args_parser(): 23 | parser = argparse.ArgumentParser('imagebind-llm pre-training', add_help=False) 24 | parser.add_argument('--batch_size', default=32, type=int, 25 | help='Batch size per GPU (effective batch size is batch_size * accum_iter * # gpus') 26 | parser.add_argument('--epochs', default=4, type=int) 27 | parser.add_argument('--accum_iter', default=1, type=int, 28 | help='Accumulate gradient iterations (for increasing the effective batch size under memory constraints)') 29 | 30 | # Model parameters 31 | parser.add_argument('--llama_type', default='7B', type=str, 32 | help='Type of LLaMA model') # 33 | parser.add_argument('--llama_path', default='./ckpts/llama_model_weights', type=str, 34 | help='path to LLaMA pretrained checkpoint') 35 | parser.add_argument('--pretrained_path', default='./ckpts/BIAS_LORA_NORM-336-Chinese-7B.pth ', type=str, 36 | help='path to checkpoint from pretrain stage') 37 | parser.add_argument('--max_words', default=512, type=int, 38 | help='max number of input words') 39 | 40 | # Optimizer parameters 41 | parser.add_argument('--weight_decay', type=float, default=0.05, 42 | help='weight decay (default: 0.05)') 43 | 44 | parser.add_argument('--lr', type=float, default=None, metavar='LR', 45 | help='learning rate (absolute lr)') 46 | parser.add_argument('--blr', type=float, default=1e-3, metavar='LR', 47 | help='base learning rate: absolute_lr = base_lr * total_batch_size / 256') 48 | parser.add_argument('--min_lr', type=float, default=0., metavar='LR', 49 | help='lower lr bound for cyclic schedulers that hit 0') 50 | 51 | parser.add_argument('--warmup_epochs', type=int, default=40, metavar='N', 52 | help='epochs to warmup LR') 53 | 54 | # Dataset parameters 55 | parser.add_argument('--data_config', default='./data/train_json', type=str, 56 | help='dataset config path') 57 | parser.add_argument('--num_workers', default=16, type=int) 58 | parser.add_argument('--pin_mem', action='store_true', 59 | help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.') 60 | parser.add_argument('--no_pin_mem', action='store_false', dest='pin_mem') 61 | parser.set_defaults(pin_mem=True) 62 | parser.add_argument('--mlm', default='False', type=str, help='if use mask language model') 63 | parser.add_argument('--bins', default='False', type=str, help='if use bin in orientation') 64 | parser.add_argument('--aff_prior', action='store_true', help='if learn from affordance') 65 | 66 | 67 | parser.add_argument('--output_dir', default='./exp/train_model', 68 | help='path where to save, empty for no saving') 69 | parser.add_argument('--log_dir', default='./output', 70 | help='path where to tensorboard log') 71 | parser.add_argument('--device', default='cuda', 72 | help='device to use for training / testing') 73 | parser.add_argument('--seed', default=0, type=int) 74 | 75 | 76 | parser.add_argument('--start_epoch', default=0, type=int, metavar='N', 77 | help='start epoch') 78 | 79 | # distributed training parameters 80 | parser.add_argument('--world_size', default=1, type=int, 81 | help='number of distributed processes') 82 | parser.add_argument('--local_rank', default=-1, type=int) 83 | parser.add_argument('--dist_on_itp', action='store_true') 84 | parser.add_argument('--dist_url', default='env://', 85 | help='url used to set up distributed training') 86 | 87 | return parser 88 | 89 | 90 | def main(args): 91 | 92 | print('job dir: {}'.format(os.path.dirname(os.path.realpath(__file__)))) 93 | print("{}".format(args).replace(', ', ',\n')) 94 | 95 | device = torch.device(args.device) 96 | 97 | # fix the seed for reproducibility 98 | seed = args.seed + misc.get_rank() 99 | torch.manual_seed(seed) 100 | np.random.seed(seed) 101 | cudnn.benchmark = True 102 | 103 | # define the model 104 | llama_type = args.llama_type 105 | llama_ckpt_dir = os.path.join(args.llama_path) 106 | llama_tokenzier_path = os.path.join(args.llama_path, 'tokenizer.model') 107 | 108 | model = LLaMA_adapter(llama_ckpt_dir, llama_tokenzier_path) 109 | 110 | 111 | model.to(device) 112 | 113 | model_without_ddp = model 114 | 115 | print("Trainable Params:") 116 | print([(key, val.shape) for key, val in model.named_parameters() if val.requires_grad]) 117 | 118 | 119 | 120 | # training detail 121 | eff_batch_size = args.batch_size * args.accum_iter * misc.get_world_size() 122 | 123 | if args.lr is None: # only base_lr is specified 124 | args.lr = args.blr * eff_batch_size / 256 125 | 126 | print("base lr: %.2e" % (args.lr * 256 / eff_batch_size)) 127 | print("actual lr: %.2e" % args.lr) 128 | 129 | print("accumulate grad iterations: %d" % args.accum_iter) 130 | print("effective batch size: %d" % eff_batch_size) 131 | 132 | # following timm: set wd as 0 for bias and norm layers 133 | param_groups = misc.add_weight_decay(model_without_ddp, args.weight_decay) 134 | optimizer = torch.optim.AdamW(param_groups, lr=args.lr, betas=(0.9, 0.95)) 135 | 136 | loss_scaler = NativeScaler() 137 | 138 | if args.pretrained_path != 'none': 139 | misc.load_model(model_without_ddp, args.pretrained_path) 140 | # print(args.mlm) 141 | dataset_train = FinetuneDataset(args.data_config, args, 142 | max_words=args.max_words, tokenizer_path=llama_tokenzier_path) 143 | 144 | num_tasks = misc.get_world_size() 145 | global_rank = misc.get_rank() 146 | sampler_train = torch.utils.data.DistributedSampler( 147 | dataset_train, num_replicas=num_tasks, rank=global_rank, shuffle=True 148 | ) 149 | print("Sampler_train = %s" % str(sampler_train)) 150 | 151 | data_loader_train = torch.utils.data.DataLoader( 152 | dataset_train, sampler=sampler_train, 153 | batch_size=args.batch_size, 154 | num_workers=args.num_workers, 155 | pin_memory=args.pin_mem, 156 | drop_last=True, 157 | ) 158 | 159 | # SummaryWrite 160 | if global_rank == 0 and args.log_dir is not None: 161 | os.makedirs(args.log_dir, exist_ok=True) 162 | log_writer = SummaryWriter(log_dir=args.log_dir) 163 | else: 164 | log_writer = None 165 | 166 | 167 | print(f"Start training for {args.epochs} epochs") 168 | start_time = time.time() 169 | for epoch in range(args.start_epoch, args.epochs): 170 | # if args.distributed:#分布式训练 171 | # data_loader_train.sampler.set_epoch(epoch) 172 | 173 | train_stats = train_one_epoch( 174 | model, data_loader_train, 175 | optimizer, device, epoch, loss_scaler, 176 | log_writer=log_writer, 177 | args=args 178 | ) 179 | 180 | if args.output_dir and (epoch + 1 == args.epochs): 181 | misc.save_model( 182 | args=args, model=model, model_without_ddp=model_without_ddp, optimizer=optimizer, 183 | loss_scaler=loss_scaler, epoch=epoch) 184 | 185 | log_stats = {**{f'train_{k}': v for k, v in train_stats.items()}, 186 | 'epoch': epoch} 187 | 188 | if args.output_dir and misc.is_main_process(): 189 | if log_writer is not None: 190 | log_writer.flush() 191 | with open(os.path.join(args.output_dir, "log.txt"), mode="a", encoding="utf-8") as f: 192 | f.write(json.dumps(log_stats) + "\n") 193 | 194 | total_time = time.time() - start_time 195 | total_time_str = str(datetime.timedelta(seconds=int(total_time))) 196 | print('Training time {}'.format(total_time_str)) 197 | print('Training over!!!') 198 | 199 | if __name__ == '__main__': 200 | args = get_args_parser() 201 | args = args.parse_args() 202 | if args.output_dir: 203 | Path(args.output_dir).mkdir(parents=True, exist_ok=True) 204 | main(args) 205 | -------------------------------------------------------------------------------- /test/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import h5py 4 | import torch 5 | import numpy as np 6 | import importlib 7 | import random 8 | import shutil 9 | from PIL import Image 10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 11 | sys.path.append(os.path.join(BASE_DIR, '../utils')) 12 | from colors import colors 13 | colors = np.array(colors, dtype=np.float32) 14 | import matplotlib.pylab as plt 15 | from mpl_toolkits.mplot3d import Axes3D 16 | from subprocess import call 17 | 18 | 19 | def force_mkdir(folder): 20 | if os.path.exists(folder): 21 | shutil.rmtree(folder) 22 | os.mkdir(folder) 23 | 24 | def printout(flog, strout): 25 | print(strout) 26 | if flog is not None: 27 | flog.write(strout + '\n') 28 | 29 | def optimizer_to_device(optimizer, device): 30 | for state in optimizer.state.values(): 31 | for k, v in state.items(): 32 | if torch.is_tensor(v): 33 | state[k] = v.to(device) 34 | 35 | def get_model_module(model_version): 36 | importlib.invalidate_caches() 37 | return importlib.import_module('models.' + model_version) 38 | 39 | def collate_feats(b): 40 | return list(zip(*b)) 41 | 42 | def collate_feats_pass(b): 43 | return b 44 | 45 | def collate_feats_with_none(b): 46 | b = filter (lambda x:x is not None, b) 47 | return list(zip(*b)) 48 | 49 | def worker_init_fn(worker_id): 50 | """ The function is designed for pytorch multi-process dataloader. 51 | Note that we use the pytorch random generator to generate a base_seed. 52 | Please try to be consistent. 53 | References: 54 | https://pytorch.org/docs/stable/notes/faq.html#dataloader-workers-random-seed 55 | """ 56 | base_seed = torch.IntTensor(1).random_().item() 57 | #print(worker_id, base_seed) 58 | np.random.seed(base_seed + worker_id) 59 | 60 | def viz_mask(ids): 61 | return colors[ids] 62 | 63 | def draw_dot(img, xy): 64 | out = np.array(img, dtype=np.uint8) 65 | x, y = xy[0], xy[1] 66 | neighbors = np.array([[0, 0, 0, 1, 1, 1, -1, -1, 1], \ 67 | [0, 1, -1, 0, 1, -1, 0, 1, -1]], dtype=np.int32) 68 | for i in range(neighbors.shape[1]): 69 | nx = x + neighbors[0, i] 70 | ny = y + neighbors[1, i] 71 | if nx >= 0 and nx < img.shape[0] and ny >= 0 and ny < img.shape[1]: 72 | out[nx, ny, 0] = 0 73 | out[nx, ny, 1] = 0 74 | out[nx, ny, 2] = 255 75 | 76 | return out 77 | 78 | def print_true_false(d): 79 | d = int(d) 80 | if d > 0.5: 81 | return 'True' 82 | return 'False' 83 | 84 | def img_resize(data): 85 | data = np.array(data, dtype=np.float32) 86 | mini, maxi = np.min(data), np.max(data) 87 | data -= mini 88 | data /= maxi - mini 89 | data = np.array(Image.fromarray((data*255).astype(np.uint8)).resize((224, 224)), dtype=np.float32) / 255 90 | data *= maxi - mini 91 | data += mini 92 | return data 93 | 94 | def export_pts(out, v): 95 | with open(out, 'w') as fout: 96 | for i in range(v.shape[0]): 97 | fout.write('%f %f %f\n' % (v[i, 0], v[i, 1], v[i, 2])) 98 | 99 | def export_label(out, l): 100 | with open(out, 'w') as fout: 101 | for i in range(l.shape[0]): 102 | fout.write('%f\n' % (l[i])) 103 | 104 | def export_pts_label(out, v, l): 105 | with open(out, 'w') as fout: 106 | for i in range(l.shape[0]): 107 | fout.write('%f %f %f %f\n' % (v[i, 0], v[i, 1], v[i, 2], l[i])) 108 | 109 | def render_pts_label_png(out, v, l): 110 | export_pts(out+'.pts', v) 111 | export_label(out+'.label', l) 112 | export_pts_label(out+'.feats', v, l) 113 | cmd = 'xvfb-run -a ~/thea/TheaDepsUnix/Source/TheaPrefix/bin/Thea/RenderShape %s.pts -f %s.feats %s.png 448 448 -v 1,0,0,-5,0,0,0,0,1 >> /dev/null' % (out, out, out) 114 | 115 | call(cmd, shell=True) 116 | print('save png') 117 | 118 | def export_pts_color_obj(out, v, c): 119 | with open(out+'.obj', 'w') as fout: 120 | for i in range(v.shape[0]): 121 | fout.write('v %f %f %f %f %f %f\n' % (v[i, 0], v[i, 1], v[i, 2], c[i, 0], c[i, 1], c[i, 2])) 122 | 123 | def export_pts_color_pts(out, v, c): 124 | with open(out+'.pts', 'w') as fout: 125 | for i in range(v.shape[0]): 126 | fout.write('%f %f %f %f %f %f\n' % (v[i, 0], v[i, 1], v[i, 2], c[i, 0], c[i, 1], c[i, 2])) 127 | 128 | def load_checkpoint(models, model_names, dirname, epoch=None, optimizers=None, optimizer_names=None, strict=True): 129 | if len(models) != len(model_names) or (optimizers is not None and len(optimizers) != len(optimizer_names)): 130 | raise ValueError('Number of models, model names, or optimizers does not match.') 131 | 132 | for model, model_name in zip(models, model_names): 133 | filename = f'net_{model_name}.pth' 134 | if epoch is not None: 135 | filename = f'{epoch}_' + filename 136 | model.load_state_dict(torch.load(os.path.join(dirname, filename)), strict=strict) 137 | 138 | start_epoch = 0 139 | if optimizers is not None: 140 | filename = os.path.join(dirname, 'checkpt.pth') 141 | if epoch is not None: 142 | filename = f'{epoch}_' + filename 143 | if os.path.exists(filename): 144 | checkpt = torch.load(filename) 145 | start_epoch = checkpt['epoch'] 146 | for opt, optimizer_name in zip(optimizers, optimizer_names): 147 | opt.load_state_dict(checkpt[f'opt_{optimizer_name}']) 148 | print(f'resuming from checkpoint {filename}') 149 | else: 150 | response = input(f'Checkpoint {filename} not found for resuming, refine saved models instead? (y/n) ') 151 | if response != 'y': 152 | sys.exit() 153 | 154 | return start_epoch 155 | 156 | def get_global_position_from_camera(camera, depth, x, y): 157 | """ 158 | This function is provided only to show how to convert camera observation to world space coordinates. 159 | It can be removed if not needed. 160 | 161 | camera: an camera agent 162 | depth: the depth obsrevation 163 | x, y: the horizontal, vertical index for a pixel, you would access the images by image[y, x] 164 | """ 165 | cm = camera.get_metadata() 166 | proj, model = cm['projection_matrix'], cm['model_matrix'] 167 | print('proj:', proj) 168 | print('model:', model) 169 | w, h = cm['width'], cm['height'] 170 | 171 | # get 0 to 1 coordinate for (x, y) coordinates 172 | xf, yf = (x + 0.5) / w, 1 - (y + 0.5) / h 173 | 174 | # get 0 to 1 depth value at (x,y) 175 | zf = depth[int(y), int(x)] 176 | 177 | # get the -1 to 1 (x,y,z) coordinate 178 | ndc = np.array([xf, yf, zf, 1]) * 2 - 1 179 | 180 | # transform from image space to view space 181 | v = np.linalg.inv(proj) @ ndc 182 | v /= v[3] 183 | 184 | # transform from view space to world space 185 | v = model @ v 186 | 187 | return v 188 | 189 | def rot2so3(rotation): 190 | assert rotation.shape == (3, 3) 191 | if np.isclose(rotation.trace(), 3): 192 | return np.zeros(3), 1 193 | if np.isclose(rotation.trace(), -1): 194 | raise RuntimeError 195 | theta = np.arccos((rotation.trace() - 1) / 2) 196 | omega = 1 / 2 / np.sin(theta) * np.array( 197 | [rotation[2, 1] - rotation[1, 2], rotation[0, 2] - rotation[2, 0], rotation[1, 0] - rotation[0, 1]]).T 198 | return omega, theta 199 | 200 | def skew(vec): 201 | return np.array([[0, -vec[2], vec[1]], 202 | [vec[2], 0, -vec[0]], 203 | [-vec[1], vec[0], 0]]) 204 | 205 | def adjoint_matrix(pose): 206 | adjoint = np.zeros([6, 6]) 207 | adjoint[:3, :3] = pose[:3, :3] 208 | adjoint[3:6, 3:6] = pose[:3, :3] 209 | adjoint[3:6, 0:3] = skew(pose[:3, 3]) @ pose[:3, :3] 210 | return adjoint 211 | 212 | def pose2exp_coordinate(pose): 213 | """ 214 | Compute the exponential coordinate corresponding to the given SE(3) matrix 215 | Note: unit twist is not a unit vector 216 | 217 | Args: 218 | pose: (4, 4) transformation matrix 219 | 220 | Returns: 221 | Unit twist: (6, ) vector represent the unit twist 222 | Theta: scalar represent the quantity of exponential coordinate 223 | """ 224 | 225 | omega, theta = rot2so3(pose[:3, :3]) 226 | ss = skew(omega) 227 | inv_left_jacobian = np.eye(3, dtype=np.float32) / theta - 0.5 * ss + ( 228 | 1.0 / theta - 0.5 / np.tan(theta / 2)) * ss @ ss 229 | v = inv_left_jacobian @ pose[:3, 3] 230 | return np.concatenate([omega, v]), theta 231 | 232 | def viz_mask(ids): 233 | return colors[ids] 234 | 235 | def process_angle_limit(x): 236 | if np.isneginf(x): 237 | x = -10 238 | if np.isinf(x): 239 | x = 10 240 | return x 241 | 242 | def get_random_number(l, r): 243 | return np.random.rand() * (r - l) + l 244 | 245 | def save_h5(fn, data): 246 | fout = h5py.File(fn, 'w') 247 | for d, n, t in data: 248 | fout.create_dataset(n, data=d, compression='gzip', compression_opts=4, dtype=t) 249 | fout.close() 250 | -------------------------------------------------------------------------------- /train/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import h5py 4 | import torch 5 | import numpy as np 6 | import importlib 7 | import random 8 | import shutil 9 | from PIL import Image 10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 11 | sys.path.append(os.path.join(BASE_DIR, '../utils')) 12 | from colors import colors 13 | colors = np.array(colors, dtype=np.float32) 14 | import matplotlib.pylab as plt 15 | from mpl_toolkits.mplot3d import Axes3D 16 | from subprocess import call 17 | 18 | 19 | def force_mkdir(folder): 20 | if os.path.exists(folder): 21 | shutil.rmtree(folder) 22 | os.mkdir(folder) 23 | 24 | def printout(flog, strout): 25 | print(strout) 26 | if flog is not None: 27 | flog.write(strout + '\n') 28 | 29 | def optimizer_to_device(optimizer, device): 30 | for state in optimizer.state.values(): 31 | for k, v in state.items(): 32 | if torch.is_tensor(v): 33 | state[k] = v.to(device) 34 | 35 | def get_model_module(model_version): 36 | importlib.invalidate_caches() 37 | return importlib.import_module('models.' + model_version) 38 | 39 | def collate_feats(b): 40 | return list(zip(*b)) 41 | 42 | def collate_feats_pass(b): 43 | return b 44 | 45 | def collate_feats_with_none(b): 46 | b = filter (lambda x:x is not None, b) 47 | return list(zip(*b)) 48 | 49 | def worker_init_fn(worker_id): 50 | """ The function is designed for pytorch multi-process dataloader. 51 | Note that we use the pytorch random generator to generate a base_seed. 52 | Please try to be consistent. 53 | References: 54 | https://pytorch.org/docs/stable/notes/faq.html#dataloader-workers-random-seed 55 | """ 56 | base_seed = torch.IntTensor(1).random_().item() 57 | #print(worker_id, base_seed) 58 | np.random.seed(base_seed + worker_id) 59 | 60 | def viz_mask(ids): 61 | return colors[ids] 62 | 63 | def draw_dot(img, xy): 64 | out = np.array(img, dtype=np.uint8) 65 | x, y = xy[0], xy[1] 66 | neighbors = np.array([[0, 0, 0, 1, 1, 1, -1, -1, 1], \ 67 | [0, 1, -1, 0, 1, -1, 0, 1, -1]], dtype=np.int32) 68 | for i in range(neighbors.shape[1]): 69 | nx = x + neighbors[0, i] 70 | ny = y + neighbors[1, i] 71 | if nx >= 0 and nx < img.shape[0] and ny >= 0 and ny < img.shape[1]: 72 | out[nx, ny, 0] = 0 73 | out[nx, ny, 1] = 0 74 | out[nx, ny, 2] = 255 75 | 76 | return out 77 | 78 | def print_true_false(d): 79 | d = int(d) 80 | if d > 0.5: 81 | return 'True' 82 | return 'False' 83 | 84 | def img_resize(data): 85 | data = np.array(data, dtype=np.float32) 86 | mini, maxi = np.min(data), np.max(data) 87 | data -= mini 88 | data /= maxi - mini 89 | data = np.array(Image.fromarray((data*255).astype(np.uint8)).resize((224, 224)), dtype=np.float32) / 255 90 | data *= maxi - mini 91 | data += mini 92 | return data 93 | 94 | def export_pts(out, v): 95 | with open(out, 'w') as fout: 96 | for i in range(v.shape[0]): 97 | fout.write('%f %f %f\n' % (v[i, 0], v[i, 1], v[i, 2])) 98 | 99 | def export_label(out, l): 100 | with open(out, 'w') as fout: 101 | for i in range(l.shape[0]): 102 | fout.write('%f\n' % (l[i])) 103 | 104 | def export_pts_label(out, v, l): 105 | with open(out, 'w') as fout: 106 | for i in range(l.shape[0]): 107 | fout.write('%f %f %f %f\n' % (v[i, 0], v[i, 1], v[i, 2], l[i])) 108 | 109 | def render_pts_label_png(out, v, l): 110 | export_pts(out+'.pts', v) 111 | export_label(out+'.label', l) 112 | export_pts_label(out+'.feats', v, l) 113 | cmd = 'xvfb-run -a ~/thea/TheaDepsUnix/Source/TheaPrefix/bin/Thea/RenderShape %s.pts -f %s.feats %s.png 448 448 -v 1,0,0,-5,0,0,0,0,1 >> /dev/null' % (out, out, out) 114 | 115 | call(cmd, shell=True) 116 | print('save png') 117 | 118 | def export_pts_color_obj(out, v, c): 119 | with open(out+'.obj', 'w') as fout: 120 | for i in range(v.shape[0]): 121 | fout.write('v %f %f %f %f %f %f\n' % (v[i, 0], v[i, 1], v[i, 2], c[i, 0], c[i, 1], c[i, 2])) 122 | 123 | def export_pts_color_pts(out, v, c): 124 | with open(out+'.pts', 'w') as fout: 125 | for i in range(v.shape[0]): 126 | fout.write('%f %f %f %f %f %f\n' % (v[i, 0], v[i, 1], v[i, 2], c[i, 0], c[i, 1], c[i, 2])) 127 | 128 | def load_checkpoint(models, model_names, dirname, epoch=None, optimizers=None, optimizer_names=None, strict=True): 129 | if len(models) != len(model_names) or (optimizers is not None and len(optimizers) != len(optimizer_names)): 130 | raise ValueError('Number of models, model names, or optimizers does not match.') 131 | 132 | for model, model_name in zip(models, model_names): 133 | filename = f'net_{model_name}.pth' 134 | if epoch is not None: 135 | filename = f'{epoch}_' + filename 136 | model.load_state_dict(torch.load(os.path.join(dirname, filename)), strict=strict) 137 | 138 | start_epoch = 0 139 | if optimizers is not None: 140 | filename = os.path.join(dirname, 'checkpt.pth') 141 | if epoch is not None: 142 | filename = f'{epoch}_' + filename 143 | if os.path.exists(filename): 144 | checkpt = torch.load(filename) 145 | start_epoch = checkpt['epoch'] 146 | for opt, optimizer_name in zip(optimizers, optimizer_names): 147 | opt.load_state_dict(checkpt[f'opt_{optimizer_name}']) 148 | print(f'resuming from checkpoint {filename}') 149 | else: 150 | response = input(f'Checkpoint {filename} not found for resuming, refine saved models instead? (y/n) ') 151 | if response != 'y': 152 | sys.exit() 153 | 154 | return start_epoch 155 | 156 | def get_global_position_from_camera(camera, depth, x, y): 157 | """ 158 | This function is provided only to show how to convert camera observation to world space coordinates. 159 | It can be removed if not needed. 160 | 161 | camera: an camera agent 162 | depth: the depth obsrevation 163 | x, y: the horizontal, vertical index for a pixel, you would access the images by image[y, x] 164 | """ 165 | cm = camera.get_metadata() 166 | proj, model = cm['projection_matrix'], cm['model_matrix'] 167 | print('proj:', proj) 168 | print('model:', model) 169 | w, h = cm['width'], cm['height'] 170 | 171 | # get 0 to 1 coordinate for (x, y) coordinates 172 | xf, yf = (x + 0.5) / w, 1 - (y + 0.5) / h 173 | 174 | # get 0 to 1 depth value at (x,y) 175 | zf = depth[int(y), int(x)] 176 | 177 | # get the -1 to 1 (x,y,z) coordinate 178 | ndc = np.array([xf, yf, zf, 1]) * 2 - 1 179 | 180 | # transform from image space to view space 181 | v = np.linalg.inv(proj) @ ndc 182 | v /= v[3] 183 | 184 | # transform from view space to world space 185 | v = model @ v 186 | 187 | return v 188 | 189 | def rot2so3(rotation): 190 | assert rotation.shape == (3, 3) 191 | if np.isclose(rotation.trace(), 3): 192 | return np.zeros(3), 1 193 | if np.isclose(rotation.trace(), -1): 194 | raise RuntimeError 195 | theta = np.arccos((rotation.trace() - 1) / 2) 196 | omega = 1 / 2 / np.sin(theta) * np.array( 197 | [rotation[2, 1] - rotation[1, 2], rotation[0, 2] - rotation[2, 0], rotation[1, 0] - rotation[0, 1]]).T 198 | return omega, theta 199 | 200 | def skew(vec): 201 | return np.array([[0, -vec[2], vec[1]], 202 | [vec[2], 0, -vec[0]], 203 | [-vec[1], vec[0], 0]]) 204 | 205 | def adjoint_matrix(pose): 206 | adjoint = np.zeros([6, 6]) 207 | adjoint[:3, :3] = pose[:3, :3] 208 | adjoint[3:6, 3:6] = pose[:3, :3] 209 | adjoint[3:6, 0:3] = skew(pose[:3, 3]) @ pose[:3, :3] 210 | return adjoint 211 | 212 | def pose2exp_coordinate(pose): 213 | """ 214 | Compute the exponential coordinate corresponding to the given SE(3) matrix 215 | Note: unit twist is not a unit vector 216 | 217 | Args: 218 | pose: (4, 4) transformation matrix 219 | 220 | Returns: 221 | Unit twist: (6, ) vector represent the unit twist 222 | Theta: scalar represent the quantity of exponential coordinate 223 | """ 224 | 225 | omega, theta = rot2so3(pose[:3, :3]) 226 | ss = skew(omega) 227 | inv_left_jacobian = np.eye(3, dtype=np.float32) / theta - 0.5 * ss + ( 228 | 1.0 / theta - 0.5 / np.tan(theta / 2)) * ss @ ss 229 | v = inv_left_jacobian @ pose[:3, 3] 230 | return np.concatenate([omega, v]), theta 231 | 232 | def viz_mask(ids): 233 | return colors[ids] 234 | 235 | def process_angle_limit(x): 236 | if np.isneginf(x): 237 | x = -10 238 | if np.isinf(x): 239 | x = 10 240 | return x 241 | 242 | def get_random_number(l, r): 243 | return np.random.rand() * (r - l) + l 244 | 245 | def save_h5(fn, data): 246 | fout = h5py.File(fn, 'w') 247 | for d, n, t in data: 248 | fout.create_dataset(n, data=d, compression='gzip', compression_opts=4, dtype=t) 249 | fout.close() 250 | -------------------------------------------------------------------------------- /data_collection/code/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import h5py 4 | #import torch 5 | import numpy as np 6 | import importlib 7 | import random 8 | import shutil 9 | from PIL import Image 10 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 11 | sys.path.append(os.path.join(BASE_DIR, '../utils')) 12 | from colors import colors 13 | colors = np.array(colors, dtype=np.float32) 14 | import matplotlib.pylab as plt 15 | from mpl_toolkits.mplot3d import Axes3D 16 | from subprocess import call 17 | 18 | 19 | def force_mkdir(folder): 20 | if os.path.exists(folder): 21 | shutil.rmtree(folder) 22 | os.mkdir(folder) 23 | 24 | def printout(flog, strout): 25 | print(strout) 26 | if flog is not None: 27 | flog.write(strout + '\n') 28 | 29 | def optimizer_to_device(optimizer, device): 30 | for state in optimizer.state.values(): 31 | for k, v in state.items(): 32 | if torch.is_tensor(v): 33 | state[k] = v.to(device) 34 | 35 | def get_model_module(model_version): 36 | importlib.invalidate_caches() 37 | return importlib.import_module('models.' + model_version) 38 | 39 | def collate_feats(b): 40 | return list(zip(*b)) 41 | 42 | def collate_feats_pass(b): 43 | return b 44 | 45 | def collate_feats_with_none(b): 46 | b = filter (lambda x:x is not None, b) 47 | return list(zip(*b)) 48 | 49 | def worker_init_fn(worker_id): 50 | """ The function is designed for pytorch multi-process dataloader. 51 | Note that we use the pytorch random generator to generate a base_seed. 52 | Please try to be consistent. 53 | References: 54 | https://pytorch.org/docs/stable/notes/faq.html#dataloader-workers-random-seed 55 | """ 56 | base_seed = torch.IntTensor(1).random_().item() 57 | #print(worker_id, base_seed) 58 | np.random.seed(base_seed + worker_id) 59 | 60 | def viz_mask(ids): 61 | return colors[ids] 62 | 63 | def draw_dot(img, xy): 64 | out = np.array(img, dtype=np.uint8) 65 | x, y = xy[0], xy[1] 66 | neighbors = np.array([[0, 0, 0, 1, 1, 1, -1, -1, 1], \ 67 | [0, 1, -1, 0, 1, -1, 0, 1, -1]], dtype=np.int32) 68 | for i in range(neighbors.shape[1]): 69 | nx = x + neighbors[0, i] 70 | ny = y + neighbors[1, i] 71 | if nx >= 0 and nx < img.shape[0] and ny >= 0 and ny < img.shape[1]: 72 | out[nx, ny, 0] = 0 73 | out[nx, ny, 1] = 0 74 | out[nx, ny, 2] = 255 75 | 76 | return out 77 | 78 | def print_true_false(d): 79 | d = int(d) 80 | if d > 0.5: 81 | return 'True' 82 | return 'False' 83 | 84 | def img_resize(data): 85 | data = np.array(data, dtype=np.float32) 86 | mini, maxi = np.min(data), np.max(data) 87 | data -= mini 88 | data /= maxi - mini 89 | data = np.array(Image.fromarray((data*255).astype(np.uint8)).resize((224, 224)), dtype=np.float32) / 255 90 | data *= maxi - mini 91 | data += mini 92 | return data 93 | 94 | def export_pts(out, v): 95 | with open(out, 'w') as fout: 96 | for i in range(v.shape[0]): 97 | fout.write('%f %f %f\n' % (v[i, 0], v[i, 1], v[i, 2])) 98 | 99 | def export_label(out, l): 100 | with open(out, 'w') as fout: 101 | for i in range(l.shape[0]): 102 | fout.write('%f\n' % (l[i])) 103 | 104 | def export_pts_label(out, v, l): 105 | with open(out, 'w') as fout: 106 | for i in range(l.shape[0]): 107 | fout.write('%f %f %f %f\n' % (v[i, 0], v[i, 1], v[i, 2], l[i])) 108 | 109 | def render_pts_label_png(out, v, l): 110 | export_pts(out+'.pts', v) 111 | export_label(out+'.label', l) 112 | export_pts_label(out+'.feats', v, l) 113 | cmd = 'RenderShape %s.pts -f %s.feats %s.png 448 448 -v 1,0,0,-5,0,0,0,0,1 >> /dev/null' % (out, out, out) 114 | call(cmd, shell=True) 115 | 116 | def export_pts_color_obj(out, v, c): 117 | with open(out+'.obj', 'w') as fout: 118 | for i in range(v.shape[0]): 119 | fout.write('v %f %f %f %f %f %f\n' % (v[i, 0], v[i, 1], v[i, 2], c[i, 0], c[i, 1], c[i, 2])) 120 | 121 | def export_pts_color_pts(out, v, c): 122 | with open(out+'.pts', 'w') as fout: 123 | for i in range(v.shape[0]): 124 | fout.write('%f %f %f %f %f %f\n' % (v[i, 0], v[i, 1], v[i, 2], c[i, 0], c[i, 1], c[i, 2])) 125 | 126 | def load_checkpoint(models, model_names, dirname, epoch=None, optimizers=None, optimizer_names=None, strict=True): 127 | if len(models) != len(model_names) or (optimizers is not None and len(optimizers) != len(optimizer_names)): 128 | raise ValueError('Number of models, model names, or optimizers does not match.') 129 | 130 | for model, model_name in zip(models, model_names): 131 | filename = f'net_{model_name}.pth' 132 | if epoch is not None: 133 | filename = f'{epoch}_' + filename 134 | model.load_state_dict(torch.load(os.path.join(dirname, filename)), strict=strict) 135 | 136 | start_epoch = 0 137 | if optimizers is not None: 138 | filename = os.path.join(dirname, 'checkpt.pth') 139 | if epoch is not None: 140 | filename = f'{epoch}_' + filename 141 | if os.path.exists(filename): 142 | checkpt = torch.load(filename) 143 | start_epoch = checkpt['epoch'] 144 | for opt, optimizer_name in zip(optimizers, optimizer_names): 145 | opt.load_state_dict(checkpt[f'opt_{optimizer_name}']) 146 | print(f'resuming from checkpoint {filename}') 147 | else: 148 | response = input(f'Checkpoint {filename} not found for resuming, refine saved models instead? (y/n) ') 149 | if response != 'y': 150 | sys.exit() 151 | 152 | return start_epoch 153 | 154 | def get_global_position_from_camera(camera, depth, x, y): 155 | """ 156 | This function is provided only to show how to convert camera observation to world space coordinates. 157 | It can be removed if not needed. 158 | 159 | camera: an camera agent 160 | depth: the depth obsrevation 161 | x, y: the horizontal, vertical index for a pixel, you would access the images by image[y, x] 162 | """ 163 | cm = camera.get_metadata() 164 | proj, model = cm['projection_matrix'], cm['model_matrix'] 165 | print('proj:', proj) 166 | print('model:', model) 167 | w, h = cm['width'], cm['height'] 168 | 169 | # get 0 to 1 coordinate for (x, y) coordinates 170 | xf, yf = (x + 0.5) / w, 1 - (y + 0.5) / h 171 | 172 | # get 0 to 1 depth value at (x,y) 173 | zf = depth[int(y), int(x)] 174 | 175 | # get the -1 to 1 (x,y,z) coordinate 176 | ndc = np.array([xf, yf, zf, 1]) * 2 - 1 177 | 178 | # transform from image space to view space 179 | v = np.linalg.inv(proj) @ ndc 180 | v /= v[3] 181 | 182 | # transform from view space to world space 183 | v = model @ v 184 | 185 | return v 186 | 187 | def rot2so3(rotation): 188 | assert rotation.shape == (3, 3) 189 | if np.isclose(rotation.trace(), 3): 190 | return np.zeros(3), 1 191 | if np.isclose(rotation.trace(), -1): 192 | raise RuntimeError 193 | theta = np.arccos((rotation.trace() - 1) / 2) 194 | omega = 1 / 2 / np.sin(theta) * np.array( 195 | [rotation[2, 1] - rotation[1, 2], rotation[0, 2] - rotation[2, 0], rotation[1, 0] - rotation[0, 1]]).T 196 | return omega, theta 197 | 198 | def skew(vec): 199 | return np.array([[0, -vec[2], vec[1]], 200 | [vec[2], 0, -vec[0]], 201 | [-vec[1], vec[0], 0]]) 202 | 203 | def adjoint_matrix(pose): 204 | adjoint = np.zeros([6, 6]) 205 | adjoint[:3, :3] = pose[:3, :3] 206 | adjoint[3:6, 3:6] = pose[:3, :3] 207 | adjoint[3:6, 0:3] = skew(pose[:3, 3]) @ pose[:3, :3] 208 | return adjoint 209 | 210 | def pose2exp_coordinate(pose): 211 | """ 212 | Compute the exponential coordinate corresponding to the given SE(3) matrix 213 | Note: unit twist is not a unit vector 214 | 215 | Args: 216 | pose: (4, 4) transformation matrix 217 | 218 | Returns: 219 | Unit twist: (6, ) vector represent the unit twist 220 | Theta: scalar represent the quantity of exponential coordinate 221 | """ 222 | 223 | omega, theta = rot2so3(pose[:3, :3]) 224 | ss = skew(omega) 225 | inv_left_jacobian = np.eye(3, dtype=np.float32) / theta - 0.5 * ss + ( 226 | 1.0 / theta - 0.5 / np.tan(theta / 2)) * ss @ ss 227 | v = inv_left_jacobian @ pose[:3, 3] 228 | return np.concatenate([omega, v]), theta 229 | 230 | def viz_mask(ids): 231 | return colors[ids] 232 | 233 | def process_angle_limit(x): 234 | if np.isneginf(x): 235 | x = -10 236 | if np.isinf(x): 237 | x = 10 238 | return x 239 | 240 | def get_random_number(l, r): 241 | return np.random.rand() * (r - l) + l 242 | 243 | def save_h5(fn, data): 244 | fout = h5py.File(fn, 'w') 245 | for d, n, t in data: 246 | fout.create_dataset(n, data=d, compression='gzip', compression_opts=4, dtype=t) 247 | fout.close() 248 | -------------------------------------------------------------------------------- /train/data/create_dataset_aff.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from PIL import Image, ImageDraw, ImageOps 4 | import numpy as np 5 | import argparse 6 | from tqdm import tqdm 7 | 8 | print('Start generating training json..............') 9 | count = 0 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument('--folder_dir', type=str, help='dataset dir') 12 | parser.add_argument('--output_dir', type=str, help='training json dir') 13 | parser.add_argument('--num_point', type=int, help='training json dir') 14 | args = parser.parse_args() 15 | 16 | folder_dir = args.folder_dir 17 | folder_names = os.listdir(folder_dir) 18 | output_dir = args.output_dir 19 | if not os.path.exists(output_dir): 20 | os.makedirs(output_dir) 21 | else: 22 | print('json files already exists, beginning training') 23 | exit() 24 | cal_cat = dict() 25 | 26 | for item in tqdm(folder_names): 27 | NUM_OF_POINTS = args.num_point 28 | cur_dir = os.path.join(folder_dir,str(item)) 29 | cat = item.split('_')[1] 30 | if os.path.exists(os.path.join(cur_dir, 'result.json')): 31 | with open(os.path.join(cur_dir, 'result.json'), 'r') as fin: 32 | data_inf = json.load(fin) 33 | if data_inf['mani_succ'] != 'True': 34 | continue 35 | 36 | aff_gt_dir = os.path.join(cur_dir, 'aff_gt_all.png') 37 | if not os.path.exists(aff_gt_dir): 38 | continue 39 | img_pil = Image.open(os.path.join(cur_dir, 'original_rgb.png')) 40 | intermask_pil = np.array(Image.open(os.path.join(cur_dir, 'interaction_mask.png'))) 41 | gray_image = ImageOps.grayscale(img_pil) 42 | threshold = 200 # Adjust the threshold value as needed 43 | object_mask = gray_image.point(lambda p: p < threshold and 255) 44 | object_mask.save(os.path.join(cur_dir, 'object_mask.png')) 45 | 46 | object_mask = np.array(object_mask)/255 47 | 48 | 49 | aff_gt_pil = Image.open(aff_gt_dir) 50 | aff_gt = np.array(aff_gt_pil)/255 51 | result_mask = np.where(aff_gt < 0.2, intermask_pil, 0).astype(np.uint8) 52 | object_mask = np.where(aff_gt < 0.2, object_mask, 0).astype(np.uint8) 53 | Image.fromarray((result_mask).astype(np.uint8)).save(os.path.join(cur_dir, 'result_mask.png')) 54 | Image.fromarray((object_mask*255).astype(np.uint8)).save(os.path.join(cur_dir, 'object_mask.png')) 55 | 56 | row_indices_pos, col_indices_pos = np.where(aff_gt > 0.8) 57 | if NUM_OF_POINTS > len(row_indices_pos): 58 | NUM_OF_POINTS = len(row_indices_pos) 59 | 60 | row_indices_neg1, col_indices_neg1 = np.where(result_mask > 0.8) 61 | 62 | 63 | if NUM_OF_POINTS > len(row_indices_neg1) and len(row_indices_neg1) != 0: 64 | NUM_OF_POINTS = len(row_indices_neg1) 65 | 66 | if NUM_OF_POINTS == 0: 67 | continue 68 | 69 | if len(row_indices_neg1) != 0 : 70 | indices_neg = np.random.choice(len(row_indices_neg1), size=NUM_OF_POINTS//2, replace=False) 71 | selected_row_indices_neg = row_indices_neg1[indices_neg].reshape(-1, 1) 72 | selected_col_indices_neg = col_indices_neg1[indices_neg].reshape(-1, 1) 73 | top_indices_neg1 = np.hstack((selected_row_indices_neg, selected_col_indices_neg)) 74 | top_indices_neg1_gt = np.zeros(top_indices_neg1.shape[0]) 75 | 76 | row_indices_neg, col_indices_neg = np.where(object_mask > 0.8) 77 | 78 | if len(row_indices_neg) != 0 and len(row_indices_neg1) != 0: 79 | indices_neg = np.random.choice(len(row_indices_neg), size=NUM_OF_POINTS//2, replace=False) 80 | selected_row_indices_neg = row_indices_neg[indices_neg].reshape(-1, 1) 81 | selected_col_indices_neg = col_indices_neg[indices_neg].reshape(-1, 1) 82 | top_indices_neg2 = np.hstack((selected_row_indices_neg, selected_col_indices_neg)) 83 | top_indices_neg2_gt = np.zeros(top_indices_neg2.shape[0]) 84 | else: 85 | try: 86 | indices_neg = np.random.choice(len(row_indices_neg), size=NUM_OF_POINTS, replace=False) 87 | selected_row_indices_neg = row_indices_neg[indices_neg].reshape(-1, 1) 88 | selected_col_indices_neg = col_indices_neg[indices_neg].reshape(-1, 1) 89 | top_indices_neg2 = np.hstack((selected_row_indices_neg, selected_col_indices_neg)) 90 | top_indices_neg2_gt = np.zeros(top_indices_neg2.shape[0]) 91 | except: 92 | continue 93 | 94 | 95 | indices_pos = np.random.choice(len(row_indices_pos), size=NUM_OF_POINTS, replace=False) 96 | selected_row_indices_pos = row_indices_pos[indices_pos].reshape(-1, 1) 97 | selected_col_indices_pos = col_indices_pos[indices_pos].reshape(-1, 1) 98 | top_indices_pos = np.hstack((selected_row_indices_pos, selected_col_indices_pos)) 99 | top_indices_pos_gt = np.ones(top_indices_pos.shape[0]) 100 | 101 | if len(row_indices_neg1) == 0 : 102 | 103 | select_indices = np.vstack((top_indices_neg2, top_indices_pos)) 104 | select_indices_gt = np.concatenate((top_indices_neg2_gt, top_indices_pos_gt)) 105 | 106 | else: 107 | 108 | select_indices = np.vstack((top_indices_neg1, top_indices_neg2, top_indices_pos)) 109 | select_indices_gt = np.concatenate((top_indices_neg1_gt, top_indices_neg2_gt, top_indices_pos_gt)) 110 | 111 | permutation = np.random.permutation(len(select_indices_gt)) 112 | select_indices = select_indices[permutation] 113 | select_indices_gt = select_indices_gt[permutation] 114 | 115 | mapping = {0: "no", 1: "yes"} 116 | if len(select_indices_gt) == 0: 117 | continue 118 | select_string_gt = np.vectorize(mapping.get)(select_indices_gt) 119 | 120 | 121 | select_string = np.array2string(select_indices, separator=',', formatter={'all': lambda x: str(x)})[1:-1].strip().replace("\n", " ") 122 | select_string_gt = np.array2string(select_string_gt, separator=',', formatter={'all': lambda x: str(x)})[1:-1].strip().replace("\n", " ") 123 | 124 | aff_question = 'Determine if operating on each following point can effectively manipulate the object within the image: {}'.format(select_string) 125 | aff_gt = select_string_gt 126 | 127 | 128 | #draw the selected point in the image 129 | draw = ImageDraw.Draw(img_pil) 130 | if len(row_indices_neg1) != 0 : 131 | for index in range(top_indices_neg1.shape[0]): 132 | draw.point((top_indices_neg1[index][1],top_indices_neg1[index][0]),'blue') 133 | for index in range(top_indices_neg2.shape[0]): 134 | draw.point((top_indices_neg2[index][1],top_indices_neg2[index][0]),'blue') 135 | for index in range(top_indices_pos.shape[0]): 136 | draw.point((top_indices_pos[index][1],top_indices_pos[index][0]),'red') 137 | img_pil.save(os.path.join(cur_dir, 'select_point.png')) 138 | 139 | up_cam = data_inf['gripper_up_direction_camera'] 140 | forward_cam = data_inf['gripper_forward_direction_camera'] 141 | x,y = data_inf['pixel_locs'] 142 | data = { 143 | 144 | "conversations": [ 145 | { 146 | "prompt": "Specify the contact point and gripper direction of manipulating the object." 147 | }, 148 | { 149 | "gt": f"The contact point is ({int(x)}, {int(y)}), the gripper up direction is {up_cam}, the gripper forward direction is {forward_cam}." 150 | 151 | } 152 | ], 153 | 'cat_prompt': 'What is the category of the object in the image?', 154 | 'cat_ans': item.split('_')[1], 155 | "instruction": "Specify the contact point and gripper direction of manipulating the object.", 156 | "input": os.path.join(cur_dir, 'original_rgb.png'), 157 | 'aff_question': aff_question, 158 | 'aff_gt': aff_gt.strip() 159 | 160 | } 161 | if not os.path.exists(os.path.join(cur_dir, 'original_rgb.png')): 162 | continue 163 | 164 | json_data = json.dumps(data, indent=4) 165 | cat = item.split('_')[1] 166 | 167 | if cat not in list(cal_cat.keys()): 168 | cal_cat[cat] = 1 169 | else: 170 | if cal_cat[cat] > 900: 171 | continue 172 | else: 173 | cal_cat[cat] += 1 174 | 175 | 176 | with open(os.path.join(output_dir,'{}.json'.format(item)), "w") as file: 177 | file.write(json_data) 178 | 179 | print('Numbers of each training category: ', cal_cat) 180 | print('Finish generating training json..............') -------------------------------------------------------------------------------- /test/test_one_stick_clean.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import sys 4 | import shutil 5 | from argparse import ArgumentParser 6 | from PIL import Image, ImageDraw 7 | import numpy as np 8 | import torch 9 | import torch.nn.functional as F 10 | from sapien.core import Pose 11 | from env_ori import Env,ContactError 12 | from camera import Camera 13 | from robots.panda_robot import Robot 14 | import imageio 15 | import cv2 16 | import json 17 | import random 18 | import matplotlib.pyplot as plt 19 | import matplotlib as mpl 20 | from matplotlib.colors import ListedColormap, LinearSegmentedColormap 21 | import llama 22 | 23 | 24 | parser = ArgumentParser() 25 | parser.add_argument('--llama_dir', type=str, help='llama directory') 26 | parser.add_argument('--adapter_dir', type=str,default='./', help='adapter directory') 27 | parser.add_argument('--result_suffix', type=str, default='nothing') 28 | parser.add_argument('--device', type=str, default='cuda:0', help='cpu or cuda:x for using cuda on GPU number x') 29 | parser.add_argument('--overwrite', action='store_true', default=False, help='overwrite if out_dir exists [default: False]') 30 | 31 | parser.add_argument('--no_gui', action='store_true', default=False, help='no_gui [default: False]') 32 | parser.add_argument('--data_dir', type=str) 33 | parser.add_argument('--record_name', type=str) 34 | parser.add_argument('--out_dir', type=str) 35 | parser.add_argument('--use_mask', type=str, help='whether use movable mask') 36 | eval_conf = parser.parse_args() 37 | 38 | random.seed(0) 39 | np.random.seed(0) 40 | torch.manual_seed(0) 41 | 42 | 43 | 44 | 45 | #previous info are saved in result.json 46 | shape_id, category, cnt_id, primact_type, trial_id = eval_conf.record_name.split('_') 47 | 48 | out_dir = os.path.join(eval_conf.out_dir, '%s_%s_%s_%s_%d' % (shape_id, category, cnt_id, primact_type, int(trial_id))) 49 | 50 | 51 | flog = open(os.path.join(out_dir, 'log.txt'), 'w') 52 | out_info = dict() 53 | try: 54 | with open(os.path.join(eval_conf.data_dir, eval_conf.record_name, 'result.json'), 'r') as fin: 55 | replay_data = json.load(fin) 56 | except: 57 | print('no replay data') 58 | exit(1) 59 | 60 | 61 | env = Env(flog=flog, show_gui=(not eval_conf.no_gui)) 62 | 63 | # setup camera 64 | cam_theta = replay_data['camera_metadata']['theta'] 65 | cam_phi = replay_data['camera_metadata']['phi'] 66 | cam_dist = replay_data['camera_metadata']['dist'] 67 | cam = Camera(env, theta=cam_theta, phi=cam_phi, dist=cam_dist) 68 | out_info['camera_metadata_init'] = cam.get_metadata_json() 69 | 70 | 71 | if not eval_conf.no_gui: 72 | env.set_controller_camera_pose(cam.pos[0], cam.pos[1], cam.pos[2], np.pi+cam_theta, -cam_phi) 73 | 74 | 75 | 76 | # load shape 77 | object_urdf_fn = '../data_collection/asset/original_sapien_dataset/%s/mobility.urdf' % shape_id 78 | flog.write('object_urdf_fn: %s\n' % object_urdf_fn) 79 | object_material = env.get_material(4, 4, 0.01) 80 | state = replay_data['object_state'] 81 | flog.write('Object State: %s\n' % state) 82 | out_info['object_state'] = state 83 | scale = replay_data['scale'] 84 | env.load_object(scale, object_urdf_fn, object_material, state=state) 85 | joint_angles = replay_data['joint_angles'] 86 | env.set_object_joint_angles(joint_angles) 87 | out_info['joint_angles'] = joint_angles 88 | out_info['joint_angles_lower'] = env.joint_angles_lower 89 | out_info['joint_angles_upper'] = env.joint_angles_upper 90 | cur_qpos = env.get_object_qpos() 91 | 92 | # simulate some steps for the object to stay rest 93 | still_timesteps = 0 94 | wait_timesteps = 0 95 | while still_timesteps < 5000 and wait_timesteps < 20000: 96 | env.step() 97 | env.render() 98 | cur_new_qpos = env.get_object_qpos() 99 | invalid_contact = False 100 | for c in env.scene.get_contacts(): 101 | for p in c.points: 102 | if abs(p.impulse @ p.impulse) > 1e-4: 103 | invalid_contact = True 104 | break 105 | if invalid_contact: 106 | break 107 | if np.max(np.abs(cur_new_qpos - cur_qpos)) < 1e-6 and (not invalid_contact): 108 | still_timesteps += 1 109 | else: 110 | still_timesteps = 0 111 | cur_qpos = cur_new_qpos 112 | wait_timesteps += 1 113 | 114 | if still_timesteps < 5000: 115 | printout(flog, 'Object Not Still!') 116 | flog.close() 117 | env.close() 118 | exit(1) 119 | 120 | rgb, depth = cam.get_observation() 121 | Image.fromarray((rgb*255).astype(np.uint8)).save(os.path.join(out_dir, 'rgb_img.png')) 122 | img = Image.fromarray((rgb*255).astype(np.uint8)) 123 | 124 | gt_nor = cam.get_normal_map() 125 | Image.fromarray(((gt_nor+1)/2*255).astype(np.uint8)).save(os.path.join(out_dir, 'gt_nor.png')) 126 | 127 | object_link_ids = env.movable_link_ids 128 | gt_movable_link_mask = cam.get_movable_link_mask(object_link_ids) 129 | mask = (gt_movable_link_mask > 0) 130 | 131 | 132 | if os.path.exists(os.path.join(out_dir, 'prediction.json')): 133 | with open(os.path.join(out_dir, 'prediction.json'), 'r') as fin: 134 | result = json.load(fin) 135 | else: 136 | print('!!!!!!!!!!!!!!!!!!!!!!no prediction !!!!!!!!!!!!!!!!!!!!!!!!') 137 | flog.close() 138 | env.close() 139 | exit(2) 140 | 141 | 142 | print('answer from model: ', result) 143 | 144 | object_link_ids = env.movable_link_ids 145 | gt_movable_link_mask = cam.get_movable_link_mask(object_link_ids) 146 | x, y = result.split('(')[1].split(')')[0].split(', ') 147 | x = int(x) 148 | y = int(y) 149 | if eval_conf.use_mask == 'True': 150 | if gt_movable_link_mask[x,y] == 0: 151 | exit() 152 | 153 | norm_dir = gt_nor[x,y] 154 | 155 | gt_nor = cam.get_normal_map() 156 | Image.fromarray(((gt_nor+1)/2*255).astype(np.uint8)).save(os.path.join(out_dir, 'gt_nor.png')) 157 | 158 | d_x, d_y, d_z = result.split('[')[1].split(']')[0].split(', ') 159 | gripper_direction_camera = np.array([int(d_x)*0.02, int(d_y)*0.02, int(d_z)*0.02]) 160 | fd_x, fd_y, fd_z = result.split('[')[2].split(']')[0].split(', ') 161 | gripper_forward_direction_camera = np.array([int(fd_x)*0.02, int(fd_y)*0.02, int(fd_z)*0.02]) 162 | 163 | draw = ImageDraw.Draw(img) 164 | draw.point((y,x),'red') 165 | img.save(os.path.join(out_dir, 'contact_point.png')) 166 | 167 | cam_XYZA_id1, cam_XYZA_id2, cam_XYZA_pts,out = cam.compute_camera_XYZA(depth) 168 | cam_XYZA = cam.compute_XYZA_matrix(cam_XYZA_id1, cam_XYZA_id2, cam_XYZA_pts, depth.shape[0], depth.shape[1]) 169 | position_cam = cam_XYZA[x, y, :3] 170 | 171 | position_cam_xyz1 = np.ones((4), dtype=np.float32) 172 | position_cam_xyz1[:3] = position_cam 173 | position_world_xyz1 = cam.get_metadata()['mat44'] @ position_cam_xyz1 174 | position_world = position_world_xyz1[:3] 175 | target_part_id = object_link_ids[gt_movable_link_mask[x, y] - 1] 176 | env.set_target_object_part_actor_id(target_part_id) 177 | out_info['target_object_part_actor_id'] = env.target_object_part_actor_id 178 | out_info['target_object_part_joint_id'] = env.target_object_part_joint_id 179 | 180 | 181 | def plot_mani(cam,up, forward): 182 | # we use the norm of the contact point to correct tge z-axis of end-effector 183 | if (up @ norm_dir[:3] ) > 0: 184 | up = -up 185 | 186 | up /= np.linalg.norm(up) 187 | up = cam.get_metadata()['mat44'][:3,:3] @ up 188 | forward = cam.get_metadata()['mat44'][:3,:3] @ forward 189 | out_info['gripper_direction_world'] = up.tolist() 190 | 191 | up = np.array(up, dtype=np.float32) 192 | up /= np.linalg.norm(up) 193 | forward = np.array(forward, dtype=np.float32) 194 | forward /= np.linalg.norm(forward) 195 | left = np.cross(up, forward) 196 | left /= np.linalg.norm(left) 197 | forward = np.cross(left, up) 198 | forward /= np.linalg.norm(forward) 199 | 200 | rotmat = np.eye(4).astype(np.float32) 201 | rotmat[:3, 0] = forward 202 | rotmat[:3, 1] = left 203 | rotmat[:3, 2] = up 204 | 205 | final_rotmat = np.array(rotmat, dtype=np.float32) 206 | final_rotmat[:3, 3] = position_world - up * 0.1 207 | final_pose = Pose().from_transformation_matrix(final_rotmat) 208 | 209 | start_rotmat = np.array(rotmat, dtype=np.float32) 210 | start_rotmat[:3, 3] = position_world - up * 0.15 211 | # start_rotmat[:3, 3] = position_world 212 | start_pose = Pose().from_transformation_matrix(start_rotmat) 213 | 214 | pull_rotmat = np.array(rotmat, dtype=np.float32) 215 | pull_rotmat[:3, 3] = position_world - up * 0.5 216 | pull_pose = Pose().from_transformation_matrix(pull_rotmat) 217 | out_info['pull_rotmat_world'] = pull_rotmat.tolist() 218 | 219 | #load robot 220 | robot_urdf_fn = './robots/panda_gripper.urdf' 221 | robot_material = env.get_material(4, 4, 0.01) 222 | robot = Robot(env, robot_urdf_fn, robot_material, open_gripper=('pulling' in primact_type)) 223 | 224 | 225 | robot.robot.set_root_pose(start_pose) 226 | 227 | 228 | env.render() 229 | rgb_final_pose, _ = cam.get_observation() 230 | Image.fromarray((rgb_final_pose*255).astype(np.uint8)).save(os.path.join(out_dir, 'viz_start_pose.png')) 231 | 232 | 233 | 234 | out_info['start_target_part_qpos'],_,_ = env.get_target_part_qpos() 235 | 236 | 237 | success = True 238 | target_link_mat44 = env.get_target_part_pose().to_transformation_matrix() 239 | position_local_xyz1 = np.linalg.inv(target_link_mat44) @ position_world_xyz1 240 | 241 | 242 | robot.close_gripper() 243 | robot.wait_n_steps(2000) 244 | 245 | 246 | # approach 247 | robot.move_to_target_pose(final_rotmat, 2000) 248 | robot.wait_n_steps(2000) 249 | rgb_final_pose, _ = cam.get_observation() 250 | Image.fromarray((rgb_final_pose*255).astype(np.uint8)).save(os.path.join(out_dir, 'viz_mid_pose.png')) 251 | 252 | suction_drive = env.scene.create_drive( 253 | robot.robot.get_links()[-1], 254 | robot.robot.get_links()[-1].get_cmass_local_pose(), 255 | env.target_object_part_actor_link, 256 | env.target_object_part_actor_link.get_cmass_local_pose(), 257 | ) 258 | suction_drive.set_x_properties(stiffness=45000, damping=0) 259 | suction_drive.set_y_properties(stiffness=45000, damping=0) 260 | suction_drive.set_z_properties(stiffness=45000, damping=0) 261 | 262 | 263 | if primact_type == 'pulling': 264 | robot.move_to_target_pose(pull_rotmat, 2000) 265 | robot.wait_n_steps(2000) 266 | 267 | 268 | 269 | target_link_mat44 = env.get_target_part_pose().to_transformation_matrix() 270 | position_world_xyz1_end = target_link_mat44 @ position_local_xyz1 271 | out_info['touch_position_world_xyz_start'] = position_world_xyz1[:3].tolist() 272 | out_info['touch_position_world_xyz_end'] = position_world_xyz1_end[:3].tolist() 273 | if success==True: 274 | succ=True 275 | out_info['final_target_part_qpos'],_,_ = env.get_target_part_qpos() 276 | print(out_info['final_target_part_qpos'],out_info['start_target_part_qpos']) 277 | abs_motion = abs(out_info['final_target_part_qpos'] - out_info['start_target_part_qpos']) 278 | j = out_info['target_object_part_joint_id'] 279 | tot_motion = out_info['joint_angles_upper'][j] - out_info['joint_angles_lower'][j] + 1e-8 280 | mani_success = (abs_motion > 0.03) or (abs_motion / tot_motion > 0.5) 281 | else: 282 | mani_success = False 283 | if mani_success: 284 | if primact_type == 'pushing': 285 | mani_success = mani_success 286 | elif primact_type == 'pulling': 287 | mov_dir = np.array(out_info['touch_position_world_xyz_end'], dtype=np.float32) - \ 288 | np.array(out_info['touch_position_world_xyz_start'], dtype=np.float32) 289 | mov_dir /= np.linalg.norm(mov_dir) 290 | intended_dir = -np.array(out_info['gripper_direction_world'], dtype=np.float32) 291 | mani_success = (intended_dir @ mov_dir > 0.3) 292 | return success, mani_success 293 | 294 | success, mani_succ = plot_mani(cam,gripper_direction_camera, gripper_forward_direction_camera) 295 | out_info['succ'] = np.array(success, dtype=bool).tolist() 296 | 297 | out_info['mani_succ'] = np.array(mani_succ, dtype=bool).tolist() 298 | rgb_final_pose, _ = cam.get_observation() 299 | Image.fromarray((rgb_final_pose*255).astype(np.uint8)).save(os.path.join(out_dir, 'viz_target_pose.png')) 300 | 301 | print(success, mani_succ) 302 | with open(os.path.join(out_dir, 'result.json'), 'w') as fout: 303 | json.dump(out_info, fout) 304 | print(out_dir) 305 | flog.close() 306 | env.close() 307 | -------------------------------------------------------------------------------- /train/llama/llama_adapter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from pathlib import Path 4 | 5 | import clip 6 | import torch 7 | import torch.nn as nn 8 | from timm.models.vision_transformer import Block 9 | 10 | from .llama import ModelArgs, Transformer, BERTTransformer 11 | from .tokenizer import Tokenizer 12 | from .utils import sample_top_p, _download 13 | class RMSNorm(torch.nn.Module): 14 | def __init__(self, dim: int, eps: float = 1e-6): 15 | super().__init__() 16 | self.eps = eps 17 | self.weight = nn.Parameter(torch.ones(dim)) 18 | 19 | def _norm(self, x): 20 | return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) 21 | 22 | def forward(self, x): 23 | output = self._norm(x.float()).type_as(x) 24 | return output * self.weight 25 | 26 | class LLaMA_adapter(nn.Module): 27 | 28 | def __init__(self, llama_ckpt_dir, llama_tokenizer, 29 | max_seq_len=512, max_batch_size=1, 30 | clip_model='ViT-L/14@336px', 31 | v_embed_dim=1024, v_depth=16, 32 | v_num_heads=16, v_mlp_ratio=4.0, 33 | query_len=577, query_layer=32, phase="finetune"): 34 | super().__init__() 35 | # llama configs 36 | # with open(os.path.join(llama_ckpt_dir, "7B/params.json"), "r") as f: 37 | with open("./ckpts/llama_model_weights/7B/params.json", "r") as f: 38 | params = json.loads(f.read()) 39 | bias_lora = phase == "finetune" 40 | model_args: ModelArgs = ModelArgs( 41 | max_seq_len=max_seq_len, max_batch_size=max_batch_size, **params 42 | ) # max_batch_size only affects inferenc 43 | 44 | # 1. clip and clip projector 45 | self.clip, self.clip_transform = clip.load(clip_model,download_root='./ckpts') 46 | 47 | clip_dim = self.clip.visual.proj.shape[1] 48 | self.clip_proj = nn.Linear(clip_dim, v_embed_dim) 49 | self.clip_proj_norm = nn.LayerNorm(v_embed_dim) 50 | 51 | self.query_len = query_len 52 | self.query_layer = query_layer 53 | 54 | # 2. visual query, blocks and projector 55 | 56 | visual_model_args = ModelArgs(dim=1024, n_layers=16, n_heads=8, max_seq_len=577) 57 | visual_model_args.vocab_size = 1024 58 | self.visual_blocks = BERTTransformer(visual_model_args) 59 | self.visual_proj = nn.Linear(v_embed_dim, model_args.dim) 60 | self.visual_proj_norm = nn.LayerNorm(model_args.dim) 61 | 62 | # 3. adapter query 63 | self.adapter_query = nn.Embedding( 64 | query_len * query_layer, model_args.dim) 65 | 66 | # 4. tokenizer 67 | self.tokenizer = Tokenizer(model_path=llama_tokenizer) 68 | 69 | # 5. llama 70 | model_args.w_bias = bias_lora 71 | model_args.w_lora = bias_lora 72 | model_args.vocab_size = self.tokenizer.n_words 73 | torch.set_default_tensor_type(torch.cuda.HalfTensor) 74 | self.llama = Transformer(model_args) 75 | torch.set_default_tensor_type(torch.FloatTensor) 76 | 77 | ckpts = ['./ckpts/llama_model_weights/7B/consolidated.00.pth'] 78 | 79 | for ckpt in ckpts: 80 | # print('load_ckpt_path:', ckpt) 81 | ckpt = torch.load(ckpt, map_location='cpu') 82 | self.llama.load_state_dict(ckpt, strict=False) 83 | 84 | 85 | for name, param in self.named_parameters(): 86 | param.requires_grad = False 87 | 88 | for name, para in self.llama.named_parameters(): 89 | if 'norm' in name: 90 | para.data = para.data.float() 91 | para.requires_grad = True 92 | if 'bias' in name: 93 | para.data = para.data.float() 94 | para.requires_grad = True 95 | if 'lora' in name: 96 | para.data = para.data.float() 97 | para.requires_grad = True 98 | count = 0 99 | for name, param in self.named_parameters(): 100 | if param.requires_grad: 101 | count += 1 102 | print(f"Trainable param: {name}, {param.shape}, {param.dtype}") 103 | 104 | 105 | # 6. training criterion 106 | self.criterion = torch.nn.CrossEntropyLoss(ignore_index=0) 107 | 108 | def clip_encode_image(self, x): 109 | 110 | # modified from CLIP 111 | x = self.clip.visual.conv1(x) # shape = [*, width, grid, grid] 112 | 113 | # shape = [*, width, grid ** 2] 114 | x = x.reshape(x.shape[0], x.shape[1], -1) 115 | x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width] 116 | x = torch.cat([self.clip.visual.class_embedding.to(x.dtype) + torch.zeros(x.shape[0], 1, 117 | x.shape[-1], dtype=x.dtype, device=x.device), x], dim=1) # shape = [*, grid ** 2 + 1, width] 118 | x = x + self.clip.visual.positional_embedding.to(x.dtype) 119 | x = self.clip.visual.ln_pre(x) 120 | 121 | x = x.permute(1, 0, 2) # NLD -> LND 122 | x = self.clip.visual.transformer(x) 123 | x = x.permute(1, 0, 2) # LND -> NLD 124 | 125 | # preserve all spatial tokens 126 | x = self.clip.visual.ln_post(x[:, :, :]) 127 | 128 | if self.clip.visual.proj is not None: 129 | x = x @ self.clip.visual.proj 130 | 131 | return x 132 | 133 | def forward_visual(self, imgs): 134 | clip_feats = self.clip_encode_image(imgs) 135 | clip_feats = self.clip_proj_norm(self.clip_proj(clip_feats.float())) 136 | 137 | visual_query = clip_feats 138 | visual_query = self.visual_blocks(visual_query, 0) 139 | 140 | visual_query = self.visual_proj(visual_query) 141 | visual_query = self.visual_proj_norm(visual_query) 142 | 143 | return visual_query 144 | 145 | def forward(self, tokens, labels, imgs): 146 | 147 | visual_proj = self.forward_visual(imgs) 148 | 149 | _bsz, seqlen = tokens.shape 150 | 151 | h = self.llama.tok_embeddings(tokens) 152 | freqs_cis = self.llama.freqs_cis.to(h.device) 153 | freqs_cis = freqs_cis[:seqlen] 154 | mask = None 155 | mask = torch.full((1, 1, seqlen, seqlen), float("-inf"), device=h.device) 156 | mask = torch.triu(mask, diagonal=0 + 1).type_as(h) 157 | 158 | adapter = self.adapter_query.weight.reshape(self.query_layer, self.query_len, -1).unsqueeze(1) 159 | adapter_index = 0 160 | for layer in self.llama.layers: 161 | h = layer(h, 0, freqs_cis, mask, visual_proj + adapter[adapter_index]) 162 | adapter_index = adapter_index + 1 163 | 164 | h = self.llama.norm(h) 165 | output = self.llama.output(h) 166 | output = output[:, :-1, :] 167 | labels = labels[:, 1:] 168 | 169 | if labels.sum() == 0: 170 | c_loss = output.mean() * 0 171 | else: 172 | assert self.llama.vocab_size == 32000 173 | c_loss = self.criterion(output.reshape(-1, self.llama.vocab_size), labels.flatten()) 174 | 175 | return c_loss, c_loss 176 | 177 | #@torch.inference_mode() 178 | @torch.no_grad() 179 | def forward_inference(self, visual_proj, tokens, start_pos: int): 180 | _bsz, seqlen = tokens.shape 181 | h = self.llama.tok_embeddings(tokens) 182 | freqs_cis = self.llama.freqs_cis.to(h.device) 183 | freqs_cis = freqs_cis[start_pos : start_pos + seqlen] 184 | mask = None 185 | mask = torch.full((1, 1, seqlen, seqlen), float("-inf"), device=h.device) 186 | mask = torch.triu(mask, diagonal=start_pos + 1).type_as(h) 187 | 188 | 189 | adapter = self.adapter_query.weight.reshape(self.query_layer, self.query_len, -1).unsqueeze(1) 190 | adapter_index = 0 191 | 192 | for layer in self.llama.layers: 193 | h = layer(h, start_pos, freqs_cis, mask, visual_proj + adapter[adapter_index].repeat(_bsz, 1, 1)) 194 | adapter_index = adapter_index + 1 195 | 196 | h = self.llama.norm(h) 197 | output = self.llama.output(h[:, -1, :]) 198 | 199 | return output.float() 200 | 201 | #@torch.inference_mode() 202 | @torch.no_grad() 203 | def generate( 204 | self, imgs, prompts, 205 | max_gen_len: int = 256, 206 | temperature: float = 0.1, 207 | top_p: float = 0.75, 208 | ): 209 | bsz = len(imgs) 210 | params = self.llama.params 211 | assert bsz <= params.max_batch_size, (bsz, params.max_batch_size) 212 | assert len(imgs) == len(prompts) 213 | 214 | with torch.cuda.amp.autocast(): 215 | visual_query = self.forward_visual(imgs) 216 | 217 | if isinstance(prompts[0], str): 218 | prompts = [self.tokenizer.encode(x, bos=True, eos=False) for x in prompts] 219 | 220 | min_prompt_size = min([len(t) for t in prompts]) 221 | max_prompt_size = max([len(t) for t in prompts]) 222 | 223 | total_len = min(params.max_seq_len, max_gen_len + max_prompt_size) 224 | 225 | tokens = torch.full((bsz, total_len), self.tokenizer.pad_id).cuda().long() 226 | 227 | for k, t in enumerate(prompts): 228 | tokens[k, : len(t)] = torch.tensor(t).cuda().long() 229 | input_text_mask = tokens != self.tokenizer.pad_id 230 | start_pos = min_prompt_size 231 | prev_pos = 0 232 | for cur_pos in range(start_pos, total_len): 233 | with torch.cuda.amp.autocast(): 234 | logits = self.forward_inference(visual_query, tokens[:, prev_pos:cur_pos], prev_pos) 235 | if temperature > 0: 236 | probs = torch.softmax(logits / temperature, dim=-1) 237 | next_token = sample_top_p(probs, top_p) 238 | else: 239 | next_token = torch.argmax(logits, dim=-1) 240 | next_token = next_token.reshape(-1) 241 | 242 | next_token = torch.where( 243 | input_text_mask[:, cur_pos], tokens[:, cur_pos], next_token 244 | ) 245 | tokens[:, cur_pos] = next_token 246 | # trick: early stop if bsz==1 247 | if bsz == 1 and next_token[0] == self.tokenizer.eos_id: 248 | break 249 | prev_pos = cur_pos 250 | 251 | decoded = [] 252 | for i, t in enumerate(tokens.tolist()): 253 | 254 | # cut to max gen len 255 | t = t[len(prompts[i]): len(prompts[i]) + max_gen_len] 256 | # cut to eos tok if any 257 | try: 258 | t = t[: t.index(self.tokenizer.eos_id)] 259 | except ValueError: 260 | pass 261 | decoded.append(self.tokenizer.decode(t)) 262 | 263 | return decoded 264 | 265 | 266 | _MODELS = { 267 | "BIAS-7B": "https://github.com/ZrrSkywalker/LLaMA-Adapter/releases/download/v.2.0.0/7fa55208379faf2dd862565284101b0e4a2a72114d6490a95e432cf9d9b6c813_BIAS-7B.pth", 268 | # "LORA16-7B": "", 269 | # "PARTIAL-7B": "" 270 | } 271 | 272 | def available_models(): 273 | return list(_MODELS.keys()) 274 | 275 | def load(name, llama_dir, device="cuda" if torch.cuda.is_available() else "cpu", download_root='ckpts', max_seq_len=512, 276 | phase="finetune"): 277 | if name in _MODELS: 278 | model_path = _download(_MODELS[name], download_root) 279 | elif os.path.isfile(name): 280 | model_path = name 281 | else: 282 | return RuntimeError(f"Model {name} not found; available models = {available_models()}") 283 | 284 | ckpt = torch.load(model_path, map_location='cpu') 285 | 286 | # BIAS-7B or https://xxx/sha256_BIAS-7B.pth -> 7B 287 | llama_type = name.split('.')[0].split('-')[-1] 288 | llama_ckpt_dir = os.path.join(llama_dir, llama_type) 289 | llama_tokenzier_path = os.path.join(llama_dir, 'tokenizer.model') 290 | 291 | # load llama_adapter weights and model_cfg 292 | print(f'Loading LLaMA-Adapter from {model_path}') 293 | ckpt = torch.load(model_path, map_location='cpu') 294 | 295 | model = LLaMA_adapter( 296 | llama_ckpt_dir, llama_tokenzier_path, 297 | max_seq_len=max_seq_len, max_batch_size=1, 298 | clip_model='ViT-L/14@336px', 299 | v_embed_dim=1024, v_depth=16, 300 | v_num_heads=16, v_mlp_ratio=4.0, 301 | query_len=577, query_layer=32, 302 | phase=phase) 303 | 304 | load_result = model.load_state_dict(ckpt['model'], strict=False) 305 | 306 | # assert len(load_result.unexpected_keys) == 0, f"Unexpected keys: {load_result.unexpected_keys}" 307 | return model.to(device), model.clip_transform -------------------------------------------------------------------------------- /test/llama/llama_adapter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from pathlib import Path 4 | 5 | import clip 6 | import torch 7 | import torch.nn as nn 8 | from timm.models.vision_transformer import Block 9 | 10 | from .llama import ModelArgs, Transformer, BERTTransformer 11 | from .tokenizer import Tokenizer 12 | from .utils import sample_top_p, _download 13 | class RMSNorm(torch.nn.Module): 14 | def __init__(self, dim: int, eps: float = 1e-6): 15 | super().__init__() 16 | self.eps = eps 17 | self.weight = nn.Parameter(torch.ones(dim)) 18 | 19 | def _norm(self, x): 20 | return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) 21 | 22 | def forward(self, x): 23 | output = self._norm(x.float()).type_as(x) 24 | return output * self.weight 25 | 26 | class LLaMA_adapter(nn.Module): 27 | 28 | def __init__(self, llama_ckpt_dir, llama_tokenizer, 29 | max_seq_len=512, max_batch_size=1, 30 | clip_model='ViT-L/14@336px', 31 | v_embed_dim=1024, v_depth=16, 32 | v_num_heads=16, v_mlp_ratio=4.0, 33 | query_len=577, query_layer=32, phase="finetune"): 34 | super().__init__() 35 | # llama configs 36 | with open(os.path.join(llama_ckpt_dir, "7B/params.json"), "r") as f: 37 | params = json.loads(f.read()) 38 | bias_lora = phase == "finetune" 39 | model_args: ModelArgs = ModelArgs( 40 | max_seq_len=max_seq_len, max_batch_size=max_batch_size, **params 41 | ) # max_batch_size only affects inferenc 42 | 43 | # 1. clip and clip projector 44 | self.clip, self.clip_transform = clip.load(clip_model,download_root='./ckpts') 45 | 46 | clip_dim = self.clip.visual.proj.shape[1] 47 | self.clip_proj = nn.Linear(clip_dim, v_embed_dim) 48 | self.clip_proj_norm = nn.LayerNorm(v_embed_dim) 49 | 50 | self.query_len = query_len 51 | self.query_layer = query_layer 52 | 53 | # 2. visual query, blocks and projector 54 | 55 | visual_model_args = ModelArgs(dim=1024, n_layers=16, n_heads=8, max_seq_len=577) 56 | visual_model_args.vocab_size = 1024 57 | self.visual_blocks = BERTTransformer(visual_model_args) 58 | self.visual_proj = nn.Linear(v_embed_dim, model_args.dim) 59 | self.visual_proj_norm = nn.LayerNorm(model_args.dim) 60 | 61 | # 3. adapter query 62 | self.adapter_query = nn.Embedding( 63 | query_len * query_layer, model_args.dim) 64 | 65 | # 4. tokenizer 66 | self.tokenizer = Tokenizer(model_path=llama_tokenizer) 67 | 68 | # 5. llama 69 | model_args.w_bias = bias_lora 70 | model_args.w_lora = bias_lora 71 | model_args.vocab_size = self.tokenizer.n_words 72 | torch.set_default_tensor_type(torch.cuda.HalfTensor) 73 | self.llama = Transformer(model_args) 74 | torch.set_default_tensor_type(torch.FloatTensor) 75 | 76 | ckpts = ['./ckpts/llama_model_weights/7B/consolidated.00.pth'] 77 | #ckpts = sorted(Path().glob("*.pth")) 78 | for ckpt in ckpts: 79 | # print('load_ckpt_path:', ckpt) 80 | ckpt = torch.load(ckpt, map_location='cpu') 81 | self.llama.load_state_dict(ckpt, strict=False) 82 | # print(ckpt['layers.31.attention.wo.weight']) 83 | # assert(0) 84 | # for name, para in self.llama.named_parameters(): 85 | # if 'layers.31.attention.wo.weight' in name: 86 | # print(para.data) 87 | # assert(0) 88 | 89 | for name, param in self.named_parameters(): 90 | param.requires_grad = False 91 | 92 | for name, para in self.llama.named_parameters(): 93 | if 'norm' in name: 94 | para.data = para.data.float() 95 | para.requires_grad = True 96 | if 'bias' in name: 97 | para.data = para.data.float() 98 | para.requires_grad = True 99 | if 'lora' in name: 100 | para.data = para.data.float() 101 | para.requires_grad = True 102 | count = 0 103 | for name, param in self.named_parameters(): 104 | if param.requires_grad: 105 | count += 1 106 | print(f"Trainable param: {name}, {param.shape}, {param.dtype}") 107 | 108 | 109 | # 6. training criterion 110 | self.criterion = torch.nn.CrossEntropyLoss(ignore_index=0) 111 | 112 | def clip_encode_image(self, x): 113 | # print(x.dtype) 114 | # print(self.clip.visual.conv1.weight.dtype) 115 | # assert(0) 116 | # modified from CLIP 117 | x = self.clip.visual.conv1(x) # shape = [*, width, grid, grid] 118 | 119 | # shape = [*, width, grid ** 2] 120 | x = x.reshape(x.shape[0], x.shape[1], -1) 121 | x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width] 122 | x = torch.cat([self.clip.visual.class_embedding.to(x.dtype) + torch.zeros(x.shape[0], 1, 123 | x.shape[-1], dtype=x.dtype, device=x.device), x], dim=1) # shape = [*, grid ** 2 + 1, width] 124 | x = x + self.clip.visual.positional_embedding.to(x.dtype) 125 | x = self.clip.visual.ln_pre(x) 126 | 127 | x = x.permute(1, 0, 2) # NLD -> LND 128 | x = self.clip.visual.transformer(x) 129 | x = x.permute(1, 0, 2) # LND -> NLD 130 | 131 | # preserve all spatial tokens 132 | x = self.clip.visual.ln_post(x[:, :, :]) 133 | 134 | if self.clip.visual.proj is not None: 135 | x = x @ self.clip.visual.proj 136 | 137 | return x 138 | 139 | def forward_visual(self, imgs): 140 | clip_feats = self.clip_encode_image(imgs) 141 | clip_feats = self.clip_proj_norm(self.clip_proj(clip_feats.float())) 142 | 143 | visual_query = clip_feats 144 | visual_query = self.visual_blocks(visual_query, 0) 145 | 146 | visual_query = self.visual_proj(visual_query) 147 | visual_query = self.visual_proj_norm(visual_query) 148 | 149 | return visual_query 150 | 151 | def forward(self, tokens, labels, imgs): 152 | 153 | visual_proj = self.forward_visual(imgs) 154 | 155 | _bsz, seqlen = tokens.shape 156 | 157 | h = self.llama.tok_embeddings(tokens) 158 | freqs_cis = self.llama.freqs_cis.to(h.device) 159 | freqs_cis = freqs_cis[:seqlen] 160 | mask = None 161 | mask = torch.full((1, 1, seqlen, seqlen), float("-inf"), device=h.device) 162 | mask = torch.triu(mask, diagonal=0 + 1).type_as(h) 163 | 164 | adapter = self.adapter_query.weight.reshape(self.query_layer, self.query_len, -1).unsqueeze(1) 165 | adapter_index = 0 166 | for layer in self.llama.layers: 167 | h = layer(h, 0, freqs_cis, mask, visual_proj + adapter[adapter_index]) 168 | adapter_index = adapter_index + 1 169 | 170 | h = self.llama.norm(h) 171 | output = self.llama.output(h) 172 | output = output[:, :-1, :] 173 | labels = labels[:, 1:] 174 | 175 | if labels.sum() == 0: 176 | c_loss = output.mean() * 0 177 | else: 178 | assert self.llama.vocab_size == 32000 179 | c_loss = self.criterion(output.reshape(-1, self.llama.vocab_size), labels.flatten()) 180 | 181 | return c_loss, c_loss 182 | 183 | #@torch.inference_mode() 184 | @torch.no_grad() 185 | def forward_inference(self, visual_proj, tokens, start_pos: int): 186 | _bsz, seqlen = tokens.shape 187 | h = self.llama.tok_embeddings(tokens) 188 | freqs_cis = self.llama.freqs_cis.to(h.device) 189 | freqs_cis = freqs_cis[start_pos : start_pos + seqlen] 190 | mask = None 191 | mask = torch.full((1, 1, seqlen, seqlen), float("-inf"), device=h.device) 192 | mask = torch.triu(mask, diagonal=start_pos + 1).type_as(h) 193 | 194 | 195 | adapter = self.adapter_query.weight.reshape(self.query_layer, self.query_len, -1).unsqueeze(1) 196 | adapter_index = 0 197 | 198 | for layer in self.llama.layers: 199 | h = layer(h, start_pos, freqs_cis, mask, visual_proj + adapter[adapter_index].repeat(_bsz, 1, 1)) 200 | adapter_index = adapter_index + 1 201 | 202 | h = self.llama.norm(h) 203 | output = self.llama.output(h[:, -1, :]) 204 | 205 | return output.float() 206 | 207 | #@torch.inference_mode() 208 | @torch.no_grad() 209 | def generate( 210 | self, imgs, prompts, 211 | max_gen_len: int = 256, 212 | temperature: float = 0.1, 213 | top_p: float = 0.75, 214 | ): 215 | bsz = len(imgs) 216 | params = self.llama.params 217 | assert bsz <= params.max_batch_size, (bsz, params.max_batch_size) 218 | assert len(imgs) == len(prompts) 219 | 220 | with torch.cuda.amp.autocast(): 221 | visual_query = self.forward_visual(imgs) 222 | 223 | if isinstance(prompts[0], str): 224 | prompts = [self.tokenizer.encode(x, bos=True, eos=False) for x in prompts] 225 | 226 | min_prompt_size = min([len(t) for t in prompts]) 227 | max_prompt_size = max([len(t) for t in prompts]) 228 | 229 | total_len = min(params.max_seq_len, max_gen_len + max_prompt_size) 230 | 231 | tokens = torch.full((bsz, total_len), self.tokenizer.pad_id).cuda().long() 232 | 233 | for k, t in enumerate(prompts): 234 | tokens[k, : len(t)] = torch.tensor(t).cuda().long() 235 | input_text_mask = tokens != self.tokenizer.pad_id 236 | start_pos = min_prompt_size 237 | prev_pos = 0 238 | for cur_pos in range(start_pos, total_len): 239 | with torch.cuda.amp.autocast(): 240 | logits = self.forward_inference(visual_query, tokens[:, prev_pos:cur_pos], prev_pos) 241 | if temperature > 0: 242 | probs = torch.softmax(logits / temperature, dim=-1) 243 | next_token = sample_top_p(probs, top_p) 244 | else: 245 | next_token = torch.argmax(logits, dim=-1) 246 | next_token = next_token.reshape(-1) 247 | 248 | next_token = torch.where( 249 | input_text_mask[:, cur_pos], tokens[:, cur_pos], next_token 250 | ) 251 | tokens[:, cur_pos] = next_token 252 | # trick: early stop if bsz==1 253 | if bsz == 1 and next_token[0] == self.tokenizer.eos_id: 254 | break 255 | prev_pos = cur_pos 256 | 257 | decoded = [] 258 | for i, t in enumerate(tokens.tolist()): 259 | 260 | # cut to max gen len 261 | t = t[len(prompts[i]): len(prompts[i]) + max_gen_len] 262 | # cut to eos tok if any 263 | try: 264 | t = t[: t.index(self.tokenizer.eos_id)] 265 | except ValueError: 266 | pass 267 | decoded.append(self.tokenizer.decode(t)) 268 | 269 | return decoded 270 | 271 | 272 | _MODELS = { 273 | "BIAS-7B": "https://github.com/ZrrSkywalker/LLaMA-Adapter/releases/download/v.2.0.0/7fa55208379faf2dd862565284101b0e4a2a72114d6490a95e432cf9d9b6c813_BIAS-7B.pth", 274 | # "LORA16-7B": "", 275 | # "PARTIAL-7B": "" 276 | } 277 | 278 | def available_models(): 279 | return list(_MODELS.keys()) 280 | 281 | def load(name, llama_dir, device="cuda" if torch.cuda.is_available() else "cpu", download_root='ckpts', max_seq_len=512, 282 | phase="finetune"): 283 | if name in _MODELS: 284 | model_path = _download(_MODELS[name], download_root) 285 | elif os.path.isfile(name): 286 | model_path = name 287 | else: 288 | return RuntimeError(f"Model {name} not found; available models = {available_models()}") 289 | 290 | ckpt = torch.load(model_path, map_location='cpu') 291 | 292 | # BIAS-7B or https://xxx/sha256_BIAS-7B.pth -> 7B 293 | llama_type = name.split('.')[0].split('-')[-1] 294 | llama_ckpt_dir = os.path.join(llama_dir, llama_type) 295 | llama_tokenzier_path = os.path.join(llama_dir, 'tokenizer.model') 296 | 297 | # load llama_adapter weights and model_cfg 298 | print(f'Loading checkpoint from {model_path}') 299 | 300 | 301 | model = LLaMA_adapter( 302 | llama_dir, llama_tokenzier_path, 303 | max_seq_len=max_seq_len, max_batch_size=1, 304 | clip_model='ViT-L/14@336px', 305 | v_embed_dim=1024, v_depth=16, 306 | v_num_heads=16, v_mlp_ratio=4.0, 307 | query_len=577, query_layer=32, 308 | phase=phase) 309 | 310 | load_result = model.load_state_dict(ckpt['model'], strict=False) 311 | 312 | # assert len(load_result.unexpected_keys) == 0, f"Unexpected keys: {load_result.unexpected_keys}" 313 | return model.to(device), model.clip_transform 314 | -------------------------------------------------------------------------------- /data_collection/stats/test_id.txt: -------------------------------------------------------------------------------- 1 | 24931 Table 2 | 103319 Window 3 | 26387 Table 4 | 100082 USB 5 | 2095 Faucet 6 | 34178 Table 7 | 28164 Table 8 | 102625 Toilet 9 | 102018 Oven 10 | 102001 Oven 11 | 102765 Kettle 12 | 1712 Faucet 13 | 102678 Toilet 14 | 102984 Window 15 | 30666 Table 16 | 103051 Window 17 | 100065 USB 18 | 102669 Toilet 19 | 102037 USB 20 | 148 Faucet 21 | 27619 Table 22 | 100513 USB 23 | 102663 Toilet 24 | 103813 Phone 25 | 1788 Faucet 26 | 7332 Oven 27 | 103285 Phone 28 | 149 Faucet 29 | 102732 Kettle 30 | 1653 Faucet 31 | 103305 Stapler 32 | 11040 Scissors 33 | 7290 Oven 34 | 10960 Scissors 35 | 1626 Faucet 36 | 25308 Table 37 | 2035 Faucet 38 | 100061 USB 39 | 102641 Toilet 40 | 102629 Toilet 41 | 103238 Window 42 | 102690 Toilet 43 | 27267 Table 44 | 153 Faucet 45 | 33810 Table 46 | 102703 Toilet 47 | 1444 Faucet 48 | 25160 Table 49 | 2054 Faucet 50 | 102687 Toilet 51 | 1556 Faucet 52 | 102805 Window 53 | 103040 Window 54 | 103340 Window 55 | 2115 Faucet 56 | 102621 Toilet 57 | 1931 Faucet 58 | 34610 Table 59 | 26886 Table 60 | 101924 Oven 61 | 32174 Table 62 | 100095 USB 63 | 100116 USB 64 | 11111 Scissors 65 | 33914 Table 66 | 3971 Kettle 67 | 7347 Oven 68 | 102654 Toilet 69 | 1596 Faucet 70 | 102675 Toilet 71 | 102055 Oven 72 | 1028 Faucet 73 | 103293 Stapler 74 | 103814 Phone 75 | 102677 Toilet 76 | 103312 Window 77 | 103276 Stapler 78 | 103518 WashingMachine 79 | 101931 Oven 80 | 1721 Faucet 81 | 10569 Scissors 82 | 20985 Table 83 | 101943 Oven 84 | 102801 Window 85 | 102647 Toilet 86 | 103917 Phone 87 | 102756 Kettle 88 | 32625 Table 89 | 26525 Table 90 | 102620 Toilet 91 | 100106 USB 92 | 103935 Phone 93 | 102634 Toilet 94 | 1401 Faucet 95 | 102707 Toilet 96 | 102657 Toilet 97 | 32746 Table 98 | 103095 Stapler 99 | 19179 Table 100 | 103235 Window 101 | 102679 Toilet 102 | 1646 Faucet 103 | 1488 Faucet 104 | 11047 Scissors 105 | 102684 Toilet 106 | 102738 Kettle 107 | 102977 Window 108 | 10902 Scissors 109 | 100031 Kettle 110 | 23782 Table 111 | 10559 Scissors 112 | 26503 Table 113 | 103480 WashingMachine 114 | 32259 Table 115 | 103332 Window 116 | 102724 Kettle 117 | 10889 Scissors 118 | 7179 Oven 119 | 102021 USB 120 | 100283 WashingMachine 121 | 19740 Table 122 | 103892 Phone 123 | 929 Faucet 124 | 10622 Scissors 125 | 20411 Table 126 | 103425 WashingMachine 127 | 102981 Window 128 | 10499 Scissors 129 | 103775 WashingMachine 130 | 101983 USB 131 | 10564 Scissors 132 | 102665 Toilet 133 | 102676 Toilet 134 | 102903 Window 135 | 32213 Table 136 | 33457 Table 137 | 29921 Table 138 | 1925 Faucet 139 | 103099 Stapler 140 | 100068 USB 141 | 2084 Faucet 142 | 101947 Oven 143 | 101311 Kettle 144 | 103320 Window 145 | 103201 Kettle 146 | 103208 Kettle 147 | 7220 Oven 148 | 27189 Table 149 | 102704 Toilet 150 | 102688 Toilet 151 | 10962 Scissors 152 | 26657 Table 153 | 1427 Faucet 154 | 23472 Table 155 | 2113 Faucet 156 | 22339 Table 157 | 25493 Table 158 | 102804 Window 159 | 11077 Scissors 160 | 1741 Faucet 161 | 22508 Table 162 | 102730 Kettle 163 | 101319 Toilet 164 | 1794 Faucet 165 | 24644 Table 166 | 103828 Phone 167 | 103301 Stapler 168 | 20279 Table 169 | 23807 Table 170 | 101999 USB 171 | 102024 USB 172 | 23372 Table 173 | 102736 Kettle 174 | 102062 USB 175 | 101917 Oven 176 | 11013 Scissors 177 | 1903 Faucet 178 | 103236 Window 179 | 102667 Toilet 180 | 1280 Faucet 181 | 102906 Window 182 | 103941 Phone 183 | 102710 Toilet 184 | 102628 Toilet 185 | 100064 USB 186 | 2108 Faucet 187 | 102708 Toilet 188 | 102063 USB 189 | 34617 Table 190 | 103792 Stapler 191 | 100071 USB 192 | 1795 Faucet 193 | 30869 Table 194 | 100079 USB 195 | 103255 Window 196 | 2083 Faucet 197 | 26608 Table 198 | 22241 Table 199 | 25959 Table 200 | 103275 Stapler 201 | 103253 Window 202 | 102896 Window 203 | 101948 USB 204 | 102658 Toilet 205 | 102697 Toilet 206 | 31601 Table 207 | 103351 WashingMachine 208 | 32324 Table 209 | 19898 Table 210 | 29557 Table 211 | 103050 Window 212 | 102025 USB 213 | 7187 Oven 214 | 1896 Faucet 215 | 102660 Toilet 216 | 103052 Window 217 | 103369 WashingMachine 218 | 102753 Kettle 219 | 102631 Toilet 220 | 100133 USB 221 | 102692 Toilet 222 | 26670 Table 223 | 102662 Toilet 224 | 103113 Stapler 225 | 866 Faucet 226 | 103058 Window 227 | 27044 Table 228 | 103032 Window 229 | 167 Faucet 230 | 103227 Toilet 231 | 101909 Oven 232 | 101315 Kettle 233 | 102649 Toilet 234 | 10557 Scissors 235 | 102786 Kettle 236 | 102639 Toilet 237 | 19825 Table 238 | 102060 Oven 239 | 103063 Window 240 | 100086 USB 241 | 10561 Scissors 242 | 102622 Toilet 243 | 103044 Window 244 | 103593 Phone 245 | 102016 USB 246 | 102698 Toilet 247 | 24152 Table 248 | 1885 Faucet 249 | 26073 Table 250 | 21467 Table 251 | 101952 USB 252 | 32086 Table 253 | 100073 USB 254 | 103042 Window 255 | 102798 Window 256 | 22301 Table 257 | 100113 USB 258 | 32932 Table 259 | 103325 Window 260 | 102739 Kettle 261 | 931 Faucet 262 | 10562 Scissors 263 | 1961 Faucet 264 | 103251 Phone 265 | 102651 Toilet 266 | 103015 Window 267 | 102726 Kettle 268 | 102664 Toilet 269 | 102636 Toilet 270 | 20453 Table 271 | 102905 Window 272 | 10973 Scissors 273 | 101921 Oven 274 | 22367 Table 275 | 103271 Stapler 276 | 33930 Table 277 | 1785 Faucet 278 | 10502 Scissors 279 | 1823 Faucet 280 | 152 Faucet 281 | 2140 Faucet 282 | 102773 Kettle 283 | 101930 Oven 284 | 102648 Toilet 285 | 103233 Toilet 286 | 1802 Faucet 287 | 1935 Faucet 288 | 26692 Table 289 | 1034 Faucet 290 | 2170 Faucet 291 | 101960 USB 292 | 102619 Toilet 293 | 1011 Faucet 294 | 32354 Table 295 | 10567 Scissors 296 | 103297 Stapler 297 | 10686 Scissors 298 | 103361 WashingMachine 299 | 11089 Scissors 300 | 103778 WashingMachine 301 | 10537 Scissors 302 | 23511 Table 303 | 103234 Toilet 304 | 30663 Table 305 | 100085 USB 306 | 26899 Table 307 | 103776 WashingMachine 308 | 103350 Phone 309 | 26800 Table 310 | 102763 Kettle 311 | 102009 USB 312 | 32052 Table 313 | 103329 Window 314 | 103781 WashingMachine 315 | 156 Faucet 316 | 857 Faucet 317 | 30857 Table 318 | 101320 Toilet 319 | 101808 Oven 320 | 11080 Scissors 321 | 11029 Scissors 322 | 1435 Faucet 323 | 102052 USB 324 | 103230 Toilet 325 | 102666 Toilet 326 | 103927 Phone 327 | 10894 Scissors 328 | 10844 Scissors 329 | 1901 Faucet 330 | 10558 Scissors 331 | 11021 Scissors 332 | 7130 Oven 333 | 1466 Faucet 334 | 20745 Table 335 | 103077 Window 336 | 21473 Table 337 | 103273 Stapler 338 | 1817 Faucet 339 | 101950 USB 340 | 101946 Oven 341 | 103315 Window 342 | 103283 Stapler 343 | 103070 Window 344 | 100128 USB 345 | 960 Faucet 346 | 102646 Toilet 347 | 103239 Window 348 | 101940 Oven 349 | 1380 Faucet 350 | 100072 USB 351 | 1633 Faucet 352 | 1986 Faucet 353 | 103223 Kettle 354 | 102670 Toilet 355 | 1053 Faucet 356 | 102689 Toilet 357 | 103684 Window 358 | 822 Faucet 359 | 103333 Window 360 | 103207 Kettle 361 | 21718 Table 362 | 102715 Kettle 363 | 100087 USB 364 | 103299 Stapler 365 | 102042 USB 366 | 7201 Oven 367 | 103311 Window 368 | 11099 Scissors 369 | 10968 Scissors 370 | 103452 WashingMachine 371 | 811 Faucet 372 | 862 Faucet 373 | 103540 Window 374 | 1370 Faucet 375 | 103149 Window 376 | 103303 Stapler 377 | 102652 Toilet 378 | 102068 USB 379 | 100511 USB 380 | 26545 Table 381 | 100078 USB 382 | 100103 USB 383 | 103056 Window 384 | 22870 Table 385 | 102701 Toilet 386 | 32601 Table 387 | 101323 Toilet 388 | 2017 Faucet 389 | 1288 Faucet 390 | 27478 Table 391 | 103886 Phone 392 | 23724 Table 393 | 101886 USB 394 | 20077 Table 395 | 101305 Kettle 396 | 1479 Faucet 397 | 11036 Scissors 398 | 103925 Phone 399 | 102761 Kettle 400 | 102985 Window 401 | 103347 Phone 402 | 19855 Table 403 | 1668 Faucet 404 | 154 Faucet 405 | 103321 Window 406 | 11026 Scissors 407 | 1528 Faucet 408 | 100108 USB 409 | 102768 Kettle 410 | 31249 Table 411 | 103339 Window 412 | 7120 Oven 413 | 102720 Kettle 414 | 103669 Window 415 | 885 Faucet 416 | 1667 Faucet 417 | 28594 Table 418 | 11052 Scissors 419 | 32566 Table 420 | 101971 Oven 421 | 1343 Faucet 422 | 101773 Oven 423 | 11113 Scissors 424 | 102065 USB 425 | 103318 Window 426 | 29525 Table 427 | 102803 Window 428 | 102714 Kettle 429 | 100982 Window 430 | 2082 Faucet 431 | 908 Faucet 432 | 103292 Stapler 433 | 10495 Scissors 434 | 103242 Window 435 | 101994 USB 436 | 103521 WashingMachine 437 | 26875 Table 438 | 11020 Scissors 439 | 1492 Faucet 440 | 102702 Toilet 441 | 1386 Faucet 442 | 103280 Stapler 443 | 10893 Scissors 444 | 1052 Faucet 445 | 10449 Scissors 446 | 103268 Window 447 | 168 Faucet 448 | 103222 Kettle 449 | 10907 Scissors 450 | 22692 Table 451 | 103323 Window 452 | 100123 USB 453 | 102100 Fan 454 | 101539 Dispenser 455 | 41452 StorageFurniture 456 | 101531 Dispenser 457 | 47686 StorageFurniture 458 | 46768 StorageFurniture 459 | 45523 StorageFurniture 460 | 12727 Keyboard 461 | 9148 Door 462 | 101457 Fan 463 | 45759 StorageFurniture 464 | 46440 StorageFurniture 465 | 46057 StorageFurniture 466 | 45747 StorageFurniture 467 | 100550 Suitcase 468 | 101611 Safe 469 | 100740 Globe 470 | 101117 Remote 471 | 101010 Remote 472 | 45134 StorageFurniture 473 | 103372 Dispenser 474 | 46380 StorageFurniture 475 | 101332 Eyeglasses 476 | 100794 Globe 477 | 100162 Box 478 | 10612 Refrigerator 479 | 101467 Fan 480 | 100526 FoldingChair 481 | 100760 Globe 482 | 15084 Lamp 483 | 102939 Pen 484 | 100426 Box 485 | 102588 Eyeglasses 486 | 102617 Eyeglasses 487 | 100498 Cart 488 | 46230 StorageFurniture 489 | 102095 Fan 490 | 48746 StorageFurniture 491 | 12477 TrashCan 492 | 3571 Bottle 493 | 9032 Door 494 | 102996 TrashCan 495 | 101793 Pen 496 | 46616 StorageFurniture 497 | 48876 StorageFurniture 498 | 100038 KitchenPot 499 | 14205 Lamp 500 | 3933 Bottle 501 | 101068 Knife 502 | 100756 Globe 503 | 12923 Keyboard 504 | 100924 Switch 505 | 103755 Suitcase 506 | 45001 StorageFurniture 507 | 101054 Knife 508 | 100172 Pliers 509 | 101541 Dispenser 510 | 102556 Cart 511 | 7366 Microwave 512 | 100999 Remote 513 | 49038 StorageFurniture 514 | 46108 StorageFurniture 515 | 102209 TrashCan 516 | 45690 StorageFurniture 517 | 11030 Laptop 518 | 103866 Printer 519 | 13928 Lamp 520 | 44962 StorageFurniture 521 | 45219 StorageFurniture 522 | 101565 Dispenser 523 | 46549 StorageFurniture 524 | 11211 Refrigerator 525 | 103010 TrashCan 526 | 100214 Box 527 | 100750 Globe 528 | 103582 Knife 529 | 103353 Dispenser 530 | 100189 Box 531 | 104040 Remote 532 | 46966 StorageFurniture 533 | 102186 TrashCan 534 | 102966 Pen 535 | 102423 Safe 536 | 48010 StorageFurniture 537 | 45132 StorageFurniture 538 | 101845 Eyeglasses 539 | 46825 StorageFurniture 540 | 100590 FoldingChair 541 | 100914 Switch 542 | 103770 Suitcase 543 | 103408 Dispenser 544 | 103996 Printer 545 | 45135 StorageFurniture 546 | 102177 TrashCan 547 | 10040 Laptop 548 | 100191 Box 549 | 48013 StorageFurniture 550 | 10144 Refrigerator 551 | 100792 Globe 552 | 10068 Refrigerator 553 | 46801 StorageFurniture 554 | 45677 StorageFurniture 555 | 102155 TrashCan 556 | 4094 Display 557 | 48740 StorageFurniture 558 | 103728 Knife 559 | 10707 Laptop 560 | 103740 Knife 561 | 100408 Remote 562 | 100056 KitchenPot 563 | 46466 StorageFurniture 564 | 100292 Lighter 565 | 104030 Printer 566 | 102922 Pen 567 | 13525 Lamp 568 | 45194 StorageFurniture 569 | 101440 Fan 570 | 3854 Bottle 571 | 10280 Laptop 572 | 102314 FoldingChair 573 | 102254 TrashCan 574 | 45423 StorageFurniture 575 | 100013 Remote 576 | 101593 Safe 577 | 48379 StorageFurniture 578 | 102171 TrashCan 579 | 100974 Switch 580 | 44817 StorageFurniture 581 | 47926 StorageFurniture 582 | 46439 StorageFurniture 583 | 10849 Refrigerator 584 | 3616 Bottle 585 | 4681 Display 586 | 46060 StorageFurniture 587 | 35059 StorageFurniture 588 | 101476 Fan 589 | 100767 Suitcase 590 | 45622 StorageFurniture 591 | 104004 Printer 592 | 101060 Cart 593 | 14306 Lamp 594 | 101605 Safe 595 | 4541 Display 596 | 4542 Display 597 | 102210 TrashCan 598 | 4592 Display 599 | 103967 Globe 600 | 4500 Bottle 601 | 9117 Door 602 | 46700 StorageFurniture 603 | 12851 Keyboard 604 | 101260 Knife 605 | 100948 Switch 606 | 10270 Laptop 607 | 102946 Pen 608 | 101115 Knife 609 | 100021 KitchenPot 610 | 101493 Fan 611 | 10101 Laptop 612 | 45822 StorageFurniture 613 | 45908 StorageFurniture 614 | 47645 Box 615 | 10697 Laptop 616 | 101375 Fan 617 | 101524 Fan 618 | 48479 StorageFurniture 619 | 101090 Cart 620 | 9263 Door 621 | 9968 Laptop 622 | 103762 Suitcase 623 | 9016 Door 624 | 101139 Remote 625 | 5688 Bottle 626 | 103194 Eyeglasses 627 | 10638 Refrigerator 628 | 102608 Eyeglasses 629 | 11141 Laptop 630 | 4590 Display 631 | 103989 Printer 632 | 45427 StorageFurniture 633 | 103358 Dispenser 634 | 46403 StorageFurniture 635 | 48686 StorageFurniture 636 | 100279 Printer 637 | 3593 Bottle 638 | 7119 Microwave 639 | 100491 Cart 640 | 100828 Remote 641 | 12059 Refrigerator 642 | 15423 Lamp 643 | 16047 Lamp 644 | 6040 Bottle 645 | 101860 Eyeglasses 646 | 3519 Bottle 647 | 100057 KitchenPot 648 | 101450 Fan 649 | 100310 Lighter 650 | 46180 StorageFurniture 651 | 102318 Safe 652 | 100853 Cart 653 | 101416 Mouse 654 | 102160 TrashCan 655 | 45780 StorageFurniture 656 | 104044 Remote 657 | 12447 TrashCan 658 | 100075 Cart 659 | 101861 Eyeglasses 660 | 48413 StorageFurniture 661 | 100234 Box 662 | 45403 StorageFurniture 663 | 100350 Lighter 664 | 16675 Lamp 665 | 100968 Switch 666 | 102292 Pliers 667 | 101066 Cart 668 | 45397 StorageFurniture 669 | 103013 TrashCan 670 | 46653 StorageFurniture 671 | 100221 Box 672 | 103380 Dispenser 673 | 45092 StorageFurniture 674 | 100759 Globe 675 | 100557 FoldingChair 676 | 100825 Suitcase 677 | 7263 Microwave 678 | 100243 Box 679 | 11242 Laptop 680 | 48381 StorageFurniture 681 | 102943 Pen 682 | 103978 Printer 683 | 48243 StorageFurniture 684 | 103583 Knife 685 | 102193 TrashCan 686 | 45699 StorageFurniture 687 | 100202 Box 688 | 9987 Laptop 689 | 101420 Fan 690 | 46084 StorageFurniture 691 | 100032 KitchenPot 692 | 46033 StorageFurniture 693 | 101099 Cart 694 | 100965 Switch 695 | 100801 Globe 696 | 101419 Fan 697 | 102173 TrashCan 698 | 48467 StorageFurniture 699 | 102573 Eyeglasses 700 | 100720 Globe 701 | 3615 Bottle 702 | 101432 Fan 703 | 45213 StorageFurniture 704 | 47238 StorageFurniture 705 | 100776 Suitcase 706 | 101055 Cart 707 | 41086 StorageFurniture 708 | 45420 StorageFurniture 709 | 41083 StorageFurniture 710 | 13004 Keyboard 711 | 12902 Keyboard 712 | 101489 Dispenser 713 | 102130 Remote 714 | --------------------------------------------------------------------------------