├── evaluate ├── cross_view │ ├── Readme.md │ ├── STV_compare │ │ ├── STV_compare_inference.py │ │ └── STV_compare_stats.py │ ├── STV_SAT_mapping │ │ ├── STV_SAT_mapping_inference.py │ │ └── STV_SAT_mapping_stats.py │ ├── STV_SAT_location │ │ ├── STV_SAT_location_inference.py │ │ └── STV_SAT_location_stats.py │ ├── SAT_count_pois │ │ ├── SAT_count_pois_inference.py │ │ └── SAT_count_pois_stats.py │ ├── SAT_count_buildings │ │ ├── SAT_count_buildings_inference.py │ │ └── SAT_count_buildings_stats.py │ ├── eval_inference.py │ └── eval_analysis.py ├── mobility_prediction │ ├── run_parallel.py │ └── metrics.py ├── geoqa │ └── analyse_result.py ├── uniimage │ ├── sat_address │ │ ├── sat_address_stats.py │ │ ├── sat_address_inference.py │ │ └── sat_address_convert.py │ ├── sat_landuse │ │ ├── sat_landuse_stats.py │ │ ├── sat_landuse_inference.py │ │ └── sat_landuse_convert.py │ ├── stv_address │ │ ├── stv_address_stats.py │ │ ├── stv_address_inference.py │ │ └── stv_address_convert.py │ └── stv_landmark │ │ ├── stv_landmark_stats.py │ │ ├── stv_landmark_inference.py │ │ └── stv_landmark_convert.py ├── evaluate.py └── outdoor_navigation │ └── utils.py ├── assets └── UrbanLLaVA.png ├── .gitmodules ├── simulate ├── uni_image_basic_construct.bash ├── address.bash ├── all.bash ├── uni_image_mc_construct.bash ├── streetview │ ├── process_stv_near.py │ ├── osm_address_web_stv_my.py │ ├── stv_nearest_pois.py │ └── spatial_join.py ├── STV_pipeline.bash ├── address │ ├── osm_address_web_my.py │ └── interpolate_sat_coord.py ├── CoT_construct.bash ├── multi_image_mc_construct.bash ├── advance │ ├── cross-view │ │ ├── stv_in_sat_partition.py │ │ ├── SAT_stv_corres.py │ │ └── generate_poi_building_count.py │ └── CoT │ │ ├── stv-landmark-cot │ │ └── gpt_polish.py │ │ ├── stv_address_cot │ │ ├── gpt_polish.py │ │ └── gen_CoT_template.py │ │ ├── sat_address_cot │ │ ├── gen_CoT_template.py │ │ └── gpt_polish.py │ │ ├── sat_count_cot │ │ └── gpt_polish.py │ │ └── sat_cross_stv_cot │ │ └── gpt_polish.py ├── satelite │ ├── clip_shp_point.py │ ├── make_sat_shp.py │ ├── process_landuse.py │ ├── process_driving.py │ └── process_poi.py ├── annotate.bash ├── annotate │ ├── sat_landuse_template.py │ ├── sat_combine_address.py │ ├── stv_description_gpt.py │ └── stv_landmark_gpt.py ├── format │ ├── uni_mc_format_llava.py │ └── uni_mc_SAT_landuse.py └── SAT_pipeline.bash ├── examples ├── run_eval_general_inference_stats.sh ├── geoqa.sh ├── mobility.sh ├── navigation.sh ├── run_eval_multi_image_inference_stats.sh └── run_eval_uniimage_inference_stats.sh ├── serving ├── llm_serving.sh ├── test_llm_api.py └── vlm_serving.py ├── LICENSE ├── train └── vila_train_scripts │ └── sft_mix_v1.sh └── .gitignore /evaluate/cross_view/Readme.md: -------------------------------------------------------------------------------- 1 | # Cross_view eval 2 | -------------------------------------------------------------------------------- /assets/UrbanLLaVA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tsinghua-fib-lab/UrbanLLaVA/HEAD/assets/UrbanLLaVA.png -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "train/VILA"] 2 | path = train/VILA 3 | url = https://github.com/NVlabs/VILA.git 4 | [submodule "evaluate/VLMEvalKit"] 5 | path = evaluate/VLMEvalKit 6 | url = https://github.com/open-compass/VLMEvalKit.git 7 | -------------------------------------------------------------------------------- /simulate/uni_image_basic_construct.bash: -------------------------------------------------------------------------------- 1 | cities=("Beijing" "NewYork" "London") 2 | work_dir="../../data/" 3 | 4 | 5 | # Dependency: rs_osm_description_{city}_{zl}.csv 6 | # sat_address_combined_{city}_{zl}.csv 7 | # rs_grounding_selfmade_{zl}.csv 8 | # rs_landuse_description_{zl}.jsonl 9 | # stv_in_sat_address_deploy_{zl}.csv 10 | # stv_description.jsonl 11 | # stv_poi_landmark_update.jsonl 12 | 13 | for city in "${cities[@]}"; do 14 | echo "Formatting data to VILA format for $city" 15 | python ./format/uni_basic_llava.py --city $city --work_dir $work_dir 16 | done 17 | wait 18 | echo "Finish formatting data to VILA format" 19 | 20 | # Get llava/format/{city}_basic_all_data_llava.json, etc. -------------------------------------------------------------------------------- /examples/run_eval_general_inference_stats.sh: -------------------------------------------------------------------------------- 1 | # source /usr/local/miniconda3/bin/activate vila-vlmeval 2 | export CUDA_VISIBLE_DEVICES=5 3 | export DeepInfra_API_KEY="" 4 | export SiliconFlow_API_KEY="" 5 | export OpenAI_API_KEY="" 6 | export OPENAI_API_KEY="$OpenAI_API_KEY" 7 | export OPENAI_API_BASE="https://api.openai.com/v1/chat/completions" 8 | export DASHSCOPE_API_KEY="" 9 | MODELS=("GPT4o_MINI" "Llama-3-VILA1.5-8b") 10 | 11 | DATA_VERSION='all' 12 | 13 | echo "Start running evaluation on general tasks" 14 | for MODEL in "${MODELS[@]}"; do 15 | echo "Current model: $MODEL" 16 | python -m evaluate.general.general_inference --model_name $MODEL --data_name $DATA_VERSION 17 | python -m evaluate.general.general_stats --model_name $MODEL 18 | done -------------------------------------------------------------------------------- /simulate/address.bash: -------------------------------------------------------------------------------- 1 | cities=("Beijing" "NewYork" "London") 2 | work_dir="../../data/" 3 | 4 | # Depend on SAT_{city}_{zl}.csv 5 | for city in "${cities[@]}"; do 6 | echo "Interpolating SAT, 25 for zl15, 9 for zl17, query address for $city" 7 | python ./address/interpolate_sat_coord.py --city $city --work_dir $work_dir 8 | done 9 | wait 10 | echo "Finish interpolating SAT, 25 for zl15, 9 for zl17, query address" 11 | # Get SAT_interpolate_{city}_{zl}.csv 12 | 13 | # Depend on SAT_interpolate_{city}_{zl}.csv 14 | for city in "${cities[@]}"; do 15 | echo "Getting Interpolated SAT's address for $city" 16 | python ./address/osm_address_web_my.py --city $city --work_dir $work_dir 17 | done 18 | wait 19 | echo "Finish getting Interpolated SAT's address" 20 | # Get SAT_interpolate_address_{city}_{zl}.csv -------------------------------------------------------------------------------- /examples/geoqa.sh: -------------------------------------------------------------------------------- 1 | # source /usr/local/miniconda3/bin/activate vila-vlmeval 2 | export CUDA_VISIBLE_DEVICES=0 3 | export DeepInfra_API_KEY="" 4 | export SiliconFlow_API_KEY="" 5 | export OpenAI_API_KEY="" 6 | export OPENAI_API_KEY="$OpenAI_API_KEY" 7 | export OPENAI_API_BASE="https://api.openai.com/v1/chat/completions" 8 | export DASHSCOPE_API_KEY="" 9 | 10 | 11 | CITIES=('Beijing' 'NewYork' 'London') 12 | MODELS=("Llama-3-VILA1.5-8b" "GPT4o_MINI") 13 | DATA_VERSION='all' 14 | 15 | 16 | echo "Start running geoqa" 17 | for MODEL in "${MODELS[@]}"; do 18 | echo "Current model: $MODEL" 19 | for CITY in "${CITIES[@]}"; do 20 | echo "Current city: $CITY" 21 | python -m evaluate.geoqa.run_eval --model_name $MODEL --data_name $DATA_VERSION --city_name $CITY 22 | done 23 | done 24 | echo "Finish running geoqa" 25 | -------------------------------------------------------------------------------- /examples/mobility.sh: -------------------------------------------------------------------------------- 1 | # source /usr/local/miniconda3/bin/activate vila-vlmeval 2 | export CUDA_VISIBLE_DEVICES=1 3 | export DeepInfra_API_KEY="" 4 | export SiliconFlow_API_KEY="" 5 | export OpenAI_API_KEY="" 6 | export OPENAI_API_KEY="$OpenAI_API_KEY" 7 | export OPENAI_API_BASE="https://api.openai.com/v1/chat/completions" 8 | export DASHSCOPE_API_KEY="" 9 | 10 | 11 | CITIES=('Beijing' 'NewYork' 'London') 12 | MODELS=("Llama-3-VILA1.5-8b" "GPT4o_MINI") 13 | DATA_VERSION='all' 14 | 15 | 16 | echo "Start running mobility" 17 | for MODEL in "${MODELS[@]}"; do 18 | echo "Current model: $MODEL" 19 | for CITY in "${CITIES[@]}"; do 20 | echo "Current city: $CITY" 21 | python -m evaluate.mobility_prediction.llm_mob --model_name $MODEL --data_name $DATA_VERSION --city_name $CITY 22 | done 23 | done 24 | echo "Finish running mobility" 25 | -------------------------------------------------------------------------------- /examples/navigation.sh: -------------------------------------------------------------------------------- 1 | # source /usr/local/miniconda3/bin/activate vila-vlmeval 2 | export CUDA_VISIBLE_DEVICES=2 3 | export DeepInfra_API_KEY="" 4 | export SiliconFlow_API_KEY="" 5 | export OpenAI_API_KEY="" 6 | export OPENAI_API_KEY="$OpenAI_API_KEY" 7 | export OPENAI_API_BASE="https://api.openai.com/v1/chat/completions" 8 | export DASHSCOPE_API_KEY="" 9 | 10 | 11 | CITIES=('Beijing' 'NewYork' 'London') 12 | MODELS=("Llama-3-VILA1.5-8b" "GPT4o_MINI") 13 | DATA_VERSION='all' 14 | 15 | 16 | DATA_VERSION='all' 17 | 18 | echo "Start running navigation" 19 | for MODEL in "${MODELS[@]}"; do 20 | echo "Current model: $MODEL" 21 | for CITY in "${CITIES[@]}"; do 22 | echo "Current city: $CITY" 23 | python -m evaluate.outdoor_navigation.eval --model_name $MODEL --data_name $DATA_VERSION --city_name $CITY 24 | done 25 | done 26 | echo "Finish running navigation" 27 | -------------------------------------------------------------------------------- /simulate/all.bash: -------------------------------------------------------------------------------- 1 | echo "Run data curation pipeline" 2 | 3 | bash ./SAT_pipeline.bash 4 | echo "Finish SAT pipeline" 5 | 6 | bash ./STV_pipeline.bash 7 | echo "Finish STV pipeline" 8 | 9 | bash ./address.bash 10 | echo "Finish address querying" 11 | 12 | bash ./annotate.bash 13 | echo "Finish annotation" 14 | 15 | echo "Finish data preparation, start uni_image_basic_construct" 16 | 17 | bash ./uni_image_basic_construct.bash 18 | echo "Finish uni_image_basic_construct" 19 | 20 | echo "Finish uni_image_basic_construct, start uni_image_mc_construct" 21 | 22 | bash ./uni_image_mc_construct.bash 23 | echo "Finish uni_image_mc_construct" 24 | 25 | echo "Finish uni_image_mc_construct, start multi_image_mc_construct" 26 | 27 | bash ./multi_image_mc_construct.bash 28 | echo "Finish multi_image_mc_construct" 29 | 30 | echo "Finish multi_image_mc_construct, start CoT_construct" 31 | 32 | bash ./CoT_construct.bash 33 | echo "Finish CoT_construct" 34 | 35 | echo "Finish CoT_construct" 36 | echo "Finish data curation pipeline" -------------------------------------------------------------------------------- /serving/llm_serving.sh: -------------------------------------------------------------------------------- 1 | source /usr/local/anaconda3/bin/activate vllm 2 | export CUDA_VISIBLE_DEVICES=4 3 | 4 | USER="" 5 | API_KEY="" 6 | SERVER_IP="" 7 | SERVER_PORT=23199 8 | MODEL_NAME=llama3-8B 9 | MODEL_PATH=/path/Meta-Llama-3-8B-Instruct/ 10 | 11 | exec -a "vllm-$MODEL_NAME@$USER" python -m vllm.entrypoints.openai.api_server \ 12 | --served-model-name $MODEL_NAME \ 13 | --api-key $API_KEY \ 14 | --model $MODEL_PATH \ 15 | --trust-remote-code \ 16 | --host $SERVER_IP \ 17 | --port $SERVER_PORT \ 18 | --max-model-len 4096 \ 19 | --disable-log-stats \ 20 | --tensor-parallel-size 1 \ 21 | --gpu-memory-utilization 0.95 22 | 23 | # more settings please refer to the following docs 24 | # vllm installation https://docs.vllm.ai/en/latest/getting_started/installation.html 25 | # autoAWQ https://docs.vllm.ai/en/latest/quantization/auto_awq.html 26 | # vllm engine parameters: https://docs.vllm.ai/en/latest/models/engine_args.html 27 | # vllm openai server parameters: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html -------------------------------------------------------------------------------- /simulate/uni_image_mc_construct.bash: -------------------------------------------------------------------------------- 1 | cities=("Beijing" "NewYork" "London") 2 | work_dir="../../data/" 3 | 4 | 5 | for city in "${cities[@]}"; do 6 | echo "Making sat_addr task data for $city" 7 | python ./format/uni_mc_SAT_addr.py --city $city --work_dir $work_dir 8 | done 9 | wait 10 | echo "Finish making sat_addr task data" 11 | 12 | for city in "${cities[@]}"; do 13 | echo "Making sat_landuse task data for $city" 14 | python ./format/uni_mc_SAT_landuse.py --city $city --work_dir $work_dir 15 | done 16 | wait 17 | echo "Finish making sat_landuse task data" 18 | 19 | for city in "${cities[@]}"; do 20 | echo "Making stv_addr task data for $city" 21 | python ./format/uni_mc_STV_addr.py --city $city --work_dir $work_dir 22 | done 23 | wait 24 | echo "Finish making stv_addr task data" 25 | 26 | for city in "${cities[@]}"; do 27 | echo "Making stv_landmark task data for $city" 28 | python ./format/uni_mc_STV_landmark.py --city $city --work_dir $work_dir 29 | done 30 | wait 31 | echo "Finish making stv_landmark task data" -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 FIB LAB, Tsinghua University 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /simulate/streetview/process_stv_near.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import argparse 3 | 4 | def process_stv_near(input_file, output_file): 5 | df = pd.read_csv(input_file) 6 | 7 | result = pd.DataFrame(columns=['image_name', 'feature_names']) 8 | 9 | for image_name, group in df.groupby('image_name'): 10 | feature_names = group['nearest_feature_name'].dropna().head(10) 11 | 12 | feature_names_str = ','.join(feature_names) 13 | 14 | result_tmp = pd.DataFrame({'image_name': [image_name], 'feature_names': [feature_names_str]}) 15 | result = pd.concat([result, result_tmp], ignore_index=True) 16 | 17 | result.to_csv(output_file, index=False) 18 | 19 | if __name__ == '__main__': 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 22 | parser.add_argument('--work_dir', type=str, default='../../data/') 23 | args = parser.parse_args() 24 | city = args.city 25 | work_dir = args.work_dir 26 | for zl in ['zl15', 'zl17']: 27 | input_path = work_dir + f'dev-{city}/stv_in_sat_nearest_features_{city}_{zl}.csv' 28 | output_path = work_dir + f'dev-{city}/stv_in_sat_nearest_features_update_{city}_{zl}.csv' 29 | process_stv_near(input_path, output_path) 30 | -------------------------------------------------------------------------------- /evaluate/mobility_prediction/run_parallel.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from multiprocessing import Pool 3 | 4 | from .llm_mob import main 5 | 6 | 7 | if __name__ == '__main__': 8 | 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('--user_cnt', type=int, default=50) 11 | parser.add_argument('--traj_cnt', type=int, default=10) 12 | 13 | args = parser.parse_args() 14 | user_cnt = args.user_cnt # users 15 | sample_single_user = args.traj_cnt # trajectory for each user 16 | data_version="mini" 17 | split_path="citydata/mobility/checkin_split/" 18 | test_path="citydata/mobility/checkin_test_pk/" 19 | 20 | models = ["gpt4omini"] 21 | # models = [ 22 | # "gpt-3.5", "gpt-4", "meta-llama/Meta-Llama-3-70B-Instruct", "mistralai/Mixtral-8x22B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.2", 23 | # "meta-llama/Meta-Llama-3-8B-Instruct", "deepseek-chat" 24 | # ] 25 | cities = [ 26 | "Beijing", "Cape", "London", "Moscow", "Mumbai", "Nairobi", "NewYork" ,"Paris" ,"San", "Sao", "Shanghai", "Sydney","Tokyo" 27 | ] 28 | 29 | # main(city, model, user_cnt=50, sample_single_user=10, num_historical_stay=40, num_context_stay=5, split_path="./checkin_split/", test_path="./checkin_test_pk/", data_version="all") 30 | para_group = [] 31 | for c in cities: 32 | for m in models: 33 | para_group.append([c, m, user_cnt, sample_single_user, 40, 5, split_path, test_path, data_version]) 34 | 35 | with Pool(6) as pool: 36 | results = pool.starmap(main, para_group) -------------------------------------------------------------------------------- /simulate/STV_pipeline.bash: -------------------------------------------------------------------------------- 1 | cities=("Beijing" "NewYork" "London") 2 | work_dir="../../data/" 3 | 4 | # Depend on /ThreeCityImage/{city}/StreetView/ 5 | # Depend on SAT_{city}_{zl}.csv 6 | for city in "${cities[@]}"; do 7 | echo "Finding corresponding streetview images for $city" 8 | python ./streetview/spatial_join.py --city $city --work_dir $work_dir 9 | done 10 | wait 11 | echo "Finish finding corresponding streetview images" 12 | # Get stv_in_sat_{city}_{zl}.csv 13 | # Get sampled_stv_images/ 14 | 15 | # Depend on stv_in_sat_{city}_{zl}.csv 16 | for city in "${cities[@]}"; do 17 | echo "Querying address for streetview images for $city" 18 | python ./streetview/osm_address_web_stv_my.py --city $city --work_dir $work_dir 19 | done 20 | wait 21 | echo "Finish querying address for streetview images" 22 | # Get stv_in_sat_address_deploy_{zl}.csv 23 | 24 | # Depend on stv_in_sat_{city}_{zl}.csv 25 | for city in "${cities[@]}"; do 26 | echo "Getting nearest POI for streetview images for $city" 27 | python ./streetview/stv_nearest_pois.py --city $city --work_dir $work_dir 28 | done 29 | wait 30 | echo "Finish getting nearest POI for streetview images" 31 | # Get stv_in_sat_nearest_features_{city}_{zl}.csv 32 | 33 | # Depend on stv_in_sat_nearest_features_{city}_{zl}.csv 34 | for city in "${cities[@]}"; do 35 | echo "Getting 10 POI for streetview images for $city" 36 | python ./streetview/process_stv_near.py --city $city --work_dir $work_dir 37 | done 38 | wait 39 | echo "Finish getting 10 POI for streetview images" 40 | # Get stv_in_sat_nearest_features_updated_{city}_{zl}.csv -------------------------------------------------------------------------------- /simulate/address/osm_address_web_my.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tqdm import trange 3 | 4 | import pandas as pd 5 | from geopy.geocoders import Nominatim 6 | from geopy.extra.rate_limiter import RateLimiter 7 | 8 | import argparse 9 | 10 | def reverse_geocode(lat, lon): 11 | geolocator = Nominatim(user_agent="MyGeocodingApp2",timeout=1,proxies="http://127.0.0.1:10190") 12 | geocode = RateLimiter(geolocator.reverse, min_delay_seconds=1) 13 | location = geocode((lat, lon), exactly_one=True,language='en') 14 | return location.address if location else None 15 | 16 | if __name__ == '__main__': 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 19 | parser.add_argument('--work_dir', type=str, default='../../data/') 20 | args = parser.parse_args() 21 | city = args.city 22 | work_dir = args.work_dir 23 | 24 | working_dir = work_dir + f'dev-{city}/' 25 | 26 | for zl in ['zl15','zl17']: 27 | input_path = working_dir + f'SAT_interpolate_{city}_{zl}.csv' 28 | assert os.path.exists(input_path) 29 | output_path = working_dir + f'SAT_interpolate_address_{city}_{zl}.csv' 30 | 31 | df = pd.read_csv(input_path) 32 | df['adr'] = 's' 33 | 34 | for i in trange(len(df)): 35 | lng = (df.at[i,'lng']) 36 | lat = (df.at[i,'lat']) 37 | 38 | try: 39 | address = reverse_geocode(lat, lng) 40 | df.at[i,'adr'] = str(address) 41 | except Exception as e: 42 | pass 43 | # continue 44 | df.to_csv(output_path, index=False) -------------------------------------------------------------------------------- /simulate/CoT_construct.bash: -------------------------------------------------------------------------------- 1 | cities=("Beijing" "NewYork" "London") 2 | work_dir="../../../data/" 3 | 4 | for city in "${cities[@]}"; do 5 | echo "Generating sat_count CoT with template for $city" 6 | python ./advance/CoT/sat_address_cot/gen_CoT_template.py --city $city --work_dir $work_dir 7 | echo "Using GPT to polish CoT for $city" 8 | python ./advance/CoT/sat_address_cot/gpt_polish.py --city $city --work_dir $work_dir 9 | done 10 | wait 11 | echo "Finish generating CoT for sat_address" 12 | 13 | for city in "${cities[@]}"; do 14 | echo "Generating sat_count CoT with template for $city" 15 | python ./advance/CoT/sat_count_cot/gen_CoT_template.py --city $city --work_dir $work_dir 16 | echo "Using GPT to polish CoT for $city" 17 | python ./advance/CoT/sat_count_cot/gpt_polish.py --city $city --work_dir $work_dir 18 | done 19 | wait 20 | echo "Finish generating CoT for sat_count" 21 | 22 | for city in "${cities[@]}"; do 23 | echo "Generating stv_address CoT with template for $city" 24 | python ./advance/CoT/stv_address_cot/gen_CoT_template.py --city $city --work_dir $work_dir 25 | echo "Using GPT to polish CoT for $city" 26 | python ./advance/CoT/stv_address_cot/gpt_polish.py --city $city --work_dir $work_dir 27 | done 28 | wait 29 | echo "Finish generating CoT for stv_address" 30 | 31 | for city in "${cities[@]}"; do 32 | echo "Generating sat_cross_stv CoT with template for $city" 33 | python ./advance/CoT/sat_cross_stv_cot/gen_CoT_template.py --city $city --work_dir $work_dir 34 | echo "Using GPT to polish CoT for $city" 35 | python ./advance/CoT/sat_cross_stv_cot/gpt_polish.py --city $city --work_dir $work_dir 36 | done 37 | wait 38 | echo "Finish generating CoT for sat_cross_stv" -------------------------------------------------------------------------------- /simulate/multi_image_mc_construct.bash: -------------------------------------------------------------------------------- 1 | cities=("Beijing" "NewYork" "London") 2 | work_dir="../../../data/" 3 | 4 | for city in "${cities[@]}"; do 5 | echo "Getting POIs and buildings number for $city" 6 | python ./advance/cross-view/generate_poi_building_count.py --city $city --work_dir $work_dir 7 | done 8 | wait 9 | echo "Finish getting POIs and buildings number" 10 | 11 | for city in "${cities[@]}"; do 12 | echo "Generating SAT-Count data for $city" 13 | python ./format/multi_SAT_count_llava.py --city $city --work_dir $work_dir 14 | done 15 | wait 16 | echo "Finish generating SAT-Count data" 17 | 18 | for city in "${cities[@]}"; do 19 | echo "Getting street view images and corresponding satellite images for $city" 20 | python ./advance/cross-view/SAT_stv_corres.py --city $city --work_dir $work_dir 21 | done 22 | wait 23 | echo "Finish getting street view images and corresponding satellite images" 24 | 25 | for city in "${cities[@]}"; do 26 | echo "Getting partition information between street view images and satellite images for $city" 27 | python ./advance/cross-view/stv_in_sat_partition.py --city $city --work_dir $work_dir 28 | done 29 | wait 30 | echo "Finish getting partition information between street view images and satellite images" 31 | 32 | for city in "${cities[@]}"; do 33 | echo "Generating cross SAT-STV data for $city" 34 | python ./format/multi_SAT_cross_STV_llava.py --city $city --work_dir $work_dir 35 | done 36 | wait 37 | echo "Finish generating cross SAT-STV data" 38 | 39 | for city in "${cities[@]}"; do 40 | echo "GeneratinG STV_compare data for $city" 41 | python ./format/multi_STV_compare_llava.py --city $city --work_dir $work_dir 42 | done 43 | wait 44 | echo "Finish generating STV_compare data" -------------------------------------------------------------------------------- /simulate/advance/cross-view/stv_in_sat_partition.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import argparse 4 | import tqdm 5 | from tqdm import trange 6 | 7 | if __name__ == "__main__": 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 10 | parser.add_argument('--work_dir', type=str, default='../../data/') 11 | args = parser.parse_args() 12 | city = args.city 13 | work_dir = args.work_dir 14 | working_dir = work_dir + f"dev-{city}/" 15 | 16 | for zl in ['zl15','zl17']: 17 | 18 | # df = pd.read_csv('sat_stv_corr_'+zl+'_'+area+'.csv') 19 | df = pd.read_csv(working_dir + f'sat_stv_corr_{city}_{zl}.csv') 20 | 21 | df['partition'] = 's' 22 | df['x_min'] = 0 23 | df['x_max'] = 0 24 | df['y_min'] = 0 25 | df['y_max'] = 0 26 | 27 | for i in trange(len(df)): 28 | x_pixel = df.at[i,'x_pixel'] 29 | y_pixel = df.at[i,'y_pixel'] 30 | if x_pixel<=127: 31 | if y_pixel<=127: 32 | df.at[i,'partition'] = 'Top_left' 33 | else: 34 | df.at[i,'partition'] = 'Bottom_left' 35 | else: 36 | if y_pixel<=127: 37 | df.at[i,'partition'] = 'Top_right' 38 | else: 39 | df.at[i,'partition'] = 'Bottom_right' 40 | df.at[i,'x_min'] = max(0,x_pixel-10) 41 | df.at[i,'x_max'] = min(255,x_pixel+10) 42 | df.at[i,'y_min'] = max(0,y_pixel-10) 43 | df.at[i,'y_max'] = min(255,y_pixel+10) 44 | 45 | # df.to_csv('sat_stv_corr_'+zl+'_'+area+'_partition.csv',index=False) 46 | df.to_csv(working_dir + f'sat_stv_corr_{city}_{zl}_partition.csv',index=False) 47 | print(f'{working_dir}sat_stv_corr_{city}_{zl}_partition.csv saved. {len(df)} records processed.') 48 | 49 | -------------------------------------------------------------------------------- /simulate/streetview/osm_address_web_stv_my.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tqdm import trange 3 | 4 | import pandas as pd 5 | from geopy.geocoders import Nominatim 6 | from geopy.extra.rate_limiter import RateLimiter 7 | 8 | import argparse 9 | 10 | def reverse_geocode(lat, lon): 11 | geolocator = Nominatim(user_agent="MyGeocodingApp2",timeout=1,proxies="http://127.0.0.1:10190") 12 | geocode = RateLimiter(geolocator.reverse, min_delay_seconds=1) 13 | location = geocode((lat, lon), exactly_one=True,language='en') 14 | return location.address if location else None 15 | 16 | if __name__ == '__main__': 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 19 | parser.add_argument('--work_dir', type=str, default='../../data/') 20 | args = parser.parse_args() 21 | city = args.city 22 | work_dir = args.work_dir 23 | 24 | for zl in ['zl15','zl17']: 25 | working_dir = work_dir + f'dev-{city}/' 26 | 27 | input_path= working_dir + f'stv_in_sat_{city}_{zl}.csv' 28 | assert os.path.exists(input_path) 29 | 30 | output_path = working_dir + f'stv_in_sat_address_deploy_{zl}.csv' 31 | 32 | df = pd.read_csv(input_path) 33 | df['adr'] = 's' 34 | 35 | for i in trange(len(df)): 36 | # lng = (df.at[i,'tl_lng']+df.at[i,'bt_lng'])/2 37 | # lat = (df.at[i,'tl_lat']+df.at[i,'bt_lat'])/2 38 | # lng = (df.at[i,'lng']), 39 | # lat = (df.at[i,'lat']) 40 | lng = (df.at[i,'longitude']) 41 | lat = (df.at[i,'latitude']) 42 | 43 | # lat, lng= 51.58425973969619,0.13408350251072 # 39.882027527944864, 116.38185151260446 44 | try: 45 | address = reverse_geocode(lat, lng) 46 | # print(address) 47 | df.at[i,'adr'] = str(address) 48 | except Exception as e: 49 | pass 50 | # continue 51 | df.to_csv(output_path, index=False) -------------------------------------------------------------------------------- /serving/test_llm_api.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | import httpx 3 | import os 4 | 5 | #### register your API_Key in the environment, do not directly write your key in codes 6 | #export SiliconFlow_API_KEY="xx" 7 | #export DeepInfra_API_KEY="xx" 8 | #export OpenAI_API_KEY="xx" 9 | 10 | #### define your proxy 11 | PROXY = "http://127.0.0.1:10190" 12 | 13 | #### select platforms 14 | API_KEY_MAPPING = { 15 | "siliconflow": "SiliconFlow_API_KEY", # https://siliconflow.cn/models 16 | "DeepInfra": "DeepInfra_API_KEY", # https://deepinfra.com/models 17 | "OpenAI": "OpenAI_API_KEY", # https://openai.com/api/pricing/ 18 | "vllm": "vllm_KEY" 19 | } 20 | API_URL_MAPPING = { 21 | "siliconflow": "https://api.siliconflow.cn/v1", 22 | "DeepInfra": "https://api.deepinfra.com/v1/openai", 23 | "OpenAI": "https://api.openai.com/v1", 24 | "vllm": "http://your_server_ip:port/v1", 25 | } 26 | 27 | 28 | API_TYPE = "OpenAI" 29 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]] 30 | API_URL = API_URL_MAPPING[API_TYPE] 31 | model_name = "google/gemma-2-9b-it" 32 | 33 | 34 | #### OpenAI client 35 | if API_TYPE == "OpenAI": 36 | model_name = "gpt-3.5-turbo-0125" 37 | client = OpenAI( 38 | base_url=API_URL, 39 | api_key=API_KEY, 40 | http_client=httpx.Client(proxies=PROXY) 41 | ) 42 | elif API_TYPE == "siliconflow": 43 | client = OpenAI( 44 | base_url=API_URL, 45 | api_key=API_KEY 46 | ) 47 | elif API_TYPE=="DeepInfra": 48 | client = OpenAI( 49 | base_url=API_URL, 50 | api_key=API_KEY, 51 | http_client=httpx.Client(proxies=PROXY), 52 | ) 53 | elif API_TYPE=="vllm": 54 | client = OpenAI( 55 | base_url=API_URL, 56 | api_key=API_KEY 57 | ) 58 | 59 | 60 | #### one example 61 | dialogs = [{ 62 | "role": "user", 63 | "content": "Who are you? Please output your name with JSON format." 64 | }] 65 | 66 | completion = client.chat.completions.create( 67 | model=model_name, 68 | messages=dialogs, 69 | max_tokens=100, 70 | temperature=0 71 | ) 72 | 73 | print(completion.choices[0].message.content) 74 | -------------------------------------------------------------------------------- /simulate/advance/cross-view/SAT_stv_corres.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import argparse 3 | from tqdm import tqdm, trange 4 | 5 | x_pi = 3.14159265358979324 * 3000.0 / 180.0 6 | pi = 3.1415926535897932384626 # π 7 | a = 6378245.0 # Long radius 8 | ee = 0.00669342162296594323 # Square of eccentricity 9 | 10 | 11 | 12 | if __name__ == "__main__": 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 15 | parser.add_argument('--work_dir', type=str, default='../../data/') 16 | args = parser.parse_args() 17 | city = args.city 18 | work_dir = args.work_dir 19 | working_dir = work_dir + f"dev-{city}/" 20 | 21 | for zl in ['zl15','zl17']: 22 | df_stv = pd.read_csv(working_dir + f'stv_in_sat_{city}_{zl}.csv') 23 | df_sat = pd.read_csv(working_dir + f'SAT_{city}_{zl}.csv') 24 | 25 | df_stv['x_pixel'] = 0 26 | df_stv['y_pixel'] = 0 27 | df_stv['sat'] = 's' 28 | 29 | for i_stv in trange(len(df_stv)): 30 | lng = df_stv.at[i_stv,'longitude'] 31 | lat = df_stv.at[i_stv,'latitude'] 32 | 33 | for i_sat in range(len(df_sat)): 34 | sat_tl_lat = df_sat.at[i_sat,'tl_lat'] #tl_lat,tl_lng,bt_lat,bt_lng 35 | sat_tl_lng = df_sat.at[i_sat,'tl_lng'] 36 | sat_bt_lat = df_sat.at[i_sat,'bt_lat'] 37 | sat_bt_lng = df_sat.at[i_sat,'bt_lng'] 38 | 39 | y_pixel = int(255*((sat_tl_lat-lat)/(sat_tl_lat-sat_bt_lat))) 40 | x_pixel = int(255*((lng-sat_tl_lng)/(sat_bt_lng-sat_tl_lng))) 41 | # print(x_pixel, y_pixel) 42 | if 0<=x_pixel and x_pixel<=255 and 0<=y_pixel and y_pixel<=255: 43 | df_stv.at[i_stv,'x_pixel'] = x_pixel 44 | df_stv.at[i_stv,'y_pixel'] = y_pixel 45 | df_stv.at[i_stv,'sat_img_name'] = df_sat.at[i_sat,'img_name'] 46 | break 47 | df_stv.to_csv(working_dir + f'sat_stv_corr_{city}_{zl}.csv', index=False) 48 | print(f'{working_dir}sat_stv_corr_{city}_{zl}.csv saved. {len(df_stv)} records processed.') 49 | 50 | -------------------------------------------------------------------------------- /simulate/satelite/clip_shp_point.py: -------------------------------------------------------------------------------- 1 | # Function: Clip POI/driving/landuse/natural/buildings data from GeoJSON file based on the shapefile of the region. 2 | 3 | import geopandas as gpd 4 | import os 5 | from tqdm import tqdm 6 | import argparse 7 | 8 | def clip(shp_file, geojson_file, output_dir, typ): 9 | 10 | shp_gdf = gpd.read_file(shp_file) 11 | geojson_gdf = gpd.read_file(geojson_file) 12 | 13 | if shp_gdf.crs is None: 14 | shp_gdf = shp_gdf.set_crs(epsg=4326) 15 | 16 | if shp_gdf.crs != geojson_gdf.crs: 17 | geojson_gdf = geojson_gdf.to_crs(shp_gdf.crs) 18 | 19 | os.makedirs(output_dir, exist_ok=True) 20 | 21 | for index, polygon in shp_gdf.iterrows(): 22 | clipped_gdf = geojson_gdf[geojson_gdf.geometry.intersects(polygon.geometry)] 23 | 24 | # output_filename = os.path.join(output_dir, f"clipped_part_{index}.geojson") 25 | output_filename = os.path.join(output_dir, f"clipped_{typ}_{polygon['region_nam'].split('.')[0]}.geojson") 26 | 27 | if not clipped_gdf.empty: 28 | clipped_gdf.to_file(output_filename, driver="GeoJSON") 29 | print(f"Saved clipped data to {output_filename}") 30 | else: 31 | print(f"No intersecting features for Polygon {index}") 32 | 33 | 34 | if __name__ == "__main__": 35 | 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 38 | parser.add_argument('--work_dir', type=str, default='../../data/') 39 | args = parser.parse_args() 40 | city = args.city 41 | work_dir = args.work_dir 42 | 43 | for zl in ["zl15", "zl17"]: 44 | shp_file = os.path.join(work_dir, f'dev-{city}/SAT_{city}_{zl}.shp') 45 | output_dir = os.path.join(work_dir, f'dev-{city}/clipped_results_{zl}') 46 | 47 | for typ in ['buildings','pois','landuse','natural','driving']: 48 | # TODO: Change the path to the actual geojson file 49 | geojson_dir = "....../ThreeCityImage/city_geojson_three_cities" 50 | geojson_file = os.path.join(geojson_dir, f'{city}_{typ}.geojson') 51 | clip(shp_file, geojson_file, output_dir, typ) 52 | -------------------------------------------------------------------------------- /evaluate/cross_view/STV_compare/STV_compare_inference.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | 7 | from tqdm import tqdm 8 | import json 9 | 10 | from config import MULTI_IMAGE_FOLDER 11 | from serving.vlm_serving import VLMWrapper 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 16 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 17 | parser.add_argument('--task_name', type=str, default='STV_compare', help='task name') 18 | parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"]) 19 | args = parser.parse_args() 20 | 21 | model_name = args.model_name 22 | city_name = args.city_name 23 | task_name = args.task_name 24 | 25 | print("Load the model") 26 | model_wrapper = VLMWrapper(args.model_name) 27 | model = model_wrapper.get_vlm_model() 28 | 29 | 30 | print("Load the image list") 31 | # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json") 32 | path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_test.json") 33 | with open(path, "r") as f: 34 | data = json.load(f) 35 | 36 | if args.data_name == "mini": 37 | data = data[:10] 38 | 39 | response = [] 40 | for d in tqdm(data): 41 | prompt = d["prompt"] 42 | reference = d["reference"] 43 | img_path = d["image"] 44 | 45 | ret = model.generate(img_path + [prompt]) 46 | response.append({ 47 | "image": img_path, 48 | "prompt": prompt, 49 | "reference": reference, 50 | "response": ret 51 | }) 52 | 53 | print("Save the response") 54 | output_path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, model_name) 55 | os.makedirs(output_path, exist_ok=True) 56 | with open(os.path.join(output_path, f"{city_name}_{task_name}_response.json"), "w") as f: 57 | json.dump(response, f, indent=4, ensure_ascii=False) 58 | 59 | model_wrapper.clean_proxy() -------------------------------------------------------------------------------- /evaluate/cross_view/STV_SAT_mapping/STV_SAT_mapping_inference.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | 7 | from tqdm import tqdm 8 | import json 9 | 10 | from config import MULTI_IMAGE_FOLDER 11 | from serving.vlm_serving import VLMWrapper 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 16 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 17 | parser.add_argument('--task_name', type=str, default='STV_SAT_mapping', help='task name') 18 | parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"]) 19 | args = parser.parse_args() 20 | 21 | model_name = args.model_name 22 | city_name = args.city_name 23 | task_name = args.task_name 24 | 25 | print("Load the model") 26 | model_wrapper = VLMWrapper(args.model_name) 27 | model = model_wrapper.get_vlm_model() 28 | 29 | 30 | print("Load the image list") 31 | # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json") 32 | path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_test.json") 33 | with open(path, "r") as f: 34 | data = json.load(f) 35 | 36 | if args.data_name == "mini": 37 | data = data[:10] 38 | 39 | response = [] 40 | for d in tqdm(data): 41 | prompt = d["prompt"] 42 | reference = d["reference"] 43 | img_path = d["image"] 44 | 45 | ret = model.generate(img_path + [prompt]) 46 | response.append({ 47 | "image": img_path, 48 | "prompt": prompt, 49 | "reference": reference, 50 | "response": ret 51 | }) 52 | 53 | print("Save the response") 54 | output_path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, model_name) 55 | os.makedirs(output_path, exist_ok=True) 56 | with open(os.path.join(output_path, f"{city_name}_{task_name}_response.json"), "w") as f: 57 | json.dump(response, f, indent=4, ensure_ascii=False) 58 | 59 | model_wrapper.clean_proxy() -------------------------------------------------------------------------------- /evaluate/cross_view/STV_SAT_location/STV_SAT_location_inference.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | 7 | from tqdm import tqdm 8 | import json 9 | 10 | from config import MULTI_IMAGE_FOLDER 11 | from serving.vlm_serving import VLMWrapper 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 16 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 17 | parser.add_argument('--task_name', type=str, default='STV_SAT_location', help='task name') 18 | parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"]) 19 | args = parser.parse_args() 20 | 21 | model_name = args.model_name 22 | city_name = args.city_name 23 | task_name = args.task_name 24 | 25 | print("Load the model") 26 | model_wrapper = VLMWrapper(args.model_name) 27 | model = model_wrapper.get_vlm_model() 28 | 29 | 30 | print("Load the image list") 31 | # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json") 32 | path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_test.json") 33 | with open(path, "r") as f: 34 | data = json.load(f) 35 | 36 | if args.data_name == "mini": 37 | data = data[:10] 38 | 39 | response = [] 40 | for d in tqdm(data): 41 | prompt = d["prompt"] 42 | reference = d["reference"] 43 | img_path = d["image"] 44 | 45 | ret = model.generate(img_path + [prompt]) 46 | response.append({ 47 | "image": img_path, 48 | "prompt": prompt, 49 | "reference": reference, 50 | "response": ret 51 | }) 52 | 53 | print("Save the response") 54 | output_path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, model_name) 55 | os.makedirs(output_path, exist_ok=True) 56 | with open(os.path.join(output_path, f"{city_name}_{task_name}_response.json"), "w") as f: 57 | json.dump(response, f, indent=4, ensure_ascii=False) 58 | 59 | model_wrapper.clean_proxy() -------------------------------------------------------------------------------- /simulate/annotate.bash: -------------------------------------------------------------------------------- 1 | cities=("Beijing" "NewYork" "London") 2 | work_dir="../../data/" 3 | model="gpt-4o-mini-2024-07-18" 4 | 5 | # Depend on SAT_interpolate_address_{city}_{zl}.csv 6 | for city in "${cities[@]}"; do 7 | echo "Getting SAT combined address for $city" 8 | python ./annotate/sat_combine_address.py --city $city --work_dir $work_dir --model_name $model 9 | done 10 | wait 11 | echo "Finish getting SAT combined address" 12 | # Get sat_address_combined_{city}_{zl}.csv 13 | 14 | # Depend on SAT_interpolate_address_{city}_{zl}.csv 15 | # Depend on short_clipped_results_{zl}/driving_{img_name}.txt 16 | # Depend on short_clipped_results_{zl}/pois_{img_name}.txt 17 | for city in "${cities[@]}"; do 18 | echo "Getting SAT scene description for $city" 19 | python ./annotate/sat_scene_description.py --city $city --work_dir $work_dir --model_name $model 20 | done 21 | wait 22 | echo "Finish getting SAT scene description" 23 | # Get rs_osm_description_{city}_{zl}.csv 24 | 25 | # Depend on SAT_{city}_{zl}.csv 26 | for city in "${cities[@]}"; do 27 | echo "Getting SAT grounding description for $city" 28 | python ./annotate/sat_generate_grounding_template.py --city $city --work_dir $work_dir 29 | done 30 | wait 31 | echo "Finish getting SAT grounding description" 32 | # Get rs_grounding_selfmade_{zl}.csv 33 | 34 | # Depend on SAT_{city}_{zl}.csv 35 | # Depend on short_clipped_results_{zl}/landuse_{img_name}.txt 36 | for city in "${cities[@]}"; do 37 | echo "Getting SAT landuse for $city" 38 | python ./annotate/sat_landuse_template.py --city $city --work_dir $work_dir 39 | done 40 | wait 41 | echo "Finish getting SAT landuse" 42 | # Get rs_landuse_description_{zl}.csv 43 | 44 | # Depend on stv_in_sat_{city}_{zl}.csv 45 | for city in "${cities[@]}"; do 46 | echo "Getting STV scene description for $city" 47 | python ./annotate/stv_description_gpt.py --city $city --work_dir $work_dir --model_name $model 48 | done 49 | wait 50 | echo "Finish getting STV scene description" 51 | # Get stv_description.csv 52 | 53 | # Depend on stv_in_sat_nearest_features_update_{city}_{zl}.csv 54 | for city in "${cities[@]}"; do 55 | echo "Getting STV landmark for $city" 56 | python ./annotate/stv_landmark_gpt.py --city $city --work_dir $work_dir --model_name $model 57 | done 58 | wait 59 | echo "Finish getting STV landmark" 60 | # Get stv_poi_landmark_update.jsonl -------------------------------------------------------------------------------- /evaluate/cross_view/SAT_count_pois/SAT_count_pois_inference.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | 7 | from tqdm import tqdm 8 | import json 9 | 10 | from config import MULTI_IMAGE_FOLDER 11 | from serving.vlm_serving import VLMWrapper 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 16 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 17 | parser.add_argument('--task_name', type=str, default='SAT_count_pois', help='task name') 18 | parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"]) 19 | args = parser.parse_args() 20 | 21 | model_name = args.model_name 22 | city_name = args.city_name 23 | task_name = args.task_name 24 | 25 | print("Load the model") 26 | model_wrapper = VLMWrapper(args.model_name) 27 | model = model_wrapper.get_vlm_model() 28 | 29 | for zl in ["zl15", "zl17"]: 30 | 31 | print("Load the image list") 32 | # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json") 33 | path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_{zl}_test.json") 34 | with open(path, "r") as f: 35 | data = json.load(f) 36 | 37 | if args.data_name == "mini": 38 | data = data[:10] 39 | 40 | response = [] 41 | for d in tqdm(data): 42 | prompt = d["prompt"] 43 | reference = d["reference"] 44 | img_path = d["image"] 45 | 46 | ret = model.generate(img_path+ [prompt]) 47 | response.append({ 48 | "image": img_path, 49 | "prompt": prompt, 50 | "reference": reference, 51 | "response": ret 52 | }) 53 | 54 | print("Save the response") 55 | output_path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, model_name) 56 | os.makedirs(output_path, exist_ok=True) 57 | with open(os.path.join(output_path, f"{city_name}_{task_name}_{zl}_response.json"), "w") as f: 58 | json.dump(response, f, indent=4, ensure_ascii=False) 59 | 60 | model_wrapper.clean_proxy() -------------------------------------------------------------------------------- /simulate/annotate/sat_landuse_template.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import os 3 | import json 4 | import argparse 5 | 6 | if __name__ == '__main__': 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 9 | parser.add_argument('--work_dir', type=str, default='../../data/') 10 | args = parser.parse_args() 11 | city = args.city 12 | work_dir = args.work_dir 13 | 14 | # work_dir = "../../data/dev-Beijing/" 15 | work_dir = work_dir + f"dev-{city}/" 16 | for zl in ["zl15", "zl17"]: 17 | if os.path.exists(f"rs_landuse_description_{zl}.jsonl"): 18 | os.remove(f"rs_landuse_description_{zl}.jsonl") 19 | print(f"Removed rs_landuse_description_{zl}.jsonl") 20 | df = pd.read_csv(work_dir + f"SAT_{city}_{zl}.csv") 21 | for cnt in range(len(df)): 22 | img_name = df.at[cnt,'img_name'].split('.')[0] 23 | 24 | if not os.path.exists(work_dir + f'short_clipped_results_{zl}/landuse_'+img_name +'.txt'): 25 | continue 26 | 27 | with open(work_dir + f'short_clipped_results_{zl}/landuse_'+img_name +'.txt', 'r') as file: 28 | # with open('short_clipped_results_wudaokou_zl17/landuse_'+img_name +'.txt', 'r') as file: 29 | lines = file.readlines() 30 | 31 | for line in lines: 32 | parts = line.split('location:') 33 | landuse_type = line.split('region')[0].strip().split()[-1].capitalize() 34 | coordinates = parts[1].strip() 35 | 36 | question = f"You are provided a 256*256 satellite image. What is the landuse type in region {coordinates}?" 37 | answer = f"{landuse_type}" 38 | 39 | print(f"Q: {question}") 40 | print(f"A: {answer}") 41 | with open(work_dir + f"rs_landuse_description_{zl}.jsonl", "a") as fout: 42 | value = { 43 | "img_name": img_name, 44 | "Q": f"You are provided a 256*256 satellite image. What is the landuse type in region {coordinates}?", 45 | "A":f"{landuse_type}" 46 | } 47 | fout.write(json.dumps(value, ensure_ascii=False) + "\n") 48 | print(f"Finished generating rs_landuse_description_{zl}.jsonl") -------------------------------------------------------------------------------- /simulate/satelite/make_sat_shp.py: -------------------------------------------------------------------------------- 1 | # # Function: Generate shapefile for satellite images in order to visualize them in GIS. 2 | 3 | import shapefile # Using pyshp 4 | import pandas as pd 5 | import argparse 6 | import os 7 | 8 | def make_sat_shp(data_address, csv_address) -> None: 9 | sat = pd.read_csv(csv_address, header=0, sep=',') 10 | file = shapefile.Writer(data_address) 11 | file.field('num') 12 | file.field('region_name') 13 | file.field('type', 'C', '40') 14 | 15 | for i in range(len(sat)): 16 | # Extract image name 17 | img_name = sat.at[i, 'img_name'] 18 | 19 | # Define the polygon coordinates 20 | polygon = [ 21 | [sat.at[i, 'tl_lng'], sat.at[i, 'tl_lat']], 22 | [sat.at[i, 'bt_lng'], sat.at[i, 'tl_lat']], 23 | [sat.at[i, 'bt_lng'], sat.at[i, 'bt_lat']], 24 | [sat.at[i, 'tl_lng'], sat.at[i, 'bt_lat']], 25 | [sat.at[i, 'tl_lng'], sat.at[i, 'tl_lat']] # Close the polygon 26 | ] 27 | 28 | # Add the polygon to the shapefile 29 | file.poly([polygon]) 30 | file.record(str(i), img_name, 'Polygon') 31 | 32 | file.close() 33 | 34 | # Write the projection file with WKT for EPSG:4326 35 | wkt = """GEOGCS["WGS 84", 36 | DATUM["WGS_1984", 37 | SPHEROID["WGS 84",6378137,298.257223563, 38 | AUTHORITY["EPSG","7030"]], 39 | AUTHORITY["EPSG","6326"]], 40 | PRIMEM["Greenwich",0, 41 | AUTHORITY["EPSG","8901"]], 42 | UNIT["degree",0.0174532925199433, 43 | AUTHORITY["EPSG","9122"]], 44 | AUTHORITY["EPSG","4326"]]""" 45 | 46 | # Write the WKT to the .prj file 47 | with open(data_address.replace(".shp", ".prj"), 'w') as f: 48 | f.write(wkt) 49 | 50 | print(f"Shapefile and projection file have been created at {data_address}") 51 | 52 | if __name__ == '__main__': 53 | parser = argparse.ArgumentParser() 54 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 55 | parser.add_argument('--work_dir', type=str, default='../../data/') 56 | args = parser.parse_args() 57 | 58 | city = args.city 59 | work_dir = args.work_dir 60 | 61 | for zl in ["zl15", "zl17"]: 62 | csv_path = os.path.join(work_dir, f'dev-{city}/SAT_{city}_{zl}.csv') 63 | shp_path = os.path.join(work_dir, f'dev-{city}/SAT_{city}_{zl}.shp') 64 | make_sat_shp(shp_path, csv_path) -------------------------------------------------------------------------------- /evaluate/cross_view/SAT_count_buildings/SAT_count_buildings_inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import pandas as pd 4 | from tqdm import tqdm 5 | import json 6 | 7 | from config import MULTI_IMAGE_FOLDER 8 | from serving.vlm_serving import VLMWrapper 9 | 10 | if __name__ == '__main__': 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 13 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 14 | parser.add_argument('--task_name', type=str, default='SAT_count_buildings', help='task name') 15 | parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"]) 16 | args = parser.parse_args() 17 | 18 | model_name = args.model_name 19 | city_name = args.city_name 20 | task_name = args.task_name 21 | 22 | print("Load the model") 23 | model_wrapper = VLMWrapper(args.model_name) 24 | model = model_wrapper.get_vlm_model() 25 | 26 | for zl in ["zl15", "zl17"]: 27 | 28 | print("Load the image list") 29 | # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json") 30 | path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_{zl}_test.json") 31 | with open(path, "r") as f: 32 | data = json.load(f) 33 | 34 | if args.data_name == "mini": 35 | data = data[:10] 36 | 37 | response = [] 38 | for d in tqdm(data): 39 | prompt = d["prompt"] 40 | reference = d["reference"] 41 | img_path = d["image"] 42 | # img_name = img_path.split("/")[-1] 43 | 44 | # assert os.path.exists(img_path), f"Image {img_path} not found" 45 | ret = model.generate(img_path + [prompt]) 46 | response.append({ 47 | "image": img_path, 48 | "prompt": prompt, 49 | "reference": reference, 50 | "response": ret 51 | }) 52 | 53 | print("Save the response") 54 | output_path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, model_name) 55 | os.makedirs(output_path, exist_ok=True) 56 | with open(os.path.join(output_path, f"{city_name}_{task_name}_{zl}_response.json"), "w") as f: 57 | json.dump(response, f, indent=4, ensure_ascii=False) 58 | 59 | model_wrapper.clean_proxy() -------------------------------------------------------------------------------- /evaluate/cross_view/STV_compare/STV_compare_stats.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | from tqdm import tqdm 7 | import json 8 | 9 | from config import MULTI_IMAGE_FOLDER 10 | from serving.llm_api import extract_choice 11 | 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 16 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 17 | parser.add_argument('--task_name', type=str, default='STV_compare', help='task name') 18 | args = parser.parse_args() 19 | 20 | city = args.city_name 21 | model_name = args.model_name 22 | task_name = args.task_name 23 | 24 | path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_response.json") 25 | 26 | with open(path, "r") as f: 27 | data = json.load(f) 28 | 29 | correct = 0 30 | num_A = 0 31 | num_B = 0 32 | num_C = 0 33 | num_D = 0 34 | 35 | for d in data: 36 | prompt = d["prompt"] 37 | reference = d["reference"] 38 | response = d["response"] 39 | img_name = d["image"] 40 | 41 | model_choice = extract_choice(response, ["A", "B", "C", "D"]) 42 | 43 | if model_choice == reference: 44 | correct += 1 45 | 46 | if model_choice == "A": 47 | num_A += 1 48 | elif model_choice == "B": 49 | num_B += 1 50 | elif model_choice == "C": 51 | num_C += 1 52 | elif model_choice == "D": 53 | num_D += 1 54 | 55 | 56 | print("For Response file:", path) 57 | print("Accuracy:", correct / len(data)) 58 | print("Num A:", num_A) 59 | print("Num B:", num_B) 60 | print("Num C:", num_C) 61 | print("Num D:", num_D) 62 | print() 63 | 64 | # save the stats 65 | stats_folder = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, "stats") 66 | 67 | os.makedirs(stats_folder, exist_ok=True) 68 | 69 | with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}.json"), "w") as f: 70 | json.dump({ 71 | "Length of Data": len(data), 72 | "Accuracy": correct / len(data), 73 | "Num A": num_A, 74 | "Num B": num_B, 75 | "Num C": num_C, 76 | "Num D": num_D 77 | }, f, indent=4) -------------------------------------------------------------------------------- /evaluate/cross_view/eval_inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import pandas as pd 4 | # from setproctitle import setproctitle 5 | import jsonlines 6 | 7 | from tqdm import tqdm 8 | import json 9 | 10 | from config import CROSS_VIEW_PATH, CROSS_VIEW_RESULTS_PATH 11 | from serving.vlm_serving import VLMWrapper 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')#InternVL2-40B GPT4o_MINI Qwen2-VL-2B-Instruct 16 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') #Beijing, London, NewYork 17 | parser.add_argument('--data_name', type=str, default="mini", help='dataset size') 18 | parser.add_argument('--task_name', type=str, default='IR', help='task name', choices=["IR", "CL","SC_Buildings","SC_POIs"]) 19 | #task_name include: Image Retrieval, Camera Localization, Scene Comparision Buildings, Scene Comparison POIs(restaurant, education, shopping) 20 | 21 | args = parser.parse_args() 22 | 23 | print("Load the model") 24 | model_wrapper = VLMWrapper(args.model_name) 25 | model = model_wrapper.get_vlm_model() 26 | 27 | print("Load the test data jsonl") 28 | 29 | with jsonlines.open(os.path.join(CROSS_VIEW_PATH, f"{args.city_name}_{args.task_name}_eval.jsonl")) as reader: 30 | eval_data = list(reader) 31 | 32 | if args.data_name == 'mini': 33 | eval_data = eval_data[:int(0.1*len(eval_data))] 34 | 35 | 36 | response_list = [] 37 | 38 | ###Model inference 39 | for obj in eval_data: 40 | img_names = obj['image'] 41 | prompt = obj['conversations'][0]['value'].replace("", "") 42 | GT = obj['conversations'][1]['value'] 43 | ret = model.generate(img_names+[prompt]) 44 | response_list.append([img_names, ret, GT]) 45 | 46 | os.makedirs(os.path.dirname(CROSS_VIEW_RESULTS_PATH), exist_ok=True) 47 | # # Save the response 48 | with open(os.path.join(CROSS_VIEW_RESULTS_PATH, f"{args.city_name}_{args.model_name}_{args.task_name}_eval.jsonl"), "w") as fout: 49 | 50 | for i in range(len(response_list)): 51 | value = { 52 | "img_name": response_list[i][0], 53 | "text": response_list[i][1], 54 | "GT": response_list[i][2], ##saving GT for quick human evaluation 55 | } 56 | fout.write(json.dumps(value) + "\n") 57 | 58 | model_wrapper.clean_proxy() 59 | -------------------------------------------------------------------------------- /examples/run_eval_multi_image_inference_stats.sh: -------------------------------------------------------------------------------- 1 | # source /usr/local/miniconda3/bin/activate vila-vlmeval 2 | export CUDA_VISIBLE_DEVICES=3 3 | export DeepInfra_API_KEY="" 4 | export SiliconFlow_API_KEY="" 5 | export OpenAI_API_KEY="" 6 | export OPENAI_API_KEY="$OpenAI_API_KEY" 7 | export OPENAI_API_BASE="https://api.openai.com/v1/chat/completions" 8 | export DASHSCOPE_API_KEY="" 9 | CITIES=('Beijing' 'NewYork' 'London') 10 | MODELS=("Llama-3-VILA1.5-8b" "GPT4o_MINI") 11 | DATA_VERSION='all' 12 | 13 | echo "Start running evaluation on SAT_count_buildings task" 14 | for MODEL in "${MODELS[@]}"; do 15 | echo "Current model: $MODEL" 16 | for CITY in "${CITIES[@]}"; do 17 | echo "Current city: $CITY" 18 | python -m evaluate.cross_view.SAT_count_buildings.SAT_count_buildings_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION 19 | python -m evaluate.cross_view.SAT_count_buildings.SAT_count_buildings_stats --city_name $CITY --model_name $MODEL 20 | done 21 | done 22 | 23 | echo "Start running evaluation on SAT_count_pois task" 24 | for MODEL in "${MODELS[@]}"; do 25 | echo "Current model: $MODEL" 26 | for CITY in "${CITIES[@]}"; do 27 | echo "Current city: $CITY" 28 | python -m evaluate.cross_view.SAT_count_pois.SAT_count_pois_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION 29 | python -m evaluate.cross_view.SAT_count_pois.SAT_count_pois_stats --city_name $CITY --model_name $MODEL 30 | done 31 | done 32 | 33 | echo "Start running evaluation on STV_SAT_location task" 34 | for MODEL in "${MODELS[@]}"; do 35 | echo "Current model: $MODEL" 36 | for CITY in "${CITIES[@]}"; do 37 | echo "Current city: $CITY" 38 | python -m evaluate.cross_view.STV_SAT_location.STV_SAT_location_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION 39 | python -m evaluate.cross_view.STV_SAT_location.STV_SAT_location_stats --city_name $CITY --model_name $MODEL 40 | done 41 | done 42 | 43 | echo "Start running evaluation on STV_SAT_mapping task" 44 | for MODEL in "${MODELS[@]}"; do 45 | echo "Current model: $MODEL" 46 | for CITY in "${CITIES[@]}"; do 47 | echo "Current city: $CITY" 48 | python -m evaluate.cross_view.STV_SAT_mapping.STV_SAT_mapping_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION 49 | python -m evaluate.cross_view.STV_SAT_mapping.STV_SAT_mapping_stats --city_name $CITY --model_name $MODEL 50 | done 51 | done -------------------------------------------------------------------------------- /evaluate/cross_view/STV_SAT_location/STV_SAT_location_stats.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | from tqdm import tqdm 7 | import json 8 | 9 | from config import MULTI_IMAGE_FOLDER 10 | from serving.llm_api import extract_choice 11 | 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 16 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 17 | parser.add_argument('--task_name', type=str, default='STV_SAT_location', help='task name') 18 | args = parser.parse_args() 19 | 20 | city = args.city_name 21 | model_name = args.model_name 22 | task_name = args.task_name 23 | 24 | path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_response.json") 25 | 26 | with open(path, "r") as f: 27 | data = json.load(f) 28 | 29 | correct = 0 30 | num_A = 0 31 | num_B = 0 32 | num_C = 0 33 | num_D = 0 34 | 35 | for d in data: 36 | prompt = d["prompt"] 37 | reference = d["reference"] 38 | response = d["response"] 39 | img_name = d["image"] 40 | 41 | model_choice = extract_choice(response, ["A", "B", "C", "D"]) 42 | 43 | if model_choice == reference: 44 | correct += 1 45 | 46 | if model_choice == "A": 47 | num_A += 1 48 | elif model_choice == "B": 49 | num_B += 1 50 | elif model_choice == "C": 51 | num_C += 1 52 | elif model_choice == "D": 53 | num_D += 1 54 | 55 | 56 | print("For Response file:", path) 57 | print("Accuracy:", correct / len(data)) 58 | print("Num A:", num_A) 59 | print("Num B:", num_B) 60 | print("Num C:", num_C) 61 | print("Num D:", num_D) 62 | print() 63 | 64 | # save the stats 65 | stats_folder = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, "stats") 66 | 67 | os.makedirs(stats_folder, exist_ok=True) 68 | 69 | with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}.json"), "w") as f: 70 | json.dump({ 71 | "Length of Data": len(data), 72 | "Accuracy": correct / len(data), 73 | "Num A": num_A, 74 | "Num B": num_B, 75 | "Num C": num_C, 76 | "Num D": num_D 77 | }, f, indent=4) -------------------------------------------------------------------------------- /evaluate/cross_view/STV_SAT_mapping/STV_SAT_mapping_stats.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | from tqdm import tqdm 7 | import json 8 | 9 | from config import MULTI_IMAGE_FOLDER 10 | from serving.llm_api import extract_choice 11 | 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 16 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 17 | parser.add_argument('--task_name', type=str, default='STV_SAT_mapping', help='task name') 18 | args = parser.parse_args() 19 | 20 | city = args.city_name 21 | model_name = args.model_name 22 | task_name = args.task_name 23 | 24 | path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_response.json") 25 | 26 | with open(path, "r") as f: 27 | data = json.load(f) 28 | 29 | correct = 0 30 | num_A = 0 31 | num_B = 0 32 | num_C = 0 33 | num_D = 0 34 | 35 | for d in data: 36 | prompt = d["prompt"] 37 | reference = d["reference"] 38 | response = d["response"] 39 | img_name = d["image"] 40 | 41 | model_choice = extract_choice(response, ["A", "B", "C", "D"]) 42 | 43 | if model_choice == reference: 44 | correct += 1 45 | 46 | if model_choice == "A": 47 | num_A += 1 48 | elif model_choice == "B": 49 | num_B += 1 50 | elif model_choice == "C": 51 | num_C += 1 52 | elif model_choice == "D": 53 | num_D += 1 54 | 55 | 56 | print("For Response file:", path) 57 | print("Accuracy:", correct / len(data)) 58 | print("Num A:", num_A) 59 | print("Num B:", num_B) 60 | print("Num C:", num_C) 61 | print("Num D:", num_D) 62 | print() 63 | 64 | # save the stats 65 | stats_folder = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, "stats") 66 | 67 | os.makedirs(stats_folder, exist_ok=True) 68 | 69 | with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}.json"), "w") as f: 70 | json.dump({ 71 | "Length of Data": len(data), 72 | "Accuracy": correct / len(data), 73 | "Num A": num_A, 74 | "Num B": num_B, 75 | "Num C": num_C, 76 | "Num D": num_D 77 | }, f, indent=4) -------------------------------------------------------------------------------- /simulate/format/uni_mc_format_llava.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import random 4 | import argparse 5 | random.seed(0) 6 | 7 | if __name__ == '__main__': 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 10 | parser.add_argument('--work_dir', type=str, default='../../data/') 11 | 12 | args = parser.parse_args() 13 | city_name = args.city_name 14 | work_dir = args.work_dir 15 | 16 | cur_dir = os.path.join(work_dir, f"dev-{city_name}/") 17 | 18 | output_dir = os.path.join(cur_dir, "llava_uniimage_mc_train") 19 | os.makedirs(output_dir, exist_ok=True) 20 | 21 | task_list = ['sat_address_mc', 'sat_landuse_mc', 'stv_address_mc', 'stv_landmark_mc'] 22 | summary_info = {} 23 | 24 | for task_name in task_list: 25 | unformatted_file_path = os.path.join(cur_dir, "uni_image_data", task_name, city_name, f"{city_name}_{task_name}_train.json") 26 | 27 | with open(unformatted_file_path, 'r') as f: 28 | unformatted_data = json.load(f) 29 | 30 | if len(unformatted_data) > 20000: 31 | print(f"Length of {task_name} is {len(unformatted_data)}, truncated to 20000") 32 | unformatted_data = random.sample(unformatted_data, 20000) 33 | 34 | formatted_data = [] 35 | for item in unformatted_data: 36 | formatted_item = {} 37 | image_name = item['image'].split('/')[-1] 38 | formatted_item['id'] = image_name 39 | formatted_item['image'] = item['image'] 40 | formatted_item['conversations'] = [ 41 | { 42 | "from": "human", 43 | 'value': item['prompt'] 44 | }, 45 | { 46 | "from": "gpt", 47 | "value": item['reference'] 48 | } 49 | ] 50 | 51 | formatted_data.append(formatted_item) 52 | 53 | output_file_path = os.path.join(output_dir, f"{city_name}_{task_name}_train_llava.json") 54 | with open(output_file_path, 'w') as f: 55 | json.dump(formatted_data, f, indent=4, ensure_ascii=False) 56 | print(f"Formatted data for {task_name} saved to {output_file_path}") 57 | summary_info[task_name] = len(formatted_data) 58 | 59 | summary_file_path = os.path.join(output_dir, "summary.json") 60 | with open(summary_file_path, 'w') as f: 61 | json.dump(summary_info, f, indent=4, ensure_ascii=False) 62 | print(f"Summary info saved to {summary_file_path}") -------------------------------------------------------------------------------- /train/vila_train_scripts/sft_mix_v1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Set the master address to localhost for single node 4 | export MASTER_ADDR="127.0.0.1" 5 | export CURRENT_RANK=0 6 | 7 | # Since it's single node, we don't need worker_list or SLURM_JOB_NODELIST 8 | n_node=1 9 | 10 | echo "MASTER_ADDR="$MASTER_ADDR 11 | echo "Single node setup, no SLURM required." 12 | 13 | 14 | export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 15 | # TODO: Set the output directory 16 | OUTPUT_DIR="" 17 | mkdir $OUTPUT_DIR 18 | # TODO: Set the path to the training script 19 | CODE_PATH=//train/VILA/llava/train/train_mem.py 20 | # TODO: Make sure the actural data mixture is correct 21 | DATA_MIX=llava_instruct+sharegpt4v_gpt4_100k+UrbanLLaVA_multi+UrbanLLaVA_single+UrbanLLaVA_text2img2text+UrbanLLaVA_img2text2img+UrbanLLaVA_citywalk_vison 22 | MODEL_MAX_LENGTH=2048 23 | bs=8 # Adjust batch size as needed for your single GPU 24 | echo "number of nodes:" $n_node7n 25 | echo "per device batch size:" $bs 26 | echo "node rank:" $CURRENT_RANK 27 | NUM_GPUS=$(echo $CUDA_VISIBLE_DEVICES | tr ',' ' ' | wc -w) 28 | 29 | # TODO: Set the path to the model and the vision tower 30 | torchrun --nnodes=$n_node --nproc_per_node=$NUM_GPUS --master_port=25001 \ 31 | --master_addr $MASTER_ADDR --node_rank=$CURRENT_RANK \ 32 | $CODE_PATH \ 33 | --deepspeed ./zero3.json \ 34 | --model_name_or_path //Llama-3-VILA1.5-8B \ 35 | --version llama_3 \ 36 | --data_mixture $DATA_MIX \ 37 | --vision_tower //siglip-so400m-patch14-384 \ 38 | --mm_vision_select_feature cls_patch \ 39 | --mm_projector mlp_downsample \ 40 | --tune_vision_tower False \ 41 | --tune_mm_projector True \ 42 | --tune_language_model True \ 43 | --mm_vision_select_layer -2 \ 44 | --mm_use_im_start_end False \ 45 | --mm_use_im_patch_token False \ 46 | --image_aspect_ratio resize \ 47 | --bf16 True \ 48 | --output_dir $OUTPUT_DIR \ 49 | --num_train_epochs 1 \ 50 | --per_device_train_batch_size $bs \ 51 | --per_device_eval_batch_size 4 \ 52 | --gradient_accumulation_steps 2 \ 53 | --evaluation_strategy "no" \ 54 | --save_strategy "steps" \ 55 | --save_steps 500 \ 56 | --save_total_limit 1 \ 57 | --learning_rate 1e-4 \ 58 | --weight_decay 0. \ 59 | --warmup_ratio 0.03 \ 60 | --lr_scheduler_type "cosine" \ 61 | --logging_steps 1 \ 62 | --tf32 True \ 63 | --model_max_length $MODEL_MAX_LENGTH \ 64 | --gradient_checkpointing True \ 65 | --dataloader_num_workers 16 \ 66 | --lazy_preprocess True \ 67 | --vflan_no_system_prompt True \ 68 | --report_to tensorboard 69 | -------------------------------------------------------------------------------- /evaluate/geoqa/analyse_result.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | 5 | def get_result(result_files): 6 | final_result = {} 7 | for result_file in result_files: 8 | result = pd.read_csv(os.path.join(file_path,result_file)) 9 | for _, row in result.iterrows(): 10 | # print(result_file) 11 | task = row['task_name'] 12 | model = row['model_name'] 13 | acc = row['accuracy'] 14 | if model not in final_result: 15 | final_result[model] = {} 16 | for cat,tasks in map_task.items(): 17 | if cat not in final_result[model]: 18 | final_result[model][cat] = [] 19 | if task in tasks: 20 | final_result[model][cat].append(acc) 21 | break 22 | 23 | final_result2 = {} 24 | for model, sub_dict in final_result.items(): 25 | if model not in final_result2: 26 | final_result2[model]={} 27 | for cat, acc_list in sub_dict.items(): 28 | if cat not in final_result2[model]: 29 | final_result2[model][cat] = {'mean':0,'var':0} 30 | final_result2[model][cat]['result'] = f"{format(np.mean(acc_list),'.4f')}" 31 | # ±{format(np.var(acc_list),'.4f')} 32 | 33 | # columns = ['node', 'landmark', 'path', 'districts', 'boundary', 'others'] 34 | columns = ['model_name', 'node', 'landmark', 'path', 'districts', 'boundary', 'others'] 35 | 36 | index = list(final_result2.keys()) 37 | df = pd.DataFrame(index=index, columns=columns) 38 | for model, features in final_result2.items(): 39 | df.at[model, 'model_name'] = model 40 | for feature, results in features.items(): 41 | df.at[model, feature] = results['result'] 42 | return df 43 | 44 | 45 | if __name__ == '__main__': 46 | file_path = "results/geo_knowledge_result" 47 | output_path = "results/geo_knowledge_result" 48 | result_files = os.listdir(file_path) 49 | map_task = { 50 | "node": ["poi2coor", "AOI_POI_road4", "poi2addr", "poi2type", "type2poi" ], 51 | "landmark": ["landmark_env", "landmark_path"], 52 | "path": ["road_link", "road_od", "road_length", "road_arrived_pois"], 53 | "districts": ["aoi2addr", "AOI_POI5", "AOI_POI6", "aoi_group", "aoi2type", "type2aoi", "aoi_poi", "poi_aoi", "districts_poi_type"], 54 | "boundary": ["aoi_boundary_poi", "AOI_POI_road1", "AOI_POI_road2", "AOI_POI_road3", "boundary_road"], 55 | "others": ["AOI_POI", "AOI_POI2", "AOI_POI3", "AOI_POI4"] 56 | } 57 | result = get_result(result_files) 58 | result.to_csv(os.path.join(output_path,"geoqa_benchmark_result.csv")) 59 | 60 | -------------------------------------------------------------------------------- /evaluate/cross_view/SAT_count_pois/SAT_count_pois_stats.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | from tqdm import tqdm 7 | import json 8 | 9 | from config import MULTI_IMAGE_FOLDER 10 | from serving.llm_api import extract_choice 11 | 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 16 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 17 | parser.add_argument('--task_name', type=str, default='SAT_count_pois', help='task name') 18 | args = parser.parse_args() 19 | 20 | city = args.city_name 21 | model_name = args.model_name 22 | task_name = args.task_name 23 | 24 | for zl in ["zl15", "zl17"]: 25 | path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_{zl}_response.json") 26 | 27 | with open(path, "r") as f: 28 | data = json.load(f) 29 | 30 | correct = 0 31 | num_A = 0 32 | num_B = 0 33 | num_C = 0 34 | num_D = 0 35 | 36 | for d in data: 37 | prompt = d["prompt"] 38 | reference = d["reference"] 39 | response = d["response"] 40 | img_name = d["image"] 41 | 42 | model_choice = extract_choice(response, ["A", "B", "C", "D"]) 43 | 44 | if model_choice == reference: 45 | correct += 1 46 | 47 | if model_choice == "A": 48 | num_A += 1 49 | elif model_choice == "B": 50 | num_B += 1 51 | elif model_choice == "C": 52 | num_C += 1 53 | elif model_choice == "D": 54 | num_D += 1 55 | 56 | 57 | print("For Response file:", path) 58 | print("Accuracy:", correct / len(data)) 59 | print("Num A:", num_A) 60 | print("Num B:", num_B) 61 | print("Num C:", num_C) 62 | print("Num D:", num_D) 63 | print() 64 | 65 | # save the stats 66 | stats_folder = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, "stats") 67 | 68 | os.makedirs(stats_folder, exist_ok=True) 69 | 70 | with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}_{zl}.json"), "w") as f: 71 | json.dump({ 72 | "Length of Data": len(data), 73 | "Accuracy": correct / len(data), 74 | "Num A": num_A, 75 | "Num B": num_B, 76 | "Num C": num_C, 77 | "Num D": num_D 78 | }, f, indent=4) -------------------------------------------------------------------------------- /evaluate/uniimage/sat_address/sat_address_stats.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | from tqdm import tqdm 7 | import json 8 | import csv 9 | 10 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH 11 | from serving.llm_api import extract_choice 12 | 13 | 14 | if __name__ == '__main__': 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 17 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 18 | parser.add_argument('--task_name', type=str, default='sat_address_mc', help='task name') 19 | args = parser.parse_args() 20 | 21 | city = args.city_name 22 | model_name = args.model_name 23 | task_name = args.task_name 24 | 25 | for zl in ["zl15", "zl17"]: 26 | path = os.path.join(UNI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_{zl}_response.json") 27 | 28 | with open(path, "r") as f: 29 | data = json.load(f) 30 | 31 | correct = 0 32 | num_A = 0 33 | num_B = 0 34 | num_C = 0 35 | num_D = 0 36 | 37 | for d in data: 38 | prompt = d["prompt"] 39 | choices = d["choices"] 40 | reference = d["reference"] 41 | response = d["response"] 42 | img_name = d["image"] 43 | 44 | model_choice = extract_choice(response, ["A", "B", "C", "D"]) 45 | 46 | if model_choice == reference: 47 | correct += 1 48 | 49 | if model_choice == "A": 50 | num_A += 1 51 | elif model_choice == "B": 52 | num_B += 1 53 | elif model_choice == "C": 54 | num_C += 1 55 | elif model_choice == "D": 56 | num_D += 1 57 | 58 | 59 | print("For Response file:", path) 60 | print("Accuracy:", correct / len(data)) 61 | print("Num A:", num_A) 62 | print("Num B:", num_B) 63 | print("Num C:", num_C) 64 | print("Num D:", num_D) 65 | print() 66 | 67 | # save the stats 68 | stats_folder = os.path.join(UNI_IMAGE_FOLDER, task_name, city, "stats") 69 | 70 | os.makedirs(stats_folder, exist_ok=True) 71 | 72 | with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}_{zl}.json"), "w") as f: 73 | json.dump({ 74 | "Accuracy": correct / len(data), 75 | "Num A": num_A, 76 | "Num B": num_B, 77 | "Num C": num_C, 78 | "Num D": num_D 79 | }, f, indent=4) 80 | -------------------------------------------------------------------------------- /evaluate/uniimage/sat_landuse/sat_landuse_stats.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | from tqdm import tqdm 7 | import json 8 | import csv 9 | 10 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH 11 | from serving.llm_api import extract_choice 12 | 13 | 14 | if __name__ == '__main__': 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 17 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 18 | parser.add_argument('--task_name', type=str, default='sat_landuse_mc', help='task name') 19 | args = parser.parse_args() 20 | 21 | city = args.city_name 22 | model_name = args.model_name 23 | task_name = args.task_name 24 | 25 | for zl in ["zl15", "zl17"]: 26 | path = os.path.join(UNI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_{zl}_response.json") 27 | 28 | with open(path, "r") as f: 29 | data = json.load(f) 30 | 31 | correct = 0 32 | num_A = 0 33 | num_B = 0 34 | num_C = 0 35 | num_D = 0 36 | 37 | for d in data: 38 | prompt = d["prompt"] 39 | choices = d["choices"] 40 | reference = d["reference"] 41 | response = d["response"] 42 | img_name = d["image"] 43 | 44 | model_choice = extract_choice(response, ["A", "B", "C", "D"]) 45 | 46 | if model_choice == reference: 47 | correct += 1 48 | 49 | if model_choice == "A": 50 | num_A += 1 51 | elif model_choice == "B": 52 | num_B += 1 53 | elif model_choice == "C": 54 | num_C += 1 55 | elif model_choice == "D": 56 | num_D += 1 57 | 58 | 59 | print("For Response file:", path) 60 | print("Accuracy:", correct / len(data)) 61 | print("Num A:", num_A) 62 | print("Num B:", num_B) 63 | print("Num C:", num_C) 64 | print("Num D:", num_D) 65 | print() 66 | 67 | # save the stats 68 | stats_folder = os.path.join(UNI_IMAGE_FOLDER, task_name, city, "stats") 69 | 70 | os.makedirs(stats_folder, exist_ok=True) 71 | 72 | with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}_{zl}.json"), "w") as f: 73 | json.dump({ 74 | "Accuracy": correct / len(data), 75 | "Num A": num_A, 76 | "Num B": num_B, 77 | "Num C": num_C, 78 | "Num D": num_D 79 | }, f, indent=4) 80 | -------------------------------------------------------------------------------- /evaluate/uniimage/stv_address/stv_address_stats.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | from tqdm import tqdm 7 | import json 8 | import csv 9 | 10 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH 11 | from serving.llm_api import extract_choice 12 | 13 | 14 | if __name__ == '__main__': 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 17 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 18 | parser.add_argument('--task_name', type=str, default='stv_address_mc', help='task name') 19 | args = parser.parse_args() 20 | 21 | city = args.city_name 22 | model_name = args.model_name 23 | task_name = args.task_name 24 | 25 | for zl in ["zl17"]: 26 | path = os.path.join(UNI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_{zl}_response.json") 27 | 28 | 29 | headers_needed_zl17 = not os.path.exists(csv_path_zl17) 30 | 31 | with open(csv_path_zl17, mode='a', newline='') as file_zl17: 32 | writer_zl17 = csv.writer(file_zl17) 33 | if headers_needed_zl17: 34 | writer_zl17.writerow(["model_name", "city", "Accuracy"]) 35 | 36 | for zl in ["zl17"]: 37 | 38 | path = os.path.join(RESULTS_PATH, task_name, city, f"{city}_{task_name}_{zl}_{args.model_name}_response.json") 39 | 40 | with open(path, "r") as f: 41 | data = json.load(f) 42 | 43 | correct = 0 44 | num_A = 0 45 | num_B = 0 46 | num_C = 0 47 | num_D = 0 48 | 49 | for d in data: 50 | prompt = d["prompt"] 51 | choices = d["choices"] 52 | reference = d["reference"] 53 | response = d["response"] 54 | img_name = d["image"] 55 | 56 | model_choice = extract_choice(response, ["A", "B", "C", "D"]) 57 | 58 | if model_choice == reference: 59 | correct += 1 60 | 61 | if model_choice == "A": 62 | num_A += 1 63 | elif model_choice == "B": 64 | num_B += 1 65 | elif model_choice == "C": 66 | num_C += 1 67 | elif model_choice == "D": 68 | num_D += 1 69 | 70 | accuracy = correct / len(data) 71 | writer_zl17.writerow([model_name, city, accuracy]) 72 | print("For Response file:", path) 73 | print("Accuracy:", correct / len(data)) 74 | -------------------------------------------------------------------------------- /evaluate/cross_view/SAT_count_buildings/SAT_count_buildings_stats.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | from tqdm import tqdm 7 | import json 8 | 9 | from config import MULTI_IMAGE_FOLDER 10 | from serving.llm_api import extract_choice 11 | 12 | 13 | if __name__ == '__main__': 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 16 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 17 | parser.add_argument('--task_name', type=str, default='SAT_count_buildings', help='task name') 18 | args = parser.parse_args() 19 | 20 | city = args.city_name 21 | model_name = args.model_name 22 | task_name = args.task_name 23 | 24 | for zl in ["zl15", "zl17"]: 25 | path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_{zl}_response.json") 26 | 27 | with open(path, "r") as f: 28 | data = json.load(f) 29 | 30 | correct = 0 31 | num_A = 0 32 | num_B = 0 33 | num_C = 0 34 | num_D = 0 35 | 36 | for d in data: 37 | prompt = d["prompt"] 38 | reference = d["reference"] 39 | response = d["response"] 40 | img_name = d["image"] 41 | 42 | model_choice = extract_choice(response, ["A", "B", "C", "D"]) 43 | 44 | if model_choice == reference: 45 | correct += 1 46 | 47 | if model_choice == "A": 48 | num_A += 1 49 | elif model_choice == "B": 50 | num_B += 1 51 | elif model_choice == "C": 52 | num_C += 1 53 | elif model_choice == "D": 54 | num_D += 1 55 | 56 | 57 | print("For Response file:", path) 58 | print("Accuracy:", correct / len(data)) 59 | print("Num A:", num_A) 60 | print("Num B:", num_B) 61 | print("Num C:", num_C) 62 | print("Num D:", num_D) 63 | print() 64 | 65 | # save the stats 66 | stats_folder = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, "stats") 67 | 68 | os.makedirs(stats_folder, exist_ok=True) 69 | 70 | with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}_{zl}.json"), "w") as f: 71 | json.dump({ 72 | "Length of Data": len(data), 73 | "Accuracy": correct / len(data), 74 | "Num A": num_A, 75 | "Num B": num_B, 76 | "Num C": num_C, 77 | "Num D": num_D 78 | }, f, indent=4) -------------------------------------------------------------------------------- /evaluate/uniimage/stv_landmark/stv_landmark_stats.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | from tqdm import tqdm 7 | import json 8 | import csv 9 | 10 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH 11 | from serving.llm_api import extract_choice 12 | 13 | 14 | if __name__ == '__main__': 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 17 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 18 | parser.add_argument('--task_name', type=str, default='stv_landmark_mc', help='task name') 19 | args = parser.parse_args() 20 | 21 | city = args.city_name 22 | model_name = args.model_name 23 | task_name = args.task_name 24 | 25 | for zl in ["zl15", "zl17"]: 26 | path = os.path.join(UNI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_{zl}_response.json") 27 | 28 | with open(path, "r") as f: 29 | data = json.load(f) 30 | 31 | correct = 0 32 | num_A = 0 33 | num_B = 0 34 | num_C = 0 35 | num_D = 0 36 | 37 | for d in data: 38 | prompt = d["prompt"] 39 | choices = d["choices"] 40 | reference = d["reference"] 41 | response = d["response"] 42 | img_name = d["image"] 43 | 44 | model_choice = extract_choice(response, ["A", "B", "C", "D"]) 45 | 46 | if model_choice == reference: 47 | correct += 1 48 | 49 | if model_choice == "A": 50 | num_A += 1 51 | elif model_choice == "B": 52 | num_B += 1 53 | elif model_choice == "C": 54 | num_C += 1 55 | elif model_choice == "D": 56 | num_D += 1 57 | 58 | 59 | print("For Response file:", path) 60 | print("Accuracy:", correct / len(data)) 61 | print("Num A:", num_A) 62 | print("Num B:", num_B) 63 | print("Num C:", num_C) 64 | print("Num D:", num_D) 65 | print() 66 | 67 | # save the stats 68 | stats_folder = os.path.join(UNI_IMAGE_FOLDER, task_name, city, "stats") 69 | 70 | os.makedirs(stats_folder, exist_ok=True) 71 | 72 | with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}_{zl}.json"), "w") as f: 73 | json.dump({ 74 | "Accuracy": correct / len(data), 75 | "Num A": num_A, 76 | "Num B": num_B, 77 | "Num C": num_C, 78 | "Num D": num_D 79 | }, f, indent=4) 80 | -------------------------------------------------------------------------------- /evaluate/uniimage/sat_address/sat_address_inference.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | from setproctitle import setproctitle 7 | 8 | from tqdm import tqdm 9 | import json 10 | 11 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH 12 | from serving.vlm_serving import VLMWrapper 13 | 14 | if __name__ == '__main__': 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 17 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 18 | parser.add_argument('--task_name', type=str, default='sat_address_mc', help='task name') 19 | parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"]) 20 | args = parser.parse_args() 21 | 22 | model_name = args.model_name 23 | city_name = args.city_name 24 | task_name = args.task_name 25 | 26 | print("Load the model") 27 | model_wrapper = VLMWrapper(args.model_name) 28 | model = model_wrapper.get_vlm_model() 29 | 30 | for zl in ["zl15", "zl17"]: 31 | 32 | print("Load the image list") 33 | # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json") 34 | path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_{zl}.json") 35 | output_path = os.path.join(RESULTS_PATH, task_name, city_name, f"{city_name}_{task_name}_{zl}_{args.model_name}_response.json") 36 | output_dir = os.path.dirname(output_path) 37 | if not os.path.exists(output_dir): 38 | os.makedirs(output_dir, exist_ok=True) 39 | with open(path, "r") as f: 40 | data = json.load(f) 41 | 42 | if args.data_name == "mini": 43 | data = data[:10] 44 | 45 | response = [] 46 | for d in tqdm(data): 47 | prompt = d["prompt"] 48 | choices = d["choices"] 49 | reference = d["reference"] 50 | img_path = d["image"] 51 | img_name = img_path.split("/")[-1] 52 | 53 | assert os.path.exists(img_path), f"Image {img_path} not found" 54 | ret = model.generate([img_path, prompt]) 55 | response.append({ 56 | "image": img_name, 57 | "prompt": prompt, 58 | "choices": choices, 59 | "reference": reference, 60 | "response": ret 61 | }) 62 | 63 | output_path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, model_name) 64 | os.makedirs(output_path, exist_ok=True) 65 | print("Save the response in" + output_path) 66 | with open(os.path.join(output_path, f"{city_name}_{task_name}_{zl}_response.json"), "w") as f: 67 | json.dump(response, f, indent=4, ensure_ascii=False) 68 | 69 | model_wrapper.clean_proxy() 70 | -------------------------------------------------------------------------------- /evaluate/uniimage/stv_address/stv_address_inference.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | from setproctitle import setproctitle 7 | 8 | from tqdm import tqdm 9 | import json 10 | 11 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH 12 | from serving.vlm_serving import VLMWrapper 13 | 14 | if __name__ == '__main__': 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 17 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 18 | parser.add_argument('--task_name', type=str, default='stv_address_mc', help='task name') 19 | parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"]) 20 | 21 | args = parser.parse_args() 22 | 23 | model_name = args.model_name 24 | city_name = args.city_name 25 | task_name = args.task_name 26 | 27 | print("Load the model") 28 | model_wrapper = VLMWrapper(args.model_name) 29 | model = model_wrapper.get_vlm_model() 30 | 31 | for zl in ["zl17"]: 32 | 33 | print("Load the image list") 34 | # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json") 35 | path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_{zl}.json") 36 | output_path = os.path.join(RESULTS_PATH, task_name, city_name, f"{city_name}_{task_name}_{zl}_{args.model_name}_response.json") 37 | output_dir = os.path.dirname(output_path) 38 | if not os.path.exists(output_dir): 39 | os.makedirs(output_dir, exist_ok=True) 40 | 41 | with open(path, "r") as f: 42 | data = json.load(f) 43 | 44 | if args.data_name == "mini": 45 | data = data[:10] 46 | 47 | response = [] 48 | for d in tqdm(data): 49 | prompt = d["prompt"] 50 | choices = d["choices"] 51 | reference = d["reference"] 52 | img_path = d["image"] 53 | img_name = img_path.split("/")[-1] 54 | 55 | assert os.path.exists(img_path), f"Image {img_path} not found" 56 | ret = model.generate([img_path, prompt]) 57 | response.append({ 58 | "image": img_name, 59 | "prompt": prompt, 60 | "choices": choices, 61 | "reference": reference, 62 | "response": ret 63 | }) 64 | 65 | output_path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, model_name) 66 | os.makedirs(output_path, exist_ok=True) 67 | print("Save the response in" + output_path) 68 | with open(os.path.join(output_path, f"{city_name}_{task_name}_{zl}_response.json"), "w") as f: 69 | json.dump(response, f, indent=4, ensure_ascii=False) 70 | 71 | model_wrapper.clean_proxy() 72 | -------------------------------------------------------------------------------- /evaluate/uniimage/sat_landuse/sat_landuse_inference.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | from setproctitle import setproctitle 7 | 8 | from tqdm import tqdm 9 | import json 10 | 11 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH 12 | from serving.vlm_serving import VLMWrapper 13 | 14 | if __name__ == '__main__': 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 17 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 18 | parser.add_argument('--task_name', type=str, default='sat_landuse_mc', help='task name') 19 | parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"]) 20 | 21 | args = parser.parse_args() 22 | 23 | model_name = args.model_name 24 | city_name = args.city_name 25 | task_name = args.task_name 26 | 27 | print("Load the model") 28 | model_wrapper = VLMWrapper(args.model_name) 29 | model = model_wrapper.get_vlm_model() 30 | 31 | for zl in ["zl15", "zl17"]: 32 | 33 | print("Load the image list") 34 | # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json") 35 | path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_{zl}.json") 36 | 37 | output_path = os.path.join(RESULTS_PATH, task_name, city_name, f"{city_name}_{task_name}_{zl}_{args.model_name}_response.json") 38 | output_dir = os.path.dirname(output_path) 39 | if not os.path.exists(output_dir): 40 | os.makedirs(output_dir, exist_ok=True) 41 | 42 | with open(path, "r") as f: 43 | data = json.load(f) 44 | 45 | if args.data_name == "mini": 46 | data = data[:10] 47 | 48 | response = [] 49 | for d in tqdm(data): 50 | prompt = d["prompt"] 51 | choices = d["choices"] 52 | reference = d["reference"] 53 | img_path = d["image"] 54 | img_name = img_path.split("/")[-1] 55 | 56 | assert os.path.exists(img_path), f"Image {img_path} not found" 57 | ret = model.generate([img_path, prompt]) 58 | response.append({ 59 | "image": img_name, 60 | "prompt": prompt, 61 | "choices": choices, 62 | "reference": reference, 63 | "response": ret 64 | }) 65 | 66 | output_path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, model_name) 67 | os.makedirs(output_path, exist_ok=True) 68 | print("Save the response in" + output_path) 69 | with open(os.path.join(output_path, f"{city_name}_{task_name}_{zl}_response.json"), "w") as f: 70 | json.dump(response, f, indent=4, ensure_ascii=False) 71 | 72 | model_wrapper.clean_proxy() 73 | -------------------------------------------------------------------------------- /evaluate/uniimage/stv_landmark/stv_landmark_inference.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import os 4 | import argparse 5 | import pandas as pd 6 | from setproctitle import setproctitle 7 | 8 | from tqdm import tqdm 9 | import json 10 | 11 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH 12 | from serving.vlm_serving import VLMWrapper 13 | 14 | if __name__ == '__main__': 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name') 17 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 18 | parser.add_argument('--task_name', type=str, default='stv_landmark_mc', help='task name') 19 | parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"]) 20 | args = parser.parse_args() 21 | 22 | model_name = args.model_name 23 | city_name = args.city_name 24 | task_name = args.task_name 25 | 26 | print("Load the model") 27 | model_wrapper = VLMWrapper(args.model_name) 28 | model = model_wrapper.get_vlm_model() 29 | 30 | for zl in ["zl17"]: 31 | 32 | print("Load the image list") 33 | # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json") 34 | path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_{zl}.json") 35 | 36 | output_path = os.path.join(RESULTS_PATH, task_name, city_name, f"{city_name}_{task_name}_{zl}_{args.model_name}_response.json") 37 | output_dir = os.path.dirname(output_path) 38 | if not os.path.exists(output_dir): 39 | os.makedirs(output_dir, exist_ok=True) 40 | 41 | with open(path, "r") as f: 42 | data = json.load(f) 43 | 44 | if args.data_name == "mini": 45 | data = data[:10] 46 | 47 | response = [] 48 | for d in tqdm(data): 49 | prompt = d["prompt"] 50 | choices = d["choices"] 51 | reference = d["reference"] 52 | img_path = d["image"] 53 | img_name = img_path.split("/")[-1] 54 | 55 | assert os.path.exists(img_path), f"Image {img_path} not found" 56 | ret = model.generate([img_path, prompt]) 57 | response.append({ 58 | "image": img_name, 59 | "prompt": prompt, 60 | "choices": choices, 61 | "reference": reference, 62 | "response": ret 63 | }) 64 | 65 | output_path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, model_name) 66 | os.makedirs(output_path, exist_ok=True) 67 | print("Save the response in" + output_path) 68 | with open(os.path.join(output_path, f"{city_name}_{task_name}_{zl}_response.json"), "w") as f: 69 | json.dump(response, f, indent=4, ensure_ascii=False) 70 | 71 | 72 | model_wrapper.clean_proxy() 73 | -------------------------------------------------------------------------------- /simulate/satelite/process_landuse.py: -------------------------------------------------------------------------------- 1 | import ast 2 | 3 | def parse_input_txt(input_txt): 4 | region_data = [] 5 | with open(input_txt, 'r', encoding='utf-8') as f: 6 | for line in f: 7 | if "is at location:" in line: 8 | region_type = line.split("is at location:")[0].strip() 9 | try: 10 | coordinates = ast.literal_eval(line.split("is at location:")[1].strip()) 11 | region_data.append((region_type, coordinates)) 12 | except (SyntaxError, ValueError): 13 | print(f"Error parsing coordinates in line: {line}") 14 | return region_data 15 | 16 | def is_valid_polygon(coordinates): 17 | if len(coordinates) < 4: 18 | return False 19 | if coordinates[0] == coordinates[-1]: 20 | if len(coordinates) == 3: 21 | return False 22 | return True 23 | 24 | def filter_invalid_regions(region_data): 25 | valid_regions = [] 26 | for region_type, coordinates in region_data: 27 | for polygon in coordinates: 28 | if is_valid_polygon(polygon): 29 | valid_regions.append((region_type, polygon)) 30 | return valid_regions 31 | 32 | def write_output_txt(output_txt, region_data): 33 | with open(output_txt, 'w', encoding='utf-8') as f: 34 | for region_type, coordinates in region_data: 35 | f.write(f"{region_type} is at location: {coordinates}\n") 36 | 37 | 38 | import pandas as pd 39 | import os 40 | import argparse 41 | 42 | if __name__ == "__main__": 43 | parser = argparse.ArgumentParser() 44 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 45 | parser.add_argument('--work_dir', type=str, default='../../data/') 46 | args = parser.parse_args() 47 | city = args.city 48 | work_dir = args.work_dir 49 | working_dir = work_dir + f"dev-{city}/" 50 | for zl in ['zl17','zl15']: 51 | df = pd.read_csv( working_dir + f'SAT_{city}_'+zl+'.csv') 52 | for cnt in range(len(df)): 53 | img_name = df.at[cnt,'img_name'].split('.')[0] 54 | output_dir = working_dir +"short_clipped_results_"+zl 55 | os.makedirs(output_dir, exist_ok=True) 56 | # input_txt = "clipped_results_wudaokou_pixel_non_null/landuse_"+img_name+".txt" 57 | input_txt = working_dir + "clipped_results_pixel_non_null_"+zl+"/landuse_"+img_name+".txt" 58 | 59 | 60 | if not os.path.exists(input_txt): 61 | continue 62 | # output_txt = "short_clipped_results_wudaokou/landuse_"+img_name+".txt" 63 | output_txt = working_dir + "short_clipped_results_"+zl+"/landuse_"+img_name+".txt" 64 | 65 | region_data = parse_input_txt(input_txt) 66 | 67 | try: 68 | valid_region_data = filter_invalid_regions(region_data) 69 | except: 70 | continue 71 | 72 | write_output_txt(output_txt, valid_region_data) 73 | 74 | # print(f"Data has been processed and saved to {output_txt}") 75 | -------------------------------------------------------------------------------- /examples/run_eval_uniimage_inference_stats.sh: -------------------------------------------------------------------------------- 1 | # source /usr/local/miniconda3/bin/activate vila-vlmeval 2 | export CUDA_VISIBLE_DEVICES=4 3 | export DeepInfra_API_KEY="" 4 | export SiliconFlow_API_KEY="" 5 | export OpenAI_API_KEY="" 6 | export OPENAI_API_KEY="$OpenAI_API_KEY" 7 | export OPENAI_API_BASE="https://api.openai.com/v1/chat/completions" 8 | export DASHSCOPE_API_KEY="" 9 | CITIES=('Beijing' 'London' 'NewYork') 10 | MODELS=("Llama-3-VILA1.5-8b" "GPT4o_MINI") 11 | DATA_VERSION='all' 12 | 13 | echo "Start running evaluation on street view address task" 14 | for MODEL in "${MODELS[@]}"; do 15 | echo "Current model: $MODEL" 16 | for CITY in "${CITIES[@]}"; do 17 | echo "Current city: $CITY" 18 | # python -m evaluate.uniimage.stv_address.stv_address_convert --city_name $CITY --task_name stv_address_mc 19 | python -m evaluate.uniimage.stv_address.stv_address_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION --task_name stv_address_mc 20 | python -m evaluate.uniimage.stv_address.stv_address_stats --city_name $CITY --model_name $MODEL --task_name stv_address_mc 21 | done 22 | done 23 | 24 | echo "Start running evaluation on street view landmark task" 25 | for MODEL in "${MODELS[@]}"; do 26 | echo "Current model: $MODEL" 27 | for CITY in "${CITIES[@]}"; do 28 | echo "Current city: $CITY" 29 | # python -m evaluate.uniimage.stv_landmark.stv_landmark_convert --city_name $CITY --task_name stv_landmark_mc 30 | python -m evaluate.uniimage.stv_landmark.stv_landmark_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION --task_name stv_landmark_mc 31 | python -m evaluate.uniimage.stv_landmark.stv_landmark_stats --city_name $CITY --model_name $MODEL --task_name stv_landmark_mc 32 | done 33 | done 34 | 35 | echo "Start running evaluation on satellite address task" 36 | for MODEL in "${MODELS[@]}"; do 37 | echo "Current model: $MODEL" 38 | for CITY in "${CITIES[@]}"; do 39 | echo "Current city: $CITY" 40 | # python -m evaluate.uniimage.sat_address.sat_address_convert --city_name $CITY --task_name sat_address_mc 41 | python -m evaluate.uniimage.sat_address.sat_address_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION --task_name sat_address_mc 42 | python -m evaluate.uniimage.sat_address.sat_address_stats --city_name $CITY --model_name $MODEL --task_name sat_address_mc 43 | done 44 | done 45 | 46 | echo "Start running evaluation on satellite landuse task" 47 | for MODEL in "${MODELS[@]}"; do 48 | echo "Current model: $MODEL" 49 | for CITY in "${CITIES[@]}"; do 50 | echo "Current city: $CITY" 51 | # python -m evaluate.uniimage.sat_landuse.sat_landuse_convert --city_name $CITY --task_name sat_landuse_mc 52 | python -m evaluate.uniimage.sat_landuse.sat_landuse_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION --task_name sat_landuse_mc 53 | python -m evaluate.uniimage.sat_landuse.sat_landuse_stats --city_name $CITY --model_name $MODEL --task_name sat_landuse_mc 54 | done 55 | done -------------------------------------------------------------------------------- /evaluate/evaluate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tqdm 3 | import argparse 4 | import pandas as pd 5 | from multiprocessing import Pool 6 | 7 | from config import CITY_BOUNDARY, VLM_MODELS, LLM_MODELS, TASK_DEST_MAPPING 8 | 9 | 10 | class Evaluator: 11 | def __init__(self, city_name, model_name, data_name, task_name, workers=1) -> None: 12 | self.city_list = list(CITY_BOUNDARY.keys()) 13 | self.model_list = {"vlm": VLM_MODELS, "llm": LLM_MODELS} 14 | self.task_list = list(TASK_DEST_MAPPING.keys()) 15 | self.workers = workers 16 | 17 | self.city_name_list = city_name.split(",") 18 | self.model_name_list = model_name.split(",") 19 | self.task_name_list = task_name.split(",") 20 | self.data_name = data_name 21 | 22 | def evaluate(self): 23 | # TODO: run single task or run task sets 24 | self.multiple_task_wrapper(self.task_name_list, self.model_name_list, self.city_name_list) 25 | 26 | def valid_inputs(self): 27 | # TODO: check if the inputs are valid 28 | pass 29 | 30 | @staticmethod 31 | def single_task_wrapper(task_name, model_name, city_name, data_name): 32 | # run single task 33 | task_desc = TASK_DEST_MAPPING[task_name] 34 | if task_name in ["population", "objects"]: 35 | eval_scipt = "python -m {} --city_name={} --data_name={} --model_name={} --task_name={}".format(task_desc, city_name, data_name, model_name, task_name) 36 | else: 37 | eval_scipt = "python -m {} --city_name={} --data_name={} --model_name={}".format(task_desc, city_name, data_name, model_name) 38 | 39 | return os.system(eval_scipt) 40 | 41 | # TODO: run multiple tasks 42 | def multiple_task_wrapper(self, task_list, model_list, city_list): 43 | # TODO running multi tasks efficiently 44 | para_group = [] 45 | for task in task_list: 46 | for model in model_list: 47 | for city in city_list: 48 | para_group.append([task, model, city, self.data_name]) 49 | 50 | if self.workers==1: 51 | for para in para_group: 52 | self.single_task_wrapper(*para) 53 | else: 54 | with Pool(args.workers) as pool: 55 | pool.starmap(self.single_task_wrapper, para_group) 56 | 57 | 58 | def analyze_results(self): 59 | # TODO: analyze the results 60 | pass 61 | 62 | 63 | if __name__ == '__main__': 64 | parser = argparse.ArgumentParser() 65 | parser.add_argument('--city_name', type=str, default="Beijing") 66 | parser.add_argument('--task_name', type=str, default='traffic_signal') 67 | parser.add_argument('--data_name', type=str, default='mini') 68 | parser.add_argument('--model_name', type=str, default="GPT4o") 69 | args = parser.parse_args() 70 | 71 | # Evaluator Initialization 72 | Eval = Evaluator( 73 | city_name=args.city_name, 74 | model_name=args.model_name, 75 | data_name=args.data_name, 76 | task_name=args.task_name) 77 | # Running Evalautor 78 | Eval.evaluate() 79 | -------------------------------------------------------------------------------- /simulate/satelite/process_driving.py: -------------------------------------------------------------------------------- 1 | import re 2 | from tqdm import trange 3 | import argparse 4 | 5 | def contains_chinese(text): 6 | return bool(re.search(r'[\u4e00-\u9fff]', text)) 7 | 8 | def merge_segments(road_segments): 9 | merged_segments = [] 10 | for segment in road_segments: 11 | if not merged_segments: 12 | merged_segments.append(segment) 13 | else: 14 | if merged_segments[-1][-1] == segment[0]: 15 | merged_segments[-1].extend(segment[1:]) 16 | else: 17 | merged_segments.append(segment) 18 | 19 | simplified_segments = [] 20 | for seg in merged_segments: 21 | if len(seg) > 1: 22 | simplified_segments.append([seg[0], seg[-1]]) 23 | 24 | return simplified_segments 25 | 26 | def process_road_data(road_data): 27 | compressed_data = {} 28 | 29 | for road_name, segments in road_data.items(): 30 | merged_segments = merge_segments(segments) 31 | 32 | if merged_segments: 33 | compressed_data[road_name] = merged_segments 34 | 35 | return compressed_data 36 | 37 | def parse_input_txt(input_txt): 38 | road_data = {} 39 | with open(input_txt, 'r', encoding='utf-8') as f: 40 | for line in f: 41 | if "is at location:" in line: 42 | road_name = line.split("is at location:")[0].strip() 43 | coordinates = eval(line.split("is at location:")[1].strip()) 44 | road_data[road_name] = coordinates 45 | return road_data 46 | 47 | def write_output_txt(output_txt, compressed_data): 48 | with open(output_txt, 'w', encoding='utf-8') as f: 49 | for road_name, segments in compressed_data.items(): 50 | for seg in segments: 51 | f.write(f"{road_name} from {seg[0]} to {seg[1]}\n") 52 | 53 | import pandas as pd 54 | import os 55 | 56 | if __name__ == "__main__": 57 | parser = argparse.ArgumentParser() 58 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 59 | parser.add_argument('--work_dir', type=str, default='../../data/') 60 | args = parser.parse_args() 61 | city = args.city 62 | work_dir = args.work_dir 63 | working_dir = work_dir + f"dev-{city}/" 64 | for zl in ['zl17','zl15']: 65 | # df = pd.read_csv('../SAT_BJ_wudaokou_zl15.csv') 66 | df = pd.read_csv(working_dir + f'SAT_{city}_'+zl+'.csv') 67 | for cnt in trange(len(df)): 68 | img_name = df.at[cnt,'img_name'].split('.')[0] 69 | 70 | output_dir = working_dir + "short_clipped_results_"+zl 71 | os.makedirs(output_dir, exist_ok=True) 72 | input_txt = working_dir + "clipped_results_pixel_non_null_"+zl+"/driving_"+img_name+".txt" 73 | if not os.path.exists(input_txt): 74 | continue 75 | 76 | output_txt = working_dir + "short_clipped_results_"+zl+"/driving_"+img_name+".txt" 77 | 78 | road_data = parse_input_txt(input_txt) 79 | 80 | compressed_data = process_road_data(road_data) 81 | 82 | write_output_txt(output_txt, compressed_data) 83 | 84 | # print(f"Data has been processed and saved to {output_txt}") 85 | -------------------------------------------------------------------------------- /simulate/SAT_pipeline.bash: -------------------------------------------------------------------------------- 1 | cities=("Beijing" "NewYork" "London") 2 | work_dir="../../data/" 3 | 4 | # Remove the work_dir if it exists 5 | # rm -rf $work_dir 6 | 7 | # Depend on /ThreeCityImage/{city}/Sat_{zl}/ 8 | for city in "${cities[@]}"; do 9 | echo "Filtering images for $city" 10 | python ./satelite/make_image_list_sat.py --city $city --work_dir $work_dir 11 | done 12 | wait 13 | echo "Finish filtering images" 14 | # Get /sample_sat_image_{zl}/ 15 | # Get SAT_{city}_{zl}.csv 16 | 17 | # Depend on SAT_{city}_{zl}.csv 18 | for city in "${cities[@]}"; do 19 | echo "Creating shp file for $city" 20 | python ./satelite/make_sat_shp.py --city $city --work_dir $work_dir 21 | done 22 | wait 23 | echo "Finish creating shp file" 24 | # Get SAT_{city}_{zl}.shp, SAT_{city}_{zl}.dbf, SAT_{city}_{zl}.shx, SAT_{city}_{zl}.prj 25 | 26 | # Depend on SAT_{city}_{zl}.shp, /ThreeCityImage/city_geojson_three_cities/{city}_{typ}.geojson 27 | for city in "${cities[@]}"; do 28 | echo "Clip shp point data for $city" 29 | python ./satelite/clip_shp_point.py --city $city --work_dir $work_dir 30 | done 31 | wait 32 | echo "Finish clipping shp point data" 33 | # Get clipped_results_{zl}/clipped_{typ}_{polygon['region_nam'].split('.')[0]}.geojson 34 | 35 | # Depend on SAT_{city}_{zl}.csv 36 | # Depend on clipped_results_{zl}/clipped_{typ}_{img_name}.geojson 37 | for city in "${cities[@]}"; do 38 | echo "Change OSM's lat and lon to SAT's pixel for $city" 39 | python ./satelite/coord_to_pixel.py --city $city --work_dir $work_dir 40 | done 41 | wait 42 | echo "Finish changing OSM's lat and lon to SAT's pixel" 43 | # Get clipped_results_{zl}_updated/clipped_{typ}_{img_name}_updated.geojson 44 | # Get clipped_results_pixel_{zl}/clipped_{typ}_{img_name}_pixel.geojson 45 | 46 | # Depend on clipped_results_pixel_{zl}/clipped_{typ}_{img_name}_pixel.geojson 47 | for city in "${cities[@]}"; do 48 | echo "Filter out none valid data for $city" 49 | python ./satelite/extract_non_null_values.py --city $city --work_dir $work_dir 50 | done 51 | wait 52 | echo "Finish filtering out none valid data" 53 | # Get clipped_results_pixel_non_null_{zl}/{typ}_{img_name}.txt 54 | 55 | # Depend on SAT_{city}_{zl}.csv 56 | # Depend on clipped_results_pixel_non_null_{zl}/driving_{img_name}.txt 57 | for city in "${cities[@]}"; do 58 | echo "Process driving data for $city" 59 | python ./satelite/process_driving.py --city $city --work_dir $work_dir 60 | done 61 | wait 62 | echo "Finish processing driving data" 63 | # Get "short_clipped_results_{zl}/driving_{img_name}.txt" 64 | 65 | # Depend on SAT_{city}_{zl}.csv 66 | # Depend on clipped_results_pixel_non_null_{zl}/landuse_{img_name}.txt 67 | for city in "${cities[@]}"; do 68 | echo "Process landuse data for $city" 69 | python ./satelite/process_landuse.py --city $city --work_dir $work_dir 70 | done 71 | wait 72 | echo "Finish processing landuse data" 73 | # Get "short_clipped_results_{zl}/landuse_{img_name}.txt" 74 | 75 | # Depend on SAT_{city}_{zl}.csv 76 | # Depend on clipped_results_pixel_non_null_{zl}/poi_{img_name}.txt 77 | for city in "${cities[@]}"; do 78 | echo "Process POI data for $city" 79 | python ./satelite/process_poi.py --city $city --work_dir $work_dir 80 | done 81 | wait 82 | echo "Finish processing POI data" 83 | # Get "short_clipped_results_{zl}/poi_{img_name}.txt" -------------------------------------------------------------------------------- /evaluate/cross_view/eval_analysis.py: -------------------------------------------------------------------------------- 1 | 2 | import jsonlines 3 | import os 4 | import argparse 5 | import pandas as pd 6 | # from setproctitle import setproctitle 7 | from tqdm import tqdm 8 | import json 9 | 10 | from config import CROSS_VIEW_PATH, CROSS_VIEW_RESULTS_PATH 11 | 12 | 13 | def calculate_acc(city_name_list, model_name_list,task_name,save_name): 14 | all_acc_list = [] 15 | all_city_list = [] 16 | all_model_name_list = [] 17 | for model_name in model_name_list: 18 | for city_name in city_name_list: 19 | city_pred_list = [] 20 | city_GT_list = [] 21 | print(model_name, city_name) 22 | try: 23 | json_file_path = os.path.join(CROSS_VIEW_RESULTS_PATH, city_name+'_'+model_name+'_'+task_name+'_eval.jsonl') 24 | with jsonlines.open(json_file_path) as reader: 25 | for obj in reader: 26 | city_pred_list.append(obj['text']) 27 | city_GT_list.append(obj['GT']) 28 | except FileNotFoundError as e: 29 | print("File not found! City:{} Model:{}".format(city_name, model_name)) 30 | continue 31 | 32 | if len(city_pred_list) != len(city_GT_list): 33 | raise ValueError("different length") 34 | 35 | city_pred_list_lower = [item.lower() for item in city_pred_list] 36 | city_GT_list_lower = [item.lower() for item in city_GT_list] 37 | 38 | count = sum(item1 == item2 for item1, item2 in zip(city_pred_list_lower, city_GT_list_lower)) 39 | total = len(city_GT_list_lower) 40 | 41 | acc = count / total * 100 42 | 43 | all_acc_list.append(acc) 44 | all_city_list.append(city_name) 45 | all_model_name_list.append(model_name) 46 | 47 | df = pd.DataFrame({'city': all_city_list, 'model': all_model_name_list, 'acc': all_acc_list}) 48 | df.to_csv(os.path.join(CROSS_VIEW_RESULTS_PATH, save_name), index=False) 49 | 50 | 51 | if __name__ == '__main__': 52 | parser = argparse.ArgumentParser() 53 | parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')#InternVL2-40B GPT4o_MINI Qwen2-VL-2B-Instruct 54 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') #Beijing, London, NewYork 55 | parser.add_argument('--eval_all_city_all_model', type=str, default="no", help='If yes, automatically evaluate data from all cities on all models.') 56 | parser.add_argument('--task_name', type=str, default='IR', help='task name', choices=["IR", "CL","SC_Buildings","SC_POIs"]) 57 | #task_name include: Image Retrieval, Camera Localization, Scene Comparision Buildings, Scene Comparison POIs(restaurant, education, shopping) 58 | 59 | args = parser.parse_args() 60 | 61 | if args.eval_all_city_all_model == 'yes': 62 | args.city_name="Beijing,London,NewYork" 63 | args.model_name="InternVL2-40B,GPT4o_MINI" 64 | 65 | city_name_list = args.city_name.split(",") 66 | model_name_list = args.model_name.split(",") 67 | 68 | else: 69 | city_name_list = [args.city_name] 70 | model_name_list = [args.model_name] 71 | 72 | save_name = 'summary_all_models_all_cities_{}.csv'.format(args.task_name) 73 | 74 | calculate_acc(city_name_list,model_name_list,args.task_name,save_name) 75 | -------------------------------------------------------------------------------- /simulate/advance/CoT/stv-landmark-cot/gpt_polish.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | import httpx 3 | import os 4 | import argparse 5 | import pandas as pd 6 | import json 7 | import base64 8 | import tqdm 9 | from tqdm import tqdm 10 | from concurrent.futures import ThreadPoolExecutor 11 | 12 | # API Key and Proxy settings 13 | PROXY = "http://127.0.0.1:10190" 14 | API_KEY_MAPPING = { 15 | "siliconflow": "SiliconFlow_API_KEY", 16 | "DeepInfra": "DeepInfra_API_KEY", 17 | "OpenAI": "OpenAI_API_KEY" 18 | } 19 | API_URL_MAPPING = { 20 | "siliconflow": "https://api.siliconflow.cn/v1", 21 | "DeepInfra": "https://api.deepinfra.com/v1/openai", 22 | "OpenAI": "https://api.openai.com/v1" 23 | } 24 | API_TYPE = "OpenAI" 25 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]] 26 | API_URL = API_URL_MAPPING[API_TYPE] 27 | 28 | def encode_image(image_path): 29 | with open(image_path, "rb") as image_file: 30 | return base64.b64encode(image_file.read()).decode("utf-8") 31 | 32 | def polish_text(client, model_name, og_text): 33 | prompt = f''' 34 | Please polish the following paragraph to make it more fluent and natural. 35 | You can make any necessary changes to the text, like removing the square brackets, adding punctuation, or rephrasing the text. 36 | Don't change the meaning of the text. 37 | Only output the polished text, without any additional information or appending text. 38 | Here is the original text: 39 | {og_text} 40 | ''' 41 | 42 | dialogs = [{ 43 | "role": "user", 44 | "content": [{"type": "text", "text": prompt}] 45 | }] 46 | 47 | try: 48 | completion = client.chat.completions.create( 49 | model=model_name, 50 | messages=dialogs, 51 | max_tokens=1024, 52 | temperature=0 53 | ) 54 | return completion.choices[0].message.content 55 | except Exception as e: 56 | print(e) 57 | return "" 58 | 59 | if __name__ == "__main__": 60 | parser = argparse.ArgumentParser() 61 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 62 | parser.add_argument('--work_dir', type=str, default='../../data/') 63 | args = parser.parse_args() 64 | 65 | city = args.city 66 | work_dir = args.work_dir 67 | 68 | model_name = "gpt-4o-mini-2024-07-18" 69 | client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY)) 70 | 71 | for zl in ["zl15", "zl17"]: 72 | og_path = f"stv_landmark_{city}_{zl}.json" 73 | with open(og_path, 'r') as f: 74 | og_data = json.load(f) 75 | 76 | output = [] 77 | 78 | with ThreadPoolExecutor(max_workers=32) as executor: 79 | futures = {executor.submit(polish_text, client, model_name, item['CoT']): item for item in og_data} 80 | 81 | for future in tqdm(futures): 82 | item = futures[future] 83 | polished_CoT = future.result() 84 | output.append({ 85 | "image_name": item['image_name'], 86 | "near_pois": item['near_pois'], 87 | "landmark": item['landmark'], 88 | "CoT": item['CoT'], 89 | "polished_CoT": polished_CoT 90 | }) 91 | 92 | with open(f"polished_stv_landmark_{city}_{zl}.json", 'w') as f: 93 | json.dump(output, f, indent=4, ensure_ascii=False) 94 | -------------------------------------------------------------------------------- /simulate/streetview/stv_nearest_pois.py: -------------------------------------------------------------------------------- 1 | import geopandas as gpd 2 | import pandas as pd 3 | from shapely.geometry import Point 4 | from math import radians, sin, cos, asin, sqrt 5 | import argparse 6 | 7 | 8 | 9 | import pandas as pd 10 | import geopandas as gpd 11 | from shapely.geometry import Point 12 | from scipy.spatial import KDTree 13 | 14 | if __name__ == '__main__': 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 17 | parser.add_argument('--work_dir', type=str, default='../../data/') 18 | args = parser.parse_args() 19 | city = args.city 20 | work_dir = args.work_dir 21 | for zl in ['zl15', 'zl17']: 22 | csv_file = work_dir + f"dev-{city}/stv_in_sat_{city}_{zl}.csv" 23 | 24 | output_dir = work_dir + f"dev-{city}/" 25 | 26 | csv_data = pd.read_csv(csv_file) 27 | 28 | csv_data['geometry'] = csv_data.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1) 29 | csv_gdf = gpd.GeoDataFrame(csv_data, geometry='geometry', crs="EPSG:4326") 30 | 31 | # TODO: Change the following paths to the actual paths 32 | if city == "Beijing": 33 | geojson_file = "....../MLLM-wudaokou_new/make_dataset/beijing_pois_five_ring.geojson" 34 | elif city == "London": 35 | geojson_file = "....../ThreeCityImage/city_geojson/London_geojson/London_pois_five_ring.geojson" 36 | elif city == "NewYork": 37 | geojson_file = "....../ThreeCityImage/city_geojson/NewYork_geojson/NewYork_pois_five_ring.geojson" 38 | 39 | assert geojson_file is not None, "Please specify the path to the GeoJSON file." 40 | 41 | geojson_gdf = gpd.read_file(geojson_file) 42 | 43 | points_gdf = geojson_gdf[geojson_gdf.geometry.type == 'Point'].copy() 44 | polygons_gdf = geojson_gdf[geojson_gdf.geometry.type == 'Polygon'].copy() 45 | 46 | polygons_gdf['geometry'] = polygons_gdf['geometry'].centroid 47 | 48 | combined_gdf = pd.concat([points_gdf, polygons_gdf]) 49 | 50 | geojson_coords = [(geom.x, geom.y) for geom in combined_gdf.geometry] 51 | 52 | tree = KDTree(geojson_coords) 53 | 54 | results = [] 55 | 56 | for idx, row in csv_gdf.iterrows(): 57 | point_coords = (row.geometry.x, row.geometry.y) 58 | 59 | distances, indices = tree.query(point_coords, k=20) 60 | 61 | for distance, index in zip(distances, indices): 62 | nearest_feature = combined_gdf.iloc[index] 63 | feature_name = nearest_feature.get('name', 'Unknown') 64 | 65 | results.append({ 66 | 'image_name': row.image_name, 67 | 'csv_latitude': row.geometry.y, 68 | 'csv_longitude': row.geometry.x, 69 | 'nearest_feature_name': feature_name, 70 | 'nearest_feature_type': nearest_feature.geometry.type, 71 | 'distance': distance 72 | }) 73 | 74 | output_df = pd.DataFrame(results) 75 | # output_df.to_csv(output_dir + "stv_in_sat_zl17_wudaokou_nearest_features.csv", index=False) 76 | output_df.to_csv(output_dir + f"stv_in_sat_nearest_features_{city}_{zl}.csv", index=False) 77 | 78 | print("Results saved to:", output_dir + f"stv_in_sat_nearest_features_{city}_{zl}.csv") -------------------------------------------------------------------------------- /simulate/streetview/spatial_join.py: -------------------------------------------------------------------------------- 1 | import geopandas as gpd 2 | import pandas as pd 3 | import argparse 4 | import os 5 | from tqdm import trange 6 | 7 | 8 | # Original Implementation assumes that the shapefile and the CSV file are in the same CRS. 9 | # This implementation checks the CRS of both files and reprojects them if necessary. 10 | import geopandas as gpd 11 | import pandas as pd 12 | import json 13 | 14 | def process_spatial_join(shp_file, stv_index_path, output_file): 15 | gdf_polygon = gpd.read_file(shp_file) 16 | 17 | if gdf_polygon.crs is None: 18 | gdf_polygon.set_crs(epsg=4326, inplace=True) # Assuming WGS84 as default 19 | 20 | 21 | df_points = pd.read_csv(stv_index_path) 22 | geometry = gpd.points_from_xy(df_points['longitude'], df_points['latitude']) 23 | gdf_points = gpd.GeoDataFrame(df_points, geometry=geometry) 24 | 25 | # Set CRS for points if not already set (assuming WGS84) 26 | if gdf_points.crs is None: 27 | gdf_points.set_crs(epsg=4326, inplace=True) 28 | 29 | 30 | if gdf_polygon.crs != gdf_points.crs: 31 | gdf_points = gdf_points.to_crs(gdf_polygon.crs) 32 | 33 | result = gpd.sjoin(gdf_polygon, gdf_points, how='inner') 34 | 35 | print(result.head()) 36 | 37 | # result[['region_nam', 'sid', 'sid_84_long', 'sid_84_lat']].to_csv(output_file, index=False) 38 | result[['image_name', 'longitude', 'latitude']].to_csv(output_file, index=False) 39 | print(f"Saved result to {output_file}") 40 | print(f"Total records: {len(result)}") 41 | 42 | 43 | if __name__ == "__main__": 44 | parser = argparse.ArgumentParser() 45 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 46 | parser.add_argument('--work_dir', type=str, default='../../data/') 47 | args = parser.parse_args() 48 | city = args.city 49 | work_dir = args.work_dir 50 | 51 | # TODO: Change the path to the actual streetview index file 52 | stv_index_path = f"....../Tricity/index_{city}.csv" 53 | 54 | for zl in ['zl15', 'zl17']: 55 | process_spatial_join(f'{work_dir}dev-{city}/SAT_{city}_{zl}.shp', stv_index_path, f'{work_dir}dev-{city}/stv_in_sat_{city}_{zl}.csv') 56 | print(f"Processed {zl} for {city}") 57 | 58 | # sanity check and copy the stv images 59 | print(f"Copying streetview images for {city} {zl}") 60 | # TODO: Change the path to the actual streetview image directory 61 | stv_img_all_dir = f'....../ThreeCityImage/{city}/StreetView/' 62 | 63 | df = pd.read_csv(f'{work_dir}dev-{city}/stv_in_sat_{city}_{zl}.csv') 64 | print(f"Total records: {len(df)}") 65 | target_dir = f'{work_dir}dev-{city}/sampled_stv_images/' 66 | os.makedirs(target_dir, exist_ok=True) 67 | 68 | for i in trange(len(df)): 69 | row = df.iloc[i] 70 | img_name = row["image_name"] 71 | # Bug here, OS and python treat paths differently 72 | og_path = f'\"{os.path.join(stv_img_all_dir, img_name)}\"' 73 | og_path_py = os.path.join(stv_img_all_dir, img_name) 74 | assert os.path.exists(og_path_py), f"Image {og_path_py} not found" 75 | new_image_path = f'\"{os.path.join(target_dir, img_name)}\"' 76 | os.system(f"cp {og_path} {new_image_path}") 77 | 78 | # assert target_dir is not empty 79 | assert len(os.listdir(target_dir)) > 0, f"No images copied to {target_dir}" 80 | print(f"Streetview images copied for {city} {zl}") 81 | print(f"Total images: {len(os.listdir(target_dir))} in {target_dir}") 82 | -------------------------------------------------------------------------------- /simulate/advance/CoT/stv_address_cot/gpt_polish.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | import httpx 3 | import os 4 | import argparse 5 | import pandas as pd 6 | import json 7 | import base64 8 | import tqdm 9 | from tqdm import tqdm 10 | from concurrent.futures import ThreadPoolExecutor 11 | 12 | # API Key and Proxy settings 13 | PROXY = "http://127.0.0.1:10190" 14 | API_KEY_MAPPING = { 15 | "siliconflow": "SiliconFlow_API_KEY", 16 | "DeepInfra": "DeepInfra_API_KEY", 17 | "OpenAI": "OpenAI_API_KEY" 18 | } 19 | API_URL_MAPPING = { 20 | "siliconflow": "https://api.siliconflow.cn/v1", 21 | "DeepInfra": "https://api.deepinfra.com/v1/openai", 22 | "OpenAI": "https://api.openai.com/v1" 23 | } 24 | API_TYPE = "OpenAI" 25 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]] 26 | API_URL = API_URL_MAPPING[API_TYPE] 27 | 28 | def encode_image(image_path): 29 | with open(image_path, "rb") as image_file: 30 | return base64.b64encode(image_file.read()).decode("utf-8") 31 | 32 | def polish_text(client, model_name, og_text): 33 | prompt = f''' 34 | Please polish the following paragraph to make it more fluent and natural. 35 | You can make any necessary changes to the text, like removing the square brackets, adding punctuation, or rephrasing the text. 36 | Don't change the meaning of the text. 37 | Only output the polished text, without any additional information or appending text. 38 | Here is the original text: 39 | {og_text} 40 | ''' 41 | 42 | dialogs = [{ 43 | "role": "user", 44 | "content": [{"type": "text", "text": prompt}] 45 | }] 46 | 47 | try: 48 | completion = client.chat.completions.create( 49 | model=model_name, 50 | messages=dialogs, 51 | max_tokens=1024, 52 | temperature=0 53 | ) 54 | return completion.choices[0].message.content 55 | except Exception as e: 56 | print(e) 57 | return "" 58 | 59 | if __name__ == "__main__": 60 | parser = argparse.ArgumentParser() 61 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 62 | parser.add_argument('--work_dir', type=str, default='../../data/') 63 | parser.add_argument('--task', type=str, default='stv-address-cot', choices=['stv-address-cot']) 64 | args = parser.parse_args() 65 | 66 | city = args.city 67 | work_dir = args.work_dir 68 | task = args.task 69 | 70 | model_name = "gpt-4o" 71 | client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY)) 72 | 73 | og_path = os.path.join(work_dir, f'dev-{city}/CoT/{task}/{task}_{city}.json') 74 | with open(og_path, 'r') as f: 75 | og_data = json.load(f) 76 | 77 | output = [] 78 | 79 | with ThreadPoolExecutor(max_workers=128) as executor: 80 | futures = {executor.submit(polish_text, client, model_name, item['CoT']): item for item in og_data} 81 | 82 | for future in tqdm(futures): 83 | item = futures[future] 84 | polished_CoT = future.result() 85 | output.append({ 86 | "img_name": item['img_name'], 87 | "CoT": item['CoT'], 88 | "polished_CoT": polished_CoT, 89 | "address": item['address'], 90 | "description": item['description'], 91 | "near_feature": item['near_feature'], 92 | }) 93 | 94 | output_path = f'polished_{task}_{city}.json' 95 | output_path = os.path.join(work_dir, f'dev-{city}/CoT/{task}', output_path) 96 | 97 | with open(output_path, 'w') as f: 98 | json.dump(output, f, indent=4, ensure_ascii=False) 99 | -------------------------------------------------------------------------------- /simulate/advance/CoT/sat_address_cot/gen_CoT_template.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | import pandas as pd 4 | from tqdm import trange 5 | 6 | # Generate CoT ground truth 7 | # Three reasoning steps: 8 | # 1. Tell the city name 9 | # 2. Extract the location's pois around 10 | # 3. Tell the location's address 11 | 12 | 13 | def sat_adr_prompt_template(city_name:str, description:str, address:str): 14 | """ 15 | Generate the prompt for the satelite view image addressing task 16 | """ 17 | prompt = f""" 18 | Step 1: Tell the city name 19 | According to the satelite view image, this image is taken in {city_name}. 20 | Step 2: Extract the location's features around 21 | From the image, I can see the following features: {description}. 22 | Step 3: Tell the location's address 23 | Based on my observation and knowledge about this region, the address of this region is {address}. 24 | """ 25 | prompt = str(prompt).replace('\n', ' ').strip() 26 | 27 | return prompt 28 | 29 | if __name__ == "__main__": 30 | parser = argparse.ArgumentParser() 31 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 32 | parser.add_argument('--work_dir', type=str, default='../../data/') 33 | parser.add_argument('--task', type=str, default='sat-address-cot', choices=['sat-address-cot', 'street-view-address']) 34 | args = parser.parse_args() 35 | city = args.city 36 | work_dir = args.work_dir 37 | task = args.task 38 | work_dir = work_dir + f'dev-{city}/' 39 | 40 | output_dir = work_dir + f'CoT/{task}/' 41 | import os 42 | os.makedirs(output_dir, exist_ok=True) 43 | 44 | CITY_NAME = city 45 | 46 | # Satellite view 47 | for zl in ["zl15", "zl17"]: 48 | description_csv = work_dir + f'rs_osm_description_{CITY_NAME}_{zl}.csv' 49 | 50 | address_csv = work_dir + f'sat_address_combined_{CITY_NAME}_{zl}.csv' 51 | 52 | df_description = pd.read_csv(description_csv) 53 | df_address = pd.read_csv(address_csv) 54 | 55 | df_address = df_address.dropna(subset=["combined_adr"]).reset_index(drop=True) 56 | df_description = df_description.dropna(subset=["text"]).reset_index(drop=True) 57 | 58 | # assert len(df_description) == len(df_address) 59 | print("After dropping NaN values:") 60 | print(f"Number of description: {len(df_description)}") 61 | print(f"Number of address: {len(df_address)}") 62 | output = [] 63 | print("Generating CoT...") 64 | 65 | for i in trange(len(df_description)): 66 | img_name_1 = df_description.loc[i, 'img_name'] 67 | 68 | for j in range(len(df_address)): 69 | img_name_2 = df_address.loc[j, 'img_name'] 70 | if img_name_1 == img_name_2: 71 | 72 | description = df_description.loc[i, 'text'] 73 | 74 | address = df_address.loc[j, 'combined_adr'] 75 | 76 | prompt = sat_adr_prompt_template(CITY_NAME, description, address) 77 | 78 | df_description.loc[i, 'CoT'] = prompt 79 | 80 | output.append({ 81 | "img_name": img_name_1, 82 | "CoT": prompt, 83 | "description": description, 84 | "address": address 85 | }) 86 | 87 | # output_path = f'sat_address_cot_{city}_{zl}.json' 88 | output_path = output_dir + f'{task}_{CITY_NAME}_{zl}.json' 89 | print(f"Saving CoT to {output_path}") 90 | print("Total number of CoT:", len(output)) 91 | 92 | with open(output_path, 'w') as f: 93 | json.dump(output, f, indent=4, ensure_ascii=False) 94 | -------------------------------------------------------------------------------- /simulate/advance/CoT/sat_address_cot/gpt_polish.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | import httpx 3 | import os 4 | import argparse 5 | import pandas as pd 6 | import json 7 | import base64 8 | import tqdm 9 | from tqdm import tqdm 10 | from concurrent.futures import ThreadPoolExecutor 11 | 12 | # API Key and Proxy settings 13 | PROXY = "http://127.0.0.1:10190" 14 | API_KEY_MAPPING = { 15 | "siliconflow": "SiliconFlow_API_KEY", 16 | "DeepInfra": "DeepInfra_API_KEY", 17 | "OpenAI": "OpenAI_API_KEY" 18 | } 19 | API_URL_MAPPING = { 20 | "siliconflow": "https://api.siliconflow.cn/v1", 21 | "DeepInfra": "https://api.deepinfra.com/v1/openai", 22 | "OpenAI": "https://api.openai.com/v1" 23 | } 24 | API_TYPE = "OpenAI" 25 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]] 26 | API_URL = API_URL_MAPPING[API_TYPE] 27 | 28 | def encode_image(image_path): 29 | with open(image_path, "rb") as image_file: 30 | return base64.b64encode(image_file.read()).decode("utf-8") 31 | 32 | def polish_text(client, model_name, og_text): 33 | prompt = f''' 34 | Please polish the following paragraph to make it more fluent and natural. 35 | You can make any necessary changes to the text, like removing the square brackets, adding punctuation, or rephrasing the text. 36 | Don't change the meaning of the text. 37 | Only output the polished text, without any additional information or appending text. 38 | Here is the original text: 39 | {og_text} 40 | ''' 41 | 42 | dialogs = [{ 43 | "role": "user", 44 | "content": [{"type": "text", "text": prompt}] 45 | }] 46 | 47 | try: 48 | completion = client.chat.completions.create( 49 | model=model_name, 50 | messages=dialogs, 51 | max_tokens=2048, 52 | temperature=0 53 | ) 54 | return completion.choices[0].message.content 55 | except Exception as e: 56 | print(e) 57 | return "" 58 | 59 | if __name__ == "__main__": 60 | parser = argparse.ArgumentParser() 61 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 62 | parser.add_argument('--work_dir', type=str, default='../../data/') 63 | parser.add_argument('--task', type=str, default='sat-address-cot', choices=['sat-address-cot', 'street-view-address']) 64 | args = parser.parse_args() 65 | 66 | city = args.city 67 | work_dir = args.work_dir 68 | task = args.task 69 | CoT_dir = work_dir + f'dev-{city}/CoT/{task}/' 70 | 71 | model_name = "gpt-4o" 72 | client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY)) 73 | 74 | for zl in ['zl15', 'zl17']: 75 | og_path = f"{task}_{city}_{zl}.json" 76 | og_path = os.path.join(CoT_dir, og_path) 77 | 78 | with open(og_path, 'r') as f: 79 | og_data = json.load(f) 80 | 81 | output = [] 82 | 83 | with ThreadPoolExecutor(max_workers=128) as executor: 84 | futures = {executor.submit(polish_text, client, model_name, item['CoT']): item for item in og_data} 85 | 86 | for future in tqdm(futures): 87 | item = futures[future] 88 | polished_CoT = future.result() 89 | output.append({ 90 | "img_name": item["img_name"], 91 | "polished_CoT": polished_CoT, 92 | "og_CoT": item["CoT"], 93 | "description": item["description"], 94 | "address": item["address"] 95 | }) 96 | 97 | output_path = os.path.join(CoT_dir, f'polished_{task}_{city}_{zl}.json') 98 | 99 | with open(output_path, 'w') as f: 100 | json.dump(output, f, indent=4, ensure_ascii=False) 101 | -------------------------------------------------------------------------------- /simulate/advance/CoT/sat_count_cot/gpt_polish.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | import httpx 3 | import os 4 | import argparse 5 | import pandas as pd 6 | import json 7 | import base64 8 | import tqdm 9 | from tqdm import tqdm 10 | from concurrent.futures import ThreadPoolExecutor 11 | 12 | # API Key and Proxy settings 13 | PROXY = "http://127.0.0.1:10190" 14 | API_KEY_MAPPING = { 15 | "siliconflow": "SiliconFlow_API_KEY", 16 | "DeepInfra": "DeepInfra_API_KEY", 17 | "OpenAI": "OpenAI_API_KEY" 18 | } 19 | API_URL_MAPPING = { 20 | "siliconflow": "https://api.siliconflow.cn/v1", 21 | "DeepInfra": "https://api.deepinfra.com/v1/openai", 22 | "OpenAI": "https://api.openai.com/v1" 23 | } 24 | API_TYPE = "OpenAI" 25 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]] 26 | API_URL = API_URL_MAPPING[API_TYPE] 27 | 28 | def encode_image(image_path): 29 | with open(image_path, "rb") as image_file: 30 | return base64.b64encode(image_file.read()).decode("utf-8") 31 | 32 | def polish_text(client, model_name, og_text): 33 | prompt = f''' 34 | Please polish the following paragraph to make it more fluent and natural. 35 | You can make any necessary changes to the text, like removing the square brackets, adding punctuation, or rephrasing the text. 36 | Don't change the meaning of the text. 37 | Only output the polished text, without any additional information or appending text. 38 | Here is the original text: 39 | {og_text} 40 | ''' 41 | 42 | dialogs = [{ 43 | "role": "user", 44 | "content": [{"type": "text", "text": prompt}] 45 | }] 46 | 47 | try: 48 | completion = client.chat.completions.create( 49 | model=model_name, 50 | messages=dialogs, 51 | max_tokens=1024, 52 | temperature=0 53 | ) 54 | return completion.choices[0].message.content 55 | except Exception as e: 56 | print(e) 57 | return "" 58 | 59 | if __name__ == "__main__": 60 | parser = argparse.ArgumentParser() 61 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 62 | parser.add_argument('--work_dir', type=str, default='../../data/') 63 | parser.add_argument('--task', type=str, default='sat-count-cot', choices=['sat-address-cot', 'street-view-address']) 64 | args = parser.parse_args() 65 | 66 | city = args.city 67 | work_dir = args.work_dir 68 | task = args.task 69 | CoT_dir = work_dir + f'dev-{city}/CoT/{task}/' 70 | 71 | model_name = "gpt-4o" 72 | client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY)) 73 | 74 | for zl in ['zl15', 'zl17']: 75 | og_path = f"{task}_{city}_{zl}.json" 76 | og_path = os.path.join(CoT_dir, og_path) 77 | 78 | with open(og_path, 'r') as f: 79 | og_data = json.load(f) 80 | 81 | output = [] 82 | 83 | with ThreadPoolExecutor(max_workers=128) as executor: 84 | futures = {executor.submit(polish_text, client, model_name, item['CoT']): item for item in og_data} 85 | 86 | for future in tqdm(futures): 87 | item = futures[future] 88 | polished_CoT = future.result() 89 | output.append({ 90 | "img_name": item["img_name"], 91 | "polished_CoT": polished_CoT, 92 | "og_CoT": item["CoT"], 93 | "description": item["description"], 94 | "address": item["address"] 95 | }) 96 | 97 | output_path = os.path.join(CoT_dir, f'polished_{task}_{city}_{zl}.json') 98 | 99 | with open(output_path, 'w') as f: 100 | json.dump(output, f, indent=4, ensure_ascii=False) 101 | 102 | print(f"Polished CoT saved to {output_path}") 103 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | 164 | data/* 165 | .vscode/ 166 | evaluate/citydata/ 167 | citydata/ 168 | results/ -------------------------------------------------------------------------------- /evaluate/uniimage/sat_address/sat_address_convert.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import json 4 | import os 5 | import argparse 6 | import pandas as pd 7 | import random 8 | random.seed(0) 9 | 10 | from config import UNI_IMAGE_FOLDER 11 | 12 | def prompt_template(choice1, choice2, choice3, choice4): 13 | s = f""" 14 | The following is a multiple-choice question about selecting the most appropriate address for a satellite image. 15 | A. {choice1} 16 | B. {choice2} 17 | C. {choice3} 18 | D. {choice4} 19 | Please choose the most suitable one among A, B, C and D as the answer to this question. 20 | Please output the option directly. No need for explaination.\n 21 | """ 22 | 23 | return s.strip() 24 | 25 | if __name__ == '__main__': 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 28 | parser.add_argument('--task_name', type=str, default='sat_address_mc', help='task name') 29 | args = parser.parse_args() 30 | 31 | city_name = args.city_name 32 | task_name = args.task_name 33 | 34 | work_dir = UNI_IMAGE_FOLDER 35 | 36 | cur_dir = os.path.join(work_dir, f"{city_name}/") 37 | 38 | all_train_data = [] 39 | 40 | for zl in ['zl15', 'zl17']: 41 | sat_img_dir = cur_dir + f"sample_sat_image_{zl}/" 42 | sat_address_file = cur_dir + f"sat_address_combined_{city_name}_{zl}.csv" 43 | df = pd.read_csv(sat_address_file) 44 | # remove the rows with empty address 45 | df = df.dropna(subset=["combined_adr"]) 46 | 47 | 48 | print("Input file:", sat_address_file) 49 | print("Valid records:", len(df)) 50 | 51 | output = [] 52 | 53 | for i in range(len(df)): 54 | row = df.iloc[i] 55 | img_name = row["img_name"] 56 | combined_adr = row["combined_adr"] 57 | 58 | # print(combined_adr) 59 | 60 | assert os.path.exists(sat_img_dir + img_name), f"Image {img_name} not found" 61 | 62 | # Randomly select 3 other addresses 63 | other_choices = df[df["img_name"] != img_name].sample(3)["combined_adr"].tolist() 64 | choices = [combined_adr] + other_choices 65 | random.shuffle(choices, random=random.seed(i)) 66 | 67 | # print(choices) 68 | 69 | reference = chr(ord('A') + choices.index(combined_adr)) 70 | 71 | # print(reference) 72 | 73 | prompt = prompt_template(choice1=choices[0], choice2=choices[1], choice3=choices[2], choice4=choices[3]) 74 | 75 | # print(prompt) 76 | 77 | output.append({ 78 | "prompt": prompt, 79 | "choices": choices, 80 | "reference": reference, 81 | "image": sat_img_dir + img_name 82 | }) 83 | 84 | # print(output[-1]) 85 | 86 | # exit() 87 | 88 | output_dir = os.path.join(work_dir, task_name, city_name) 89 | 90 | os.makedirs(output_dir, exist_ok=True) 91 | 92 | test = random.sample(output, min(200, len(output))) 93 | train = [x for x in output if x not in test] 94 | 95 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_test.json"), "w") as f: 96 | json.dump(test, f, indent=4, ensure_ascii=False) 97 | 98 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_train.json"), "w") as f: 99 | json.dump(train, f, indent=4, ensure_ascii=False) 100 | 101 | all_train_data.extend(train) 102 | 103 | print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_test.json')}") 104 | print("Test size:", len(test)) 105 | print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_train.json')}") 106 | print("Train size:", len(train)) 107 | 108 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_train.json"), "w") as f: 109 | json.dump(all_train_data, f, indent=4, ensure_ascii=False) -------------------------------------------------------------------------------- /simulate/satelite/process_poi.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | def remove_duplicates(points): 4 | 5 | seen = set() 6 | result = [] 7 | for point in points: 8 | if tuple(point) not in seen: 9 | seen.add(tuple(point)) 10 | result.append(point) 11 | return result 12 | 13 | def calculate_centroid(polygon): 14 | 15 | print(polygon) 16 | polygon = remove_duplicates(polygon) 17 | 18 | x_coords = [point[0] for point in polygon] 19 | y_coords = [point[1] for point in polygon] 20 | 21 | cx = sum(x_coords) / len(polygon) 22 | cy = sum(y_coords) / len(polygon) 23 | return [int(cx), int(cy)] 24 | 25 | def is_near(p1, p2, threshold=3): 26 | return abs(p1[0] - p2[0]) <= threshold and abs(p1[1] - p2[1]) <= threshold 27 | 28 | def merge_nearby_points(locations): 29 | merged = calculate_centroid(locations) 30 | 31 | return merged 32 | 33 | def merge_poi_by_category(poi_data): 34 | merged_poi = {} 35 | 36 | for poi in poi_data: 37 | name, location = poi.split(" is at location: ") 38 | category = name.split(" ")[-1] 39 | location = eval(location.strip()) 40 | 41 | processed_locations = [] 42 | # print(location) 43 | coord = location 44 | if isinstance(coord[0], list) and len(coord[0])!=2:# and len(coord[0][0]) > 2 and all(isinstance(i, list) and len(i) == 2 for i in coord): 45 | loc = location[0] 46 | if isinstance(loc, list) and len(loc) > 1: 47 | processed_locations.append(loc) 48 | 49 | if processed_locations: 50 | # print(processed_locations) 51 | merged_points = merge_nearby_points(processed_locations[0]) 52 | # print(merged_points) 53 | 54 | if category not in merged_poi: 55 | merged_poi[category] = [] 56 | merged_poi[category].extend([merged_points]) 57 | else: 58 | if category not in merged_poi: 59 | merged_poi[category] = [] 60 | merged_poi[category].append(location) 61 | else: 62 | if category not in merged_poi: 63 | merged_poi[category] = [] 64 | merged_poi[category].append(location) 65 | 66 | 67 | return merged_poi 68 | 69 | def format_merged_poi(merged_poi): 70 | output = [] 71 | for category, locations in merged_poi.items(): 72 | output.append(f"{category}s are at locations: {', '.join(str(loc) for loc in locations)}") 73 | return output 74 | 75 | def read_poi_from_file(filename): 76 | with open(filename, 'r', encoding='utf-8') as file: 77 | return file.readlines() 78 | 79 | def write_poi_to_file(filename, formatted_output): 80 | with open(filename, 'w', encoding='utf-8') as file: 81 | for line in formatted_output: 82 | file.write(line + '\n') 83 | 84 | import os 85 | import argparse 86 | 87 | if __name__ == "__main__": 88 | parser = argparse.ArgumentParser() 89 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 90 | parser.add_argument('--work_dir', type=str, default='../../data/') 91 | args = parser.parse_args() 92 | city = args.city 93 | work_dir = args.work_dir 94 | working_dir = work_dir + f"dev-{city}/" 95 | 96 | for zl in ['zl17','zl15']: 97 | df = pd.read_csv(working_dir + f'SAT_{city}_'+zl+'.csv') 98 | for cnt in range(len(df)): 99 | img_name = df.at[cnt,'img_name'].split('.')[0] 100 | output_dir = working_dir + "short_clipped_results_"+zl 101 | os.makedirs(output_dir, exist_ok=True) 102 | input_filename = working_dir + 'clipped_results_pixel_non_null_'+zl+'/pois_'+img_name+'.txt' 103 | 104 | if not os.path.exists(input_filename): 105 | continue 106 | poi_data = read_poi_from_file(input_filename) 107 | 108 | merged_poi = merge_poi_by_category(poi_data) 109 | formatted_output = format_merged_poi(merged_poi) 110 | 111 | output_filename = working_dir + 'short_clipped_results_'+zl+'/pois_'+img_name+'.txt' 112 | write_poi_to_file(output_filename, formatted_output) 113 | 114 | -------------------------------------------------------------------------------- /simulate/address/interpolate_sat_coord.py: -------------------------------------------------------------------------------- 1 | # Function: Interpolate the coordinates of satellite images for zoom level 15 and 17 2 | import pandas as pd 3 | import argparse 4 | 5 | if __name__ == '__main__': 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 8 | parser.add_argument('--work_dir', type=str, default='../../data/') 9 | args = parser.parse_args() 10 | city = args.city 11 | work_dir = args.work_dir 12 | 13 | for zl in ['zl15','zl17']: 14 | 15 | 16 | input_csv_path = work_dir + f'dev-{city}/SAT_{city}_{zl}.csv' 17 | 18 | output_csv_path = work_dir + f'dev-{city}/SAT_interpolate_{city}_{zl}.csv' 19 | 20 | 21 | if 'zl17' in input_csv_path: 22 | data = pd.read_csv(input_csv_path) 23 | 24 | img_list = [] 25 | lng_list = [] 26 | lat_list = [] 27 | 28 | for i in range(len(data)): 29 | tl_lat = data.at[i,'tl_lat'] 30 | tl_lng = data.at[i,'tl_lng'] 31 | bt_lat = data.at[i,'bt_lat'] 32 | bt_lng = data.at[i,'bt_lng'] 33 | # evenly sample 3*3 points for zoom level 17 34 | for j in range(3): 35 | for k in range(3): 36 | if j == 0 and k==0: 37 | img_list.append(data.at[i,'img_name']) 38 | lng_list.append(data.at[i,'tl_lng']) 39 | lat_list.append(data.at[i,'tl_lat']) 40 | continue 41 | if j ==2 and k==2: 42 | img_list.append(data.at[i,'img_name']) 43 | lng_list.append(data.at[i,'bt_lng']) 44 | lat_list.append(data.at[i,'bt_lat']) 45 | continue 46 | img_list.append(data.at[i,'img_name']) 47 | lng_list.append(data.at[i,'tl_lng']+j/2*(data.at[i,'bt_lng']-data.at[i,'tl_lng'])) 48 | lat_list.append(data.at[i,'tl_lat']-k/2*(data.at[i,'tl_lat']-data.at[i,'bt_lat'])) 49 | 50 | new_df = pd.DataFrame({'img_name':img_list,'lng':lng_list,'lat':lat_list}) 51 | new_df.to_csv(output_csv_path, index=False) 52 | 53 | elif 'zl15' in input_csv_path: 54 | 55 | 56 | data = pd.read_csv(input_csv_path) 57 | 58 | img_list = [] 59 | lng_list = [] 60 | lat_list = [] 61 | 62 | for i in range(len(data)): 63 | tl_lat = data.at[i,'tl_lat'] 64 | tl_lng = data.at[i,'tl_lng'] 65 | bt_lat = data.at[i,'bt_lat'] 66 | bt_lng = data.at[i,'bt_lng'] 67 | # evenly sample 5*5 points for zoom level 15 68 | for j in range(5): 69 | for k in range(5): 70 | if j == 0 and k==0: 71 | img_list.append(data.at[i,'img_name']) 72 | lng_list.append(data.at[i,'tl_lng']) 73 | lat_list.append(data.at[i,'tl_lat']) 74 | continue 75 | if j ==4 and k==4: 76 | img_list.append(data.at[i,'img_name']) 77 | lng_list.append(data.at[i,'bt_lng']) 78 | lat_list.append(data.at[i,'bt_lat']) 79 | continue 80 | img_list.append(data.at[i,'img_name']) 81 | lng_list.append(data.at[i,'tl_lng']+j/4*(data.at[i,'bt_lng']-data.at[i,'tl_lng'])) 82 | lat_list.append(data.at[i,'tl_lat']-k/4*(data.at[i,'tl_lat']-data.at[i,'bt_lat'])) 83 | 84 | new_df = pd.DataFrame({'img_name':img_list,'lng':lng_list,'lat':lat_list}) 85 | new_df.to_csv(output_csv_path, index=False) 86 | 87 | 88 | else: 89 | print("Please input the correct csv file path!") 90 | raise NotImplementedError 91 | 92 | 93 | # sanity check 94 | print("Before interpolation, the number of images is ",len(data)) 95 | print("After interpolation, the number of images is ",len(new_df)) 96 | print("The ratio of the number of images after interpolation to the number of images before interpolation is ",len(new_df)/len(data)) -------------------------------------------------------------------------------- /evaluate/mobility_prediction/metrics.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | from sklearn.metrics import f1_score 4 | import ast 5 | import csv 6 | import numpy as np 7 | 8 | 9 | 10 | def get_acc1_f1(df): 11 | acc1 = (df['prediction'] == df['ground_truth']).sum() / len(df) 12 | f1 = f1_score(df['ground_truth'], df['prediction'], average='weighted') 13 | return acc1, f1 14 | 15 | def get_is_correct(row): 16 | pred_list = row['prediction'] 17 | if row['ground_truth'] in pred_list: 18 | row['is_correct'] = True 19 | else: 20 | row['is_correct'] = False 21 | 22 | return row 23 | 24 | 25 | def get_is_correct10(row): 26 | pred_list = row['top10'] 27 | if row['ground_truth'] in pred_list: 28 | row['is_correct10'] = True 29 | else: 30 | row['is_correct10'] = False 31 | 32 | pred_list = row['top5'] 33 | if row['ground_truth'] in pred_list: 34 | row['is_correct5'] = True 35 | else: 36 | row['is_correct5'] = False 37 | 38 | pred = row['top1'] 39 | if pred == row['ground_truth']: 40 | row['is_correct1'] = True 41 | else: 42 | row['is_correct1'] = False 43 | 44 | return row 45 | 46 | 47 | def first_nonzero(arr, axis, invalid_val=-1): 48 | mask = arr!=0 49 | return np.where(mask.any(axis=axis), mask.argmax(axis=axis), invalid_val) 50 | 51 | 52 | def get_ndcg(prediction, targets, k=10): 53 | """ 54 | Calculates the NDCG score for the given predictions and targets. 55 | 56 | Args: 57 | prediction (Nxk): list of lists. the softmax output of the model. 58 | targets (N): torch.LongTensor. actual target place id. 59 | 60 | Returns: 61 | the sum ndcg score 62 | """ 63 | for _, xi in enumerate(prediction): 64 | if len(xi) < k: 65 | #print(f"the {i}th length: {len(xi)}") 66 | xi += [-5 for _ in range(k-len(xi))] 67 | elif len(xi) > k: 68 | xi = xi[:k] 69 | else: 70 | pass 71 | 72 | n_sample = len(prediction) 73 | prediction = np.array(prediction) 74 | targets = np.broadcast_to(targets.reshape(-1, 1), prediction.shape) 75 | hits = first_nonzero(prediction == targets, axis=1, invalid_val=-1) 76 | hits = hits[hits>=0] 77 | ranks = hits + 1 78 | ndcg = 1 / np.log2(ranks + 1) 79 | return np.sum(ndcg) / n_sample 80 | 81 | 82 | def cal_metrics(output_dir): 83 | # Calculate the metric for all user 84 | # output_dir = 'output/Mixtral-8x22B-Instruct-v0.1_Paris_top1_wot' 85 | file_list = [file for file in os.listdir(output_dir) if file.endswith('.csv')] 86 | print(file_list) 87 | file_path_list = [os.path.join(output_dir, file) for file in file_list] 88 | 89 | df = pd.DataFrame({ 90 | 'user_id': None, 91 | 'ground_truth': None, 92 | 'prediction': None, 93 | 'reason': None 94 | }, index=[]) 95 | 96 | for file_path in file_path_list: 97 | iter_df = pd.read_csv(file_path) 98 | df = pd.concat([df, iter_df], ignore_index=True) 99 | 100 | df_cleaned = df.dropna(subset=['prediction', 'ground_truth']) 101 | df_cleaned['prediction'] = df_cleaned['prediction'].apply( 102 | # lambda x: int(x) if isinstance(x, int) else int(x.split('or')[0]) if 'or' in x else int(x) 103 | lambda x: int(x) if isinstance(x, int) else -100 104 | ) 105 | df_cleaned['ground_truth'] = df_cleaned['ground_truth'].apply(lambda x: int(x)) 106 | 107 | acc1, f1 = get_acc1_f1(df_cleaned) 108 | return acc1, f1 109 | 110 | 111 | if __name__ == "__main__": 112 | # Calculate the metric for all user 113 | # TODO: Fill in the model names that you want to evaluate 114 | models = ["Llama-3-VILA1.5-8b",] 115 | cities = ["Beijing"] 116 | 117 | csv_file = 'results/prediction_results/metrics.csv' 118 | with open(csv_file, mode='w', newline='') as file: 119 | writer = csv.writer(file) 120 | writer.writerow(['Model', 'City', 'Acc@1', 'F1']) 121 | for model in models: 122 | for city in cities: 123 | output_dir = f'results/prediction_results/{model}_{city}_top1_wot' 124 | acc1, f1 = cal_metrics(output_dir=output_dir) 125 | writer.writerow([model, city, acc1, f1]) 126 | 127 | print(f"Results have been saved to {csv_file}") 128 | -------------------------------------------------------------------------------- /simulate/annotate/sat_combine_address.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI # >=1.0, test version 1.16.0 2 | import httpx 3 | import os 4 | import pandas as pd 5 | import argparse 6 | from tqdm import tqdm, trange 7 | from concurrent.futures import ThreadPoolExecutor 8 | 9 | PROXY = "http://127.0.0.1:10190" 10 | 11 | API_KEY_MAPPING = { 12 | "siliconflow": "SiliconFlow_API_KEY", 13 | "DeepInfra": "DeepInfra_API_KEY", 14 | "OpenAI": "OpenAI_API_KEY" 15 | } 16 | API_URL_MAPPING = { 17 | "siliconflow": "https://api.siliconflow.cn/v1", 18 | "DeepInfra": "https://api.deepinfra.com/v1/openai", 19 | "OpenAI": "https://api.openai.com/v1" 20 | } 21 | 22 | API_TYPE = "OpenAI" 23 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]] 24 | API_URL = API_URL_MAPPING[API_TYPE] 25 | 26 | def process_chunk(client, df, i, interpolate_num, model_name): 27 | string_adr = '' 28 | for j in range(i, i + interpolate_num): 29 | string_adr += str(df.at[j, 'adr']) + ', ' 30 | 31 | prompts = f''' 32 | I give you a detailed address description of a square area. 33 | The square area is evenly divided into a {int(interpolate_num**0.5)}*{int(interpolate_num**0.5)} grid. 34 | Starting from the upper left corner and going down is the first column, 35 | and then the second column continues from top to bottom, from grid 0 to grid {interpolate_num-1}. 36 | The detailed addresses of grid 0 to grid {interpolate_num-1} are: {string_adr} 37 | Please form a general description of this area. Please include where are the east, west, north and south of this area, 38 | where are the relative locations of the pois and roads in the area, 39 | which POIs are adjacent, which road connects which pois in this area, etc. 40 | Avoid including words like grid or poi in your answer and only generate one paragraph. Please make it natural and fluent. 41 | ''' 42 | 43 | dialogs = [{ 44 | "role": "user", 45 | "content": prompts 46 | }] 47 | 48 | completion = client.chat.completions.create( 49 | model=model_name, 50 | messages=dialogs, 51 | max_tokens=1024, 52 | temperature=0.3, 53 | ) 54 | 55 | return df.at[i, 'img_name'], completion.choices[0].message.content.strip() 56 | 57 | if __name__ == '__main__': 58 | parser = argparse.ArgumentParser() 59 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 60 | parser.add_argument('--work_dir', type=str, default='../../data/') 61 | parser.add_argument('--model_name', type=str, default='gpt-4o-mini-2024-07-18') 62 | args = parser.parse_args() 63 | city = args.city 64 | work_dir = args.work_dir 65 | model_name = args.model_name 66 | 67 | if API_TYPE == "OpenAI": 68 | client = OpenAI( 69 | base_url=API_URL, 70 | api_key=API_KEY, 71 | http_client=httpx.Client(proxies=PROXY) 72 | ) 73 | elif API_TYPE == "siliconflow": 74 | client = OpenAI( 75 | base_url=API_URL, 76 | api_key=API_KEY 77 | ) 78 | elif API_TYPE == "DeepInfra": 79 | client = OpenAI( 80 | base_url=API_URL, 81 | api_key=API_KEY, 82 | http_client=httpx.Client(proxies=PROXY), 83 | ) 84 | 85 | for zl in ['zl15', 'zl17']: 86 | if zl == 'zl15': 87 | interpolate_num = 5 * 5 88 | else: 89 | interpolate_num = 3 * 3 90 | 91 | input_file_path = work_dir + f"dev-{city}/SAT_interpolate_address_{city}_{zl}.csv" 92 | output_file_path = work_dir + f"dev-{city}/sat_address_combined_{city}_{zl}.csv" 93 | 94 | df = pd.read_csv(input_file_path) 95 | region_list = [] 96 | combined_adr_list = [] 97 | 98 | with ThreadPoolExecutor(max_workers=128) as executor: 99 | futures = [ 100 | executor.submit(process_chunk, client, df, i, interpolate_num, model_name) 101 | for i in range(0, len(df), interpolate_num) 102 | ] 103 | 104 | for future in tqdm(futures, total=len(futures)): 105 | region, combined_adr = future.result() 106 | region_list.append(region) 107 | combined_adr_list.append(combined_adr) 108 | 109 | pd_dict = pd.DataFrame({'img_name': region_list, 'combined_adr': combined_adr_list}) 110 | pd_dict.to_csv(output_file_path, index=False) 111 | -------------------------------------------------------------------------------- /evaluate/uniimage/stv_landmark/stv_landmark_convert.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import json 4 | import os 5 | import argparse 6 | import pandas as pd 7 | import random 8 | random.seed(0) 9 | import sys 10 | from config import UNI_IMAGE_FOLDER, BEIJING_STV_IMAGE_FOLDER, LONDON_STV_IMAGE_FOLDER 11 | import tqdm 12 | 13 | 14 | def prompt_template(choice1, choice2, choice3, choice4): 15 | s = f""" 16 | The following is a multiple-choice question about selecting the most possible nearby POIs(Place of Interests) or landmarks description in the region of a street view image. 17 | A. {choice1} 18 | B. {choice2} 19 | C. {choice3} 20 | D. {choice4} 21 | Please choose the most suitable one among A, B, C and D as the answer to this question. 22 | Please output the option directly. No need for explaination.\n 23 | """ 24 | 25 | return s.strip() 26 | 27 | if __name__ == '__main__': 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 30 | parser.add_argument('--task_name', type=str, default='stv_landmark_mc', help='task name') 31 | args = parser.parse_args() 32 | 33 | city_name = args.city_name 34 | task_name = args.task_name 35 | 36 | work_dir = UNI_IMAGE_FOLDER 37 | 38 | work_dir = os.path.join(work_dir, f"{city_name}/") 39 | 40 | all_train_data = [] 41 | all_test_data = [] 42 | 43 | for zl in ['zl15', 'zl17']: 44 | if city_name == "Beijing": 45 | stv_img_dir = BEIJING_STV_IMAGE_FOLDER 46 | elif city_name == "London": 47 | stv_img_dir = LONDON_STV_IMAGE_FOLDER 48 | elif city_name == "NewYork": 49 | pass 50 | 51 | output = [] 52 | 53 | input_path = os.path.join(work_dir, "stv_poi_landmark_update.jsonl") 54 | with open(input_path, "r") as f: 55 | data = [json.loads(line) for line in f] 56 | 57 | output = [] 58 | 59 | all_choices = [d["text"] for d in data] 60 | all_choices = list(set(all_choices)) 61 | 62 | for d in tqdm.tqdm(data): 63 | # is absolute path 64 | img_name = d["img_name"].split('/')[-1] 65 | text = d["text"] 66 | 67 | other_choices = random.sample([text for text in all_choices if text != d["text"]], 3) 68 | choices = [text] + other_choices 69 | random.shuffle(choices, random=random.seed(data.index(d))) 70 | 71 | reference = chr(ord('A') + choices.index(text)) 72 | 73 | prompt = prompt_template(choices[0], choices[1], choices[2], choices[3]) 74 | 75 | output.append({ 76 | "prompt": prompt, 77 | "choices": choices, 78 | "reference": reference, 79 | "image": os.path.join(stv_img_dir, img_name) 80 | }) 81 | 82 | 83 | 84 | output_dir = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name) 85 | os.makedirs(output_dir, exist_ok=True) 86 | 87 | 88 | test = random.sample(output, min(200, len(output))) 89 | train = [d for d in output if d not in test] 90 | 91 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_test.json"), "w") as f: 92 | json.dump(test, f, indent=4, ensure_ascii=False) 93 | 94 | print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_test.json')}") 95 | print("Test size:", len(test)) 96 | 97 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_train.json"), "w") as f: 98 | json.dump(train, f, indent=4, ensure_ascii=False) 99 | print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_train.json')}") 100 | print("Train size:", len(train)) 101 | 102 | all_train_data.extend(train) 103 | all_test_data.extend(test) 104 | 105 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_train.json"), "w") as f: 106 | json.dump(all_train_data, f, indent=4, ensure_ascii=False) 107 | 108 | print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_train.json')}") 109 | print("Total train size:", len(all_train_data)) 110 | 111 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_test.json"), "w") as f: 112 | json.dump(all_test_data, f, indent=4, ensure_ascii=False) 113 | 114 | print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_test.json')}") 115 | print("Total test size:", len(all_test_data)) 116 | -------------------------------------------------------------------------------- /simulate/annotate/stv_description_gpt.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI # >=1.0, test version 1.16.0 2 | import httpx 3 | import os 4 | import argparse 5 | import pandas as pd 6 | import json 7 | import base64 8 | import tqdm 9 | from concurrent.futures import ThreadPoolExecutor, as_completed 10 | 11 | PROXY = "http://127.0.0.1:10190" 12 | 13 | API_KEY_MAPPING = { 14 | "siliconflow": "SiliconFlow_API_KEY", 15 | "DeepInfra": "DeepInfra_API_KEY", 16 | "OpenAI": "OpenAI_API_KEY" 17 | } 18 | API_URL_MAPPING = { 19 | "siliconflow": "https://api.siliconflow.cn/v1", 20 | "DeepInfra": "https://api.deepinfra.com/v1/openai", 21 | "OpenAI": "https://api.openai.com/v1" 22 | } 23 | API_TYPE = "OpenAI" 24 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]] 25 | API_URL = API_URL_MAPPING[API_TYPE] 26 | 27 | def encode_image(image_path): 28 | with open(image_path, "rb") as image_file: 29 | return base64.b64encode(image_file.read()).decode("utf-8") 30 | 31 | def generate_description(client, img_name, image_dir, model_name): 32 | img_url = os.path.join(image_dir, img_name) 33 | if not os.path.exists(img_url): 34 | return None 35 | 36 | base64_image = encode_image(img_url) 37 | 38 | prompt = ''' 39 | Please describe in detail the given image following the principles: 40 | (1) Describing object attributes, including object quantity, color, material, shape, size; 41 | (2) Describing the spatial relationship between objects, including the relative position of objects, the distance between objects, and the direction of objects; 42 | (3) Only describe the content that has high confidently. 43 | (4) Do not describe the contents by itemizing them in list form. 44 | (5) Make sure the description is coherent and fluent. 45 | ''' 46 | 47 | dialogs = [{ 48 | "role": "user", 49 | "content": [ 50 | {"type": "text", "text": prompt}, 51 | {"type": "image_url", "image_url": { 52 | "url": f"data:image/png;base64,{base64_image}" 53 | }} 54 | ] 55 | }] 56 | 57 | try: 58 | completion = client.chat.completions.create( 59 | model=model_name, 60 | messages=dialogs, 61 | max_tokens=1024, 62 | temperature=0 63 | ) 64 | return { 65 | "img_name": img_url, 66 | "text": completion.choices[0].message.content.strip(), 67 | } 68 | except Exception as e: 69 | print(f"Error processing {img_name}: {e}") 70 | return None 71 | 72 | if __name__ == "__main__": 73 | parser = argparse.ArgumentParser() 74 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 75 | parser.add_argument('--work_dir', type=str, default='../../data/') 76 | parser.add_argument('--model_name', type=str, default='gpt-4o-mini-2024-07-18') 77 | args = parser.parse_args() 78 | 79 | city = args.city 80 | work_dir = args.work_dir 81 | working_dir = work_dir + f"dev-{city}" 82 | # TODO: Change the following path to the actual path 83 | image_dir = f"....../ThreeCityImage/{city}/StreetView" 84 | model_name = args.model_name 85 | 86 | if API_TYPE == "OpenAI": 87 | client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY)) 88 | elif API_TYPE == "siliconflow": 89 | client = OpenAI(base_url=API_URL, api_key=API_KEY) 90 | elif API_TYPE == "DeepInfra": 91 | model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct" 92 | client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY)) 93 | 94 | img_set = [] 95 | for zl in ['zl15', 'zl17']: 96 | stv_in_sat_path = f"{working_dir}/stv_in_sat_{city}_{zl}.csv" 97 | assert os.path.exists(stv_in_sat_path), f"{stv_in_sat_path} does not exist." 98 | df = pd.read_csv(stv_in_sat_path) 99 | img_set.extend(df['image_name'].tolist()) 100 | 101 | img_set = list(set(img_set)) 102 | assert os.path.exists(image_dir), f"{image_dir} does not exist." 103 | print(f"Start generating descriptions for {len(img_set)} images.") 104 | 105 | with ThreadPoolExecutor(max_workers=128) as executor: 106 | futures = {executor.submit(generate_description, client, img_name, image_dir, model_name): img_name for img_name in img_set} 107 | with open(os.path.join(working_dir, "stv_description.jsonl"), "a") as fout: 108 | for future in tqdm.tqdm(as_completed(futures), total=len(futures)): 109 | result = future.result() 110 | if result: 111 | fout.write(json.dumps(result, ensure_ascii=False) + "\n") 112 | 113 | -------------------------------------------------------------------------------- /serving/vlm_serving.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import transformers 4 | 5 | from functools import partial 6 | from config import VLLM_MODEL_PATH, VLM_MODELS 7 | 8 | 9 | class VLMWrapper: 10 | def __init__(self, model_name, max_new_tokens=1000): 11 | self.model_name = model_name 12 | assert self.model_name in VLM_MODELS 13 | 14 | transformers_version_436=["VILA1.5-3b", "Llama-3-VILA1.5-8b", "VILA1.5-13b"] 15 | transformers_version_437=["cogvlm2-llama3-chat-19B", "InternVL2-40B", "llava_v1.5_7b", "Yi_VL_6B", "Yi_VL_34B", 16 | "InternVL2-2B", "InternVL2-4B", "InternVL2-8B", "InternVL2-26B"] 17 | transformers_version_440=["MiniCPM-Llama3-V-2_5"] 18 | transformers_version_444=["llava_next_yi_34b", "llava_next_llama3", "glm-4v-9b"] 19 | trainformers_version_latest = ["Qwen2-VL-7B-Instruct", "Qwen2-VL-2B-Instruct"] 20 | # The following is an example of the model name of UrbanLLaVA 21 | # TODO: Add your model name of UrbanLLaVA here 22 | transformers_version_UrbanLLaVA=["UrbanLLaVA-8b-mix-v1"] 23 | 24 | # Install the correct version of transformers 25 | if self.model_name in transformers_version_UrbanLLaVA: 26 | if transformers.__version__ != "4.36.2": 27 | os.system("pip install transformers==4.36.2") 28 | elif self.model_name in transformers_version_436: 29 | if transformers.__version__ != "4.36.2": 30 | os.system("pip install transformers==4.36.2") 31 | elif self.model_name in transformers_version_437: 32 | if transformers.__version__ != "4.37.0": 33 | os.system("pip install transformers==4.37.0") 34 | elif self.model_name in transformers_version_440: 35 | if transformers.__version__ != "4.40.0": 36 | os.system("pip install transformers==4.40.0") 37 | elif self.model_name in transformers_version_444: 38 | if transformers.__version__ != "4.44.2": 39 | os.system("pip install transformers==4.44.2") 40 | elif self.model_name in trainformers_version_latest: 41 | if transformers.__version__ != "4.45.0.dev0": 42 | os.system("pip install git+https://github.com/huggingface/transformers@21fac7abba2a37fae86106f87fcf9974fd1e3830 accelerate") 43 | else: 44 | print("no need to update transformers") 45 | 46 | # place this line after the command "pip install" 47 | try: 48 | from vlmeval.config import supported_VLM 49 | from vlmeval.vlm import VILA 50 | from functools import partial 51 | except Exception as e: 52 | print(e) 53 | print("need to run this script in vlmeval") 54 | 55 | 56 | # only update local model path 57 | for model_name in transformers_version_436 + transformers_version_437 + transformers_version_440 + transformers_version_444 + trainformers_version_latest: 58 | original_func = supported_VLM[model_name] 59 | if "glm" in model_name or "cogvlm" in model_name: 60 | supported_VLM[model_name] = partial(original_func.func, 61 | model_path=VLLM_MODEL_PATH[model_name], 62 | max_length=max_new_tokens, 63 | **{k: v for k, v in original_func.keywords.items() if k != 'model_path'}) 64 | else: 65 | supported_VLM[model_name] = partial(original_func.func, 66 | model_path=VLLM_MODEL_PATH[model_name], 67 | max_new_tokens=max_new_tokens, 68 | **{k: v for k, v in original_func.keywords.items() if k != 'model_path'}) 69 | try: 70 | for model_name in transformers_version_UrbanLLaVA: 71 | supported_VLM[model_name]=partial(VILA, model_path=VLLM_MODEL_PATH[model_name], max_new_tokens=max_new_tokens) 72 | except Exception as e: 73 | print(e) 74 | print("UrbanLLaVA is not supported") 75 | 76 | self.enable_proxy() 77 | self.model = supported_VLM[self.model_name]() 78 | 79 | def get_vlm_model(self): 80 | return self.model 81 | 82 | def enable_proxy(self): 83 | # set proxy for OpenAI models 84 | if self.model_name in ["GPT4o", "GPT4o_MINI"]: 85 | os.environ["http_proxy"] = 'http://127.0.0.1:10190' 86 | os.environ["https_proxy"] = 'http://127.0.0.1:10190' 87 | 88 | 89 | def clean_proxy(self): 90 | try: 91 | del os.environ["http_proxy"] 92 | del os.environ["https_proxy"] 93 | except Exception as e: 94 | print("Failed to delete proxy environment") 95 | -------------------------------------------------------------------------------- /simulate/annotate/stv_landmark_gpt.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import json 4 | from tqdm import tqdm, trange 5 | import argparse 6 | import httpx 7 | import base64 8 | from openai import OpenAI # >=1.0, test version 1.16.0 9 | from concurrent.futures import ThreadPoolExecutor, as_completed 10 | 11 | PROXY = "http://127.0.0.1:10190" 12 | 13 | API_KEY_MAPPING = { 14 | "siliconflow": "SiliconFlow_API_KEY", 15 | "DeepInfra": "DeepInfra_API_KEY", 16 | "OpenAI": "OpenAI_API_KEY" 17 | } 18 | API_URL_MAPPING = { 19 | "siliconflow": "https://api.siliconflow.cn/v1", 20 | "DeepInfra": "https://api.deepinfra.com/v1/openai", 21 | "OpenAI": "https://api.openai.com/v1" 22 | } 23 | API_TYPE = "OpenAI" 24 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]] 25 | API_URL = API_URL_MAPPING[API_TYPE] 26 | 27 | def encode_image(image_path): 28 | with open(image_path, "rb") as image_file: 29 | return base64.b64encode(image_file.read()).decode("utf-8") 30 | 31 | def process_row(client, df, cnt, image_dir, model_name, working_dir): 32 | image_name = df.at[cnt, 'image_name'] 33 | image_url = os.path.join(image_dir, image_name) 34 | if not os.path.exists(image_url): 35 | return None 36 | 37 | base64_image = encode_image(image_url) 38 | near_pois = df.at[cnt, 'feature_names'] 39 | 40 | prompt = ''' 41 | You are given one street view image and the nearest 10 pois as background information. 42 | The nearest pois are ''' + str(near_pois) + '''. 43 | Based on the given pois and the image, please use LESS THAN FIVE WORDS to describe what the landmark in the image is. 44 | A landmark is a recognizable natural or artificial feature used for navigation, for example, a building, a statue, a bridge, etc. 45 | Please give the name of the landmark and illustrate the landmark if possible. For example, "Eiffel Tower" and "a tall iron tower". 46 | Keep your response short and concise, USE LESS THAN FIVE WORDS to describe the landmark. 47 | ''' 48 | 49 | dialogs = [{ 50 | "role": "user", 51 | "content": [ 52 | {"type": "text", "text": prompt}, 53 | {"type": "image_url", "image_url": { 54 | "url": f"data:image/png;base64,{base64_image}" 55 | }} 56 | ] 57 | }] 58 | 59 | try: 60 | completion = client.chat.completions.create( 61 | model=model_name, 62 | messages=dialogs, 63 | max_tokens=2048, 64 | temperature=0 65 | ) 66 | 67 | return { 68 | "img_name": image_url, 69 | "text": completion.choices[0].message.content.strip() 70 | } 71 | except Exception as e: 72 | print(f"Error processing row {cnt}: {e}") 73 | return None 74 | 75 | if __name__ == "__main__": 76 | parser = argparse.ArgumentParser() 77 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 78 | parser.add_argument('--work_dir', type=str, default='../../data/') 79 | parser.add_argument('--model_name', type=str, default='gpt-4o-mini-2024-07-18') 80 | args = parser.parse_args() 81 | 82 | city = args.city 83 | work_dir = args.work_dir 84 | working_dir = work_dir + f'dev-{city}/' 85 | model_name = args.model_name 86 | 87 | if API_TYPE == "OpenAI": 88 | client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY)) 89 | elif API_TYPE == "siliconflow": 90 | client = OpenAI(base_url=API_URL, api_key=API_KEY) 91 | elif API_TYPE == "DeepInfra": 92 | model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct" 93 | client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY)) 94 | 95 | # TODO: Change the following path to the actual path 96 | image_dir = f"....../ThreeCityImage/{city}/StreetView" 97 | 98 | if os .path.exists(working_dir + "stv_poi_landmark_update.jsonl"): 99 | os.remove(working_dir + "stv_poi_landmark_update.jsonl") 100 | print(f"{working_dir}stv_poi_landmark_update.jsonl File Removed!") 101 | 102 | for zl in ['zl15', 'zl17']: 103 | df = pd.read_csv(working_dir + f'stv_in_sat_nearest_features_update_{city}_{zl}.csv') 104 | output_file = working_dir + "stv_poi_landmark_update.jsonl" 105 | 106 | with ThreadPoolExecutor(max_workers=128) as executor: 107 | futures = {executor.submit(process_row, client, df, cnt, image_dir, model_name, working_dir): cnt for cnt in range(len(df))} 108 | 109 | with open(output_file, "a") as fout: 110 | for future in tqdm(as_completed(futures), total=len(futures)): 111 | result = future.result() 112 | if result: 113 | fout.write(json.dumps(result, ensure_ascii=False) + "\n") 114 | 115 | -------------------------------------------------------------------------------- /simulate/format/uni_mc_SAT_landuse.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import json 4 | import os 5 | import argparse 6 | import pandas as pd 7 | import random 8 | from sklearn.model_selection import train_test_split 9 | random.seed(0) 10 | 11 | 12 | 13 | def prompt_template(choice1, choice2, choice3, choice4): 14 | s = f""" 15 | The following is a multiple-choice question about selecting the most possible landuse type in the region of a satellite image. 16 | A. {choice1} 17 | B. {choice2} 18 | C. {choice3} 19 | D. {choice4} 20 | Please choose the most suitable one among A, B, C and D as the answer to this question. 21 | Please output the option directly. No need for explaination.\n 22 | """ 23 | 24 | return s.strip() 25 | 26 | if __name__ == '__main__': 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 29 | parser.add_argument('--task_name', type=str, default='sat_landuse_mc', help='task name') 30 | parser.add_argument('--work_dir', type=str, default='../../data/') 31 | args = parser.parse_args() 32 | 33 | city_name = args.city_name 34 | task_name = args.task_name 35 | work_dir = args.work_dir 36 | 37 | cur_dir = os.path.join(work_dir, f"dev-{city_name}/") 38 | 39 | all_train_data = [] 40 | for zl in ['zl15', 'zl17']: 41 | sat_img_dir = cur_dir + f"sample_sat_image_{zl}/" 42 | 43 | output = [] 44 | 45 | if os.path.exists(f"rs_landuse_description_{zl}.jsonl"): 46 | os.remove(f"rs_landuse_description_{zl}.jsonl") 47 | print(f"Removed rs_landuse_description_{zl}.jsonl") 48 | df = pd.read_csv(cur_dir + f"SAT_{city_name}_{zl}.csv") 49 | 50 | for cnt in range(len(df)): 51 | img_name = df.at[cnt,'img_name'].split('.')[0] 52 | 53 | if not os.path.exists(cur_dir + f'short_clipped_results_{zl}/landuse_'+img_name +'.txt'): 54 | continue 55 | 56 | with open(cur_dir + f'short_clipped_results_{zl}/landuse_'+img_name +'.txt', 'r') as file: 57 | # with open('short_clipped_results_wudaokou_zl17/landuse_'+img_name +'.txt', 'r') as file: 58 | lines = file.readlines() 59 | 60 | landuse_types_list = ["Retail", "Recreation_ground", "Commercial", "Residential", "Grass", "Forest", "Construction", "Meadow", "Garages", "Railway", "Brownfield", "Farmland", "Religious", "Industrial", "Recreation"] 61 | 62 | for line in lines: 63 | parts = line.split('location:') 64 | landuse_type = line.split('region')[0].strip().split()[-1].capitalize() 65 | if not landuse_type in landuse_types_list: 66 | landuse_types_list.append(landuse_type) 67 | i = 0 68 | for line in lines: 69 | parts = line.split('location:') 70 | landuse_type = line.split('region')[0].strip().split()[-1].capitalize() 71 | assert landuse_type in landuse_types_list, landuse_type 72 | 73 | other_choices = [d for d in landuse_types_list if d != landuse_type] 74 | other_choices = random.sample(other_choices, 3) 75 | choices = [landuse_type] + other_choices 76 | random.shuffle(choices) 77 | i += 1 78 | reference = chr(ord('A') + choices.index(landuse_type)) 79 | prompt = prompt_template(choice1=choices[0], choice2=choices[1], choice3=choices[2], choice4=choices[3]) 80 | 81 | 82 | output.append({ 83 | "prompt": prompt, 84 | "choices": choices, 85 | "reference": reference, 86 | "image": os.path.join(sat_img_dir, img_name + '.png') 87 | }) 88 | 89 | test = random.sample(output, min(200, len(output))) 90 | train = [d for d in output if d not in test] 91 | all_train_data.extend(train) 92 | output_dir = os.path.join(cur_dir, "uni_image_data", task_name, city_name) 93 | 94 | os.makedirs(output_dir, exist_ok=True) 95 | 96 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_test.json"), "w") as f: 97 | json.dump(test, f, indent=4, ensure_ascii=False) 98 | 99 | print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_test.json')}") 100 | print("Test size:", len(test)) 101 | 102 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_train.json"), "w") as f: 103 | json.dump(train, f, indent=4, ensure_ascii=False) 104 | 105 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_train.json"), "w") as f: 106 | json.dump(all_train_data, f, indent=4, ensure_ascii=False) 107 | 108 | print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_train.json')}") 109 | print("Total train size:", len(train)) 110 | -------------------------------------------------------------------------------- /evaluate/uniimage/sat_landuse/sat_landuse_convert.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import json 4 | import os 5 | import argparse 6 | import pandas as pd 7 | import random 8 | from sklearn.model_selection import train_test_split 9 | random.seed(0) 10 | 11 | from config import UNI_IMAGE_FOLDER 12 | 13 | 14 | def prompt_template(choice1, choice2, choice3, choice4): 15 | s = f""" 16 | The following is a multiple-choice question about selecting the most possible landuse type in the region of a satellite image. 17 | A. {choice1} 18 | B. {choice2} 19 | C. {choice3} 20 | D. {choice4} 21 | Please choose the most suitable one among A, B, C and D as the answer to this question. 22 | Please output the option directly. No need for explaination.\n 23 | """ 24 | 25 | return s.strip() 26 | 27 | if __name__ == '__main__': 28 | parser = argparse.ArgumentParser() 29 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 30 | parser.add_argument('--task_name', type=str, default='sat_landuse_mc', help='task name') 31 | args = parser.parse_args() 32 | 33 | city_name = args.city_name 34 | task_name = args.task_name 35 | 36 | work_dir = UNI_IMAGE_FOLDER 37 | 38 | work_dir = os.path.join(work_dir, f"{city_name}/") 39 | 40 | all_train_data = [] 41 | for zl in ['zl15', 'zl17']: 42 | sat_img_dir = work_dir + f"sample_sat_image_{zl}/" 43 | 44 | output = [] 45 | 46 | if os.path.exists(f"rs_landuse_description_{zl}.jsonl"): 47 | os.remove(f"rs_landuse_description_{zl}.jsonl") 48 | print(f"Removed rs_landuse_description_{zl}.jsonl") 49 | df = pd.read_csv(work_dir + f"SAT_{city_name}_{zl}.csv") 50 | 51 | for cnt in range(len(df)): 52 | img_name = df.at[cnt,'img_name'].split('.')[0] 53 | 54 | if not os.path.exists(work_dir + f'short_clipped_results_{zl}/landuse_'+img_name +'.txt'): 55 | continue 56 | 57 | with open(work_dir + f'short_clipped_results_{zl}/landuse_'+img_name +'.txt', 'r') as file: 58 | # with open('short_clipped_results_wudaokou_zl17/landuse_'+img_name +'.txt', 'r') as file: 59 | lines = file.readlines() 60 | 61 | landuse_types_list = ["Retail", "Recreation_ground", "Commercial", "Residential", "Grass", "Forest", "Construction", "Meadow", "Garages", "Railway", "Brownfield", "Farmland", "Religious", "Industrial", "Recreation"] 62 | 63 | for line in lines: 64 | parts = line.split('location:') 65 | landuse_type = line.split('region')[0].strip().split()[-1].capitalize() 66 | if not landuse_type in landuse_types_list: 67 | landuse_types_list.append(landuse_type) 68 | i = 0 69 | for line in lines: 70 | parts = line.split('location:') 71 | landuse_type = line.split('region')[0].strip().split()[-1].capitalize() 72 | assert landuse_type in landuse_types_list, landuse_type 73 | 74 | other_choices = [d for d in landuse_types_list if d != landuse_type] 75 | other_choices = random.sample(other_choices, 3) 76 | choices = [landuse_type] + other_choices 77 | random.shuffle(choices) 78 | i += 1 79 | reference = chr(ord('A') + choices.index(landuse_type)) 80 | prompt = prompt_template(choice1=choices[0], choice2=choices[1], choice3=choices[2], choice4=choices[3]) 81 | 82 | 83 | output.append({ 84 | "prompt": prompt, 85 | "choices": choices, 86 | "reference": reference, 87 | "image": os.path.join(sat_img_dir, img_name + '.png') 88 | }) 89 | 90 | test = random.sample(output, min(200, len(output))) 91 | train = [d for d in output if d not in test] 92 | all_train_data.extend(train) 93 | output_dir = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name) 94 | os.makedirs(output_dir, exist_ok=True) 95 | 96 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_test.json"), "w") as f: 97 | json.dump(test, f, indent=4, ensure_ascii=False) 98 | 99 | print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_test.json')}") 100 | print("Test size:", len(test)) 101 | 102 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_train.json"), "w") as f: 103 | json.dump(train, f, indent=4, ensure_ascii=False) 104 | 105 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_train.json"), "w") as f: 106 | json.dump(all_train_data, f, indent=4, ensure_ascii=False) 107 | 108 | print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_train.json')}") 109 | print("Total train size:", len(train)) 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /simulate/advance/CoT/stv_address_cot/gen_CoT_template.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | import pandas as pd 4 | from tqdm import trange 5 | import os 6 | import random 7 | random.seed(0) 8 | 9 | # Generate CoT ground truth 10 | # Three reasoning steps: 11 | # 1. Tell the city name 12 | # 2. Extract the location's pois around 13 | # 3. Tell the location's address 14 | 15 | 16 | def stv_prompt_template(city_name:str, near_feature:str, description:str, address:str): 17 | """ 18 | Generate the prompt for the street view task 19 | """ 20 | prompt = f""" 21 | Step 1: Describe the street view image: 22 | This is a street view image, in thie image, {description}. 23 | Step 2: Tell the city name: 24 | According to the street view image, this is probably in {city_name}. 25 | Step 3: Extract the location's features around: 26 | The street view image is taken in a region with the following features: {near_feature}. 27 | Step 4: Tell the location's address: 28 | Based on my observation and knowledge about this region, the address of this region is {address}. 29 | """ 30 | prompt = str(prompt).replace('\n', ' ').strip() 31 | 32 | return prompt 33 | 34 | if __name__ == "__main__": 35 | parser = argparse.ArgumentParser() 36 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 37 | parser.add_argument('--work_dir', type=str, default='../../data/') 38 | parser.add_argument('--task', type=str, default='stv-address-cot', choices=['stv-address-cot', 'sat-address-cot']) 39 | args = parser.parse_args() 40 | city = args.city 41 | work_dir = args.work_dir 42 | task = args.task 43 | work_dir = work_dir + f'dev-{city}' 44 | output_dir = os.path.join(work_dir, 'CoT', task) 45 | import os 46 | os.makedirs(output_dir, exist_ok=True) 47 | 48 | CITY_NAME = city 49 | 50 | stv_output = [] 51 | 52 | 53 | for zl in ["zl15", "zl17"]: 54 | # Streetview 55 | address_csv = os.path.join(work_dir, f'stv_in_sat_address_deploy_{zl}.csv') 56 | near_feature_csv = os.path.join(work_dir, f'stv_in_sat_nearest_features_update_{city}_{zl}.csv') 57 | stv_description_jsonl = os.path.join(work_dir, f'stv_description.jsonl') 58 | 59 | addr_df = pd.read_csv(address_csv) 60 | near_feature_df = pd.read_csv(near_feature_csv) 61 | with open(stv_description_jsonl, 'r') as f: 62 | stv_description = f.readlines() 63 | stv_description = [json.loads(x) for x in stv_description] 64 | 65 | addr_df = addr_df[:1000] 66 | 67 | for i in trange(len(addr_df)): 68 | image_name = addr_df.loc[i, 'image_name'] 69 | address = addr_df.loc[i, 'adr'] 70 | for j in range(len(stv_description)): 71 | image_name2 = stv_description[j]['img_name'].split('/')[-1] 72 | if image_name == image_name2: 73 | description = stv_description[j]['text'] 74 | for k in range(len(near_feature_df)): 75 | if near_feature_df.loc[k, 'image_name'] == image_name: 76 | near_feature = near_feature_df.loc[k, 'feature_names'] 77 | 78 | prompt = stv_prompt_template(CITY_NAME, near_feature, description, address) 79 | stv_output.append({ 80 | "img_name": image_name, 81 | "CoT": prompt, 82 | "address": address, 83 | "description": description, 84 | "near_feature": near_feature 85 | }) 86 | 87 | 88 | 89 | # for i in trange(len(addr_df)): 90 | # region_name = addr_df.loc[i, 'region_nam'] 91 | # sid = addr_df.loc[i, 'sid'] 92 | # adr = addr_df.loc[i, 'adr'] 93 | # near_feature = None 94 | # for j in range(len(near_feature_df)): 95 | # if near_feature_df.loc[j, 'sid'] == sid: 96 | # near_feature = near_feature_df.loc[j, 'feature_names'] 97 | # break 98 | 99 | # if near_feature is not None: 100 | # near_feature_lst = str(near_feature).split(',') 101 | # near_feature_lst = [x for x in near_feature_lst if x != '' and not x.isdigit()] 102 | # prompt = stv_prompt_template(CITY_NAME, near_feature_lst, adr) 103 | # stv_output.append({ 104 | # "region_name": region_name, 105 | # "sid": sid, 106 | # "adr": adr, 107 | # "near_feature": near_feature_lst, 108 | # "CoT": prompt 109 | # }) 110 | 111 | 112 | with open(os.path.join(output_dir, f'{task}_{CITY_NAME}.json'), 'w') as f: 113 | json.dump(stv_output, f, indent=4, ensure_ascii=False) 114 | 115 | print(f"Total number of CoT: {len(stv_output)}") 116 | print(f"Saving CoT to {os.path.join(output_dir, f'{task}_{CITY_NAME}.json')}") 117 | 118 | 119 | -------------------------------------------------------------------------------- /evaluate/outdoor_navigation/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | import math 5 | import base64 6 | 7 | from config import NAVIGATION_IMAGE_FOLDER, NAVIGATION_URL_PATH 8 | 9 | def encode_image(image_path): 10 | with open(image_path, "rb") as image_file: 11 | return base64.b64encode(image_file.read()).decode('utf-8') 12 | 13 | def haversine_distance(lat1, lon1, lat2, lon2): 14 | lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2]) 15 | # haversine 16 | dlat = lat2 - lat1 17 | dlon = lon2 - lon1 18 | a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2 19 | c = 2 * math.asin(math.sqrt(a)) 20 | r = 6371000 21 | return c * r 22 | 23 | def extract_coords_from_filename(city, image_filename): 24 | meta_file = os.path.join(NAVIGATION_IMAGE_FOLDER, f"{city}_StreetView_Images/combined_stitch_meta_info.csv") 25 | 26 | parts = image_filename.split('_') 27 | dataset_name = parts[0] 28 | sid_84_long = parts[1] 29 | sid_84_lat = parts[2] 30 | sid = parts[3].split('.')[0] 31 | 32 | df = pd.read_csv(meta_file) 33 | 34 | matched_row = df[(df['sid_84_long'] == float(sid_84_long)) & 35 | (df['sid_84_lat'] == float(sid_84_lat)) & 36 | (df['sid'] == sid)] 37 | 38 | return matched_row.iloc[0]['longitude_origin'], matched_row.iloc[0]['latitude_origin'] 39 | 40 | 41 | 42 | def calculate_distance(city, last_image_url, cur_image_url): 43 | url_file = NAVIGATION_URL_PATH 44 | url_df = pd.read_csv(url_file) 45 | last_image_name = url_df.loc[url_df['image_url'] == last_image_url, 'image_name'].values[0] 46 | cur_image_name = url_df.loc[url_df['image_url'] == cur_image_url, 'image_name'].values[0] 47 | 48 | meta_file = os.path.join(NAVIGATION_IMAGE_FOLDER, f"{city}_StreetView_Images/combined_stitch_meta_info.csv") 49 | meta_df = pd.read_csv(meta_file) 50 | if city not in ["Beijing", "Shanghai"]: 51 | last_image_coords = meta_df.loc[meta_df['file_name'] == last_image_name, ['query_longti', 'query_lati']].iloc[0] 52 | cur_image_coords = meta_df.loc[meta_df['file_name'] == cur_image_name, ['query_longti', 'query_lati']].iloc[0] 53 | 54 | distance = haversine_distance(last_image_coords['query_lati'], last_image_coords['query_longti'], 55 | cur_image_coords['query_lati'], cur_image_coords['query_longti']) 56 | else: 57 | last_image_lng, last_image_lat = extract_coords_from_filename(city, last_image_name) 58 | cur_image_lng, cur_image_lat = extract_coords_from_filename(city, cur_image_name) 59 | distance = haversine_distance(last_image_lat, last_image_lng, cur_image_lat, cur_image_lng) 60 | return distance 61 | 62 | 63 | def calculate_direction(current_end, next_start): 64 | dx = next_start[0] - current_end[0] 65 | dy = next_start[1] - current_end[1] 66 | 67 | if abs(dx) > abs(dy): 68 | if dx > 0: 69 | return "right" 70 | else: 71 | return "left" 72 | else: 73 | if dy > 0: 74 | return "forward" 75 | else: 76 | return "forward" 77 | 78 | def get_basic_prompt(): 79 | basic_prompt = f""" 80 | You are tasked with guiding a virtual traveler through a series of street view images along a specific route. With each image provided: 81 | 82 | Describe the Image: Identify and describe any prominent landmarks, features, or unique characteristics visible in the photo. This may include notable buildings, distinctive shops, interesting street art, or any other element that stands out. 83 | 84 | Action Decision: For each image, I will also provide the navigation action decision that needs to be taken at that location (e.g., turn left, go straight, turn right, or stop). You must integrate this action decision into your description, using the landmarks as reference points. For example, you might say, "At the red cafe with the large windows on your left, turn right to head towards the park with the fountain." 85 | 86 | Remember, your descriptions should not include URL links to images or 'image' word. Instead, they should provide a clear, concise, and complete guide using landmarks that will be paired with the images I provide. And you must intergrate the action decesion with image description. This ensures that anyone using your descriptions and my images can successfully navigate and reach their destination. 87 | 88 | Here is the images and action decisions for each step of the route: 89 | 90 | """ 91 | return basic_prompt 92 | 93 | def get_prompt_eval(): 94 | basic_prompt = f""" 95 | Navigate to the described target location! 96 | Action Space: forward, left, right, stop 97 | - If you choose "forward", proceed for 50 meters. 98 | - If you choose "left" or "right", make the turn at the next intersection. 99 | - If you believe you have reached the destination, please select "stop". 100 | - Format your response as follows:\n 101 | Reason: Action: 102 | 103 | Navigation Instructions: 104 | """ 105 | return basic_prompt 106 | -------------------------------------------------------------------------------- /evaluate/uniimage/stv_address/stv_address_convert.py: -------------------------------------------------------------------------------- 1 | # This script is used to convert a address QA into a multi-choice question for evaluation. 2 | 3 | import json 4 | import os 5 | import argparse 6 | import pandas as pd 7 | import random 8 | random.seed(0) 9 | from tqdm import trange 10 | from config import UNI_IMAGE_FOLDER, BEIJING_STV_IMAGE_FOLDER, LONDON_STV_IMAGE_FOLDER 11 | 12 | def prompt_template(choice1, choice2, choice3, choice4): 13 | s = f""" 14 | The following is a multiple-choice question about selecting the most appropriate address for a street view image. 15 | A. {choice1} 16 | B. {choice2} 17 | C. {choice3} 18 | D. {choice4} 19 | Please choose the most suitable one among A, B, C and D as the answer to this question. 20 | Please output the option directly. No need for explaination.\n 21 | """ 22 | 23 | return s.strip() 24 | 25 | if __name__ == '__main__': 26 | parser = argparse.ArgumentParser() 27 | parser.add_argument('--city_name', type=str, default='Beijing', help='city name') 28 | parser.add_argument('--task_name', type=str, default='stv_address_mc', help='task name') 29 | args = parser.parse_args() 30 | 31 | city_name = args.city_name 32 | task_name = args.task_name 33 | 34 | work_dir = UNI_IMAGE_FOLDER 35 | 36 | cur_dir = os.path.join(work_dir, f"{city_name}/") 37 | 38 | 39 | all_train_data = [] 40 | 41 | for zl in ['zl15', 'zl17']: 42 | sat_address_file = cur_dir + f"stv_in_sat_address_deploy_{zl}.csv" 43 | df = pd.read_csv(sat_address_file) 44 | # remove the rows with empty address 45 | df = df.dropna(subset=["adr"]) 46 | 47 | # randomly shuffle df, to avoid the same address in the same order 48 | df = df.sample(frac=1, random_state=0).reset_index(drop=True) 49 | for i in range(len(df)): 50 | df.at[i, "img_name"] = df.iloc[i]["image_name"] 51 | 52 | if city_name == "Beijing": 53 | stv_img_dir = BEIJING_STV_IMAGE_FOLDER 54 | elif city_name == "London": 55 | stv_img_dir = LONDON_STV_IMAGE_FOLDER 56 | elif city_name == "NewYork": 57 | pass 58 | 59 | 60 | print("Input file:", sat_address_file) 61 | print("Valid records:", len(df)) 62 | 63 | output = [] 64 | 65 | for i in range(len(df)): 66 | row = df.iloc[i] 67 | img_name = row["img_name"] 68 | adr = row["adr"] 69 | 70 | # print(combined_adr) 71 | 72 | assert os.path.exists(os.path.join(stv_img_dir, img_name)), f"Image {os.path.join(stv_img_dir, img_name)} not found" 73 | 74 | # Randomly select 3 other addresses 75 | valid_choices = df[(df["img_name"] != img_name) & (df["adr"] != adr)]["adr"].unique() 76 | if len(valid_choices) < 3: 77 | raise ValueError(f"Not enough valid choices to sample for image {img_name}") 78 | 79 | # other_choices = df[(df["img_name"] != img_name) & (df["adr"] != adr)].sample(3)["adr"].tolist() 80 | other_choices = random.sample(list(valid_choices), 3) 81 | choices = [adr] + other_choices 82 | 83 | assert len(list(set(choices))) == 4 84 | 85 | random.shuffle(choices, random=random.seed(i)) 86 | 87 | # print(choices) 88 | 89 | reference = chr(ord('A') + choices.index(adr)) 90 | 91 | # print(reference) 92 | 93 | prompt = prompt_template(choice1=choices[0], choice2=choices[1], choice3=choices[2], choice4=choices[3]) 94 | 95 | # print(prompt) 96 | 97 | output.append({ 98 | "prompt": prompt, 99 | "choices": choices, 100 | "reference": reference, 101 | "image": os.path.join(stv_img_dir, img_name) 102 | }) 103 | 104 | # print(output[-1]) 105 | 106 | # exit() 107 | 108 | # os.makedirs(f"./{task_name}/{city_name}", exist_ok=True) 109 | output_dir = os.path.join(work_dir, task_name, city_name) 110 | os.makedirs(output_dir, exist_ok=True) 111 | 112 | test = random.sample(output, min(200, len(output))) 113 | train = [d for d in output if d not in test] 114 | 115 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_test.json"), "w") as f: 116 | json.dump(test, f, indent=4, ensure_ascii=False) 117 | print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_test.json')}") 118 | print("Test size:", len(test)) 119 | 120 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_train.json"), "w") as f: 121 | json.dump(train, f, indent=4, ensure_ascii=False) 122 | print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_train.json')}") 123 | print("Train size:", len(train)) 124 | 125 | all_train_data.extend(train) 126 | 127 | with open(os.path.join(output_dir, f"{city_name}_{task_name}_train.json"), "w") as f: 128 | json.dump(all_train_data, f, indent=4, ensure_ascii=False) 129 | 130 | print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_train.json')}") 131 | print("Total train size:", len(all_train_data)) 132 | 133 | -------------------------------------------------------------------------------- /simulate/advance/CoT/sat_cross_stv_cot/gpt_polish.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | import httpx 3 | import os 4 | import argparse 5 | import pandas as pd 6 | import json 7 | import base64 8 | import tqdm 9 | from tqdm import tqdm 10 | from concurrent.futures import ThreadPoolExecutor 11 | 12 | # API Key and Proxy settings 13 | PROXY = "http://127.0.0.1:10190" 14 | API_KEY_MAPPING = { 15 | "siliconflow": "SiliconFlow_API_KEY", 16 | "DeepInfra": "DeepInfra_API_KEY", 17 | "OpenAI": "OpenAI_API_KEY" 18 | } 19 | API_URL_MAPPING = { 20 | "siliconflow": "https://api.siliconflow.cn/v1", 21 | "DeepInfra": "https://api.deepinfra.com/v1/openai", 22 | "OpenAI": "https://api.openai.com/v1" 23 | } 24 | API_TYPE = "OpenAI" 25 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]] 26 | API_URL = API_URL_MAPPING[API_TYPE] 27 | 28 | def encode_image(image_path): 29 | with open(image_path, "rb") as image_file: 30 | return base64.b64encode(image_file.read()).decode("utf-8") 31 | 32 | def polish_text(client, model_name, og_text): 33 | prompt = f''' 34 | Please polish the following paragraph to make it more fluent and natural. 35 | Please shorten the text to 2048 tokens or less, keeping the most important information like the reference answer. 36 | Remove redundant or less helpful information. Only keep the most important parts that can help with the task. 37 | You can make necessary changes to the text, like removing the square brackets, adding punctuation, or rephrasing the text. 38 | Don't change the meaning of the text. 39 | Only output the polished text, without any additional information or appending text. 40 | Here is the original text: 41 | {og_text} 42 | ''' 43 | 44 | dialogs = [{ 45 | "role": "user", 46 | "content": [{"type": "text", "text": prompt}] 47 | }] 48 | 49 | try: 50 | completion = client.chat.completions.create( 51 | model=model_name, 52 | messages=dialogs, 53 | max_tokens=2048, 54 | temperature=0.1, 55 | ) 56 | return completion.choices[0].message.content 57 | except Exception as e: 58 | print(e) 59 | return "" 60 | 61 | if __name__ == "__main__": 62 | parser = argparse.ArgumentParser() 63 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 64 | parser.add_argument('--work_dir', type=str, default='../../data/') 65 | parser.add_argument('--task', type=str, default='sat-cross-stv-cot', choices=['sat-cross-stv-cot', 'sat-address-cot']) 66 | args = parser.parse_args() 67 | 68 | city = args.city 69 | work_dir = args.work_dir 70 | task = args.task 71 | CoT_dir = work_dir + f'dev-{city}/CoT/{task}/' 72 | 73 | model_name = "gpt-4o-2024-08-06" 74 | client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY)) 75 | 76 | for dataset in ["SAT_STV_location_CoT", "SAT_STV_mapping_CoT"]: 77 | og_path = f"{dataset}_{city}.json" 78 | og_path = os.path.join(CoT_dir, og_path) 79 | 80 | with open(og_path, 'r') as f: 81 | og_data = json.load(f) 82 | 83 | output = [] 84 | with ThreadPoolExecutor(max_workers=128) as executor: 85 | futures = {executor.submit(polish_text, client, model_name, item['CoT']): item for item in og_data} 86 | 87 | for future in tqdm(futures): 88 | item = futures[future] 89 | polished_CoT = future.result() 90 | output.append({ 91 | "image": item["image"], 92 | "prompt": item["prompt"], 93 | "polished_CoT": polished_CoT, 94 | "og_CoT": item["CoT"], 95 | "reference": item["reference"] 96 | }) 97 | 98 | output_path = os.path.join(CoT_dir, f'polished_{dataset}_{city}.json') 99 | 100 | with open(output_path, 'w') as f: 101 | json.dump(output, f, indent=4, ensure_ascii=False) 102 | 103 | print(f"Polished {dataset} for {city} saved to {output_path}") 104 | print("Length of output:", len(output)) 105 | 106 | # for zl in ['zl15', 'zl17']: 107 | # og_path = f"{task}_{city}_{zl}.json" 108 | # og_path = os.path.join(CoT_dir, og_path) 109 | 110 | # with open(og_path, 'r') as f: 111 | # og_data = json.load(f) 112 | 113 | # output = [] 114 | 115 | # with ThreadPoolExecutor(max_workers=128) as executor: 116 | # futures = {executor.submit(polish_text, client, model_name, item['CoT']): item for item in og_data} 117 | 118 | # for future in tqdm(futures): 119 | # item = futures[future] 120 | # polished_CoT = future.result() 121 | # output.append({ 122 | # "img_name": item["img_name"], 123 | # "polished_CoT": polished_CoT, 124 | # "og_CoT": item["CoT"], 125 | # "description": item["description"], 126 | # "address": item["address"] 127 | # }) 128 | 129 | # output_path = os.path.join(CoT_dir, f'polished_{task}_{city}_{zl}.json') 130 | 131 | # with open(output_path, 'w') as f: 132 | # json.dump(output, f, indent=4, ensure_ascii=False) 133 | -------------------------------------------------------------------------------- /simulate/advance/cross-view/generate_poi_building_count.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pandas as pd 3 | import re 4 | import os 5 | import glob 6 | import argparse 7 | import random 8 | import tqdm 9 | from tqdm import tqdm, trange 10 | 11 | def extract_keys_from_json_files(filenames): 12 | 13 | 14 | all_keys = set() 15 | for filename in filenames: 16 | with open(filename, 'r') as f: 17 | data = json.load(f) 18 | all_keys.update(data.keys()) 19 | return list(all_keys) 20 | 21 | 22 | 23 | 24 | 25 | def create_location_dict(filename): 26 | 27 | 28 | location_dict = {} 29 | with open(filename, 'r') as f: 30 | for line in f: 31 | match = re.match(r"(\w+)\s+are\s+at\s+locations:\s+\[(.*)\]", line) 32 | if match: 33 | location_type, coordinates = match.groups() 34 | num_locations = len(coordinates.split(',')) // 2 35 | location_dict[location_type] = num_locations 36 | return location_dict 37 | 38 | if __name__ == '__main__': 39 | parser = argparse.ArgumentParser() 40 | parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing']) 41 | parser.add_argument('--work_dir', type=str, default='../../data/') 42 | args = parser.parse_args() 43 | city_name = args.city 44 | work_dir = args.work_dir 45 | working_dir = os.path.join(work_dir, f"dev-{city_name}") 46 | # Convert the POI txt files to JSON files 47 | for zl in ['zl15','zl17']: 48 | target_dir = os.path.join(working_dir, f"poi_json_{city_name}") 49 | os.makedirs(target_dir, exist_ok=True) 50 | sat_path = pd.read_csv(os.path.join(working_dir, f"SAT_{city_name}_{zl}.csv")) 51 | for i in range(len(sat_path)): 52 | sat_name = sat_path.at[i,'img_name'].split('.')[0] 53 | filename = os.path.join(working_dir, f"short_clipped_results_{zl}/pois_{sat_name}.txt") 54 | if not os.path.exists(filename): 55 | continue 56 | result_dict = create_location_dict(filename) 57 | with open(os.path.join(target_dir, f"pois_{sat_name}_update.json"), 'w') as f: 58 | json.dump(result_dict, f, indent=4) 59 | 60 | 61 | key_groups = { 62 | 'group_1': ['kindergartens', 'schools', 'colleges', 'research_institutes', 'universitys'], 63 | 'group_2': ['conveniences', 'malls', 'supermarkets'], 64 | 'group_3': ['restaurants', 'bakerys','foods', 'fast_foods', 'beveragess', 'food_courts', 'bars', 'cafes', 'coffees', 'vending_machines', 'nightclubs'], 65 | 'group_4': ['apartments', 'hostels', 'hotels'], 66 | 'group_5': ['attractions'] 67 | } 68 | 69 | # Count the number of POIs in each satellite image 70 | for zl in ['zl15','zl17']: 71 | sat_df = pd.read_csv(os.path.join(working_dir, f"SAT_{city_name}_{zl}.csv")) 72 | img_name_list = list(sat_df['img_name']) 73 | json_file_list = [os.path.join(working_dir, f"poi_json_{city_name}/pois_{x.split('.')[0]}_update.json") for x in img_name_list] 74 | result_data = [] 75 | 76 | for json_file in json_file_list: 77 | file_path = json_file 78 | img_name = file_path.split('/')[-1].split('.')[0] 79 | if not os.path.exists(file_path): 80 | continue 81 | with open(file_path, 'r', encoding='utf-8') as f: 82 | json_data = json.load(f) 83 | 84 | group_sums = {group: 0 for group in key_groups} 85 | 86 | for group, keys in key_groups.items(): 87 | for key in keys: 88 | group_sums[group] += json_data.get(key, 0) 89 | 90 | result_data.append([img_name] + list(group_sums.values())) 91 | 92 | columns = ['img_name'] + list(key_groups.keys()) 93 | df = pd.DataFrame(result_data, columns=columns) 94 | df.to_csv(os.path.join(working_dir, f"POI_key_group_sums_{zl}_{city_name}.csv"), index=False) 95 | print(df) 96 | 97 | ################----------------------------------------------------------------------------------------------------- 98 | 99 | # count the number of buildings in each satellite image 100 | for zl in ['zl15','zl17']: 101 | 102 | sat_df = pd.read_csv(os.path.join(working_dir, f"SAT_{city_name}_{zl}.csv")) 103 | img_name_list = list(sat_df['img_name']) 104 | sat_name_list = [] 105 | sat_building_num = [] 106 | for i in img_name_list: 107 | img_name = i.split('.')[0] 108 | if not os.path.exists(os.path.join(working_dir, f"clipped_results_{zl}/clipped_buildings_{img_name}.geojson")): 109 | continue 110 | with open(os.path.join(working_dir, f"clipped_results_{zl}/clipped_buildings_{img_name}.geojson"), 'r', encoding='utf-8') as f: 111 | json_data = json.load(f) 112 | num_features = len(json_data['features']) 113 | sat_name_list.append(img_name) 114 | sat_building_num.append(num_features) 115 | 116 | pd_dict = pd.DataFrame({'img_name':sat_name_list,'building_num':sat_building_num}) 117 | pd_dict.to_csv(os.path.join(working_dir, f"building_num_sat_{zl}_{city_name}.csv"),index=False) 118 | print(pd_dict) 119 | 120 | --------------------------------------------------------------------------------