├── evaluate
    ├── cross_view
    │   ├── Readme.md
    │   ├── STV_compare
    │   │   ├── STV_compare_inference.py
    │   │   └── STV_compare_stats.py
    │   ├── STV_SAT_mapping
    │   │   ├── STV_SAT_mapping_inference.py
    │   │   └── STV_SAT_mapping_stats.py
    │   ├── STV_SAT_location
    │   │   ├── STV_SAT_location_inference.py
    │   │   └── STV_SAT_location_stats.py
    │   ├── SAT_count_pois
    │   │   ├── SAT_count_pois_inference.py
    │   │   └── SAT_count_pois_stats.py
    │   ├── SAT_count_buildings
    │   │   ├── SAT_count_buildings_inference.py
    │   │   └── SAT_count_buildings_stats.py
    │   ├── eval_inference.py
    │   └── eval_analysis.py
    ├── mobility_prediction
    │   ├── run_parallel.py
    │   └── metrics.py
    ├── geoqa
    │   └── analyse_result.py
    ├── uniimage
    │   ├── sat_address
    │   │   ├── sat_address_stats.py
    │   │   ├── sat_address_inference.py
    │   │   └── sat_address_convert.py
    │   ├── sat_landuse
    │   │   ├── sat_landuse_stats.py
    │   │   ├── sat_landuse_inference.py
    │   │   └── sat_landuse_convert.py
    │   ├── stv_address
    │   │   ├── stv_address_stats.py
    │   │   ├── stv_address_inference.py
    │   │   └── stv_address_convert.py
    │   └── stv_landmark
    │   │   ├── stv_landmark_stats.py
    │   │   ├── stv_landmark_inference.py
    │   │   └── stv_landmark_convert.py
    ├── evaluate.py
    └── outdoor_navigation
    │   └── utils.py
├── assets
    └── UrbanLLaVA.png
├── .gitmodules
├── simulate
    ├── uni_image_basic_construct.bash
    ├── address.bash
    ├── all.bash
    ├── uni_image_mc_construct.bash
    ├── streetview
    │   ├── process_stv_near.py
    │   ├── osm_address_web_stv_my.py
    │   ├── stv_nearest_pois.py
    │   └── spatial_join.py
    ├── STV_pipeline.bash
    ├── address
    │   ├── osm_address_web_my.py
    │   └── interpolate_sat_coord.py
    ├── CoT_construct.bash
    ├── multi_image_mc_construct.bash
    ├── advance
    │   ├── cross-view
    │   │   ├── stv_in_sat_partition.py
    │   │   ├── SAT_stv_corres.py
    │   │   └── generate_poi_building_count.py
    │   └── CoT
    │   │   ├── stv-landmark-cot
    │   │       └── gpt_polish.py
    │   │   ├── stv_address_cot
    │   │       ├── gpt_polish.py
    │   │       └── gen_CoT_template.py
    │   │   ├── sat_address_cot
    │   │       ├── gen_CoT_template.py
    │   │       └── gpt_polish.py
    │   │   ├── sat_count_cot
    │   │       └── gpt_polish.py
    │   │   └── sat_cross_stv_cot
    │   │       └── gpt_polish.py
    ├── satelite
    │   ├── clip_shp_point.py
    │   ├── make_sat_shp.py
    │   ├── process_landuse.py
    │   ├── process_driving.py
    │   └── process_poi.py
    ├── annotate.bash
    ├── annotate
    │   ├── sat_landuse_template.py
    │   ├── sat_combine_address.py
    │   ├── stv_description_gpt.py
    │   └── stv_landmark_gpt.py
    ├── format
    │   ├── uni_mc_format_llava.py
    │   └── uni_mc_SAT_landuse.py
    └── SAT_pipeline.bash
├── examples
    ├── run_eval_general_inference_stats.sh
    ├── geoqa.sh
    ├── mobility.sh
    ├── navigation.sh
    ├── run_eval_multi_image_inference_stats.sh
    └── run_eval_uniimage_inference_stats.sh
├── serving
    ├── llm_serving.sh
    ├── test_llm_api.py
    └── vlm_serving.py
├── LICENSE
├── train
    └── vila_train_scripts
    │   └── sft_mix_v1.sh
└── .gitignore


/evaluate/cross_view/Readme.md:
--------------------------------------------------------------------------------
1 | # Cross_view eval
2 | 


--------------------------------------------------------------------------------
/assets/UrbanLLaVA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tsinghua-fib-lab/UrbanLLaVA/HEAD/assets/UrbanLLaVA.png


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "train/VILA"]
2 | 	path = train/VILA
3 | 	url = https://github.com/NVlabs/VILA.git
4 | [submodule "evaluate/VLMEvalKit"]
5 | 	path = evaluate/VLMEvalKit
6 | 	url = https://github.com/open-compass/VLMEvalKit.git
7 | 


--------------------------------------------------------------------------------
/simulate/uni_image_basic_construct.bash:
--------------------------------------------------------------------------------
 1 | cities=("Beijing" "NewYork" "London")
 2 | work_dir="../../data/"
 3 | 
 4 | 
 5 | # Dependency: rs_osm_description_{city}_{zl}.csv
 6 | #             sat_address_combined_{city}_{zl}.csv
 7 | #             rs_grounding_selfmade_{zl}.csv
 8 | #             rs_landuse_description_{zl}.jsonl
 9 | #             stv_in_sat_address_deploy_{zl}.csv
10 | #             stv_description.jsonl
11 | #             stv_poi_landmark_update.jsonl
12 | 
13 | for city in "${cities[@]}"; do
14 |     echo "Formatting data to VILA format for $city"
15 |     python ./format/uni_basic_llava.py --city $city --work_dir $work_dir
16 | done
17 | wait
18 | echo "Finish formatting data to VILA format"
19 | 
20 | # Get llava/format/{city}_basic_all_data_llava.json, etc.


--------------------------------------------------------------------------------
/examples/run_eval_general_inference_stats.sh:
--------------------------------------------------------------------------------
 1 | # source /usr/local/miniconda3/bin/activate vila-vlmeval
 2 | export CUDA_VISIBLE_DEVICES=5
 3 | export DeepInfra_API_KEY=""
 4 | export SiliconFlow_API_KEY=""                   
 5 | export OpenAI_API_KEY=""
 6 | export OPENAI_API_KEY="$OpenAI_API_KEY"
 7 | export OPENAI_API_BASE="https://api.openai.com/v1/chat/completions"
 8 | export DASHSCOPE_API_KEY=""
 9 | MODELS=("GPT4o_MINI" "Llama-3-VILA1.5-8b")
10 | 
11 | DATA_VERSION='all'
12 | 
13 | echo "Start running evaluation on general tasks"
14 | for MODEL in "${MODELS[@]}"; do
15 |     echo "Current model: $MODEL"
16 |         python -m evaluate.general.general_inference --model_name $MODEL --data_name $DATA_VERSION
17 |         python -m evaluate.general.general_stats --model_name $MODEL
18 | done


--------------------------------------------------------------------------------
/simulate/address.bash:
--------------------------------------------------------------------------------
 1 | cities=("Beijing" "NewYork" "London")
 2 | work_dir="../../data/"
 3 | 
 4 | # Depend on SAT_{city}_{zl}.csv
 5 | for city in "${cities[@]}"; do
 6 |     echo "Interpolating SAT, 25 for zl15, 9 for zl17, query address for $city"
 7 |     python ./address/interpolate_sat_coord.py --city $city --work_dir $work_dir
 8 | done
 9 | wait
10 | echo "Finish interpolating SAT, 25 for zl15, 9 for zl17, query address"
11 | # Get SAT_interpolate_{city}_{zl}.csv
12 | 
13 | # Depend on SAT_interpolate_{city}_{zl}.csv
14 | for city in "${cities[@]}"; do
15 |     echo "Getting Interpolated SAT's address for $city"
16 |     python ./address/osm_address_web_my.py --city $city --work_dir $work_dir
17 | done
18 | wait
19 | echo "Finish getting Interpolated SAT's address"
20 | # Get SAT_interpolate_address_{city}_{zl}.csv


--------------------------------------------------------------------------------
/examples/geoqa.sh:
--------------------------------------------------------------------------------
 1 | # source /usr/local/miniconda3/bin/activate vila-vlmeval
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | export DeepInfra_API_KEY=""
 4 | export SiliconFlow_API_KEY=""                   
 5 | export OpenAI_API_KEY=""
 6 | export OPENAI_API_KEY="$OpenAI_API_KEY"
 7 | export OPENAI_API_BASE="https://api.openai.com/v1/chat/completions"
 8 | export DASHSCOPE_API_KEY=""
 9 | 
10 | 
11 | CITIES=('Beijing' 'NewYork' 'London')
12 | MODELS=("Llama-3-VILA1.5-8b" "GPT4o_MINI")
13 | DATA_VERSION='all'
14 | 
15 | 
16 | echo "Start running geoqa"
17 | for MODEL in "${MODELS[@]}"; do
18 |     echo "Current model: $MODEL"
19 |     for CITY in "${CITIES[@]}"; do
20 |         echo "Current city: $CITY"
21 |         python -m evaluate.geoqa.run_eval --model_name $MODEL --data_name $DATA_VERSION --city_name $CITY 
22 |     done
23 | done
24 | echo "Finish running geoqa"
25 | 


--------------------------------------------------------------------------------
/examples/mobility.sh:
--------------------------------------------------------------------------------
 1 | # source /usr/local/miniconda3/bin/activate vila-vlmeval
 2 | export CUDA_VISIBLE_DEVICES=1
 3 | export DeepInfra_API_KEY=""
 4 | export SiliconFlow_API_KEY=""                   
 5 | export OpenAI_API_KEY=""
 6 | export OPENAI_API_KEY="$OpenAI_API_KEY"
 7 | export OPENAI_API_BASE="https://api.openai.com/v1/chat/completions"
 8 | export DASHSCOPE_API_KEY=""
 9 | 
10 | 
11 | CITIES=('Beijing' 'NewYork' 'London')
12 | MODELS=("Llama-3-VILA1.5-8b" "GPT4o_MINI")
13 | DATA_VERSION='all'
14 | 
15 | 
16 | echo "Start running mobility"
17 | for MODEL in "${MODELS[@]}"; do
18 |     echo "Current model: $MODEL"
19 |     for CITY in "${CITIES[@]}"; do
20 |         echo "Current city: $CITY"
21 |         python -m evaluate.mobility_prediction.llm_mob --model_name $MODEL --data_name $DATA_VERSION --city_name $CITY 
22 |     done
23 | done
24 | echo "Finish running mobility"
25 | 


--------------------------------------------------------------------------------
/examples/navigation.sh:
--------------------------------------------------------------------------------
 1 | # source /usr/local/miniconda3/bin/activate vila-vlmeval
 2 | export CUDA_VISIBLE_DEVICES=2
 3 | export DeepInfra_API_KEY=""
 4 | export SiliconFlow_API_KEY=""                   
 5 | export OpenAI_API_KEY=""
 6 | export OPENAI_API_KEY="$OpenAI_API_KEY"
 7 | export OPENAI_API_BASE="https://api.openai.com/v1/chat/completions"
 8 | export DASHSCOPE_API_KEY=""
 9 | 
10 | 
11 | CITIES=('Beijing' 'NewYork' 'London')
12 | MODELS=("Llama-3-VILA1.5-8b" "GPT4o_MINI")
13 | DATA_VERSION='all'
14 | 
15 | 
16 | DATA_VERSION='all'
17 | 
18 | echo "Start running navigation"
19 | for MODEL in "${MODELS[@]}"; do
20 |     echo "Current model: $MODEL"
21 |     for CITY in "${CITIES[@]}"; do
22 |         echo "Current city: $CITY"
23 |         python -m evaluate.outdoor_navigation.eval --model_name $MODEL --data_name $DATA_VERSION --city_name $CITY 
24 |     done
25 | done
26 | echo "Finish running navigation"
27 | 


--------------------------------------------------------------------------------
/simulate/all.bash:
--------------------------------------------------------------------------------
 1 | echo "Run data curation pipeline"
 2 | 
 3 | bash ./SAT_pipeline.bash
 4 | echo "Finish SAT pipeline"
 5 | 
 6 | bash ./STV_pipeline.bash
 7 | echo "Finish STV pipeline"
 8 | 
 9 | bash ./address.bash
10 | echo "Finish address querying"
11 | 
12 | bash ./annotate.bash
13 | echo "Finish annotation"
14 | 
15 | echo "Finish data preparation, start uni_image_basic_construct"
16 | 
17 | bash ./uni_image_basic_construct.bash
18 | echo "Finish uni_image_basic_construct"
19 | 
20 | echo "Finish uni_image_basic_construct, start uni_image_mc_construct"
21 | 
22 | bash ./uni_image_mc_construct.bash
23 | echo "Finish uni_image_mc_construct"
24 | 
25 | echo "Finish uni_image_mc_construct, start multi_image_mc_construct"
26 | 
27 | bash ./multi_image_mc_construct.bash
28 | echo "Finish multi_image_mc_construct"
29 | 
30 | echo "Finish multi_image_mc_construct, start CoT_construct"
31 | 
32 | bash ./CoT_construct.bash
33 | echo "Finish CoT_construct"
34 | 
35 | echo "Finish CoT_construct"
36 | echo "Finish data curation pipeline"


--------------------------------------------------------------------------------
/serving/llm_serving.sh:
--------------------------------------------------------------------------------
 1 | source /usr/local/anaconda3/bin/activate vllm
 2 | export CUDA_VISIBLE_DEVICES=4
 3 | 
 4 | USER=""
 5 | API_KEY=""
 6 | SERVER_IP=""
 7 | SERVER_PORT=23199
 8 | MODEL_NAME=llama3-8B
 9 | MODEL_PATH=/path/Meta-Llama-3-8B-Instruct/
10 | 
11 | exec -a "vllm-$MODEL_NAME@$USER" python -m vllm.entrypoints.openai.api_server \
12 |   --served-model-name $MODEL_NAME \
13 |   --api-key $API_KEY \
14 |   --model $MODEL_PATH \
15 |   --trust-remote-code \
16 |   --host $SERVER_IP \
17 |   --port $SERVER_PORT \
18 |   --max-model-len 4096 \
19 |   --disable-log-stats \
20 |   --tensor-parallel-size 1 \
21 |   --gpu-memory-utilization 0.95
22 | 
23 | # more settings please refer to the following docs
24 | # vllm installation https://docs.vllm.ai/en/latest/getting_started/installation.html
25 | # autoAWQ https://docs.vllm.ai/en/latest/quantization/auto_awq.html
26 | # vllm engine parameters: https://docs.vllm.ai/en/latest/models/engine_args.html
27 | # vllm openai server parameters: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html


--------------------------------------------------------------------------------
/simulate/uni_image_mc_construct.bash:
--------------------------------------------------------------------------------
 1 | cities=("Beijing" "NewYork" "London")
 2 | work_dir="../../data/"
 3 | 
 4 | 
 5 | for city in "${cities[@]}"; do
 6 |     echo "Making sat_addr task data for $city"
 7 |     python ./format/uni_mc_SAT_addr.py --city $city --work_dir $work_dir
 8 | done
 9 | wait
10 | echo "Finish making sat_addr task data"
11 | 
12 | for city in "${cities[@]}"; do
13 |     echo "Making sat_landuse task data for $city"
14 |     python ./format/uni_mc_SAT_landuse.py --city $city --work_dir $work_dir
15 | done
16 | wait
17 | echo "Finish making sat_landuse task data"
18 | 
19 | for city in "${cities[@]}"; do
20 |     echo "Making stv_addr task data for $city"
21 |     python ./format/uni_mc_STV_addr.py --city $city --work_dir $work_dir
22 | done
23 | wait
24 | echo "Finish making stv_addr task data"
25 | 
26 | for city in "${cities[@]}"; do
27 |     echo "Making stv_landmark task data for $city"
28 |     python ./format/uni_mc_STV_landmark.py --city $city --work_dir $work_dir
29 | done
30 | wait
31 | echo "Finish making stv_landmark task data"


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 FIB LAB, Tsinghua University
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/simulate/streetview/process_stv_near.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import argparse
 3 | 
 4 | def process_stv_near(input_file, output_file):
 5 |     df = pd.read_csv(input_file)
 6 | 
 7 |     result = pd.DataFrame(columns=['image_name', 'feature_names'])
 8 | 
 9 |     for image_name, group in df.groupby('image_name'):
10 |         feature_names = group['nearest_feature_name'].dropna().head(10)
11 |         
12 |         feature_names_str = ','.join(feature_names)
13 |         
14 |         result_tmp = pd.DataFrame({'image_name': [image_name], 'feature_names': [feature_names_str]})
15 |         result = pd.concat([result, result_tmp], ignore_index=True)
16 | 
17 |     result.to_csv(output_file, index=False)
18 | 
19 | if __name__ == '__main__':
20 |     parser = argparse.ArgumentParser()
21 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
22 |     parser.add_argument('--work_dir', type=str, default='../../data/')
23 |     args = parser.parse_args()
24 |     city = args.city
25 |     work_dir = args.work_dir        
26 |     for zl in ['zl15', 'zl17']:
27 |         input_path = work_dir + f'dev-{city}/stv_in_sat_nearest_features_{city}_{zl}.csv'
28 |         output_path = work_dir + f'dev-{city}/stv_in_sat_nearest_features_update_{city}_{zl}.csv'
29 |         process_stv_near(input_path, output_path)
30 | 


--------------------------------------------------------------------------------
/evaluate/mobility_prediction/run_parallel.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from multiprocessing import Pool
 3 | 
 4 | from .llm_mob import main
 5 | 
 6 | 
 7 | if __name__ == '__main__':
 8 | 
 9 |     parser = argparse.ArgumentParser()
10 |     parser.add_argument('--user_cnt', type=int, default=50)
11 |     parser.add_argument('--traj_cnt', type=int, default=10)
12 | 
13 |     args = parser.parse_args()
14 |     user_cnt = args.user_cnt            # users 
15 |     sample_single_user = args.traj_cnt  # trajectory for each user
16 |     data_version="mini"
17 |     split_path="citydata/mobility/checkin_split/"
18 |     test_path="citydata/mobility/checkin_test_pk/"
19 |     
20 |     models = ["gpt4omini"]
21 |     # models = [
22 |     #     "gpt-3.5", "gpt-4", "meta-llama/Meta-Llama-3-70B-Instruct", "mistralai/Mixtral-8x22B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.2",
23 |     #     "meta-llama/Meta-Llama-3-8B-Instruct", "deepseek-chat"
24 |     # ]
25 |     cities = [
26 |             "Beijing", "Cape", "London", "Moscow", "Mumbai", "Nairobi", "NewYork" ,"Paris" ,"San", "Sao", "Shanghai", "Sydney","Tokyo"
27 |         ]
28 |     
29 |     # main(city, model, user_cnt=50, sample_single_user=10, num_historical_stay=40, num_context_stay=5, split_path="./checkin_split/", test_path="./checkin_test_pk/", data_version="all")
30 |     para_group = []
31 |     for c in cities:
32 |         for m in models:
33 |             para_group.append([c, m, user_cnt, sample_single_user, 40, 5, split_path, test_path, data_version])
34 | 
35 |     with Pool(6) as pool:
36 |         results = pool.starmap(main, para_group)


--------------------------------------------------------------------------------
/simulate/STV_pipeline.bash:
--------------------------------------------------------------------------------
 1 | cities=("Beijing" "NewYork" "London")
 2 | work_dir="../../data/"
 3 | 
 4 | # Depend on /ThreeCityImage/{city}/StreetView/
 5 | # Depend on SAT_{city}_{zl}.csv
 6 | for city in "${cities[@]}"; do
 7 |     echo "Finding corresponding streetview images for $city"
 8 |     python ./streetview/spatial_join.py --city $city --work_dir $work_dir
 9 | done
10 | wait
11 | echo "Finish finding corresponding streetview images"
12 | # Get stv_in_sat_{city}_{zl}.csv
13 | # Get sampled_stv_images/
14 | 
15 | # Depend on stv_in_sat_{city}_{zl}.csv
16 | for city in "${cities[@]}"; do
17 |     echo "Querying address for streetview images for $city"
18 |     python ./streetview/osm_address_web_stv_my.py --city $city --work_dir $work_dir
19 | done
20 | wait
21 | echo "Finish querying address for streetview images"
22 | # Get stv_in_sat_address_deploy_{zl}.csv
23 | 
24 | # Depend on stv_in_sat_{city}_{zl}.csv
25 | for city in "${cities[@]}"; do
26 |     echo "Getting nearest POI for streetview images for $city"
27 |     python ./streetview/stv_nearest_pois.py --city $city --work_dir $work_dir
28 | done
29 | wait
30 | echo "Finish getting nearest POI for streetview images"
31 | # Get stv_in_sat_nearest_features_{city}_{zl}.csv
32 | 
33 | # Depend on stv_in_sat_nearest_features_{city}_{zl}.csv
34 | for city in "${cities[@]}"; do
35 |     echo "Getting 10 POI for streetview images for $city"
36 |     python ./streetview/process_stv_near.py --city $city --work_dir $work_dir
37 | done
38 | wait
39 | echo "Finish getting 10 POI for streetview images"
40 | # Get stv_in_sat_nearest_features_updated_{city}_{zl}.csv


--------------------------------------------------------------------------------
/simulate/address/osm_address_web_my.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from tqdm import trange
 3 | 
 4 | import pandas as pd 
 5 | from geopy.geocoders import Nominatim
 6 | from geopy.extra.rate_limiter import RateLimiter
 7 | 
 8 | import argparse
 9 | 
10 | def reverse_geocode(lat, lon):
11 |     geolocator = Nominatim(user_agent="MyGeocodingApp2",timeout=1,proxies="http://127.0.0.1:10190")  
12 |     geocode = RateLimiter(geolocator.reverse, min_delay_seconds=1)  
13 |     location = geocode((lat, lon), exactly_one=True,language='en')
14 |     return location.address if location else None
15 | 
16 | if __name__ == '__main__':
17 |     parser = argparse.ArgumentParser()
18 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
19 |     parser.add_argument('--work_dir', type=str, default='../../data/')
20 |     args = parser.parse_args()
21 |     city = args.city
22 |     work_dir = args.work_dir
23 | 
24 |     working_dir = work_dir + f'dev-{city}/'
25 | 
26 |     for zl in ['zl15','zl17']:
27 |         input_path = working_dir + f'SAT_interpolate_{city}_{zl}.csv'
28 |         assert os.path.exists(input_path)
29 |         output_path = working_dir + f'SAT_interpolate_address_{city}_{zl}.csv'
30 | 
31 |         df = pd.read_csv(input_path)
32 |         df['adr'] = 's'
33 | 
34 |         for i in trange(len(df)):
35 |             lng = (df.at[i,'lng'])
36 |             lat = (df.at[i,'lat'])
37 | 
38 |             try:
39 |                 address = reverse_geocode(lat, lng)
40 |                 df.at[i,'adr'] = str(address)
41 |             except Exception as e:
42 |                 pass
43 |                 # continue
44 |         df.to_csv(output_path, index=False)


--------------------------------------------------------------------------------
/simulate/CoT_construct.bash:
--------------------------------------------------------------------------------
 1 | cities=("Beijing" "NewYork" "London")
 2 | work_dir="../../../data/"
 3 | 
 4 | for city in "${cities[@]}"; do
 5 |     echo "Generating sat_count CoT with template for $city"
 6 |     python ./advance/CoT/sat_address_cot/gen_CoT_template.py --city $city --work_dir $work_dir
 7 |     echo "Using GPT to polish CoT for $city"
 8 |     python ./advance/CoT/sat_address_cot/gpt_polish.py --city $city --work_dir $work_dir
 9 | done
10 | wait
11 | echo "Finish generating CoT for sat_address"
12 | 
13 | for city in "${cities[@]}"; do
14 |     echo "Generating sat_count CoT with template for $city"
15 |     python ./advance/CoT/sat_count_cot/gen_CoT_template.py --city $city --work_dir $work_dir
16 |     echo "Using GPT to polish CoT for $city"
17 |     python ./advance/CoT/sat_count_cot/gpt_polish.py --city $city --work_dir $work_dir
18 | done
19 | wait
20 | echo "Finish generating CoT for sat_count"
21 | 
22 | for city in "${cities[@]}"; do
23 |     echo "Generating stv_address CoT with template for $city"
24 |     python ./advance/CoT/stv_address_cot/gen_CoT_template.py --city $city --work_dir $work_dir
25 |     echo "Using GPT to polish CoT for $city"
26 |     python ./advance/CoT/stv_address_cot/gpt_polish.py --city $city --work_dir $work_dir
27 | done
28 | wait
29 | echo "Finish generating CoT for stv_address"
30 | 
31 | for city in "${cities[@]}"; do
32 |     echo "Generating sat_cross_stv CoT with template for $city"
33 |     python ./advance/CoT/sat_cross_stv_cot/gen_CoT_template.py --city $city --work_dir $work_dir
34 |     echo "Using GPT to polish CoT for $city"
35 |     python ./advance/CoT/sat_cross_stv_cot/gpt_polish.py --city $city --work_dir $work_dir
36 | done
37 | wait
38 | echo "Finish generating CoT for sat_cross_stv"


--------------------------------------------------------------------------------
/simulate/multi_image_mc_construct.bash:
--------------------------------------------------------------------------------
 1 | cities=("Beijing" "NewYork" "London")
 2 | work_dir="../../../data/"
 3 | 
 4 | for city in "${cities[@]}"; do
 5 |     echo  "Getting POIs and buildings number for $city"
 6 |     python ./advance/cross-view/generate_poi_building_count.py --city $city --work_dir $work_dir
 7 | done
 8 | wait
 9 | echo "Finish getting POIs and buildings number"
10 | 
11 | for city in "${cities[@]}"; do
12 |     echo  "Generating SAT-Count data for $city"
13 |     python ./format/multi_SAT_count_llava.py --city $city --work_dir $work_dir
14 | done
15 | wait
16 | echo "Finish generating SAT-Count data"
17 | 
18 | for city in "${cities[@]}"; do
19 |     echo  "Getting street view images and corresponding satellite images for $city"
20 |     python ./advance/cross-view/SAT_stv_corres.py --city $city --work_dir $work_dir
21 | done
22 | wait
23 | echo "Finish getting street view images and corresponding satellite images"
24 | 
25 | for city in "${cities[@]}"; do
26 |     echo  "Getting partition information between street view images and satellite images for $city"
27 |     python ./advance/cross-view/stv_in_sat_partition.py --city $city --work_dir $work_dir
28 | done
29 | wait
30 | echo "Finish getting partition information between street view images and satellite images"
31 | 
32 | for city in "${cities[@]}"; do
33 |     echo  "Generating cross SAT-STV data for $city"
34 |     python ./format/multi_SAT_cross_STV_llava.py --city $city --work_dir $work_dir
35 | done
36 | wait
37 | echo "Finish generating cross SAT-STV data"
38 | 
39 | for city in "${cities[@]}"; do
40 |     echo  "GeneratinG STV_compare data for $city"
41 |     python ./format/multi_STV_compare_llava.py --city $city --work_dir $work_dir
42 | done
43 | wait
44 | echo "Finish generating STV_compare data"


--------------------------------------------------------------------------------
/simulate/advance/cross-view/stv_in_sat_partition.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import argparse
 4 | import tqdm 
 5 | from tqdm import trange
 6 | 
 7 | if __name__ == "__main__":
 8 |     parser = argparse.ArgumentParser()
 9 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
10 |     parser.add_argument('--work_dir', type=str, default='../../data/')
11 |     args = parser.parse_args()
12 |     city = args.city
13 |     work_dir = args.work_dir
14 |     working_dir = work_dir + f"dev-{city}/"
15 | 
16 |     for zl in ['zl15','zl17']:
17 | 
18 |         # df = pd.read_csv('sat_stv_corr_'+zl+'_'+area+'.csv')
19 |         df = pd.read_csv(working_dir + f'sat_stv_corr_{city}_{zl}.csv')
20 | 
21 |         df['partition'] = 's'
22 |         df['x_min'] = 0
23 |         df['x_max'] = 0
24 |         df['y_min'] = 0
25 |         df['y_max'] = 0
26 | 
27 |         for i in trange(len(df)):
28 |             x_pixel = df.at[i,'x_pixel']
29 |             y_pixel = df.at[i,'y_pixel']
30 |             if x_pixel<=127:
31 |                 if y_pixel<=127:
32 |                     df.at[i,'partition'] = 'Top_left'
33 |                 else:
34 |                     df.at[i,'partition'] = 'Bottom_left'
35 |             else:
36 |                 if y_pixel<=127:
37 |                     df.at[i,'partition'] = 'Top_right'
38 |                 else:
39 |                     df.at[i,'partition'] = 'Bottom_right'
40 |             df.at[i,'x_min'] = max(0,x_pixel-10)
41 |             df.at[i,'x_max'] = min(255,x_pixel+10)
42 |             df.at[i,'y_min'] = max(0,y_pixel-10)
43 |             df.at[i,'y_max'] = min(255,y_pixel+10)
44 | 
45 |         # df.to_csv('sat_stv_corr_'+zl+'_'+area+'_partition.csv',index=False)
46 |         df.to_csv(working_dir + f'sat_stv_corr_{city}_{zl}_partition.csv',index=False)
47 |         print(f'{working_dir}sat_stv_corr_{city}_{zl}_partition.csv saved. {len(df)} records processed.')
48 | 
49 | 


--------------------------------------------------------------------------------
/simulate/streetview/osm_address_web_stv_my.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from tqdm import trange
 3 | 
 4 | import pandas as pd 
 5 | from geopy.geocoders import Nominatim
 6 | from geopy.extra.rate_limiter import RateLimiter
 7 | 
 8 | import argparse
 9 | 
10 | def reverse_geocode(lat, lon):
11 |     geolocator = Nominatim(user_agent="MyGeocodingApp2",timeout=1,proxies="http://127.0.0.1:10190")  
12 |     geocode = RateLimiter(geolocator.reverse, min_delay_seconds=1)  
13 |     location = geocode((lat, lon), exactly_one=True,language='en')
14 |     return location.address if location else None
15 | 
16 | if __name__ == '__main__':
17 |     parser = argparse.ArgumentParser()
18 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
19 |     parser.add_argument('--work_dir', type=str, default='../../data/')
20 |     args = parser.parse_args()
21 |     city = args.city
22 |     work_dir = args.work_dir    
23 | 
24 |     for zl in ['zl15','zl17']:
25 |         working_dir = work_dir + f'dev-{city}/'
26 | 
27 |         input_path= working_dir + f'stv_in_sat_{city}_{zl}.csv'
28 |         assert os.path.exists(input_path)
29 | 
30 |         output_path = working_dir + f'stv_in_sat_address_deploy_{zl}.csv'
31 | 
32 |         df = pd.read_csv(input_path)
33 |         df['adr'] = 's'
34 | 
35 |         for i in trange(len(df)):
36 |             # lng = (df.at[i,'tl_lng']+df.at[i,'bt_lng'])/2
37 |             # lat = (df.at[i,'tl_lat']+df.at[i,'bt_lat'])/2
38 |             # lng = (df.at[i,'lng']),
39 |             # lat = (df.at[i,'lat'])
40 |             lng = (df.at[i,'longitude'])
41 |             lat = (df.at[i,'latitude'])
42 | 
43 |         # lat, lng= 51.58425973969619,0.13408350251072 # 39.882027527944864, 116.38185151260446
44 |             try:
45 |                 address = reverse_geocode(lat, lng)
46 |             # print(address)
47 |                 df.at[i,'adr'] = str(address)
48 |             except Exception as e:
49 |                 pass
50 |                 # continue
51 |         df.to_csv(output_path, index=False)


--------------------------------------------------------------------------------
/serving/test_llm_api.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | import httpx
 3 | import os
 4 | 
 5 | #### register your API_Key in the environment, do not directly write your key in codes
 6 | #export SiliconFlow_API_KEY="xx"
 7 | #export DeepInfra_API_KEY="xx"
 8 | #export OpenAI_API_KEY="xx"
 9 | 
10 | #### define your proxy
11 | PROXY = "http://127.0.0.1:10190"
12 | 
13 | #### select platforms
14 | API_KEY_MAPPING = {
15 |     "siliconflow": "SiliconFlow_API_KEY",   # https://siliconflow.cn/models
16 |     "DeepInfra": "DeepInfra_API_KEY",       # https://deepinfra.com/models
17 |     "OpenAI": "OpenAI_API_KEY",             # https://openai.com/api/pricing/
18 |     "vllm": "vllm_KEY"
19 | }
20 | API_URL_MAPPING = {
21 |     "siliconflow": "https://api.siliconflow.cn/v1",
22 |     "DeepInfra": "https://api.deepinfra.com/v1/openai",
23 |     "OpenAI": "https://api.openai.com/v1",
24 |     "vllm": "http://your_server_ip:port/v1",
25 | }
26 | 
27 | 
28 | API_TYPE = "OpenAI"
29 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]]
30 | API_URL = API_URL_MAPPING[API_TYPE]
31 | model_name = "google/gemma-2-9b-it"
32 | 
33 | 
34 | #### OpenAI client
35 | if API_TYPE == "OpenAI":
36 |     model_name = "gpt-3.5-turbo-0125"
37 |     client = OpenAI(
38 |         base_url=API_URL,
39 |         api_key=API_KEY,
40 |         http_client=httpx.Client(proxies=PROXY)
41 |     )
42 | elif API_TYPE == "siliconflow":
43 |     client = OpenAI(
44 |         base_url=API_URL,
45 |         api_key=API_KEY
46 |     )
47 | elif API_TYPE=="DeepInfra":
48 |     client = OpenAI(
49 |         base_url=API_URL,
50 |         api_key=API_KEY,
51 |         http_client=httpx.Client(proxies=PROXY),
52 |     )
53 | elif API_TYPE=="vllm":
54 |     client = OpenAI(
55 |         base_url=API_URL,
56 |         api_key=API_KEY
57 |     )
58 | 
59 | 
60 | #### one example
61 | dialogs = [{
62 |         "role": "user",
63 |         "content": "Who are you? Please output your name with JSON format."
64 |     }]
65 | 
66 | completion = client.chat.completions.create(
67 |   model=model_name,
68 |   messages=dialogs,
69 |   max_tokens=100,
70 |   temperature=0
71 | )
72 | 
73 | print(completion.choices[0].message.content)
74 | 


--------------------------------------------------------------------------------
/simulate/advance/cross-view/SAT_stv_corres.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import argparse
 3 | from tqdm import tqdm, trange
 4 | 
 5 | x_pi = 3.14159265358979324 * 3000.0 / 180.0
 6 | pi = 3.1415926535897932384626  # π
 7 | a = 6378245.0  # Long radius
 8 | ee = 0.00669342162296594323  # Square of eccentricity
 9 | 
10 | 
11 | 
12 | if __name__ == "__main__":
13 |     parser = argparse.ArgumentParser()
14 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
15 |     parser.add_argument('--work_dir', type=str, default='../../data/')
16 |     args = parser.parse_args()
17 |     city = args.city
18 |     work_dir = args.work_dir
19 |     working_dir = work_dir + f"dev-{city}/"
20 | 
21 |     for zl in ['zl15','zl17']:
22 |         df_stv = pd.read_csv(working_dir + f'stv_in_sat_{city}_{zl}.csv')
23 |         df_sat = pd.read_csv(working_dir + f'SAT_{city}_{zl}.csv')
24 | 
25 |         df_stv['x_pixel'] = 0
26 |         df_stv['y_pixel'] = 0
27 |         df_stv['sat'] = 's'
28 | 
29 |         for i_stv in trange(len(df_stv)):
30 |             lng = df_stv.at[i_stv,'longitude']
31 |             lat = df_stv.at[i_stv,'latitude']
32 | 
33 |             for i_sat in range(len(df_sat)):
34 |                 sat_tl_lat = df_sat.at[i_sat,'tl_lat'] #tl_lat,tl_lng,bt_lat,bt_lng
35 |                 sat_tl_lng = df_sat.at[i_sat,'tl_lng']
36 |                 sat_bt_lat = df_sat.at[i_sat,'bt_lat']
37 |                 sat_bt_lng = df_sat.at[i_sat,'bt_lng']
38 | 
39 |                 y_pixel = int(255*((sat_tl_lat-lat)/(sat_tl_lat-sat_bt_lat)))
40 |                 x_pixel = int(255*((lng-sat_tl_lng)/(sat_bt_lng-sat_tl_lng)))
41 |                 # print(x_pixel, y_pixel)
42 |                 if 0<=x_pixel and x_pixel<=255 and 0<=y_pixel and y_pixel<=255:
43 |                     df_stv.at[i_stv,'x_pixel'] = x_pixel
44 |                     df_stv.at[i_stv,'y_pixel'] = y_pixel
45 |                     df_stv.at[i_stv,'sat_img_name'] = df_sat.at[i_sat,'img_name']
46 |                     break
47 |         df_stv.to_csv(working_dir + f'sat_stv_corr_{city}_{zl}.csv', index=False)
48 |         print(f'{working_dir}sat_stv_corr_{city}_{zl}.csv saved. {len(df_stv)} records processed.')
49 | 
50 | 


--------------------------------------------------------------------------------
/simulate/satelite/clip_shp_point.py:
--------------------------------------------------------------------------------
 1 | # Function: Clip POI/driving/landuse/natural/buildings data from GeoJSON file based on the shapefile of the region.
 2 | 
 3 | import geopandas as gpd
 4 | import os
 5 | from tqdm import tqdm
 6 | import argparse
 7 | 
 8 | def clip(shp_file, geojson_file, output_dir, typ):
 9 | 
10 |     shp_gdf = gpd.read_file(shp_file)
11 |     geojson_gdf = gpd.read_file(geojson_file)
12 | 
13 |     if shp_gdf.crs is None:
14 |         shp_gdf = shp_gdf.set_crs(epsg=4326)
15 | 
16 |     if shp_gdf.crs != geojson_gdf.crs:
17 |         geojson_gdf = geojson_gdf.to_crs(shp_gdf.crs)
18 | 
19 |     os.makedirs(output_dir, exist_ok=True)
20 | 
21 |     for index, polygon in shp_gdf.iterrows():
22 |         clipped_gdf = geojson_gdf[geojson_gdf.geometry.intersects(polygon.geometry)]
23 |         
24 |         # output_filename = os.path.join(output_dir, f"clipped_part_{index}.geojson")
25 |         output_filename = os.path.join(output_dir, f"clipped_{typ}_{polygon['region_nam'].split('.')[0]}.geojson")
26 |         
27 |         if not clipped_gdf.empty:
28 |             clipped_gdf.to_file(output_filename, driver="GeoJSON")
29 |             print(f"Saved clipped data to {output_filename}")
30 |         else:
31 |             print(f"No intersecting features for Polygon {index}")
32 | 
33 | 
34 | if __name__ == "__main__":
35 | 
36 |     parser = argparse.ArgumentParser()
37 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
38 |     parser.add_argument('--work_dir', type=str, default='../../data/')
39 |     args = parser.parse_args()
40 |     city = args.city
41 |     work_dir = args.work_dir
42 | 
43 |     for zl in ["zl15", "zl17"]:
44 |         shp_file = os.path.join(work_dir, f'dev-{city}/SAT_{city}_{zl}.shp')
45 |         output_dir = os.path.join(work_dir, f'dev-{city}/clipped_results_{zl}')
46 |         
47 |         for typ in ['buildings','pois','landuse','natural','driving']:
48 |             # TODO: Change the path to the actual geojson file
49 |             geojson_dir = "....../ThreeCityImage/city_geojson_three_cities"
50 |             geojson_file = os.path.join(geojson_dir, f'{city}_{typ}.geojson')
51 |             clip(shp_file, geojson_file, output_dir, typ)
52 | 


--------------------------------------------------------------------------------
/evaluate/cross_view/STV_compare/STV_compare_inference.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | 
 7 | from tqdm import tqdm
 8 | import json
 9 | 
10 | from config import MULTI_IMAGE_FOLDER
11 | from serving.vlm_serving import VLMWrapper
12 | 
13 | if __name__ == '__main__':
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
16 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
17 |     parser.add_argument('--task_name', type=str, default='STV_compare', help='task name')
18 |     parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"])
19 |     args = parser.parse_args() 
20 | 
21 |     model_name = args.model_name
22 |     city_name = args.city_name
23 |     task_name = args.task_name   
24 | 
25 |     print("Load the model")
26 |     model_wrapper = VLMWrapper(args.model_name)
27 |     model = model_wrapper.get_vlm_model()
28 |     
29 | 
30 |     print("Load the image list")
31 |     # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json")
32 |     path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_test.json")
33 |     with open(path, "r") as f:
34 |         data = json.load(f)
35 | 
36 |     if args.data_name == "mini":
37 |         data = data[:10]
38 | 
39 |     response = []
40 |     for d in tqdm(data):
41 |         prompt = d["prompt"]
42 |         reference = d["reference"]
43 |         img_path = d["image"]
44 | 
45 |         ret = model.generate(img_path + [prompt])
46 |         response.append({
47 |             "image": img_path,
48 |             "prompt": prompt,
49 |             "reference": reference,
50 |             "response": ret
51 |         })
52 | 
53 |     print("Save the response")
54 |     output_path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, model_name)
55 |     os.makedirs(output_path, exist_ok=True)
56 |     with open(os.path.join(output_path, f"{city_name}_{task_name}_response.json"), "w") as f:
57 |         json.dump(response, f, indent=4, ensure_ascii=False)
58 | 
59 |     model_wrapper.clean_proxy()


--------------------------------------------------------------------------------
/evaluate/cross_view/STV_SAT_mapping/STV_SAT_mapping_inference.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | 
 7 | from tqdm import tqdm
 8 | import json
 9 | 
10 | from config import MULTI_IMAGE_FOLDER
11 | from serving.vlm_serving import VLMWrapper
12 | 
13 | if __name__ == '__main__':
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
16 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
17 |     parser.add_argument('--task_name', type=str, default='STV_SAT_mapping', help='task name')
18 |     parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"])
19 |     args = parser.parse_args() 
20 | 
21 |     model_name = args.model_name
22 |     city_name = args.city_name
23 |     task_name = args.task_name   
24 | 
25 |     print("Load the model")
26 |     model_wrapper = VLMWrapper(args.model_name)
27 |     model = model_wrapper.get_vlm_model()
28 |     
29 | 
30 |     print("Load the image list")
31 |     # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json")
32 |     path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_test.json")
33 |     with open(path, "r") as f:
34 |         data = json.load(f)
35 | 
36 |     if args.data_name == "mini":
37 |         data = data[:10]
38 | 
39 |     response = []
40 |     for d in tqdm(data):
41 |         prompt = d["prompt"]
42 |         reference = d["reference"]
43 |         img_path = d["image"]
44 | 
45 |         ret = model.generate(img_path + [prompt])
46 |         response.append({
47 |             "image": img_path,
48 |             "prompt": prompt,
49 |             "reference": reference,
50 |             "response": ret
51 |         })
52 | 
53 |     print("Save the response")
54 |     output_path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, model_name)
55 |     os.makedirs(output_path, exist_ok=True)
56 |     with open(os.path.join(output_path, f"{city_name}_{task_name}_response.json"), "w") as f:
57 |         json.dump(response, f, indent=4, ensure_ascii=False)
58 | 
59 |     model_wrapper.clean_proxy()


--------------------------------------------------------------------------------
/evaluate/cross_view/STV_SAT_location/STV_SAT_location_inference.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | 
 7 | from tqdm import tqdm
 8 | import json
 9 | 
10 | from config import MULTI_IMAGE_FOLDER
11 | from serving.vlm_serving import VLMWrapper
12 | 
13 | if __name__ == '__main__':
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
16 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
17 |     parser.add_argument('--task_name', type=str, default='STV_SAT_location', help='task name')
18 |     parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"])
19 |     args = parser.parse_args() 
20 | 
21 |     model_name = args.model_name
22 |     city_name = args.city_name
23 |     task_name = args.task_name   
24 | 
25 |     print("Load the model")
26 |     model_wrapper = VLMWrapper(args.model_name)
27 |     model = model_wrapper.get_vlm_model()
28 |     
29 | 
30 |     print("Load the image list")
31 |     # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json")
32 |     path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_test.json")
33 |     with open(path, "r") as f:
34 |         data = json.load(f)
35 | 
36 |     if args.data_name == "mini":
37 |         data = data[:10]
38 | 
39 |     response = []
40 |     for d in tqdm(data):
41 |         prompt = d["prompt"]
42 |         reference = d["reference"]
43 |         img_path = d["image"]
44 | 
45 |         ret = model.generate(img_path + [prompt])
46 |         response.append({
47 |             "image": img_path,
48 |             "prompt": prompt,
49 |             "reference": reference,
50 |             "response": ret
51 |         })
52 | 
53 |     print("Save the response")
54 |     output_path = os.path.join(MULTI_IMAGE_FOLDER,  task_name, city_name, model_name)
55 |     os.makedirs(output_path, exist_ok=True)
56 |     with open(os.path.join(output_path, f"{city_name}_{task_name}_response.json"), "w") as f:
57 |         json.dump(response, f, indent=4, ensure_ascii=False)
58 | 
59 |     model_wrapper.clean_proxy()


--------------------------------------------------------------------------------
/simulate/annotate.bash:
--------------------------------------------------------------------------------
 1 | cities=("Beijing" "NewYork" "London")
 2 | work_dir="../../data/"
 3 | model="gpt-4o-mini-2024-07-18"
 4 | 
 5 | # Depend on SAT_interpolate_address_{city}_{zl}.csv
 6 | for city in "${cities[@]}"; do
 7 |     echo "Getting SAT combined address for $city"
 8 |     python ./annotate/sat_combine_address.py --city $city --work_dir $work_dir --model_name $model
 9 | done
10 | wait
11 | echo "Finish getting SAT combined address"
12 | # Get sat_address_combined_{city}_{zl}.csv
13 | 
14 | # Depend on SAT_interpolate_address_{city}_{zl}.csv
15 | # Depend on short_clipped_results_{zl}/driving_{img_name}.txt
16 | # Depend on short_clipped_results_{zl}/pois_{img_name}.txt
17 | for city in "${cities[@]}"; do
18 |     echo "Getting SAT scene description for $city"
19 |     python ./annotate/sat_scene_description.py --city $city --work_dir $work_dir --model_name $model
20 | done
21 | wait
22 | echo "Finish getting SAT scene description"
23 | # Get rs_osm_description_{city}_{zl}.csv
24 | 
25 | # Depend on SAT_{city}_{zl}.csv
26 | for city in "${cities[@]}"; do
27 |     echo "Getting SAT grounding description for $city"
28 |     python ./annotate/sat_generate_grounding_template.py --city $city --work_dir $work_dir
29 | done
30 | wait
31 | echo "Finish getting SAT grounding description"
32 | # Get rs_grounding_selfmade_{zl}.csv
33 | 
34 | # Depend on SAT_{city}_{zl}.csv
35 | # Depend on short_clipped_results_{zl}/landuse_{img_name}.txt
36 | for city in "${cities[@]}"; do
37 |     echo "Getting SAT landuse for $city"
38 |     python ./annotate/sat_landuse_template.py --city $city --work_dir $work_dir
39 | done
40 | wait
41 | echo "Finish getting SAT landuse"
42 | # Get rs_landuse_description_{zl}.csv
43 | 
44 | # Depend on stv_in_sat_{city}_{zl}.csv
45 | for city in "${cities[@]}"; do
46 |     echo "Getting STV scene description for $city"
47 |     python ./annotate/stv_description_gpt.py --city $city --work_dir $work_dir --model_name $model
48 | done
49 | wait
50 | echo "Finish getting STV scene description"
51 | # Get stv_description.csv
52 | 
53 | # Depend on stv_in_sat_nearest_features_update_{city}_{zl}.csv
54 | for city in "${cities[@]}"; do
55 |     echo "Getting STV landmark for $city"
56 |     python ./annotate/stv_landmark_gpt.py --city $city --work_dir $work_dir --model_name $model
57 | done
58 | wait
59 | echo "Finish getting STV landmark"
60 | # Get stv_poi_landmark_update.jsonl


--------------------------------------------------------------------------------
/evaluate/cross_view/SAT_count_pois/SAT_count_pois_inference.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | 
 7 | from tqdm import tqdm
 8 | import json
 9 | 
10 | from config import MULTI_IMAGE_FOLDER
11 | from serving.vlm_serving import VLMWrapper
12 | 
13 | if __name__ == '__main__':
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
16 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
17 |     parser.add_argument('--task_name', type=str, default='SAT_count_pois', help='task name')
18 |     parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"])
19 |     args = parser.parse_args() 
20 | 
21 |     model_name = args.model_name
22 |     city_name = args.city_name
23 |     task_name = args.task_name   
24 | 
25 |     print("Load the model")
26 |     model_wrapper = VLMWrapper(args.model_name)
27 |     model = model_wrapper.get_vlm_model()
28 |     
29 |     for zl in ["zl15", "zl17"]:
30 | 
31 |         print("Load the image list")
32 |         # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json")
33 |         path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_{zl}_test.json")
34 |         with open(path, "r") as f:
35 |             data = json.load(f)
36 | 
37 |         if args.data_name == "mini":
38 |             data = data[:10]
39 | 
40 |         response = []
41 |         for d in tqdm(data):
42 |             prompt = d["prompt"]
43 |             reference = d["reference"]
44 |             img_path = d["image"]
45 | 
46 |             ret = model.generate(img_path+ [prompt])
47 |             response.append({
48 |                 "image": img_path,
49 |                 "prompt": prompt,
50 |                 "reference": reference,
51 |                 "response": ret
52 |             })
53 | 
54 |         print("Save the response")
55 |         output_path = os.path.join(MULTI_IMAGE_FOLDER,  task_name, city_name, model_name)
56 |         os.makedirs(output_path, exist_ok=True)
57 |         with open(os.path.join(output_path, f"{city_name}_{task_name}_{zl}_response.json"), "w") as f:
58 |             json.dump(response, f, indent=4, ensure_ascii=False)
59 | 
60 |     model_wrapper.clean_proxy()


--------------------------------------------------------------------------------
/simulate/annotate/sat_landuse_template.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import os
 3 | import json
 4 | import argparse
 5 | 
 6 | if __name__ == '__main__':
 7 |     parser = argparse.ArgumentParser()
 8 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
 9 |     parser.add_argument('--work_dir', type=str, default='../../data/')
10 |     args = parser.parse_args()
11 |     city = args.city
12 |     work_dir = args.work_dir            
13 | 
14 |     # work_dir = "../../data/dev-Beijing/"
15 |     work_dir = work_dir + f"dev-{city}/"
16 |     for zl in ["zl15", "zl17"]:
17 |         if os.path.exists(f"rs_landuse_description_{zl}.jsonl"):
18 |             os.remove(f"rs_landuse_description_{zl}.jsonl")
19 |             print(f"Removed rs_landuse_description_{zl}.jsonl")
20 |         df = pd.read_csv(work_dir + f"SAT_{city}_{zl}.csv")
21 |         for cnt in range(len(df)):
22 |             img_name = df.at[cnt,'img_name'].split('.')[0]
23 | 
24 |             if not os.path.exists(work_dir + f'short_clipped_results_{zl}/landuse_'+img_name +'.txt'):
25 |                 continue
26 | 
27 |             with open(work_dir + f'short_clipped_results_{zl}/landuse_'+img_name +'.txt', 'r') as file:
28 |             # with open('short_clipped_results_wudaokou_zl17/landuse_'+img_name +'.txt', 'r') as file:
29 |                 lines = file.readlines()
30 | 
31 |             for line in lines:
32 |                 parts = line.split('location:')
33 |                 landuse_type = line.split('region')[0].strip().split()[-1].capitalize()  
34 |                 coordinates = parts[1].strip()  
35 |                 
36 |                 question = f"You are provided a 256*256 satellite image. What is the landuse type in region {coordinates}?"
37 |                 answer = f"{landuse_type}"
38 |                 
39 |                 print(f"Q: {question}")
40 |                 print(f"A: {answer}")
41 |                 with open(work_dir + f"rs_landuse_description_{zl}.jsonl", "a") as fout:
42 |                     value = {
43 |                     "img_name": img_name,
44 |                     "Q": f"You are provided a 256*256 satellite image. What is the landuse type in region {coordinates}?",
45 |                     "A":f"{landuse_type}"
46 |                 }
47 |                     fout.write(json.dumps(value, ensure_ascii=False) + "\n")
48 |             print(f"Finished generating rs_landuse_description_{zl}.jsonl")


--------------------------------------------------------------------------------
/simulate/satelite/make_sat_shp.py:
--------------------------------------------------------------------------------
 1 | # # Function: Generate shapefile for satellite images in order to visualize them in GIS.
 2 | 
 3 | import shapefile  # Using pyshp
 4 | import pandas as pd
 5 | import argparse
 6 | import os
 7 | 
 8 | def make_sat_shp(data_address, csv_address) -> None:
 9 |     sat = pd.read_csv(csv_address, header=0, sep=',')
10 |     file = shapefile.Writer(data_address)
11 |     file.field('num')
12 |     file.field('region_name')
13 |     file.field('type', 'C', '40')
14 | 
15 |     for i in range(len(sat)):
16 |         # Extract image name
17 |         img_name = sat.at[i, 'img_name']
18 |         
19 |         # Define the polygon coordinates
20 |         polygon = [
21 |             [sat.at[i, 'tl_lng'], sat.at[i, 'tl_lat']],
22 |             [sat.at[i, 'bt_lng'], sat.at[i, 'tl_lat']],
23 |             [sat.at[i, 'bt_lng'], sat.at[i, 'bt_lat']],
24 |             [sat.at[i, 'tl_lng'], sat.at[i, 'bt_lat']],
25 |             [sat.at[i, 'tl_lng'], sat.at[i, 'tl_lat']]  # Close the polygon
26 |         ]
27 |         
28 |         # Add the polygon to the shapefile
29 |         file.poly([polygon])
30 |         file.record(str(i), img_name, 'Polygon')
31 | 
32 |     file.close()
33 | 
34 |     # Write the projection file with WKT for EPSG:4326
35 |     wkt = """GEOGCS["WGS 84",
36 |     DATUM["WGS_1984",
37 |         SPHEROID["WGS 84",6378137,298.257223563,
38 |             AUTHORITY["EPSG","7030"]],
39 |         AUTHORITY["EPSG","6326"]],
40 |     PRIMEM["Greenwich",0,
41 |         AUTHORITY["EPSG","8901"]],
42 |     UNIT["degree",0.0174532925199433,
43 |         AUTHORITY["EPSG","9122"]],
44 |     AUTHORITY["EPSG","4326"]]"""
45 | 
46 |     # Write the WKT to the .prj file
47 |     with open(data_address.replace(".shp", ".prj"), 'w') as f:
48 |         f.write(wkt)
49 | 
50 |     print(f"Shapefile and projection file have been created at {data_address}")
51 | 
52 | if __name__ == '__main__':
53 |     parser = argparse.ArgumentParser()
54 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
55 |     parser.add_argument('--work_dir', type=str, default='../../data/')    
56 |     args = parser.parse_args()
57 | 
58 |     city = args.city
59 |     work_dir = args.work_dir
60 | 
61 |     for zl in ["zl15", "zl17"]:
62 |         csv_path = os.path.join(work_dir, f'dev-{city}/SAT_{city}_{zl}.csv')
63 |         shp_path = os.path.join(work_dir, f'dev-{city}/SAT_{city}_{zl}.shp')
64 |         make_sat_shp(shp_path, csv_path)


--------------------------------------------------------------------------------
/evaluate/cross_view/SAT_count_buildings/SAT_count_buildings_inference.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import pandas as pd
 4 | from tqdm import tqdm
 5 | import json
 6 | 
 7 | from config import MULTI_IMAGE_FOLDER
 8 | from serving.vlm_serving import VLMWrapper
 9 | 
10 | if __name__ == '__main__':
11 |     parser = argparse.ArgumentParser()
12 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
13 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
14 |     parser.add_argument('--task_name', type=str, default='SAT_count_buildings', help='task name')
15 |     parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"])
16 |     args = parser.parse_args() 
17 | 
18 |     model_name = args.model_name
19 |     city_name = args.city_name
20 |     task_name = args.task_name   
21 | 
22 |     print("Load the model")
23 |     model_wrapper = VLMWrapper(args.model_name)
24 |     model = model_wrapper.get_vlm_model()
25 |     
26 |     for zl in ["zl15", "zl17"]:
27 | 
28 |         print("Load the image list")
29 |         # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json")
30 |         path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_{zl}_test.json")
31 |         with open(path, "r") as f:
32 |             data = json.load(f)
33 | 
34 |         if args.data_name == "mini":
35 |             data = data[:10]
36 | 
37 |         response = []
38 |         for d in tqdm(data):
39 |             prompt = d["prompt"]
40 |             reference = d["reference"]
41 |             img_path = d["image"]
42 |             # img_name = img_path.split("/")[-1]
43 | 
44 |             # assert os.path.exists(img_path), f"Image {img_path} not found"
45 |             ret = model.generate(img_path + [prompt])
46 |             response.append({
47 |                 "image": img_path,
48 |                 "prompt": prompt,
49 |                 "reference": reference,
50 |                 "response": ret
51 |             })
52 | 
53 |         print("Save the response")
54 |         output_path = os.path.join(MULTI_IMAGE_FOLDER,  task_name, city_name, model_name)
55 |         os.makedirs(output_path, exist_ok=True)
56 |         with open(os.path.join(output_path, f"{city_name}_{task_name}_{zl}_response.json"), "w") as f:
57 |             json.dump(response, f, indent=4, ensure_ascii=False)
58 | 
59 |     model_wrapper.clean_proxy()


--------------------------------------------------------------------------------
/evaluate/cross_view/STV_compare/STV_compare_stats.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | from tqdm import tqdm
 7 | import json
 8 | 
 9 | from config import MULTI_IMAGE_FOLDER
10 | from serving.llm_api import extract_choice
11 | 
12 | 
13 | if __name__ == '__main__':
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
16 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
17 |     parser.add_argument('--task_name', type=str, default='STV_compare', help='task name')
18 |     args = parser.parse_args()    
19 | 
20 |     city = args.city_name
21 |     model_name = args.model_name
22 |     task_name = args.task_name
23 | 
24 |     path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_response.json")
25 | 
26 |     with open(path, "r") as f:
27 |         data = json.load(f)
28 | 
29 |     correct = 0
30 |     num_A = 0
31 |     num_B = 0
32 |     num_C = 0
33 |     num_D = 0
34 |     
35 |     for d in data:
36 |         prompt = d["prompt"]
37 |         reference = d["reference"]
38 |         response = d["response"]
39 |         img_name = d["image"]
40 | 
41 |         model_choice = extract_choice(response, ["A", "B", "C", "D"])
42 | 
43 |         if model_choice == reference:
44 |             correct += 1
45 | 
46 |         if model_choice == "A":
47 |             num_A += 1
48 |         elif model_choice == "B":
49 |             num_B += 1
50 |         elif model_choice == "C":
51 |             num_C += 1
52 |         elif model_choice == "D":
53 |             num_D += 1
54 | 
55 | 
56 |     print("For Response file:", path)
57 |     print("Accuracy:", correct / len(data))
58 |     print("Num A:", num_A)
59 |     print("Num B:", num_B)
60 |     print("Num C:", num_C)
61 |     print("Num D:", num_D)
62 |     print()
63 | 
64 |     # save the stats
65 |     stats_folder = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, "stats")
66 | 
67 |     os.makedirs(stats_folder, exist_ok=True)
68 | 
69 |     with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}.json"), "w") as f:
70 |         json.dump({
71 |             "Length of Data": len(data),
72 |             "Accuracy": correct / len(data),
73 |             "Num A": num_A,
74 |             "Num B": num_B,
75 |             "Num C": num_C,
76 |             "Num D": num_D
77 |         }, f, indent=4)


--------------------------------------------------------------------------------
/evaluate/cross_view/eval_inference.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import pandas as pd
 4 | # from setproctitle import setproctitle
 5 | import jsonlines
 6 | 
 7 | from tqdm import tqdm
 8 | import json
 9 | 
10 | from config import CROSS_VIEW_PATH, CROSS_VIEW_RESULTS_PATH
11 | from serving.vlm_serving import VLMWrapper
12 | 
13 | if __name__ == '__main__':
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')#InternVL2-40B  GPT4o_MINI  Qwen2-VL-2B-Instruct
16 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name') #Beijing, London, NewYork
17 |     parser.add_argument('--data_name', type=str, default="mini", help='dataset size')
18 |     parser.add_argument('--task_name', type=str, default='IR', help='task name', choices=["IR", "CL","SC_Buildings","SC_POIs"]) 
19 |     #task_name include: Image Retrieval, Camera Localization, Scene Comparision Buildings, Scene Comparison POIs(restaurant, education, shopping)
20 | 
21 |     args = parser.parse_args()
22 | 
23 |     print("Load the model")
24 |     model_wrapper = VLMWrapper(args.model_name)
25 |     model = model_wrapper.get_vlm_model()
26 | 
27 |     print("Load the test data jsonl")
28 | 
29 |     with jsonlines.open(os.path.join(CROSS_VIEW_PATH, f"{args.city_name}_{args.task_name}_eval.jsonl")) as reader:
30 |         eval_data = list(reader)
31 | 
32 |     if args.data_name == 'mini':
33 |         eval_data = eval_data[:int(0.1*len(eval_data))]
34 | 
35 |     
36 |     response_list = []
37 | 
38 |     ###Model inference
39 |     for obj in eval_data:
40 |         img_names = obj['image']
41 |         prompt = obj['conversations'][0]['value'].replace("<image>", "")
42 |         GT = obj['conversations'][1]['value']
43 |         ret = model.generate(img_names+[prompt])
44 |         response_list.append([img_names, ret, GT])
45 |     
46 |     os.makedirs(os.path.dirname(CROSS_VIEW_RESULTS_PATH), exist_ok=True)
47 |     # # Save the response
48 |     with open(os.path.join(CROSS_VIEW_RESULTS_PATH, f"{args.city_name}_{args.model_name}_{args.task_name}_eval.jsonl"), "w") as fout:
49 |         
50 |         for i in range(len(response_list)):
51 |             value = {
52 |                 "img_name": response_list[i][0],
53 |                 "text": response_list[i][1],
54 |                 "GT": response_list[i][2],  ##saving GT for quick human evaluation
55 |             }
56 |             fout.write(json.dumps(value) + "\n")
57 |     
58 |     model_wrapper.clean_proxy()
59 | 


--------------------------------------------------------------------------------
/examples/run_eval_multi_image_inference_stats.sh:
--------------------------------------------------------------------------------
 1 | # source /usr/local/miniconda3/bin/activate vila-vlmeval
 2 | export CUDA_VISIBLE_DEVICES=3
 3 | export DeepInfra_API_KEY=""
 4 | export SiliconFlow_API_KEY=""                   
 5 | export OpenAI_API_KEY=""
 6 | export OPENAI_API_KEY="$OpenAI_API_KEY"
 7 | export OPENAI_API_BASE="https://api.openai.com/v1/chat/completions"
 8 | export DASHSCOPE_API_KEY=""
 9 | CITIES=('Beijing' 'NewYork' 'London')
10 | MODELS=("Llama-3-VILA1.5-8b" "GPT4o_MINI")
11 | DATA_VERSION='all'
12 | 
13 | echo "Start running evaluation on SAT_count_buildings task"
14 | for MODEL in "${MODELS[@]}"; do
15 |     echo "Current model: $MODEL"
16 |     for CITY in "${CITIES[@]}"; do
17 |         echo "Current city: $CITY"
18 |         python -m evaluate.cross_view.SAT_count_buildings.SAT_count_buildings_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION
19 |         python -m evaluate.cross_view.SAT_count_buildings.SAT_count_buildings_stats --city_name $CITY --model_name $MODEL
20 |     done
21 | done
22 | 
23 | echo "Start running evaluation on SAT_count_pois task"
24 | for MODEL in "${MODELS[@]}"; do
25 |     echo "Current model: $MODEL"
26 |     for CITY in "${CITIES[@]}"; do
27 |         echo "Current city: $CITY"
28 |         python -m evaluate.cross_view.SAT_count_pois.SAT_count_pois_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION
29 |         python -m evaluate.cross_view.SAT_count_pois.SAT_count_pois_stats --city_name $CITY --model_name $MODEL
30 |     done
31 | done
32 | 
33 | echo "Start running evaluation on STV_SAT_location task"
34 | for MODEL in "${MODELS[@]}"; do
35 |     echo "Current model: $MODEL"
36 |     for CITY in "${CITIES[@]}"; do
37 |         echo "Current city: $CITY"
38 |         python -m evaluate.cross_view.STV_SAT_location.STV_SAT_location_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION
39 |         python -m evaluate.cross_view.STV_SAT_location.STV_SAT_location_stats --city_name $CITY --model_name $MODEL
40 |     done
41 | done
42 | 
43 | echo "Start running evaluation on STV_SAT_mapping task"
44 | for MODEL in "${MODELS[@]}"; do
45 |     echo "Current model: $MODEL"
46 |     for CITY in "${CITIES[@]}"; do
47 |         echo "Current city: $CITY"
48 |         python -m evaluate.cross_view.STV_SAT_mapping.STV_SAT_mapping_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION
49 |         python -m evaluate.cross_view.STV_SAT_mapping.STV_SAT_mapping_stats --city_name $CITY --model_name $MODEL
50 |     done
51 | done


--------------------------------------------------------------------------------
/evaluate/cross_view/STV_SAT_location/STV_SAT_location_stats.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | from tqdm import tqdm
 7 | import json
 8 | 
 9 | from config import MULTI_IMAGE_FOLDER
10 | from serving.llm_api import extract_choice
11 | 
12 | 
13 | if __name__ == '__main__':
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
16 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
17 |     parser.add_argument('--task_name', type=str, default='STV_SAT_location', help='task name')
18 |     args = parser.parse_args()    
19 | 
20 |     city = args.city_name
21 |     model_name = args.model_name
22 |     task_name = args.task_name
23 | 
24 |     path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_response.json")
25 | 
26 |     with open(path, "r") as f:
27 |         data = json.load(f)
28 | 
29 |     correct = 0
30 |     num_A = 0
31 |     num_B = 0
32 |     num_C = 0
33 |     num_D = 0
34 |     
35 |     for d in data:
36 |         prompt = d["prompt"]
37 |         reference = d["reference"]
38 |         response = d["response"]
39 |         img_name = d["image"]
40 | 
41 |         model_choice = extract_choice(response, ["A", "B", "C", "D"])
42 | 
43 |         if model_choice == reference:
44 |             correct += 1
45 | 
46 |         if model_choice == "A":
47 |             num_A += 1
48 |         elif model_choice == "B":
49 |             num_B += 1
50 |         elif model_choice == "C":
51 |             num_C += 1
52 |         elif model_choice == "D":
53 |             num_D += 1
54 | 
55 | 
56 |     print("For Response file:", path)
57 |     print("Accuracy:", correct / len(data))
58 |     print("Num A:", num_A)
59 |     print("Num B:", num_B)
60 |     print("Num C:", num_C)
61 |     print("Num D:", num_D)
62 |     print()
63 | 
64 |     # save the stats
65 |     stats_folder = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, "stats")
66 | 
67 |     os.makedirs(stats_folder, exist_ok=True)
68 | 
69 |     with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}.json"), "w") as f:
70 |         json.dump({
71 |             "Length of Data": len(data),
72 |             "Accuracy": correct / len(data),
73 |             "Num A": num_A,
74 |             "Num B": num_B,
75 |             "Num C": num_C,
76 |             "Num D": num_D
77 |         }, f, indent=4)


--------------------------------------------------------------------------------
/evaluate/cross_view/STV_SAT_mapping/STV_SAT_mapping_stats.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | from tqdm import tqdm
 7 | import json
 8 | 
 9 | from config import MULTI_IMAGE_FOLDER
10 | from serving.llm_api import extract_choice
11 | 
12 | 
13 | if __name__ == '__main__':
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
16 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
17 |     parser.add_argument('--task_name', type=str, default='STV_SAT_mapping', help='task name')
18 |     args = parser.parse_args()    
19 | 
20 |     city = args.city_name
21 |     model_name = args.model_name
22 |     task_name = args.task_name
23 | 
24 |     path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_response.json")
25 | 
26 |     with open(path, "r") as f:
27 |         data = json.load(f)
28 | 
29 |     correct = 0
30 |     num_A = 0
31 |     num_B = 0
32 |     num_C = 0
33 |     num_D = 0
34 |     
35 |     for d in data:
36 |         prompt = d["prompt"]
37 |         reference = d["reference"]
38 |         response = d["response"]
39 |         img_name = d["image"]
40 | 
41 |         model_choice = extract_choice(response, ["A", "B", "C", "D"])
42 | 
43 |         if model_choice == reference:
44 |             correct += 1
45 | 
46 |         if model_choice == "A":
47 |             num_A += 1
48 |         elif model_choice == "B":
49 |             num_B += 1
50 |         elif model_choice == "C":
51 |             num_C += 1
52 |         elif model_choice == "D":
53 |             num_D += 1
54 | 
55 | 
56 |     print("For Response file:", path)
57 |     print("Accuracy:", correct / len(data))
58 |     print("Num A:", num_A)
59 |     print("Num B:", num_B)
60 |     print("Num C:", num_C)
61 |     print("Num D:", num_D)
62 |     print()
63 | 
64 |     # save the stats
65 |     stats_folder = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, "stats")
66 | 
67 |     os.makedirs(stats_folder, exist_ok=True)
68 | 
69 |     with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}.json"), "w") as f:
70 |         json.dump({
71 |             "Length of Data": len(data),
72 |             "Accuracy": correct / len(data),
73 |             "Num A": num_A,
74 |             "Num B": num_B,
75 |             "Num C": num_C,
76 |             "Num D": num_D
77 |         }, f, indent=4)


--------------------------------------------------------------------------------
/simulate/format/uni_mc_format_llava.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import random
 4 | import argparse
 5 | random.seed(0)
 6 | 
 7 | if __name__ == '__main__':
 8 |     parser = argparse.ArgumentParser()
 9 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
10 |     parser.add_argument('--work_dir', type=str, default='../../data/')
11 | 
12 |     args = parser.parse_args()
13 |     city_name = args.city_name
14 |     work_dir = args.work_dir
15 | 
16 |     cur_dir = os.path.join(work_dir, f"dev-{city_name}/")
17 | 
18 |     output_dir = os.path.join(cur_dir, "llava_uniimage_mc_train")
19 |     os.makedirs(output_dir, exist_ok=True)
20 | 
21 |     task_list = ['sat_address_mc', 'sat_landuse_mc', 'stv_address_mc', 'stv_landmark_mc']
22 |     summary_info = {}
23 | 
24 |     for task_name in task_list:
25 |         unformatted_file_path = os.path.join(cur_dir, "uni_image_data", task_name, city_name, f"{city_name}_{task_name}_train.json")
26 | 
27 |         with open(unformatted_file_path, 'r') as f:
28 |             unformatted_data = json.load(f)
29 | 
30 |         if len(unformatted_data) > 20000:
31 |             print(f"Length of {task_name} is {len(unformatted_data)}, truncated to 20000")
32 |             unformatted_data = random.sample(unformatted_data, 20000)
33 | 
34 |         formatted_data = []
35 |         for item in unformatted_data:
36 |             formatted_item = {}
37 |             image_name = item['image'].split('/')[-1]
38 |             formatted_item['id'] = image_name
39 |             formatted_item['image'] = item['image']
40 |             formatted_item['conversations'] = [
41 |                 {
42 |                     "from": "human",
43 |                     'value': item['prompt']
44 |                 },
45 |                 {
46 |                     "from": "gpt",
47 |                     "value": item['reference']
48 |                 }
49 |             ]
50 | 
51 |             formatted_data.append(formatted_item)
52 | 
53 |         output_file_path = os.path.join(output_dir, f"{city_name}_{task_name}_train_llava.json")
54 |         with open(output_file_path, 'w') as f:
55 |             json.dump(formatted_data, f, indent=4, ensure_ascii=False)
56 |             print(f"Formatted data for {task_name} saved to {output_file_path}")
57 |         summary_info[task_name] = len(formatted_data)
58 | 
59 |     summary_file_path = os.path.join(output_dir, "summary.json")
60 |     with open(summary_file_path, 'w') as f:
61 |         json.dump(summary_info, f, indent=4, ensure_ascii=False)
62 |         print(f"Summary info saved to {summary_file_path}")


--------------------------------------------------------------------------------
/train/vila_train_scripts/sft_mix_v1.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Set the master address to localhost for single node
 4 | export MASTER_ADDR="127.0.0.1"
 5 | export CURRENT_RANK=0
 6 | 
 7 | # Since it's single node, we don't need worker_list or SLURM_JOB_NODELIST
 8 | n_node=1
 9 | 
10 | echo "MASTER_ADDR="$MASTER_ADDR
11 | echo "Single node setup, no SLURM required."
12 | 
13 | 
14 | export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
15 | # TODO: Set the output directory
16 | OUTPUT_DIR=""
17 | mkdir $OUTPUT_DIR
18 | # TODO: Set the path to the training script
19 | CODE_PATH=/<path-to-your-dir>/train/VILA/llava/train/train_mem.py
20 | # TODO: Make sure the actural data mixture is correct
21 | DATA_MIX=llava_instruct+sharegpt4v_gpt4_100k+UrbanLLaVA_multi+UrbanLLaVA_single+UrbanLLaVA_text2img2text+UrbanLLaVA_img2text2img+UrbanLLaVA_citywalk_vison
22 | MODEL_MAX_LENGTH=2048
23 | bs=8  # Adjust batch size as needed for your single GPU
24 | echo "number of nodes:" $n_node7n
25 | echo "per device batch size:" $bs
26 | echo "node rank:" $CURRENT_RANK
27 | NUM_GPUS=$(echo $CUDA_VISIBLE_DEVICES | tr ',' ' ' | wc -w)
28 | 
29 | # TODO: Set the path to the model and the vision tower
30 | torchrun --nnodes=$n_node --nproc_per_node=$NUM_GPUS --master_port=25001 \
31 |     --master_addr $MASTER_ADDR --node_rank=$CURRENT_RANK \
32 |     $CODE_PATH \
33 |     --deepspeed ./zero3.json \
34 |     --model_name_or_path /<path-to-your-model>/Llama-3-VILA1.5-8B \
35 |     --version llama_3 \
36 |     --data_mixture $DATA_MIX \
37 |     --vision_tower /<path-to-your-model>/siglip-so400m-patch14-384  \
38 |     --mm_vision_select_feature cls_patch \
39 |     --mm_projector mlp_downsample \
40 |     --tune_vision_tower False \
41 |     --tune_mm_projector True \
42 |     --tune_language_model True \
43 |     --mm_vision_select_layer -2 \
44 |     --mm_use_im_start_end False \
45 |     --mm_use_im_patch_token False \
46 |     --image_aspect_ratio resize \
47 |     --bf16 True \
48 |     --output_dir $OUTPUT_DIR \
49 |     --num_train_epochs 1 \
50 |     --per_device_train_batch_size $bs \
51 |     --per_device_eval_batch_size 4 \
52 |     --gradient_accumulation_steps 2 \
53 |     --evaluation_strategy "no" \
54 |     --save_strategy "steps" \
55 |     --save_steps 500 \
56 |     --save_total_limit 1 \
57 |     --learning_rate 1e-4 \
58 |     --weight_decay 0. \
59 |     --warmup_ratio 0.03 \
60 |     --lr_scheduler_type "cosine" \
61 |     --logging_steps 1 \
62 |     --tf32 True \
63 |     --model_max_length $MODEL_MAX_LENGTH \
64 |     --gradient_checkpointing True \
65 |     --dataloader_num_workers 16 \
66 |     --lazy_preprocess True \
67 |     --vflan_no_system_prompt True \
68 |     --report_to tensorboard
69 | 


--------------------------------------------------------------------------------
/evaluate/geoqa/analyse_result.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import numpy as np
 4 | 
 5 | def get_result(result_files):
 6 |     final_result = {}
 7 |     for result_file in result_files:
 8 |         result = pd.read_csv(os.path.join(file_path,result_file))
 9 |         for _, row in result.iterrows():
10 |             # print(result_file)
11 |             task = row['task_name']
12 |             model = row['model_name']
13 |             acc = row['accuracy']
14 |             if model not in final_result:
15 |                 final_result[model] = {}
16 |             for cat,tasks in map_task.items():
17 |                 if cat not in final_result[model]:
18 |                     final_result[model][cat] = []
19 |                 if task in tasks:
20 |                     final_result[model][cat].append(acc)
21 |                     break
22 | 
23 |     final_result2 = {}   
24 |     for model, sub_dict in final_result.items():
25 |         if model not in final_result2:
26 |             final_result2[model]={}
27 |         for cat, acc_list in sub_dict.items():
28 |             if cat not in final_result2[model]:
29 |                 final_result2[model][cat] = {'mean':0,'var':0}
30 |             final_result2[model][cat]['result'] = f"{format(np.mean(acc_list),'.4f')}"
31 |             # ±{format(np.var(acc_list),'.4f')}
32 | 
33 |     # columns = ['node', 'landmark', 'path', 'districts', 'boundary', 'others']
34 |     columns = ['model_name', 'node', 'landmark', 'path', 'districts', 'boundary', 'others']
35 | 
36 |     index = list(final_result2.keys())
37 |     df = pd.DataFrame(index=index, columns=columns)
38 |     for model, features in final_result2.items():
39 |         df.at[model, 'model_name'] = model 
40 |         for feature, results in features.items():
41 |             df.at[model, feature] = results['result']
42 |     return df
43 | 
44 | 
45 | if __name__ == '__main__':
46 |     file_path = "results/geo_knowledge_result"
47 |     output_path = "results/geo_knowledge_result"
48 |     result_files = os.listdir(file_path)
49 |     map_task = {
50 |         "node": ["poi2coor", "AOI_POI_road4", "poi2addr", "poi2type", "type2poi" ],
51 |         "landmark": ["landmark_env", "landmark_path"],
52 |         "path": ["road_link", "road_od", "road_length", "road_arrived_pois"],
53 |         "districts": ["aoi2addr", "AOI_POI5", "AOI_POI6", "aoi_group", "aoi2type", "type2aoi", "aoi_poi", "poi_aoi", "districts_poi_type"],
54 |         "boundary": ["aoi_boundary_poi", "AOI_POI_road1", "AOI_POI_road2", "AOI_POI_road3", "boundary_road"],
55 |         "others": ["AOI_POI", "AOI_POI2", "AOI_POI3", "AOI_POI4"]
56 |     }
57 |     result = get_result(result_files)
58 |     result.to_csv(os.path.join(output_path,"geoqa_benchmark_result.csv"))
59 | 
60 | 


--------------------------------------------------------------------------------
/evaluate/cross_view/SAT_count_pois/SAT_count_pois_stats.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | from tqdm import tqdm
 7 | import json
 8 | 
 9 | from config import MULTI_IMAGE_FOLDER
10 | from serving.llm_api import extract_choice
11 | 
12 | 
13 | if __name__ == '__main__':
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
16 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
17 |     parser.add_argument('--task_name', type=str, default='SAT_count_pois', help='task name')
18 |     args = parser.parse_args()    
19 | 
20 |     city = args.city_name
21 |     model_name = args.model_name
22 |     task_name = args.task_name
23 | 
24 |     for zl in ["zl15", "zl17"]:
25 |         path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_{zl}_response.json")
26 | 
27 |         with open(path, "r") as f:
28 |             data = json.load(f)
29 | 
30 |         correct = 0
31 |         num_A = 0
32 |         num_B = 0
33 |         num_C = 0
34 |         num_D = 0
35 |         
36 |         for d in data:
37 |             prompt = d["prompt"]
38 |             reference = d["reference"]
39 |             response = d["response"]
40 |             img_name = d["image"]
41 | 
42 |             model_choice = extract_choice(response, ["A", "B", "C", "D"])
43 | 
44 |             if model_choice == reference:
45 |                 correct += 1
46 | 
47 |             if model_choice == "A":
48 |                 num_A += 1
49 |             elif model_choice == "B":
50 |                 num_B += 1
51 |             elif model_choice == "C":
52 |                 num_C += 1
53 |             elif model_choice == "D":
54 |                 num_D += 1
55 | 
56 | 
57 |         print("For Response file:", path)
58 |         print("Accuracy:", correct / len(data))
59 |         print("Num A:", num_A)
60 |         print("Num B:", num_B)
61 |         print("Num C:", num_C)
62 |         print("Num D:", num_D)
63 |         print()
64 | 
65 |         # save the stats
66 |         stats_folder = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, "stats")
67 | 
68 |         os.makedirs(stats_folder, exist_ok=True)
69 | 
70 |         with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}_{zl}.json"), "w") as f:
71 |             json.dump({
72 |                 "Length of Data": len(data),
73 |                 "Accuracy": correct / len(data),
74 |                 "Num A": num_A,
75 |                 "Num B": num_B,
76 |                 "Num C": num_C,
77 |                 "Num D": num_D
78 |             }, f, indent=4)


--------------------------------------------------------------------------------
/evaluate/uniimage/sat_address/sat_address_stats.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | from tqdm import tqdm
 7 | import json
 8 | import csv
 9 | 
10 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH
11 | from serving.llm_api import extract_choice
12 | 
13 | 
14 | if __name__ == '__main__':
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
17 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
18 |     parser.add_argument('--task_name', type=str, default='sat_address_mc', help='task name')
19 |     args = parser.parse_args()    
20 | 
21 |     city = args.city_name
22 |     model_name = args.model_name
23 |     task_name = args.task_name
24 | 
25 |     for zl in ["zl15", "zl17"]:
26 |         path = os.path.join(UNI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_{zl}_response.json")
27 | 
28 |         with open(path, "r") as f:
29 |             data = json.load(f)
30 | 
31 |         correct = 0
32 |         num_A = 0
33 |         num_B = 0
34 |         num_C = 0
35 |         num_D = 0
36 |         
37 |         for d in data:
38 |             prompt = d["prompt"]
39 |             choices = d["choices"]
40 |             reference = d["reference"]
41 |             response = d["response"]
42 |             img_name = d["image"]
43 | 
44 |             model_choice = extract_choice(response, ["A", "B", "C", "D"])
45 | 
46 |             if model_choice == reference:
47 |                 correct += 1
48 | 
49 |             if model_choice == "A":
50 |                 num_A += 1
51 |             elif model_choice == "B":
52 |                 num_B += 1
53 |             elif model_choice == "C":
54 |                 num_C += 1
55 |             elif model_choice == "D":
56 |                 num_D += 1
57 | 
58 | 
59 |         print("For Response file:", path)
60 |         print("Accuracy:", correct / len(data))
61 |         print("Num A:", num_A)
62 |         print("Num B:", num_B)
63 |         print("Num C:", num_C)
64 |         print("Num D:", num_D)
65 |         print()
66 | 
67 |         # save the stats
68 |         stats_folder = os.path.join(UNI_IMAGE_FOLDER, task_name, city, "stats")
69 | 
70 |         os.makedirs(stats_folder, exist_ok=True)
71 | 
72 |         with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}_{zl}.json"), "w") as f:
73 |             json.dump({
74 |                 "Accuracy": correct / len(data),
75 |                 "Num A": num_A,
76 |                 "Num B": num_B,
77 |                 "Num C": num_C,
78 |                 "Num D": num_D
79 |             }, f, indent=4)
80 | 


--------------------------------------------------------------------------------
/evaluate/uniimage/sat_landuse/sat_landuse_stats.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | from tqdm import tqdm
 7 | import json
 8 | import csv
 9 | 
10 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH
11 | from serving.llm_api import extract_choice
12 | 
13 | 
14 | if __name__ == '__main__':
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
17 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
18 |     parser.add_argument('--task_name', type=str, default='sat_landuse_mc', help='task name')
19 |     args = parser.parse_args()    
20 | 
21 |     city = args.city_name
22 |     model_name = args.model_name
23 |     task_name = args.task_name
24 | 
25 |     for zl in ["zl15", "zl17"]:
26 |         path = os.path.join(UNI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_{zl}_response.json")
27 | 
28 |         with open(path, "r") as f:
29 |             data = json.load(f)
30 | 
31 |         correct = 0
32 |         num_A = 0
33 |         num_B = 0
34 |         num_C = 0
35 |         num_D = 0
36 |         
37 |         for d in data:
38 |             prompt = d["prompt"]
39 |             choices = d["choices"]
40 |             reference = d["reference"]
41 |             response = d["response"]
42 |             img_name = d["image"]
43 | 
44 |             model_choice = extract_choice(response, ["A", "B", "C", "D"])
45 | 
46 |             if model_choice == reference:
47 |                 correct += 1
48 | 
49 |             if model_choice == "A":
50 |                 num_A += 1
51 |             elif model_choice == "B":
52 |                 num_B += 1
53 |             elif model_choice == "C":
54 |                 num_C += 1
55 |             elif model_choice == "D":
56 |                 num_D += 1
57 | 
58 | 
59 |         print("For Response file:", path)
60 |         print("Accuracy:", correct / len(data))
61 |         print("Num A:", num_A)
62 |         print("Num B:", num_B)
63 |         print("Num C:", num_C)
64 |         print("Num D:", num_D)
65 |         print()
66 | 
67 |         # save the stats
68 |         stats_folder = os.path.join(UNI_IMAGE_FOLDER, task_name, city, "stats")
69 | 
70 |         os.makedirs(stats_folder, exist_ok=True)
71 | 
72 |         with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}_{zl}.json"), "w") as f:
73 |             json.dump({
74 |                 "Accuracy": correct / len(data),
75 |                 "Num A": num_A,
76 |                 "Num B": num_B,
77 |                 "Num C": num_C,
78 |                 "Num D": num_D
79 |             }, f, indent=4)
80 | 


--------------------------------------------------------------------------------
/evaluate/uniimage/stv_address/stv_address_stats.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | from tqdm import tqdm
 7 | import json
 8 | import csv
 9 | 
10 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH
11 | from serving.llm_api import extract_choice
12 | 
13 | 
14 | if __name__ == '__main__':
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
17 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
18 |     parser.add_argument('--task_name', type=str, default='stv_address_mc', help='task name')
19 |     args = parser.parse_args()    
20 | 
21 |     city = args.city_name
22 |     model_name = args.model_name
23 |     task_name = args.task_name
24 | 
25 |     for zl in ["zl17"]:
26 |         path = os.path.join(UNI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_{zl}_response.json")
27 | 
28 | 
29 |     headers_needed_zl17 = not os.path.exists(csv_path_zl17)
30 | 
31 |     with open(csv_path_zl17, mode='a', newline='') as file_zl17:
32 |         writer_zl17 = csv.writer(file_zl17)
33 |         if headers_needed_zl17:
34 |             writer_zl17.writerow(["model_name", "city", "Accuracy"])
35 | 
36 |         for zl in ["zl17"]:
37 |             
38 |             path = os.path.join(RESULTS_PATH, task_name, city, f"{city}_{task_name}_{zl}_{args.model_name}_response.json")
39 | 
40 |             with open(path, "r") as f:
41 |                 data = json.load(f)
42 | 
43 |             correct = 0
44 |             num_A = 0
45 |             num_B = 0
46 |             num_C = 0
47 |             num_D = 0
48 |             
49 |             for d in data:
50 |                 prompt = d["prompt"]
51 |                 choices = d["choices"]
52 |                 reference = d["reference"]
53 |                 response = d["response"]
54 |                 img_name = d["image"]
55 | 
56 |                 model_choice = extract_choice(response, ["A", "B", "C", "D"])
57 | 
58 |                 if model_choice == reference:
59 |                     correct += 1
60 | 
61 |                 if model_choice == "A":
62 |                     num_A += 1
63 |                 elif model_choice == "B":
64 |                     num_B += 1
65 |                 elif model_choice == "C":
66 |                     num_C += 1
67 |                 elif model_choice == "D":
68 |                     num_D += 1
69 |                 
70 |             accuracy = correct / len(data)
71 |             writer_zl17.writerow([model_name, city, accuracy])
72 |             print("For Response file:", path)
73 |             print("Accuracy:", correct / len(data))
74 | 


--------------------------------------------------------------------------------
/evaluate/cross_view/SAT_count_buildings/SAT_count_buildings_stats.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | from tqdm import tqdm
 7 | import json
 8 | 
 9 | from config import MULTI_IMAGE_FOLDER
10 | from serving.llm_api import extract_choice
11 | 
12 | 
13 | if __name__ == '__main__':
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
16 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
17 |     parser.add_argument('--task_name', type=str, default='SAT_count_buildings', help='task name')
18 |     args = parser.parse_args()    
19 | 
20 |     city = args.city_name
21 |     model_name = args.model_name
22 |     task_name = args.task_name
23 | 
24 |     for zl in ["zl15", "zl17"]:
25 |         path = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_{zl}_response.json")
26 | 
27 |         with open(path, "r") as f:
28 |             data = json.load(f)
29 | 
30 |         correct = 0
31 |         num_A = 0
32 |         num_B = 0
33 |         num_C = 0
34 |         num_D = 0
35 |         
36 |         for d in data:
37 |             prompt = d["prompt"]
38 |             reference = d["reference"]
39 |             response = d["response"]
40 |             img_name = d["image"]
41 | 
42 |             model_choice = extract_choice(response, ["A", "B", "C", "D"])
43 | 
44 |             if model_choice == reference:
45 |                 correct += 1
46 | 
47 |             if model_choice == "A":
48 |                 num_A += 1
49 |             elif model_choice == "B":
50 |                 num_B += 1
51 |             elif model_choice == "C":
52 |                 num_C += 1
53 |             elif model_choice == "D":
54 |                 num_D += 1
55 | 
56 | 
57 |         print("For Response file:", path)
58 |         print("Accuracy:", correct / len(data))
59 |         print("Num A:", num_A)
60 |         print("Num B:", num_B)
61 |         print("Num C:", num_C)
62 |         print("Num D:", num_D)
63 |         print()
64 | 
65 |         # save the stats
66 |         stats_folder = os.path.join(MULTI_IMAGE_FOLDER, task_name, city, "stats")
67 | 
68 |         os.makedirs(stats_folder, exist_ok=True)
69 | 
70 |         with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}_{zl}.json"), "w") as f:
71 |             json.dump({
72 |                 "Length of Data": len(data),
73 |                 "Accuracy": correct / len(data),
74 |                 "Num A": num_A,
75 |                 "Num B": num_B,
76 |                 "Num C": num_C,
77 |                 "Num D": num_D
78 |             }, f, indent=4)


--------------------------------------------------------------------------------
/evaluate/uniimage/stv_landmark/stv_landmark_stats.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | from tqdm import tqdm
 7 | import json
 8 | import csv
 9 | 
10 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH
11 | from serving.llm_api import extract_choice
12 | 
13 | 
14 | if __name__ == '__main__':
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
17 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
18 |     parser.add_argument('--task_name', type=str, default='stv_landmark_mc', help='task name')
19 |     args = parser.parse_args()    
20 | 
21 |     city = args.city_name
22 |     model_name = args.model_name
23 |     task_name = args.task_name
24 | 
25 |     for zl in ["zl15", "zl17"]:
26 |         path = os.path.join(UNI_IMAGE_FOLDER, task_name, city, model_name, f"{city}_{task_name}_{zl}_response.json")
27 | 
28 |         with open(path, "r") as f:
29 |             data = json.load(f)
30 | 
31 |         correct = 0
32 |         num_A = 0
33 |         num_B = 0
34 |         num_C = 0
35 |         num_D = 0
36 |         
37 |         for d in data:
38 |             prompt = d["prompt"]
39 |             choices = d["choices"]
40 |             reference = d["reference"]
41 |             response = d["response"]
42 |             img_name = d["image"]
43 | 
44 |             model_choice = extract_choice(response, ["A", "B", "C", "D"])
45 | 
46 |             if model_choice == reference:
47 |                 correct += 1
48 | 
49 |             if model_choice == "A":
50 |                 num_A += 1
51 |             elif model_choice == "B":
52 |                 num_B += 1
53 |             elif model_choice == "C":
54 |                 num_C += 1
55 |             elif model_choice == "D":
56 |                 num_D += 1
57 | 
58 | 
59 |         print("For Response file:", path)
60 |         print("Accuracy:", correct / len(data))
61 |         print("Num A:", num_A)
62 |         print("Num B:", num_B)
63 |         print("Num C:", num_C)
64 |         print("Num D:", num_D)
65 |         print()
66 | 
67 |         # save the stats
68 |         stats_folder = os.path.join(UNI_IMAGE_FOLDER, task_name, city, "stats")
69 | 
70 |         os.makedirs(stats_folder, exist_ok=True)
71 | 
72 |         with open(os.path.join(stats_folder, f"{task_name}_{city}_{model_name}_{zl}.json"), "w") as f:
73 |             json.dump({
74 |                 "Accuracy": correct / len(data),
75 |                 "Num A": num_A,
76 |                 "Num B": num_B,
77 |                 "Num C": num_C,
78 |                 "Num D": num_D
79 |             }, f, indent=4)
80 |     


--------------------------------------------------------------------------------
/evaluate/uniimage/sat_address/sat_address_inference.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | from setproctitle import setproctitle
 7 | 
 8 | from tqdm import tqdm
 9 | import json
10 | 
11 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH
12 | from serving.vlm_serving import VLMWrapper
13 | 
14 | if __name__ == '__main__':
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
17 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
18 |     parser.add_argument('--task_name', type=str, default='sat_address_mc', help='task name')
19 |     parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"])
20 |     args = parser.parse_args() 
21 | 
22 |     model_name = args.model_name
23 |     city_name = args.city_name
24 |     task_name = args.task_name   
25 | 
26 |     print("Load the model")
27 |     model_wrapper = VLMWrapper(args.model_name)
28 |     model = model_wrapper.get_vlm_model()
29 |     
30 |     for zl in ["zl15", "zl17"]:
31 | 
32 |         print("Load the image list")
33 |         # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json")
34 |         path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_{zl}.json")
35 |         output_path = os.path.join(RESULTS_PATH, task_name, city_name, f"{city_name}_{task_name}_{zl}_{args.model_name}_response.json")
36 |         output_dir = os.path.dirname(output_path)
37 |         if not os.path.exists(output_dir):
38 |             os.makedirs(output_dir, exist_ok=True)
39 |         with open(path, "r") as f:
40 |             data = json.load(f)
41 | 
42 |         if args.data_name == "mini":
43 |             data = data[:10]
44 | 
45 |         response = []
46 |         for d in tqdm(data):
47 |             prompt = d["prompt"]
48 |             choices = d["choices"]
49 |             reference = d["reference"]
50 |             img_path = d["image"]
51 |             img_name = img_path.split("/")[-1]
52 | 
53 |             assert os.path.exists(img_path), f"Image {img_path} not found"
54 |             ret = model.generate([img_path, prompt])
55 |             response.append({
56 |                 "image": img_name,
57 |                 "prompt": prompt,
58 |                 "choices": choices,
59 |                 "reference": reference,
60 |                 "response": ret
61 |             })
62 | 
63 |         output_path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, model_name)
64 |         os.makedirs(output_path, exist_ok=True)
65 |         print("Save the response in" + output_path)
66 |         with open(os.path.join(output_path, f"{city_name}_{task_name}_{zl}_response.json"), "w") as f:
67 |             json.dump(response, f, indent=4, ensure_ascii=False)
68 | 
69 |     model_wrapper.clean_proxy()
70 | 


--------------------------------------------------------------------------------
/evaluate/uniimage/stv_address/stv_address_inference.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | from setproctitle import setproctitle
 7 | 
 8 | from tqdm import tqdm
 9 | import json
10 | 
11 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH
12 | from serving.vlm_serving import VLMWrapper
13 | 
14 | if __name__ == '__main__':
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
17 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
18 |     parser.add_argument('--task_name', type=str, default='stv_address_mc', help='task name')
19 |     parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"])
20 | 
21 |     args = parser.parse_args() 
22 | 
23 |     model_name = args.model_name
24 |     city_name = args.city_name
25 |     task_name = args.task_name   
26 | 
27 |     print("Load the model")
28 |     model_wrapper = VLMWrapper(args.model_name)
29 |     model = model_wrapper.get_vlm_model()
30 |     
31 |     for zl in ["zl17"]:
32 | 
33 |         print("Load the image list")
34 |         # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json")
35 |         path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_{zl}.json")
36 |         output_path = os.path.join(RESULTS_PATH, task_name, city_name, f"{city_name}_{task_name}_{zl}_{args.model_name}_response.json")
37 |         output_dir = os.path.dirname(output_path)
38 |         if not os.path.exists(output_dir):
39 |             os.makedirs(output_dir, exist_ok=True)
40 | 
41 |         with open(path, "r") as f:
42 |             data = json.load(f)
43 | 
44 |         if args.data_name == "mini":
45 |             data = data[:10]
46 | 
47 |         response = []
48 |         for d in tqdm(data):
49 |             prompt = d["prompt"]
50 |             choices = d["choices"]
51 |             reference = d["reference"]
52 |             img_path = d["image"]
53 |             img_name = img_path.split("/")[-1]
54 | 
55 |             assert os.path.exists(img_path), f"Image {img_path} not found"
56 |             ret = model.generate([img_path, prompt])
57 |             response.append({
58 |                 "image": img_name,
59 |                 "prompt": prompt,
60 |                 "choices": choices,
61 |                 "reference": reference,
62 |                 "response": ret
63 |             })
64 | 
65 |         output_path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, model_name)
66 |         os.makedirs(output_path, exist_ok=True)
67 |         print("Save the response in" + output_path)
68 |         with open(os.path.join(output_path, f"{city_name}_{task_name}_{zl}_response.json"), "w") as f:
69 |             json.dump(response, f, indent=4, ensure_ascii=False)
70 | 
71 |     model_wrapper.clean_proxy()
72 | 


--------------------------------------------------------------------------------
/evaluate/uniimage/sat_landuse/sat_landuse_inference.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | from setproctitle import setproctitle
 7 | 
 8 | from tqdm import tqdm
 9 | import json
10 | 
11 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH
12 | from serving.vlm_serving import VLMWrapper
13 | 
14 | if __name__ == '__main__':
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
17 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
18 |     parser.add_argument('--task_name', type=str, default='sat_landuse_mc', help='task name')
19 |     parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"])
20 | 
21 |     args = parser.parse_args() 
22 | 
23 |     model_name = args.model_name
24 |     city_name = args.city_name
25 |     task_name = args.task_name   
26 | 
27 |     print("Load the model")
28 |     model_wrapper = VLMWrapper(args.model_name)
29 |     model = model_wrapper.get_vlm_model()
30 |     
31 |     for zl in ["zl15", "zl17"]:
32 | 
33 |         print("Load the image list")
34 |         # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json")
35 |         path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_{zl}.json")
36 | 
37 |         output_path = os.path.join(RESULTS_PATH, task_name, city_name, f"{city_name}_{task_name}_{zl}_{args.model_name}_response.json")
38 |         output_dir = os.path.dirname(output_path)
39 |         if not os.path.exists(output_dir):
40 |             os.makedirs(output_dir, exist_ok=True)
41 | 
42 |         with open(path, "r") as f:
43 |             data = json.load(f)
44 | 
45 |         if args.data_name == "mini":
46 |             data = data[:10]
47 | 
48 |         response = []
49 |         for d in tqdm(data):
50 |             prompt = d["prompt"]
51 |             choices = d["choices"]
52 |             reference = d["reference"]
53 |             img_path = d["image"]
54 |             img_name = img_path.split("/")[-1]
55 | 
56 |             assert os.path.exists(img_path), f"Image {img_path} not found"
57 |             ret = model.generate([img_path, prompt])
58 |             response.append({
59 |                 "image": img_name,
60 |                 "prompt": prompt,
61 |                 "choices": choices,
62 |                 "reference": reference,
63 |                 "response": ret
64 |             })
65 | 
66 |         output_path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, model_name)
67 |         os.makedirs(output_path, exist_ok=True)
68 |         print("Save the response in" + output_path)
69 |         with open(os.path.join(output_path, f"{city_name}_{task_name}_{zl}_response.json"), "w") as f:
70 |             json.dump(response, f, indent=4, ensure_ascii=False)
71 | 
72 |     model_wrapper.clean_proxy()
73 | 


--------------------------------------------------------------------------------
/evaluate/uniimage/stv_landmark/stv_landmark_inference.py:
--------------------------------------------------------------------------------
 1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
 2 | 
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | from setproctitle import setproctitle
 7 | 
 8 | from tqdm import tqdm
 9 | import json
10 | 
11 | from config import UNI_IMAGE_FOLDER, RESULTS_PATH
12 | from serving.vlm_serving import VLMWrapper
13 | 
14 | if __name__ == '__main__':
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')
17 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
18 |     parser.add_argument('--task_name', type=str, default='stv_landmark_mc', help='task name')
19 |     parser.add_argument('--data_name', type=str, default='all', help='data name', choices=["all", "mini"])
20 |     args = parser.parse_args() 
21 | 
22 |     model_name = args.model_name
23 |     city_name = args.city_name
24 |     task_name = args.task_name   
25 | 
26 |     print("Load the model")
27 |     model_wrapper = VLMWrapper(args.model_name)
28 |     model = model_wrapper.get_vlm_model()
29 |     
30 |     for zl in ["zl17"]:
31 | 
32 |         print("Load the image list")
33 |         # path = os.path.join(f"./{task_name}/{city_name}", f"{task_name}_{city_name}_{zl}.json")
34 |         path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, f"{city_name}_{task_name}_{zl}.json")
35 | 
36 |         output_path = os.path.join(RESULTS_PATH, task_name, city_name, f"{city_name}_{task_name}_{zl}_{args.model_name}_response.json")
37 |         output_dir = os.path.dirname(output_path)
38 |         if not os.path.exists(output_dir):
39 |             os.makedirs(output_dir, exist_ok=True)
40 | 
41 |         with open(path, "r") as f:
42 |             data = json.load(f)
43 | 
44 |         if args.data_name == "mini":
45 |             data = data[:10]
46 | 
47 |         response = []
48 |         for d in tqdm(data):
49 |             prompt = d["prompt"]
50 |             choices = d["choices"]
51 |             reference = d["reference"]
52 |             img_path = d["image"]
53 |             img_name = img_path.split("/")[-1]
54 | 
55 |             assert os.path.exists(img_path), f"Image {img_path} not found"
56 |             ret = model.generate([img_path, prompt])
57 |             response.append({
58 |                 "image": img_name,
59 |                 "prompt": prompt,
60 |                 "choices": choices,
61 |                 "reference": reference,
62 |                 "response": ret
63 |             })
64 | 
65 |         output_path = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name, model_name)
66 |         os.makedirs(output_path, exist_ok=True)
67 |         print("Save the response in" + output_path)
68 |         with open(os.path.join(output_path, f"{city_name}_{task_name}_{zl}_response.json"), "w") as f:
69 |             json.dump(response, f, indent=4, ensure_ascii=False)
70 | 
71 |     
72 |     model_wrapper.clean_proxy()
73 | 


--------------------------------------------------------------------------------
/simulate/satelite/process_landuse.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | 
 3 | def parse_input_txt(input_txt):
 4 |     region_data = []
 5 |     with open(input_txt, 'r', encoding='utf-8') as f:
 6 |         for line in f:
 7 |             if "is at location:" in line:
 8 |                 region_type = line.split("is at location:")[0].strip()
 9 |                 try:
10 |                     coordinates = ast.literal_eval(line.split("is at location:")[1].strip())
11 |                     region_data.append((region_type, coordinates))
12 |                 except (SyntaxError, ValueError):
13 |                     print(f"Error parsing coordinates in line: {line}")
14 |     return region_data
15 | 
16 | def is_valid_polygon(coordinates):
17 |     if len(coordinates) < 4:  
18 |         return False
19 |     if coordinates[0] == coordinates[-1]:  
20 |         if len(coordinates) == 3:  
21 |             return False
22 |     return True
23 | 
24 | def filter_invalid_regions(region_data):
25 |     valid_regions = []
26 |     for region_type, coordinates in region_data:
27 |         for polygon in coordinates:
28 |             if is_valid_polygon(polygon):
29 |                 valid_regions.append((region_type, polygon))
30 |     return valid_regions
31 | 
32 | def write_output_txt(output_txt, region_data):
33 |     with open(output_txt, 'w', encoding='utf-8') as f:
34 |         for region_type, coordinates in region_data:
35 |             f.write(f"{region_type} is at location: {coordinates}\n")
36 | 
37 | 
38 | import pandas as pd
39 | import os
40 | import argparse
41 | 
42 | if __name__ == "__main__":
43 |     parser = argparse.ArgumentParser()
44 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
45 |     parser.add_argument('--work_dir', type=str, default='../../data/')
46 |     args = parser.parse_args()
47 |     city = args.city
48 |     work_dir = args.work_dir
49 |     working_dir = work_dir + f"dev-{city}/"    
50 |     for zl in ['zl17','zl15']:
51 |         df = pd.read_csv( working_dir + f'SAT_{city}_'+zl+'.csv')
52 |         for cnt in range(len(df)):
53 |             img_name = df.at[cnt,'img_name'].split('.')[0]
54 |             output_dir = working_dir +"short_clipped_results_"+zl
55 |             os.makedirs(output_dir, exist_ok=True)
56 |             # input_txt = "clipped_results_wudaokou_pixel_non_null/landuse_"+img_name+".txt"
57 |             input_txt = working_dir + "clipped_results_pixel_non_null_"+zl+"/landuse_"+img_name+".txt"
58 | 
59 | 
60 |             if not os.path.exists(input_txt):
61 |                 continue
62 |             # output_txt = "short_clipped_results_wudaokou/landuse_"+img_name+".txt"
63 |             output_txt = working_dir + "short_clipped_results_"+zl+"/landuse_"+img_name+".txt"
64 | 
65 |             region_data = parse_input_txt(input_txt)
66 | 
67 |             try:
68 |                 valid_region_data = filter_invalid_regions(region_data)
69 |             except:
70 |                 continue
71 | 
72 |             write_output_txt(output_txt, valid_region_data)
73 | 
74 |         # print(f"Data has been processed and saved to {output_txt}")
75 | 


--------------------------------------------------------------------------------
/examples/run_eval_uniimage_inference_stats.sh:
--------------------------------------------------------------------------------
 1 | # source /usr/local/miniconda3/bin/activate vila-vlmeval
 2 | export CUDA_VISIBLE_DEVICES=4
 3 | export DeepInfra_API_KEY=""
 4 | export SiliconFlow_API_KEY=""                   
 5 | export OpenAI_API_KEY=""
 6 | export OPENAI_API_KEY="$OpenAI_API_KEY"
 7 | export OPENAI_API_BASE="https://api.openai.com/v1/chat/completions"
 8 | export DASHSCOPE_API_KEY=""
 9 | CITIES=('Beijing' 'London' 'NewYork')
10 | MODELS=("Llama-3-VILA1.5-8b" "GPT4o_MINI")
11 | DATA_VERSION='all'
12 | 
13 | echo "Start running evaluation on street view address task"
14 | for MODEL in "${MODELS[@]}"; do
15 |     echo "Current model: $MODEL"
16 |     for CITY in "${CITIES[@]}"; do
17 |         echo "Current city: $CITY"
18 |         # python -m evaluate.uniimage.stv_address.stv_address_convert --city_name $CITY --task_name stv_address_mc
19 |         python -m evaluate.uniimage.stv_address.stv_address_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION --task_name stv_address_mc
20 |         python -m evaluate.uniimage.stv_address.stv_address_stats --city_name $CITY --model_name $MODEL --task_name stv_address_mc
21 |     done
22 | done
23 | 
24 | echo "Start running evaluation on street view landmark task"
25 | for MODEL in "${MODELS[@]}"; do
26 |     echo "Current model: $MODEL"
27 |     for CITY in "${CITIES[@]}"; do
28 |         echo "Current city: $CITY"
29 |         # python -m evaluate.uniimage.stv_landmark.stv_landmark_convert --city_name $CITY --task_name stv_landmark_mc
30 |         python -m evaluate.uniimage.stv_landmark.stv_landmark_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION --task_name stv_landmark_mc
31 |         python -m evaluate.uniimage.stv_landmark.stv_landmark_stats --city_name $CITY --model_name $MODEL --task_name stv_landmark_mc
32 |     done
33 | done
34 | 
35 | echo "Start running evaluation on satellite address task"
36 | for MODEL in "${MODELS[@]}"; do
37 |     echo "Current model: $MODEL"
38 |     for CITY in "${CITIES[@]}"; do
39 |         echo "Current city: $CITY"
40 |         # python -m evaluate.uniimage.sat_address.sat_address_convert --city_name $CITY --task_name sat_address_mc
41 |         python -m evaluate.uniimage.sat_address.sat_address_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION --task_name sat_address_mc
42 |         python -m evaluate.uniimage.sat_address.sat_address_stats --city_name $CITY --model_name $MODEL --task_name sat_address_mc
43 |     done
44 | done
45 | 
46 | echo "Start running evaluation on satellite landuse task"
47 | for MODEL in "${MODELS[@]}"; do
48 |     echo "Current model: $MODEL"
49 |     for CITY in "${CITIES[@]}"; do
50 |         echo "Current city: $CITY"
51 |         # python -m evaluate.uniimage.sat_landuse.sat_landuse_convert --city_name $CITY --task_name sat_landuse_mc
52 |         python -m evaluate.uniimage.sat_landuse.sat_landuse_inference --city_name $CITY --model_name $MODEL --data_name $DATA_VERSION --task_name sat_landuse_mc
53 |         python -m evaluate.uniimage.sat_landuse.sat_landuse_stats --city_name $CITY --model_name $MODEL --task_name sat_landuse_mc
54 |     done
55 | done


--------------------------------------------------------------------------------
/evaluate/evaluate.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tqdm
 3 | import argparse
 4 | import pandas as pd
 5 | from multiprocessing import Pool
 6 | 
 7 | from config import CITY_BOUNDARY, VLM_MODELS, LLM_MODELS, TASK_DEST_MAPPING
 8 | 
 9 | 
10 | class Evaluator:
11 |     def __init__(self, city_name, model_name, data_name, task_name, workers=1) -> None:
12 |         self.city_list = list(CITY_BOUNDARY.keys())
13 |         self.model_list = {"vlm": VLM_MODELS, "llm": LLM_MODELS}
14 |         self.task_list = list(TASK_DEST_MAPPING.keys())
15 |         self.workers = workers
16 |         
17 |         self.city_name_list = city_name.split(",")
18 |         self.model_name_list = model_name.split(",")
19 |         self.task_name_list = task_name.split(",")
20 |         self.data_name = data_name
21 | 
22 |     def evaluate(self):
23 |         # TODO: run single task or run task sets
24 |         self.multiple_task_wrapper(self.task_name_list, self.model_name_list, self.city_name_list)
25 | 
26 |     def valid_inputs(self):
27 |         # TODO: check if the inputs are valid
28 |         pass
29 | 
30 |     @staticmethod
31 |     def single_task_wrapper(task_name, model_name, city_name, data_name):
32 |         # run single task 
33 |         task_desc = TASK_DEST_MAPPING[task_name]
34 |         if task_name in ["population", "objects"]:
35 |             eval_scipt = "python -m {} --city_name={} --data_name={} --model_name={} --task_name={}".format(task_desc, city_name, data_name, model_name, task_name)
36 |         else:
37 |             eval_scipt = "python -m {} --city_name={} --data_name={} --model_name={}".format(task_desc, city_name, data_name, model_name)
38 | 
39 |         return os.system(eval_scipt)
40 | 
41 |     # TODO: run multiple tasks
42 |     def multiple_task_wrapper(self, task_list, model_list, city_list):
43 |         # TODO running multi tasks efficiently
44 |         para_group = []
45 |         for task in task_list:
46 |             for model in model_list:
47 |                 for city in city_list:
48 |                     para_group.append([task, model, city, self.data_name])
49 |         
50 |         if self.workers==1:
51 |             for para in para_group:
52 |                 self.single_task_wrapper(*para)
53 |         else:
54 |             with Pool(args.workers) as pool:
55 |                 pool.starmap(self.single_task_wrapper, para_group)
56 | 
57 | 
58 |     def analyze_results(self):
59 |         # TODO: analyze the results
60 |         pass
61 | 
62 | 
63 | if __name__ == '__main__':
64 |     parser = argparse.ArgumentParser()
65 |     parser.add_argument('--city_name', type=str, default="Beijing")
66 |     parser.add_argument('--task_name', type=str, default='traffic_signal')
67 |     parser.add_argument('--data_name', type=str, default='mini')
68 |     parser.add_argument('--model_name', type=str, default="GPT4o")
69 |     args = parser.parse_args()
70 | 
71 |     # Evaluator Initialization
72 |     Eval = Evaluator(
73 |         city_name=args.city_name,
74 |         model_name=args.model_name,
75 |         data_name=args.data_name,
76 |         task_name=args.task_name)
77 |     # Running Evalautor 
78 |     Eval.evaluate()
79 | 


--------------------------------------------------------------------------------
/simulate/satelite/process_driving.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from tqdm import trange
 3 | import argparse
 4 | 
 5 | def contains_chinese(text):
 6 |     return bool(re.search(r'[\u4e00-\u9fff]', text))
 7 | 
 8 | def merge_segments(road_segments):
 9 |     merged_segments = []
10 |     for segment in road_segments:
11 |         if not merged_segments:
12 |             merged_segments.append(segment)
13 |         else:
14 |             if merged_segments[-1][-1] == segment[0]:
15 |                 merged_segments[-1].extend(segment[1:])
16 |             else:
17 |                 merged_segments.append(segment)
18 | 
19 |     simplified_segments = []
20 |     for seg in merged_segments:
21 |         if len(seg) > 1:  
22 |             simplified_segments.append([seg[0], seg[-1]])
23 |     
24 |     return simplified_segments
25 | 
26 | def process_road_data(road_data):
27 |     compressed_data = {}
28 |     
29 |     for road_name, segments in road_data.items():
30 |         merged_segments = merge_segments(segments)
31 |         
32 |         if merged_segments:
33 |             compressed_data[road_name] = merged_segments
34 | 
35 |     return compressed_data
36 | 
37 | def parse_input_txt(input_txt):
38 |     road_data = {}
39 |     with open(input_txt, 'r', encoding='utf-8') as f:
40 |         for line in f:
41 |             if "is at location:" in line:
42 |                 road_name = line.split("is at location:")[0].strip()
43 |                 coordinates = eval(line.split("is at location:")[1].strip())
44 |                 road_data[road_name] = coordinates
45 |     return road_data
46 | 
47 | def write_output_txt(output_txt, compressed_data):
48 |     with open(output_txt, 'w', encoding='utf-8') as f:
49 |         for road_name, segments in compressed_data.items():
50 |             for seg in segments:
51 |                 f.write(f"{road_name} from {seg[0]} to {seg[1]}\n")
52 | 
53 | import pandas as pd
54 | import os
55 | 
56 | if __name__ == "__main__":
57 |     parser = argparse.ArgumentParser()
58 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
59 |     parser.add_argument('--work_dir', type=str, default='../../data/')
60 |     args = parser.parse_args()
61 |     city = args.city
62 |     work_dir = args.work_dir
63 |     working_dir = work_dir + f"dev-{city}/"
64 |     for zl in ['zl17','zl15']:
65 |         # df = pd.read_csv('../SAT_BJ_wudaokou_zl15.csv')
66 |         df = pd.read_csv(working_dir + f'SAT_{city}_'+zl+'.csv')
67 |         for cnt in trange(len(df)):
68 |             img_name = df.at[cnt,'img_name'].split('.')[0]
69 | 
70 |             output_dir = working_dir + "short_clipped_results_"+zl
71 |             os.makedirs(output_dir, exist_ok=True)
72 |             input_txt = working_dir + "clipped_results_pixel_non_null_"+zl+"/driving_"+img_name+".txt"
73 |             if not os.path.exists(input_txt):
74 |                 continue
75 | 
76 |             output_txt = working_dir + "short_clipped_results_"+zl+"/driving_"+img_name+".txt"
77 | 
78 |             road_data = parse_input_txt(input_txt)
79 | 
80 |             compressed_data = process_road_data(road_data)
81 | 
82 |             write_output_txt(output_txt, compressed_data)
83 | 
84 |         # print(f"Data has been processed and saved to {output_txt}")
85 | 


--------------------------------------------------------------------------------
/simulate/SAT_pipeline.bash:
--------------------------------------------------------------------------------
 1 | cities=("Beijing" "NewYork" "London")
 2 | work_dir="../../data/"
 3 | 
 4 | # Remove the work_dir if it exists
 5 | # rm -rf $work_dir
 6 | 
 7 | # Depend on /ThreeCityImage/{city}/Sat_{zl}/
 8 | for city in "${cities[@]}"; do
 9 |     echo "Filtering images for $city"
10 |     python ./satelite/make_image_list_sat.py --city $city --work_dir $work_dir
11 | done
12 | wait
13 | echo "Finish filtering images"
14 | # Get /sample_sat_image_{zl}/
15 | # Get SAT_{city}_{zl}.csv
16 | 
17 | # Depend on SAT_{city}_{zl}.csv
18 | for city in "${cities[@]}"; do
19 |     echo "Creating shp file for $city"
20 |     python ./satelite/make_sat_shp.py --city $city --work_dir $work_dir
21 | done
22 | wait
23 | echo "Finish creating shp file"
24 | # Get SAT_{city}_{zl}.shp, SAT_{city}_{zl}.dbf, SAT_{city}_{zl}.shx, SAT_{city}_{zl}.prj
25 | 
26 | # Depend on SAT_{city}_{zl}.shp, /ThreeCityImage/city_geojson_three_cities/{city}_{typ}.geojson
27 | for city in "${cities[@]}"; do
28 |     echo "Clip shp point data for $city"
29 |     python ./satelite/clip_shp_point.py --city $city --work_dir $work_dir
30 | done
31 | wait
32 | echo "Finish clipping shp point data"
33 | # Get clipped_results_{zl}/clipped_{typ}_{polygon['region_nam'].split('.')[0]}.geojson
34 | 
35 | # Depend on SAT_{city}_{zl}.csv
36 | # Depend on clipped_results_{zl}/clipped_{typ}_{img_name}.geojson
37 | for city in "${cities[@]}"; do
38 |     echo "Change OSM's lat and lon to SAT's pixel for $city"
39 |     python ./satelite/coord_to_pixel.py --city $city --work_dir $work_dir
40 | done
41 | wait
42 | echo "Finish changing OSM's lat and lon to SAT's pixel"
43 | # Get clipped_results_{zl}_updated/clipped_{typ}_{img_name}_updated.geojson
44 | # Get clipped_results_pixel_{zl}/clipped_{typ}_{img_name}_pixel.geojson
45 | 
46 | # Depend on clipped_results_pixel_{zl}/clipped_{typ}_{img_name}_pixel.geojson
47 | for city in "${cities[@]}"; do
48 |     echo "Filter out none valid data for $city"
49 |     python ./satelite/extract_non_null_values.py --city $city --work_dir $work_dir
50 | done
51 | wait
52 | echo "Finish filtering out none valid data"
53 | # Get clipped_results_pixel_non_null_{zl}/{typ}_{img_name}.txt
54 | 
55 | # Depend on SAT_{city}_{zl}.csv
56 | # Depend on clipped_results_pixel_non_null_{zl}/driving_{img_name}.txt
57 | for city in "${cities[@]}"; do
58 |     echo "Process driving data for $city"
59 |     python ./satelite/process_driving.py --city $city --work_dir $work_dir
60 | done
61 | wait
62 | echo "Finish processing driving data"
63 | # Get "short_clipped_results_{zl}/driving_{img_name}.txt"
64 | 
65 | # Depend on SAT_{city}_{zl}.csv
66 | # Depend on clipped_results_pixel_non_null_{zl}/landuse_{img_name}.txt
67 | for city in "${cities[@]}"; do
68 |     echo "Process landuse data for $city"
69 |     python ./satelite/process_landuse.py --city $city --work_dir $work_dir
70 | done
71 | wait
72 | echo "Finish processing landuse data"
73 | # Get "short_clipped_results_{zl}/landuse_{img_name}.txt"
74 | 
75 | # Depend on SAT_{city}_{zl}.csv
76 | # Depend on clipped_results_pixel_non_null_{zl}/poi_{img_name}.txt
77 | for city in "${cities[@]}"; do
78 |     echo "Process POI data for $city"
79 |     python ./satelite/process_poi.py --city $city --work_dir $work_dir
80 | done
81 | wait
82 | echo "Finish processing POI data"
83 | # Get "short_clipped_results_{zl}/poi_{img_name}.txt"


--------------------------------------------------------------------------------
/evaluate/cross_view/eval_analysis.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import jsonlines
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | # from setproctitle import setproctitle
 7 | from tqdm import tqdm
 8 | import json
 9 | 
10 | from config import CROSS_VIEW_PATH, CROSS_VIEW_RESULTS_PATH
11 | 
12 | 
13 | def calculate_acc(city_name_list, model_name_list,task_name,save_name):
14 |     all_acc_list = []
15 |     all_city_list = []
16 |     all_model_name_list = []
17 |     for model_name in model_name_list:
18 |         for city_name in city_name_list:
19 |             city_pred_list = []
20 |             city_GT_list = []
21 |             print(model_name, city_name)
22 |             try:
23 |                 json_file_path = os.path.join(CROSS_VIEW_RESULTS_PATH, city_name+'_'+model_name+'_'+task_name+'_eval.jsonl')
24 |                 with jsonlines.open(json_file_path) as reader:
25 |                     for obj in reader:
26 |                         city_pred_list.append(obj['text'])
27 |                         city_GT_list.append(obj['GT'])
28 |             except FileNotFoundError as e:
29 |                     print("File not found! City:{} Model:{}".format(city_name, model_name))
30 |                     continue
31 | 
32 |             if len(city_pred_list) != len(city_GT_list):
33 |                 raise ValueError("different length")
34 | 
35 |             city_pred_list_lower = [item.lower() for item in city_pred_list]
36 |             city_GT_list_lower = [item.lower() for item in city_GT_list]
37 | 
38 |             count = sum(item1 == item2 for item1, item2 in zip(city_pred_list_lower, city_GT_list_lower))
39 |             total = len(city_GT_list_lower)
40 | 
41 |             acc = count / total * 100
42 | 
43 |             all_acc_list.append(acc)
44 |             all_city_list.append(city_name)
45 |             all_model_name_list.append(model_name)
46 | 
47 |     df = pd.DataFrame({'city': all_city_list, 'model': all_model_name_list, 'acc': all_acc_list})
48 |     df.to_csv(os.path.join(CROSS_VIEW_RESULTS_PATH, save_name), index=False)
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     parser = argparse.ArgumentParser()
53 |     parser.add_argument('--model_name', type=str, default='InternVL2-40B', help='model name')#InternVL2-40B  GPT4o_MINI  Qwen2-VL-2B-Instruct
54 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name') #Beijing, London, NewYork
55 |     parser.add_argument('--eval_all_city_all_model', type=str, default="no", help='If yes, automatically evaluate data from all cities on all models.')
56 |     parser.add_argument('--task_name', type=str, default='IR', help='task name', choices=["IR", "CL","SC_Buildings","SC_POIs"])
57 |     #task_name include: Image Retrieval, Camera Localization, Scene Comparision Buildings, Scene Comparison POIs(restaurant, education, shopping)
58 | 
59 |     args = parser.parse_args()
60 | 
61 |     if args.eval_all_city_all_model == 'yes':
62 |         args.city_name="Beijing,London,NewYork"
63 |         args.model_name="InternVL2-40B,GPT4o_MINI"
64 | 
65 |         city_name_list = args.city_name.split(",")
66 |         model_name_list = args.model_name.split(",")
67 |     
68 |     else:
69 |         city_name_list = [args.city_name]
70 |         model_name_list = [args.model_name]
71 |     
72 |     save_name = 'summary_all_models_all_cities_{}.csv'.format(args.task_name)
73 | 
74 |     calculate_acc(city_name_list,model_name_list,args.task_name,save_name)
75 | 


--------------------------------------------------------------------------------
/simulate/advance/CoT/stv-landmark-cot/gpt_polish.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | import httpx
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | import json
 7 | import base64
 8 | import tqdm
 9 | from tqdm import tqdm
10 | from concurrent.futures import ThreadPoolExecutor
11 | 
12 | # API Key and Proxy settings
13 | PROXY = "http://127.0.0.1:10190"
14 | API_KEY_MAPPING = { 
15 |     "siliconflow": "SiliconFlow_API_KEY",
16 |     "DeepInfra": "DeepInfra_API_KEY",
17 |     "OpenAI": "OpenAI_API_KEY"
18 | }
19 | API_URL_MAPPING = {
20 |     "siliconflow": "https://api.siliconflow.cn/v1",
21 |     "DeepInfra": "https://api.deepinfra.com/v1/openai",
22 |     "OpenAI": "https://api.openai.com/v1"
23 | }
24 | API_TYPE = "OpenAI"
25 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]]
26 | API_URL = API_URL_MAPPING[API_TYPE]
27 | 
28 | def encode_image(image_path):
29 |     with open(image_path, "rb") as image_file:
30 |         return base64.b64encode(image_file.read()).decode("utf-8")
31 | 
32 | def polish_text(client, model_name, og_text):
33 |     prompt = f'''
34 |     Please polish the following paragraph to make it more fluent and natural.
35 |     You can make any necessary changes to the text, like removing the square brackets, adding punctuation, or rephrasing the text.
36 |     Don't change the meaning of the text.
37 |     Only output the polished text, without any additional information or appending text.
38 |     Here is the original text:
39 |     {og_text}
40 |     '''
41 |     
42 |     dialogs = [{
43 |         "role": "user",
44 |         "content": [{"type": "text", "text": prompt}]
45 |     }]
46 | 
47 |     try:
48 |       completion = client.chat.completions.create(
49 |           model=model_name,
50 |           messages=dialogs,
51 |           max_tokens=1024,
52 |           temperature=0
53 |       )
54 |       return completion.choices[0].message.content
55 |     except Exception as e:
56 |       print(e)
57 |       return ""
58 | 
59 | if __name__ == "__main__":
60 |     parser = argparse.ArgumentParser()
61 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
62 |     parser.add_argument('--work_dir', type=str, default='../../data/')
63 |     args = parser.parse_args()
64 |     
65 |     city = args.city
66 |     work_dir = args.work_dir
67 | 
68 |     model_name = "gpt-4o-mini-2024-07-18"
69 |     client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY))
70 | 
71 |     for zl in ["zl15", "zl17"]:
72 |         og_path = f"stv_landmark_{city}_{zl}.json"
73 |         with open(og_path, 'r') as f:
74 |             og_data = json.load(f)
75 | 
76 |         output = []
77 | 
78 |         with ThreadPoolExecutor(max_workers=32) as executor:
79 |             futures = {executor.submit(polish_text, client, model_name, item['CoT']): item for item in og_data}
80 | 
81 |             for future in tqdm(futures):
82 |                 item = futures[future]
83 |                 polished_CoT = future.result()
84 |                 output.append({
85 |                     "image_name": item['image_name'],
86 |                     "near_pois": item['near_pois'],
87 |                     "landmark": item['landmark'],
88 |                     "CoT": item['CoT'],
89 |                     "polished_CoT": polished_CoT
90 |                 })
91 | 
92 |         with open(f"polished_stv_landmark_{city}_{zl}.json", 'w') as f:
93 |             json.dump(output, f, indent=4, ensure_ascii=False)
94 | 


--------------------------------------------------------------------------------
/simulate/streetview/stv_nearest_pois.py:
--------------------------------------------------------------------------------
 1 | import geopandas as gpd
 2 | import pandas as pd
 3 | from shapely.geometry import Point
 4 | from math import radians, sin, cos, asin, sqrt
 5 | import argparse
 6 | 
 7 | 
 8 | 
 9 | import pandas as pd
10 | import geopandas as gpd
11 | from shapely.geometry import Point
12 | from scipy.spatial import KDTree
13 | 
14 | if __name__ == '__main__':
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
17 |     parser.add_argument('--work_dir', type=str, default='../../data/')
18 |     args = parser.parse_args()
19 |     city = args.city
20 |     work_dir = args.work_dir        
21 |     for zl in ['zl15', 'zl17']:
22 |         csv_file = work_dir + f"dev-{city}/stv_in_sat_{city}_{zl}.csv"
23 | 
24 |         output_dir = work_dir + f"dev-{city}/"
25 | 
26 |         csv_data = pd.read_csv(csv_file)
27 | 
28 |         csv_data['geometry'] = csv_data.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
29 |         csv_gdf = gpd.GeoDataFrame(csv_data, geometry='geometry', crs="EPSG:4326")
30 | 
31 |         # TODO: Change the following paths to the actual paths
32 |         if city == "Beijing":
33 |             geojson_file = "....../MLLM-wudaokou_new/make_dataset/beijing_pois_five_ring.geojson"
34 |         elif city == "London":
35 |             geojson_file = "....../ThreeCityImage/city_geojson/London_geojson/London_pois_five_ring.geojson"
36 |         elif city == "NewYork":
37 |             geojson_file = "....../ThreeCityImage/city_geojson/NewYork_geojson/NewYork_pois_five_ring.geojson"
38 | 
39 |         assert geojson_file is not None, "Please specify the path to the GeoJSON file."
40 | 
41 |         geojson_gdf = gpd.read_file(geojson_file)
42 | 
43 |         points_gdf = geojson_gdf[geojson_gdf.geometry.type == 'Point'].copy()
44 |         polygons_gdf = geojson_gdf[geojson_gdf.geometry.type == 'Polygon'].copy()
45 | 
46 |         polygons_gdf['geometry'] = polygons_gdf['geometry'].centroid
47 | 
48 |         combined_gdf = pd.concat([points_gdf, polygons_gdf])
49 | 
50 |         geojson_coords = [(geom.x, geom.y) for geom in combined_gdf.geometry]
51 | 
52 |         tree = KDTree(geojson_coords)
53 | 
54 |         results = []
55 | 
56 |         for idx, row in csv_gdf.iterrows():
57 |             point_coords = (row.geometry.x, row.geometry.y)
58 |             
59 |             distances, indices = tree.query(point_coords, k=20)
60 |             
61 |             for distance, index in zip(distances, indices):
62 |                 nearest_feature = combined_gdf.iloc[index]
63 |                 feature_name = nearest_feature.get('name', 'Unknown')  
64 |                 
65 |                 results.append({
66 |                     'image_name': row.image_name,
67 |                     'csv_latitude': row.geometry.y,
68 |                     'csv_longitude': row.geometry.x,
69 |                     'nearest_feature_name': feature_name,
70 |                     'nearest_feature_type': nearest_feature.geometry.type,
71 |                     'distance': distance
72 |                 })
73 | 
74 |         output_df = pd.DataFrame(results)
75 |         # output_df.to_csv(output_dir + "stv_in_sat_zl17_wudaokou_nearest_features.csv", index=False)
76 |         output_df.to_csv(output_dir + f"stv_in_sat_nearest_features_{city}_{zl}.csv", index=False)
77 | 
78 |         print("Results saved to:", output_dir + f"stv_in_sat_nearest_features_{city}_{zl}.csv")


--------------------------------------------------------------------------------
/simulate/streetview/spatial_join.py:
--------------------------------------------------------------------------------
 1 | import geopandas as gpd
 2 | import pandas as pd
 3 | import argparse
 4 | import os
 5 | from tqdm import trange
 6 | 
 7 | 
 8 | # Original Implementation assumes that the shapefile and the CSV file are in the same CRS.
 9 | # This implementation checks the CRS of both files and reprojects them if necessary.
10 | import geopandas as gpd
11 | import pandas as pd
12 | import json
13 | 
14 | def process_spatial_join(shp_file, stv_index_path, output_file):
15 |     gdf_polygon = gpd.read_file(shp_file)
16 |     
17 |     if gdf_polygon.crs is None:
18 |         gdf_polygon.set_crs(epsg=4326, inplace=True)  # Assuming WGS84 as default    
19 | 
20 | 
21 |     df_points = pd.read_csv(stv_index_path)
22 |     geometry = gpd.points_from_xy(df_points['longitude'], df_points['latitude'])
23 |     gdf_points = gpd.GeoDataFrame(df_points, geometry=geometry)
24 |     
25 |     # Set CRS for points if not already set (assuming WGS84)    
26 |     if gdf_points.crs is None:
27 |         gdf_points.set_crs(epsg=4326, inplace=True)
28 | 
29 | 
30 |     if gdf_polygon.crs != gdf_points.crs:
31 |         gdf_points = gdf_points.to_crs(gdf_polygon.crs)
32 | 
33 |     result = gpd.sjoin(gdf_polygon, gdf_points, how='inner')
34 | 
35 |     print(result.head())
36 | 
37 |     # result[['region_nam', 'sid', 'sid_84_long', 'sid_84_lat']].to_csv(output_file, index=False)
38 |     result[['image_name', 'longitude', 'latitude']].to_csv(output_file, index=False)
39 |     print(f"Saved result to {output_file}")
40 |     print(f"Total records: {len(result)}")
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     parser = argparse.ArgumentParser()
45 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
46 |     parser.add_argument('--work_dir', type=str, default='../../data/')
47 |     args = parser.parse_args()
48 |     city = args.city
49 |     work_dir = args.work_dir
50 |     
51 |     # TODO: Change the path to the actual streetview index file
52 |     stv_index_path = f"....../Tricity/index_{city}.csv"
53 | 
54 |     for zl in ['zl15', 'zl17']:
55 |         process_spatial_join(f'{work_dir}dev-{city}/SAT_{city}_{zl}.shp', stv_index_path, f'{work_dir}dev-{city}/stv_in_sat_{city}_{zl}.csv')
56 |         print(f"Processed {zl} for {city}")
57 | 
58 |         # sanity check and copy the stv images
59 |         print(f"Copying streetview images for {city} {zl}")
60 |         # TODO: Change the path to the actual streetview image directory
61 |         stv_img_all_dir = f'....../ThreeCityImage/{city}/StreetView/'
62 | 
63 |         df = pd.read_csv(f'{work_dir}dev-{city}/stv_in_sat_{city}_{zl}.csv')
64 |         print(f"Total records: {len(df)}")
65 |         target_dir = f'{work_dir}dev-{city}/sampled_stv_images/'
66 |         os.makedirs(target_dir, exist_ok=True)
67 | 
68 |         for i in trange(len(df)):
69 |             row = df.iloc[i]
70 |             img_name = row["image_name"]
71 |             # Bug here, OS and python treat paths differently
72 |             og_path = f'\"{os.path.join(stv_img_all_dir, img_name)}\"'
73 |             og_path_py = os.path.join(stv_img_all_dir, img_name)
74 |             assert os.path.exists(og_path_py), f"Image {og_path_py} not found"
75 |             new_image_path = f'\"{os.path.join(target_dir, img_name)}\"'
76 |             os.system(f"cp {og_path} {new_image_path}")
77 | 
78 |         # assert target_dir is not empty
79 |         assert len(os.listdir(target_dir)) > 0, f"No images copied to {target_dir}"   
80 |         print(f"Streetview images copied for {city} {zl}")
81 |         print(f"Total images: {len(os.listdir(target_dir))} in {target_dir}")         
82 |             


--------------------------------------------------------------------------------
/simulate/advance/CoT/stv_address_cot/gpt_polish.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | import httpx
 3 | import os
 4 | import argparse
 5 | import pandas as pd
 6 | import json
 7 | import base64
 8 | import tqdm
 9 | from tqdm import tqdm
10 | from concurrent.futures import ThreadPoolExecutor
11 | 
12 | # API Key and Proxy settings
13 | PROXY = "http://127.0.0.1:10190"
14 | API_KEY_MAPPING = { 
15 |     "siliconflow": "SiliconFlow_API_KEY",
16 |     "DeepInfra": "DeepInfra_API_KEY",
17 |     "OpenAI": "OpenAI_API_KEY"
18 | }
19 | API_URL_MAPPING = {
20 |     "siliconflow": "https://api.siliconflow.cn/v1",
21 |     "DeepInfra": "https://api.deepinfra.com/v1/openai",
22 |     "OpenAI": "https://api.openai.com/v1"
23 | }
24 | API_TYPE = "OpenAI"
25 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]]
26 | API_URL = API_URL_MAPPING[API_TYPE]
27 | 
28 | def encode_image(image_path):
29 |     with open(image_path, "rb") as image_file:
30 |         return base64.b64encode(image_file.read()).decode("utf-8")
31 | 
32 | def polish_text(client, model_name, og_text):
33 |     prompt = f'''
34 |     Please polish the following paragraph to make it more fluent and natural.
35 |     You can make any necessary changes to the text, like removing the square brackets, adding punctuation, or rephrasing the text.
36 |     Don't change the meaning of the text.
37 |     Only output the polished text, without any additional information or appending text.
38 |     Here is the original text:
39 |     {og_text}
40 |     '''
41 |     
42 |     dialogs = [{
43 |         "role": "user",
44 |         "content": [{"type": "text", "text": prompt}]
45 |     }]
46 | 
47 |     try:
48 |       completion = client.chat.completions.create(
49 |           model=model_name,
50 |           messages=dialogs,
51 |           max_tokens=1024,
52 |           temperature=0
53 |       )
54 |       return completion.choices[0].message.content
55 |     except Exception as e:
56 |       print(e)
57 |       return ""
58 | 
59 | if __name__ == "__main__":
60 |     parser = argparse.ArgumentParser()
61 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
62 |     parser.add_argument('--work_dir', type=str, default='../../data/')
63 |     parser.add_argument('--task', type=str, default='stv-address-cot', choices=['stv-address-cot'])
64 |     args = parser.parse_args()
65 |     
66 |     city = args.city
67 |     work_dir = args.work_dir
68 |     task = args.task
69 | 
70 |     model_name = "gpt-4o"
71 |     client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY))
72 | 
73 |     og_path = os.path.join(work_dir, f'dev-{city}/CoT/{task}/{task}_{city}.json')
74 |     with open(og_path, 'r') as f:
75 |         og_data = json.load(f)
76 | 
77 |     output = []
78 | 
79 |     with ThreadPoolExecutor(max_workers=128) as executor:
80 |         futures = {executor.submit(polish_text, client, model_name, item['CoT']): item for item in og_data}
81 | 
82 |         for future in tqdm(futures):
83 |             item = futures[future]
84 |             polished_CoT = future.result()
85 |             output.append({
86 |                 "img_name": item['img_name'],
87 |                 "CoT": item['CoT'],
88 |                 "polished_CoT": polished_CoT,
89 |                 "address": item['address'],
90 |                 "description": item['description'],
91 |                 "near_feature": item['near_feature'],
92 |             })
93 | 
94 |     output_path = f'polished_{task}_{city}.json'
95 |     output_path = os.path.join(work_dir, f'dev-{city}/CoT/{task}', output_path)
96 | 
97 |     with open(output_path, 'w') as f:
98 |         json.dump(output, f, indent=4, ensure_ascii=False)
99 | 


--------------------------------------------------------------------------------
/simulate/advance/CoT/sat_address_cot/gen_CoT_template.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import argparse
 3 | import pandas as pd
 4 | from tqdm import trange
 5 | 
 6 | # Generate CoT ground truth
 7 | # Three reasoning steps:
 8 | # 1. Tell the city name
 9 | # 2. Extract the location's pois around
10 | # 3. Tell the location's address
11 | 
12 | 
13 | def sat_adr_prompt_template(city_name:str, description:str, address:str):
14 |     """
15 |     Generate the prompt for the satelite view image addressing task
16 |     """
17 |     prompt = f"""
18 |     Step 1: Tell the city name
19 |     According to the satelite view image, this image is taken in {city_name}.
20 |     Step 2: Extract the location's features around
21 |     From the image, I can see the following features: {description}.
22 |     Step 3: Tell the location's address
23 |     Based on my observation and knowledge about this region, the address of this region is {address}.
24 |     """
25 |     prompt = str(prompt).replace('\n', ' ').strip()
26 | 
27 |     return prompt
28 | 
29 | if __name__ == "__main__":
30 |     parser = argparse.ArgumentParser()
31 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
32 |     parser.add_argument('--work_dir', type=str, default='../../data/')
33 |     parser.add_argument('--task', type=str, default='sat-address-cot', choices=['sat-address-cot', 'street-view-address'])
34 |     args = parser.parse_args()
35 |     city = args.city
36 |     work_dir = args.work_dir      
37 |     task = args.task
38 |     work_dir = work_dir + f'dev-{city}/'
39 | 
40 |     output_dir = work_dir + f'CoT/{task}/'
41 |     import os
42 |     os.makedirs(output_dir, exist_ok=True)
43 | 
44 |     CITY_NAME = city
45 | 
46 |     # Satellite view
47 |     for zl in ["zl15", "zl17"]:
48 |         description_csv = work_dir + f'rs_osm_description_{CITY_NAME}_{zl}.csv'
49 | 
50 |         address_csv = work_dir + f'sat_address_combined_{CITY_NAME}_{zl}.csv'
51 | 
52 |         df_description = pd.read_csv(description_csv)
53 |         df_address = pd.read_csv(address_csv)
54 | 
55 |         df_address = df_address.dropna(subset=["combined_adr"]).reset_index(drop=True)
56 |         df_description = df_description.dropna(subset=["text"]).reset_index(drop=True)
57 | 
58 |         # assert len(df_description) == len(df_address)
59 |         print("After dropping NaN values:")
60 |         print(f"Number of description: {len(df_description)}")
61 |         print(f"Number of address: {len(df_address)}")
62 |         output = []
63 |         print("Generating CoT...")
64 | 
65 |         for i in trange(len(df_description)):
66 |             img_name_1 = df_description.loc[i, 'img_name']
67 | 
68 |             for j in range(len(df_address)):
69 |                 img_name_2 = df_address.loc[j, 'img_name']
70 |                 if img_name_1 == img_name_2:
71 | 
72 |                     description = df_description.loc[i, 'text']
73 | 
74 |                     address = df_address.loc[j, 'combined_adr']
75 | 
76 |                     prompt = sat_adr_prompt_template(CITY_NAME, description, address)
77 | 
78 |                     df_description.loc[i, 'CoT'] = prompt
79 | 
80 |                     output.append({
81 |                         "img_name": img_name_1,
82 |                         "CoT": prompt,
83 |                         "description": description,
84 |                         "address": address
85 |                     })
86 |             
87 |         # output_path = f'sat_address_cot_{city}_{zl}.json'
88 |         output_path = output_dir + f'{task}_{CITY_NAME}_{zl}.json'
89 |         print(f"Saving CoT to {output_path}")
90 |         print("Total number of CoT:", len(output))
91 | 
92 |         with open(output_path, 'w') as f:
93 |             json.dump(output, f, indent=4, ensure_ascii=False)
94 | 


--------------------------------------------------------------------------------
/simulate/advance/CoT/sat_address_cot/gpt_polish.py:
--------------------------------------------------------------------------------
  1 | from openai import OpenAI
  2 | import httpx
  3 | import os
  4 | import argparse
  5 | import pandas as pd
  6 | import json
  7 | import base64
  8 | import tqdm
  9 | from tqdm import tqdm
 10 | from concurrent.futures import ThreadPoolExecutor
 11 | 
 12 | # API Key and Proxy settings
 13 | PROXY = "http://127.0.0.1:10190"
 14 | API_KEY_MAPPING = { 
 15 |     "siliconflow": "SiliconFlow_API_KEY",
 16 |     "DeepInfra": "DeepInfra_API_KEY",
 17 |     "OpenAI": "OpenAI_API_KEY"
 18 | }
 19 | API_URL_MAPPING = {
 20 |     "siliconflow": "https://api.siliconflow.cn/v1",
 21 |     "DeepInfra": "https://api.deepinfra.com/v1/openai",
 22 |     "OpenAI": "https://api.openai.com/v1"
 23 | }
 24 | API_TYPE = "OpenAI"
 25 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]]
 26 | API_URL = API_URL_MAPPING[API_TYPE]
 27 | 
 28 | def encode_image(image_path):
 29 |     with open(image_path, "rb") as image_file:
 30 |         return base64.b64encode(image_file.read()).decode("utf-8")
 31 | 
 32 | def polish_text(client, model_name, og_text):
 33 |     prompt = f'''
 34 |     Please polish the following paragraph to make it more fluent and natural.
 35 |     You can make any necessary changes to the text, like removing the square brackets, adding punctuation, or rephrasing the text.
 36 |     Don't change the meaning of the text.
 37 |     Only output the polished text, without any additional information or appending text.
 38 |     Here is the original text:
 39 |     {og_text}
 40 |     '''
 41 |     
 42 |     dialogs = [{
 43 |         "role": "user",
 44 |         "content": [{"type": "text", "text": prompt}]
 45 |     }]
 46 | 
 47 |     try:
 48 |       completion = client.chat.completions.create(
 49 |           model=model_name,
 50 |           messages=dialogs,
 51 |           max_tokens=2048,
 52 |           temperature=0
 53 |       )
 54 |       return completion.choices[0].message.content
 55 |     except Exception as e:
 56 |       print(e)
 57 |       return ""
 58 | 
 59 | if __name__ == "__main__":
 60 |     parser = argparse.ArgumentParser()
 61 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
 62 |     parser.add_argument('--work_dir', type=str, default='../../data/')
 63 |     parser.add_argument('--task', type=str, default='sat-address-cot', choices=['sat-address-cot', 'street-view-address'])
 64 |     args = parser.parse_args()
 65 |     
 66 |     city = args.city
 67 |     work_dir = args.work_dir
 68 |     task = args.task
 69 |     CoT_dir = work_dir + f'dev-{city}/CoT/{task}/'
 70 | 
 71 |     model_name = "gpt-4o"
 72 |     client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY))
 73 | 
 74 |     for zl in ['zl15', 'zl17']:
 75 |         og_path = f"{task}_{city}_{zl}.json"
 76 |         og_path = os.path.join(CoT_dir, og_path)
 77 | 
 78 |         with open(og_path, 'r') as f:
 79 |             og_data = json.load(f)
 80 | 
 81 |         output = []
 82 | 
 83 |         with ThreadPoolExecutor(max_workers=128) as executor:
 84 |             futures = {executor.submit(polish_text, client, model_name, item['CoT']): item for item in og_data}
 85 | 
 86 |             for future in tqdm(futures):
 87 |                 item = futures[future]
 88 |                 polished_CoT = future.result()
 89 |                 output.append({
 90 |                     "img_name": item["img_name"],
 91 |                     "polished_CoT": polished_CoT,
 92 |                     "og_CoT": item["CoT"],
 93 |                     "description": item["description"],
 94 |                     "address": item["address"]
 95 |                 })
 96 | 
 97 |         output_path = os.path.join(CoT_dir, f'polished_{task}_{city}_{zl}.json')
 98 | 
 99 |         with open(output_path, 'w') as f:
100 |             json.dump(output, f, indent=4, ensure_ascii=False)
101 | 


--------------------------------------------------------------------------------
/simulate/advance/CoT/sat_count_cot/gpt_polish.py:
--------------------------------------------------------------------------------
  1 | from openai import OpenAI
  2 | import httpx
  3 | import os
  4 | import argparse
  5 | import pandas as pd
  6 | import json
  7 | import base64
  8 | import tqdm
  9 | from tqdm import tqdm
 10 | from concurrent.futures import ThreadPoolExecutor
 11 | 
 12 | # API Key and Proxy settings
 13 | PROXY = "http://127.0.0.1:10190"
 14 | API_KEY_MAPPING = { 
 15 |     "siliconflow": "SiliconFlow_API_KEY",
 16 |     "DeepInfra": "DeepInfra_API_KEY",
 17 |     "OpenAI": "OpenAI_API_KEY"
 18 | }
 19 | API_URL_MAPPING = {
 20 |     "siliconflow": "https://api.siliconflow.cn/v1",
 21 |     "DeepInfra": "https://api.deepinfra.com/v1/openai",
 22 |     "OpenAI": "https://api.openai.com/v1"
 23 | }
 24 | API_TYPE = "OpenAI"
 25 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]]
 26 | API_URL = API_URL_MAPPING[API_TYPE]
 27 | 
 28 | def encode_image(image_path):
 29 |     with open(image_path, "rb") as image_file:
 30 |         return base64.b64encode(image_file.read()).decode("utf-8")
 31 | 
 32 | def polish_text(client, model_name, og_text):
 33 |     prompt = f'''
 34 |     Please polish the following paragraph to make it more fluent and natural.
 35 |     You can make any necessary changes to the text, like removing the square brackets, adding punctuation, or rephrasing the text.
 36 |     Don't change the meaning of the text.
 37 |     Only output the polished text, without any additional information or appending text.
 38 |     Here is the original text:
 39 |     {og_text}
 40 |     '''
 41 |     
 42 |     dialogs = [{
 43 |         "role": "user",
 44 |         "content": [{"type": "text", "text": prompt}]
 45 |     }]
 46 | 
 47 |     try:
 48 |       completion = client.chat.completions.create(
 49 |           model=model_name,
 50 |           messages=dialogs,
 51 |           max_tokens=1024,
 52 |           temperature=0
 53 |       )
 54 |       return completion.choices[0].message.content
 55 |     except Exception as e:
 56 |       print(e)
 57 |       return ""
 58 | 
 59 | if __name__ == "__main__":
 60 |     parser = argparse.ArgumentParser()
 61 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
 62 |     parser.add_argument('--work_dir', type=str, default='../../data/')
 63 |     parser.add_argument('--task', type=str, default='sat-count-cot', choices=['sat-address-cot', 'street-view-address'])
 64 |     args = parser.parse_args()
 65 |     
 66 |     city = args.city
 67 |     work_dir = args.work_dir
 68 |     task = args.task
 69 |     CoT_dir = work_dir + f'dev-{city}/CoT/{task}/'
 70 | 
 71 |     model_name = "gpt-4o"
 72 |     client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY))
 73 | 
 74 |     for zl in ['zl15', 'zl17']:
 75 |         og_path = f"{task}_{city}_{zl}.json"
 76 |         og_path = os.path.join(CoT_dir, og_path)
 77 | 
 78 |         with open(og_path, 'r') as f:
 79 |             og_data = json.load(f)
 80 | 
 81 |         output = []
 82 | 
 83 |         with ThreadPoolExecutor(max_workers=128) as executor:
 84 |             futures = {executor.submit(polish_text, client, model_name, item['CoT']): item for item in og_data}
 85 | 
 86 |             for future in tqdm(futures):
 87 |                 item = futures[future]
 88 |                 polished_CoT = future.result()
 89 |                 output.append({
 90 |                     "img_name": item["img_name"],
 91 |                     "polished_CoT": polished_CoT,
 92 |                     "og_CoT": item["CoT"],
 93 |                     "description": item["description"],
 94 |                     "address": item["address"]
 95 |                 })
 96 | 
 97 |         output_path = os.path.join(CoT_dir, f'polished_{task}_{city}_{zl}.json')
 98 | 
 99 |         with open(output_path, 'w') as f:
100 |             json.dump(output, f, indent=4, ensure_ascii=False)
101 | 
102 |         print(f"Polished CoT saved to {output_path}")
103 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110 | .pdm.toml
111 | .pdm-python
112 | .pdm-build/
113 | 
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115 | __pypackages__/
116 | 
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 | 
121 | # SageMath parsed files
122 | *.sage.py
123 | 
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 | 
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 | 
137 | # Rope project settings
138 | .ropeproject
139 | 
140 | # mkdocs documentation
141 | /site
142 | 
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 | 
148 | # Pyre type checker
149 | .pyre/
150 | 
151 | # pytype static type analyzer
152 | .pytype/
153 | 
154 | # Cython debug symbols
155 | cython_debug/
156 | 
157 | # PyCharm
158 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
161 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
162 | #.idea/
163 | 
164 | data/*
165 | .vscode/
166 | evaluate/citydata/
167 | citydata/
168 | results/


--------------------------------------------------------------------------------
/evaluate/uniimage/sat_address/sat_address_convert.py:
--------------------------------------------------------------------------------
  1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
  2 | 
  3 | import json
  4 | import os
  5 | import argparse
  6 | import pandas as pd
  7 | import random
  8 | random.seed(0)
  9 | 
 10 | from config import UNI_IMAGE_FOLDER
 11 | 
 12 | def prompt_template(choice1, choice2, choice3, choice4):
 13 |     s =  f"""
 14 |     The following is a multiple-choice question about selecting the most appropriate address for a satellite image.
 15 |     A. {choice1}
 16 |     B. {choice2}
 17 |     C. {choice3}
 18 |     D. {choice4}
 19 |     Please choose the most suitable one among A, B, C and D as the answer to this question. 
 20 |     Please output the option directly. No need for explaination.\n
 21 |     """
 22 | 
 23 |     return s.strip()
 24 | 
 25 | if __name__ == '__main__':
 26 |     parser = argparse.ArgumentParser()
 27 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
 28 |     parser.add_argument('--task_name', type=str, default='sat_address_mc', help='task name')
 29 |     args = parser.parse_args() 
 30 | 
 31 |     city_name = args.city_name
 32 |     task_name = args.task_name   
 33 | 
 34 |     work_dir = UNI_IMAGE_FOLDER
 35 | 
 36 |     cur_dir = os.path.join(work_dir, f"{city_name}/")
 37 | 
 38 |     all_train_data = []
 39 | 
 40 |     for zl in ['zl15', 'zl17']:
 41 |         sat_img_dir = cur_dir + f"sample_sat_image_{zl}/"
 42 |         sat_address_file = cur_dir + f"sat_address_combined_{city_name}_{zl}.csv"
 43 |         df = pd.read_csv(sat_address_file)
 44 |         # remove the rows with empty address
 45 |         df = df.dropna(subset=["combined_adr"])
 46 | 
 47 | 
 48 |         print("Input file:", sat_address_file)
 49 |         print("Valid records:", len(df))
 50 | 
 51 |         output = []
 52 | 
 53 |         for i in range(len(df)):
 54 |             row = df.iloc[i]
 55 |             img_name = row["img_name"]
 56 |             combined_adr = row["combined_adr"]
 57 | 
 58 |             # print(combined_adr)
 59 | 
 60 |             assert os.path.exists(sat_img_dir + img_name), f"Image {img_name} not found"
 61 | 
 62 |             # Randomly select 3 other addresses
 63 |             other_choices = df[df["img_name"] != img_name].sample(3)["combined_adr"].tolist()
 64 |             choices = [combined_adr] + other_choices
 65 |             random.shuffle(choices, random=random.seed(i))
 66 | 
 67 |             # print(choices)
 68 | 
 69 |             reference = chr(ord('A') + choices.index(combined_adr))
 70 | 
 71 |             # print(reference)
 72 | 
 73 |             prompt = prompt_template(choice1=choices[0], choice2=choices[1], choice3=choices[2], choice4=choices[3])
 74 | 
 75 |             # print(prompt)
 76 | 
 77 |             output.append({
 78 |                 "prompt": prompt,
 79 |                 "choices": choices,
 80 |                 "reference": reference,
 81 |                 "image": sat_img_dir + img_name
 82 |             })
 83 | 
 84 |             # print(output[-1])
 85 | 
 86 |             # exit()
 87 | 
 88 |         output_dir = os.path.join(work_dir, task_name, city_name)
 89 | 
 90 |         os.makedirs(output_dir, exist_ok=True)
 91 | 
 92 |         test = random.sample(output, min(200, len(output)))
 93 |         train = [x for x in output if x not in test]
 94 | 
 95 |         with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_test.json"), "w") as f:
 96 |             json.dump(test, f, indent=4, ensure_ascii=False)
 97 | 
 98 |         with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_train.json"), "w") as f:
 99 |             json.dump(train, f, indent=4, ensure_ascii=False)
100 | 
101 |         all_train_data.extend(train)
102 | 
103 |         print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_test.json')}")
104 |         print("Test size:", len(test))
105 |         print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_train.json')}")
106 |         print("Train size:", len(train))
107 | 
108 |     with open(os.path.join(output_dir, f"{city_name}_{task_name}_train.json"), "w") as f:
109 |         json.dump(all_train_data, f, indent=4, ensure_ascii=False)


--------------------------------------------------------------------------------
/simulate/satelite/process_poi.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | 
  3 | def remove_duplicates(points):
  4 | 
  5 |     seen = set()
  6 |     result = []
  7 |     for point in points:
  8 |         if tuple(point) not in seen:
  9 |             seen.add(tuple(point))
 10 |             result.append(point)
 11 |     return result
 12 | 
 13 | def calculate_centroid(polygon):
 14 | 
 15 |     print(polygon)
 16 |     polygon = remove_duplicates(polygon)
 17 |     
 18 |     x_coords = [point[0] for point in polygon]
 19 |     y_coords = [point[1] for point in polygon]
 20 | 
 21 |     cx = sum(x_coords) / len(polygon)
 22 |     cy = sum(y_coords) / len(polygon)
 23 |     return [int(cx), int(cy)]
 24 | 
 25 | def is_near(p1, p2, threshold=3):
 26 |     return abs(p1[0] - p2[0]) <= threshold and abs(p1[1] - p2[1]) <= threshold
 27 | 
 28 | def merge_nearby_points(locations):
 29 |     merged = calculate_centroid(locations)
 30 | 
 31 |     return merged
 32 | 
 33 | def merge_poi_by_category(poi_data):
 34 |     merged_poi = {}
 35 | 
 36 |     for poi in poi_data:
 37 |         name, location = poi.split(" is at location: ")
 38 |         category = name.split(" ")[-1]
 39 |         location = eval(location.strip())
 40 | 
 41 |         processed_locations = []
 42 |         # print(location)
 43 |         coord = location
 44 |         if isinstance(coord[0], list) and len(coord[0])!=2:# and len(coord[0][0]) > 2 and all(isinstance(i, list) and len(i) == 2 for i in coord):
 45 |             loc = location[0]
 46 |             if isinstance(loc, list) and len(loc) > 1:  
 47 |                 processed_locations.append(loc)
 48 | 
 49 |                 if processed_locations:
 50 |                     # print(processed_locations)
 51 |                     merged_points = merge_nearby_points(processed_locations[0])
 52 |                     # print(merged_points)
 53 | 
 54 |                     if category not in merged_poi:
 55 |                         merged_poi[category] = []
 56 |                     merged_poi[category].extend([merged_points])
 57 |                 else:
 58 |                     if category not in merged_poi:
 59 |                         merged_poi[category] = []
 60 |                     merged_poi[category].append(location)  
 61 |         else:
 62 |             if category not in merged_poi:
 63 |                 merged_poi[category] = []
 64 |             merged_poi[category].append(location)  
 65 | 
 66 | 
 67 |     return merged_poi
 68 | 
 69 | def format_merged_poi(merged_poi):
 70 |     output = []
 71 |     for category, locations in merged_poi.items():
 72 |         output.append(f"{category}s are at locations: {', '.join(str(loc) for loc in locations)}")
 73 |     return output
 74 | 
 75 | def read_poi_from_file(filename):
 76 |     with open(filename, 'r', encoding='utf-8') as file:
 77 |         return file.readlines()
 78 | 
 79 | def write_poi_to_file(filename, formatted_output):
 80 |     with open(filename, 'w', encoding='utf-8') as file:
 81 |         for line in formatted_output:
 82 |             file.write(line + '\n')
 83 | 
 84 | import os
 85 | import argparse
 86 | 
 87 | if __name__ == "__main__":
 88 |     parser = argparse.ArgumentParser()
 89 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
 90 |     parser.add_argument('--work_dir', type=str, default='../../data/')
 91 |     args = parser.parse_args()
 92 |     city = args.city
 93 |     work_dir = args.work_dir
 94 |     working_dir = work_dir + f"dev-{city}/" 
 95 | 
 96 |     for zl in ['zl17','zl15']:
 97 |         df = pd.read_csv(working_dir + f'SAT_{city}_'+zl+'.csv')
 98 |         for cnt in range(len(df)):
 99 |             img_name = df.at[cnt,'img_name'].split('.')[0]
100 |             output_dir = working_dir + "short_clipped_results_"+zl
101 |             os.makedirs(output_dir, exist_ok=True)
102 |             input_filename = working_dir + 'clipped_results_pixel_non_null_'+zl+'/pois_'+img_name+'.txt' 
103 | 
104 |             if not os.path.exists(input_filename):
105 |                 continue
106 |             poi_data = read_poi_from_file(input_filename)
107 | 
108 |             merged_poi = merge_poi_by_category(poi_data)
109 |             formatted_output = format_merged_poi(merged_poi)
110 | 
111 |             output_filename = working_dir + 'short_clipped_results_'+zl+'/pois_'+img_name+'.txt'  
112 |             write_poi_to_file(output_filename, formatted_output)
113 | 
114 | 


--------------------------------------------------------------------------------
/simulate/address/interpolate_sat_coord.py:
--------------------------------------------------------------------------------
 1 | # Function: Interpolate the coordinates of satellite images for zoom level 15 and 17
 2 | import pandas as pd
 3 | import argparse
 4 | 
 5 | if __name__ == '__main__':
 6 |     parser = argparse.ArgumentParser()
 7 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
 8 |     parser.add_argument('--work_dir', type=str, default='../../data/')
 9 |     args = parser.parse_args()
10 |     city = args.city
11 |     work_dir = args.work_dir
12 | 
13 |     for zl in ['zl15','zl17']:
14 | 
15 | 
16 |         input_csv_path = work_dir + f'dev-{city}/SAT_{city}_{zl}.csv'
17 | 
18 |         output_csv_path = work_dir + f'dev-{city}/SAT_interpolate_{city}_{zl}.csv'
19 | 
20 | 
21 |         if 'zl17' in input_csv_path:
22 |             data = pd.read_csv(input_csv_path)
23 | 
24 |             img_list = []
25 |             lng_list = []
26 |             lat_list = []
27 | 
28 |             for i in  range(len(data)):
29 |                 tl_lat = data.at[i,'tl_lat']
30 |                 tl_lng = data.at[i,'tl_lng']
31 |                 bt_lat = data.at[i,'bt_lat']
32 |                 bt_lng = data.at[i,'bt_lng']
33 |                 # evenly sample 3*3 points for zoom level 17
34 |                 for j in range(3):
35 |                     for k in range(3):
36 |                         if j == 0 and k==0:
37 |                             img_list.append(data.at[i,'img_name'])
38 |                             lng_list.append(data.at[i,'tl_lng'])
39 |                             lat_list.append(data.at[i,'tl_lat'])
40 |                             continue
41 |                         if j ==2 and k==2:
42 |                             img_list.append(data.at[i,'img_name'])
43 |                             lng_list.append(data.at[i,'bt_lng'])
44 |                             lat_list.append(data.at[i,'bt_lat'])
45 |                             continue
46 |                         img_list.append(data.at[i,'img_name'])
47 |                         lng_list.append(data.at[i,'tl_lng']+j/2*(data.at[i,'bt_lng']-data.at[i,'tl_lng']))
48 |                         lat_list.append(data.at[i,'tl_lat']-k/2*(data.at[i,'tl_lat']-data.at[i,'bt_lat']))
49 | 
50 |             new_df = pd.DataFrame({'img_name':img_list,'lng':lng_list,'lat':lat_list})
51 |             new_df.to_csv(output_csv_path, index=False)
52 | 
53 |         elif 'zl15' in input_csv_path:    
54 | 
55 | 
56 |             data = pd.read_csv(input_csv_path)
57 | 
58 |             img_list = []
59 |             lng_list = []
60 |             lat_list = []
61 | 
62 |             for i in  range(len(data)):
63 |                 tl_lat = data.at[i,'tl_lat']
64 |                 tl_lng = data.at[i,'tl_lng']
65 |                 bt_lat = data.at[i,'bt_lat']
66 |                 bt_lng = data.at[i,'bt_lng']
67 |         # evenly sample 5*5 points for zoom level 15
68 |                 for j in range(5):
69 |                     for k in range(5):
70 |                         if j == 0 and k==0:
71 |                             img_list.append(data.at[i,'img_name'])
72 |                             lng_list.append(data.at[i,'tl_lng'])
73 |                             lat_list.append(data.at[i,'tl_lat'])
74 |                             continue
75 |                         if j ==4 and k==4:
76 |                             img_list.append(data.at[i,'img_name'])
77 |                             lng_list.append(data.at[i,'bt_lng'])
78 |                             lat_list.append(data.at[i,'bt_lat'])
79 |                             continue
80 |                         img_list.append(data.at[i,'img_name'])
81 |                         lng_list.append(data.at[i,'tl_lng']+j/4*(data.at[i,'bt_lng']-data.at[i,'tl_lng']))
82 |                         lat_list.append(data.at[i,'tl_lat']-k/4*(data.at[i,'tl_lat']-data.at[i,'bt_lat']))
83 | 
84 |             new_df = pd.DataFrame({'img_name':img_list,'lng':lng_list,'lat':lat_list})
85 |             new_df.to_csv(output_csv_path, index=False)
86 | 
87 | 
88 |         else:
89 |             print("Please input the correct csv file path!")
90 |             raise NotImplementedError
91 | 
92 | 
93 |         # sanity check
94 |         print("Before interpolation, the number of images is ",len(data))
95 |         print("After interpolation, the number of images is ",len(new_df))
96 |         print("The ratio of the number of images after interpolation to the number of images before interpolation is ",len(new_df)/len(data))


--------------------------------------------------------------------------------
/evaluate/mobility_prediction/metrics.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pandas as pd
  3 | from sklearn.metrics import f1_score
  4 | import ast
  5 | import csv
  6 | import numpy as np
  7 | 
  8 | 
  9 | 
 10 | def get_acc1_f1(df):
 11 |     acc1 = (df['prediction'] == df['ground_truth']).sum() / len(df)
 12 |     f1 = f1_score(df['ground_truth'], df['prediction'], average='weighted')
 13 |     return acc1, f1
 14 | 
 15 | def get_is_correct(row):
 16 |     pred_list = row['prediction']
 17 |     if row['ground_truth'] in pred_list:
 18 |         row['is_correct'] = True
 19 |     else:
 20 |         row['is_correct'] = False
 21 |     
 22 |     return row
 23 | 
 24 | 
 25 | def get_is_correct10(row):
 26 |     pred_list = row['top10']
 27 |     if row['ground_truth'] in pred_list:
 28 |         row['is_correct10'] = True
 29 |     else:
 30 |         row['is_correct10'] = False
 31 |         
 32 |     pred_list = row['top5']
 33 |     if row['ground_truth'] in pred_list:
 34 |         row['is_correct5'] = True
 35 |     else:
 36 |         row['is_correct5'] = False
 37 | 
 38 |     pred = row['top1']
 39 |     if pred == row['ground_truth']:
 40 |         row['is_correct1'] = True
 41 |     else:
 42 |         row['is_correct1'] = False
 43 |     
 44 |     return row
 45 | 
 46 | 
 47 | def first_nonzero(arr, axis, invalid_val=-1):
 48 |     mask = arr!=0
 49 |     return np.where(mask.any(axis=axis), mask.argmax(axis=axis), invalid_val)
 50 | 
 51 | 
 52 | def get_ndcg(prediction, targets, k=10):
 53 |     """
 54 |     Calculates the NDCG score for the given predictions and targets.
 55 | 
 56 |     Args:
 57 |         prediction (Nxk): list of lists. the softmax output of the model.
 58 |         targets (N): torch.LongTensor. actual target place id.
 59 | 
 60 |     Returns:
 61 |         the sum ndcg score
 62 |     """
 63 |     for _, xi in enumerate(prediction):
 64 |         if len(xi) < k:
 65 |             #print(f"the {i}th length: {len(xi)}")
 66 |             xi += [-5 for _ in range(k-len(xi))]
 67 |         elif len(xi) > k:
 68 |             xi = xi[:k]
 69 |         else:
 70 |             pass
 71 |     
 72 |     n_sample = len(prediction)
 73 |     prediction = np.array(prediction)
 74 |     targets = np.broadcast_to(targets.reshape(-1, 1), prediction.shape)
 75 |     hits = first_nonzero(prediction == targets, axis=1, invalid_val=-1)
 76 |     hits = hits[hits>=0]
 77 |     ranks = hits + 1
 78 |     ndcg = 1 / np.log2(ranks + 1)
 79 |     return np.sum(ndcg) / n_sample
 80 | 
 81 | 
 82 | def cal_metrics(output_dir):
 83 |      # Calculate the metric for all user
 84 |     # output_dir = 'output/Mixtral-8x22B-Instruct-v0.1_Paris_top1_wot'
 85 |     file_list = [file for file in os.listdir(output_dir) if file.endswith('.csv')]
 86 |     print(file_list)
 87 |     file_path_list = [os.path.join(output_dir, file) for file in file_list]
 88 | 
 89 |     df = pd.DataFrame({
 90 |         'user_id': None,
 91 |         'ground_truth': None,
 92 |         'prediction': None,
 93 |         'reason': None
 94 |     }, index=[])
 95 | 
 96 |     for file_path in file_path_list:
 97 |         iter_df = pd.read_csv(file_path)
 98 |         df = pd.concat([df, iter_df], ignore_index=True)
 99 |         
100 |     df_cleaned = df.dropna(subset=['prediction', 'ground_truth'])
101 |     df_cleaned['prediction'] = df_cleaned['prediction'].apply(
102 |         # lambda x: int(x) if isinstance(x, int) else int(x.split('or')[0]) if 'or' in x else int(x)
103 |         lambda x: int(x) if isinstance(x, int) else -100
104 |     )
105 |     df_cleaned['ground_truth'] = df_cleaned['ground_truth'].apply(lambda x: int(x))
106 | 
107 |     acc1, f1 = get_acc1_f1(df_cleaned)
108 |     return acc1, f1
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     # Calculate the metric for all user
113 |     # TODO: Fill in the model names that you want to evaluate
114 |     models = ["Llama-3-VILA1.5-8b",]  
115 |     cities = ["Beijing"]
116 | 
117 |     csv_file = 'results/prediction_results/metrics.csv'
118 |     with open(csv_file, mode='w', newline='') as file:
119 |         writer = csv.writer(file)
120 |         writer.writerow(['Model', 'City', 'Acc@1', 'F1'])
121 |         for model in models:
122 |             for city in cities:
123 |                 output_dir = f'results/prediction_results/{model}_{city}_top1_wot'
124 |                 acc1, f1 = cal_metrics(output_dir=output_dir)
125 |                 writer.writerow([model, city, acc1, f1])
126 | 
127 |     print(f"Results have been saved to {csv_file}")
128 | 


--------------------------------------------------------------------------------
/simulate/annotate/sat_combine_address.py:
--------------------------------------------------------------------------------
  1 | from openai import OpenAI  # >=1.0, test version 1.16.0
  2 | import httpx
  3 | import os
  4 | import pandas as pd
  5 | import argparse
  6 | from tqdm import tqdm, trange
  7 | from concurrent.futures import ThreadPoolExecutor
  8 | 
  9 | PROXY = "http://127.0.0.1:10190"
 10 | 
 11 | API_KEY_MAPPING = {
 12 |     "siliconflow": "SiliconFlow_API_KEY",
 13 |     "DeepInfra": "DeepInfra_API_KEY",
 14 |     "OpenAI": "OpenAI_API_KEY"
 15 | }
 16 | API_URL_MAPPING = {
 17 |     "siliconflow": "https://api.siliconflow.cn/v1",
 18 |     "DeepInfra": "https://api.deepinfra.com/v1/openai",
 19 |     "OpenAI": "https://api.openai.com/v1"
 20 | }
 21 | 
 22 | API_TYPE = "OpenAI"
 23 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]]
 24 | API_URL = API_URL_MAPPING[API_TYPE]
 25 | 
 26 | def process_chunk(client, df, i, interpolate_num, model_name):
 27 |     string_adr = ''
 28 |     for j in range(i, i + interpolate_num):
 29 |         string_adr += str(df.at[j, 'adr']) + ', '
 30 | 
 31 |     prompts = f'''
 32 |     I give you a detailed address description of a square area. 
 33 |     The square area is evenly divided into a {int(interpolate_num**0.5)}*{int(interpolate_num**0.5)} grid. 
 34 |     Starting from the upper left corner and going down is the first column, 
 35 |     and then the second column continues from top to bottom, from grid 0 to grid {interpolate_num-1}. 
 36 |     The detailed addresses of grid 0 to grid {interpolate_num-1} are: {string_adr}
 37 |     Please form a general description of this area. Please include where are the east, west, north and south of this area, 
 38 |     where are the relative locations of the pois and roads in the area, 
 39 |     which POIs are adjacent, which road connects which pois in this area, etc. 
 40 |     Avoid including words like grid or poi in your answer and only generate one paragraph. Please make it natural and fluent.
 41 |     '''
 42 | 
 43 |     dialogs = [{
 44 |         "role": "user",
 45 |         "content": prompts
 46 |     }]
 47 | 
 48 |     completion = client.chat.completions.create(
 49 |         model=model_name,
 50 |         messages=dialogs,
 51 |         max_tokens=1024,
 52 |         temperature=0.3,
 53 |     )
 54 | 
 55 |     return df.at[i, 'img_name'], completion.choices[0].message.content.strip()
 56 | 
 57 | if __name__ == '__main__':
 58 |     parser = argparse.ArgumentParser()
 59 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
 60 |     parser.add_argument('--work_dir', type=str, default='../../data/')
 61 |     parser.add_argument('--model_name', type=str, default='gpt-4o-mini-2024-07-18')
 62 |     args = parser.parse_args()
 63 |     city = args.city
 64 |     work_dir = args.work_dir
 65 |     model_name = args.model_name
 66 | 
 67 |     if API_TYPE == "OpenAI":
 68 |         client = OpenAI(
 69 |             base_url=API_URL,
 70 |             api_key=API_KEY,
 71 |             http_client=httpx.Client(proxies=PROXY)
 72 |         )
 73 |     elif API_TYPE == "siliconflow":
 74 |         client = OpenAI(
 75 |             base_url=API_URL,
 76 |             api_key=API_KEY
 77 |         )
 78 |     elif API_TYPE == "DeepInfra":
 79 |         client = OpenAI(
 80 |             base_url=API_URL,
 81 |             api_key=API_KEY,
 82 |             http_client=httpx.Client(proxies=PROXY),
 83 |         )
 84 | 
 85 |     for zl in ['zl15', 'zl17']:
 86 |         if zl == 'zl15':
 87 |             interpolate_num = 5 * 5
 88 |         else:
 89 |             interpolate_num = 3 * 3
 90 | 
 91 |         input_file_path = work_dir + f"dev-{city}/SAT_interpolate_address_{city}_{zl}.csv"
 92 |         output_file_path = work_dir + f"dev-{city}/sat_address_combined_{city}_{zl}.csv"
 93 | 
 94 |         df = pd.read_csv(input_file_path)
 95 |         region_list = []
 96 |         combined_adr_list = []
 97 | 
 98 |         with ThreadPoolExecutor(max_workers=128) as executor:
 99 |             futures = [
100 |                 executor.submit(process_chunk, client, df, i, interpolate_num, model_name)
101 |                 for i in range(0, len(df), interpolate_num)
102 |             ]
103 | 
104 |             for future in tqdm(futures, total=len(futures)):
105 |                 region, combined_adr = future.result()
106 |                 region_list.append(region)
107 |                 combined_adr_list.append(combined_adr)
108 | 
109 |         pd_dict = pd.DataFrame({'img_name': region_list, 'combined_adr': combined_adr_list})
110 |         pd_dict.to_csv(output_file_path, index=False)
111 | 


--------------------------------------------------------------------------------
/evaluate/uniimage/stv_landmark/stv_landmark_convert.py:
--------------------------------------------------------------------------------
  1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
  2 | 
  3 | import json
  4 | import os
  5 | import argparse
  6 | import pandas as pd
  7 | import random
  8 | random.seed(0)
  9 | import sys
 10 | from config import UNI_IMAGE_FOLDER, BEIJING_STV_IMAGE_FOLDER, LONDON_STV_IMAGE_FOLDER
 11 | import tqdm
 12 | 
 13 | 
 14 | def prompt_template(choice1, choice2, choice3, choice4):
 15 |     s =  f"""
 16 |     The following is a multiple-choice question about selecting the most possible nearby POIs(Place of Interests) or landmarks description in the region of a street view image.
 17 |     A. {choice1}
 18 |     B. {choice2}
 19 |     C. {choice3}
 20 |     D. {choice4}
 21 |     Please choose the most suitable one among A, B, C and D as the answer to this question. 
 22 |     Please output the option directly. No need for explaination.\n
 23 |     """
 24 | 
 25 |     return s.strip()
 26 | 
 27 | if __name__ == '__main__':
 28 |     parser = argparse.ArgumentParser()
 29 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
 30 |     parser.add_argument('--task_name', type=str, default='stv_landmark_mc', help='task name')
 31 |     args = parser.parse_args() 
 32 | 
 33 |     city_name = args.city_name
 34 |     task_name = args.task_name   
 35 | 
 36 |     work_dir = UNI_IMAGE_FOLDER
 37 | 
 38 |     work_dir = os.path.join(work_dir, f"{city_name}/")
 39 | 
 40 |     all_train_data = []
 41 |     all_test_data = []
 42 | 
 43 |     for zl in ['zl15', 'zl17']:
 44 |         if city_name == "Beijing":
 45 |             stv_img_dir = BEIJING_STV_IMAGE_FOLDER
 46 |         elif city_name == "London":
 47 |             stv_img_dir = LONDON_STV_IMAGE_FOLDER
 48 |         elif city_name == "NewYork":
 49 |             pass
 50 | 
 51 |         output = []
 52 | 
 53 |         input_path = os.path.join(work_dir, "stv_poi_landmark_update.jsonl")
 54 |         with open(input_path, "r") as f:
 55 |             data = [json.loads(line) for line in f]
 56 | 
 57 |         output = []
 58 | 
 59 |         all_choices = [d["text"] for d in data]
 60 |         all_choices = list(set(all_choices))
 61 | 
 62 |         for d in tqdm.tqdm(data):
 63 |             # is absolute path
 64 |             img_name = d["img_name"].split('/')[-1]
 65 |             text = d["text"]
 66 | 
 67 |             other_choices = random.sample([text for text in all_choices if text != d["text"]], 3)
 68 |             choices = [text] + other_choices
 69 |             random.shuffle(choices, random=random.seed(data.index(d)))
 70 | 
 71 |             reference = chr(ord('A') + choices.index(text))
 72 | 
 73 |             prompt = prompt_template(choices[0], choices[1], choices[2], choices[3])
 74 | 
 75 |             output.append({
 76 |                 "prompt": prompt,
 77 |                 "choices": choices,
 78 |                 "reference": reference,
 79 |                 "image": os.path.join(stv_img_dir, img_name)
 80 |             })
 81 | 
 82 | 
 83 | 
 84 |         output_dir = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name)
 85 |         os.makedirs(output_dir, exist_ok=True)
 86 | 
 87 |         
 88 |         test = random.sample(output, min(200, len(output)))
 89 |         train = [d for d in output if d not in test]
 90 | 
 91 |         with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_test.json"), "w") as f:
 92 |             json.dump(test, f, indent=4, ensure_ascii=False)
 93 | 
 94 |         print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_test.json')}")
 95 |         print("Test size:", len(test))
 96 |               
 97 |         with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_train.json"), "w") as f:
 98 |             json.dump(train, f, indent=4, ensure_ascii=False)
 99 |         print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_train.json')}")
100 |         print("Train size:", len(train))
101 | 
102 |         all_train_data.extend(train)
103 |         all_test_data.extend(test)
104 | 
105 |     with open(os.path.join(output_dir, f"{city_name}_{task_name}_train.json"), "w") as f:
106 |         json.dump(all_train_data, f, indent=4, ensure_ascii=False)
107 | 
108 |     print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_train.json')}")
109 |     print("Total train size:", len(all_train_data))
110 | 
111 |     with open(os.path.join(output_dir, f"{city_name}_{task_name}_test.json"), "w") as f:
112 |         json.dump(all_test_data, f, indent=4, ensure_ascii=False)
113 | 
114 |     print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_test.json')}")
115 |     print("Total test size:", len(all_test_data))
116 | 


--------------------------------------------------------------------------------
/simulate/annotate/stv_description_gpt.py:
--------------------------------------------------------------------------------
  1 | from openai import OpenAI  # >=1.0, test version 1.16.0
  2 | import httpx
  3 | import os
  4 | import argparse
  5 | import pandas as pd
  6 | import json
  7 | import base64
  8 | import tqdm
  9 | from concurrent.futures import ThreadPoolExecutor, as_completed
 10 | 
 11 | PROXY = "http://127.0.0.1:10190"
 12 | 
 13 | API_KEY_MAPPING = { 
 14 |     "siliconflow": "SiliconFlow_API_KEY", 
 15 |     "DeepInfra": "DeepInfra_API_KEY", 
 16 |     "OpenAI": "OpenAI_API_KEY"
 17 | }
 18 | API_URL_MAPPING = {
 19 |     "siliconflow": "https://api.siliconflow.cn/v1",
 20 |     "DeepInfra": "https://api.deepinfra.com/v1/openai",
 21 |     "OpenAI": "https://api.openai.com/v1"
 22 | }
 23 | API_TYPE = "OpenAI"
 24 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]]
 25 | API_URL = API_URL_MAPPING[API_TYPE]
 26 | 
 27 | def encode_image(image_path):
 28 |     with open(image_path, "rb") as image_file:
 29 |         return base64.b64encode(image_file.read()).decode("utf-8")
 30 | 
 31 | def generate_description(client, img_name, image_dir, model_name):
 32 |     img_url = os.path.join(image_dir, img_name)
 33 |     if not os.path.exists(img_url):
 34 |         return None
 35 |     
 36 |     base64_image = encode_image(img_url)
 37 | 
 38 |     prompt = '''
 39 |     Please describe in detail the given image following the principles: 
 40 |     (1) Describing object attributes, including object quantity, color, material, shape, size; 
 41 |     (2) Describing the spatial relationship between objects, including the relative position of objects, the distance between objects, and the direction of objects;
 42 |     (3) Only describe the content that has high confidently. 
 43 |     (4) Do not describe the contents by itemizing them in list form. 
 44 |     (5) Make sure the description is coherent and fluent.
 45 |     '''
 46 | 
 47 |     dialogs = [{
 48 |         "role": "user",
 49 |         "content": [
 50 |             {"type": "text", "text": prompt},
 51 |             {"type": "image_url", "image_url": {
 52 |                 "url": f"data:image/png;base64,{base64_image}"
 53 |             }}
 54 |         ]
 55 |     }]
 56 | 
 57 |     try:
 58 |         completion = client.chat.completions.create(
 59 |             model=model_name,
 60 |             messages=dialogs,
 61 |             max_tokens=1024,
 62 |             temperature=0
 63 |         )
 64 |         return {
 65 |             "img_name": img_url,
 66 |             "text": completion.choices[0].message.content.strip(),
 67 |         }
 68 |     except Exception as e:
 69 |         print(f"Error processing {img_name}: {e}")
 70 |         return None
 71 | 
 72 | if __name__ == "__main__":
 73 |     parser = argparse.ArgumentParser()
 74 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
 75 |     parser.add_argument('--work_dir', type=str, default='../../data/')
 76 |     parser.add_argument('--model_name', type=str, default='gpt-4o-mini-2024-07-18')
 77 |     args = parser.parse_args()
 78 |     
 79 |     city = args.city
 80 |     work_dir = args.work_dir
 81 |     working_dir = work_dir + f"dev-{city}"
 82 |     # TODO: Change the following path to the actual path
 83 |     image_dir = f"....../ThreeCityImage/{city}/StreetView"
 84 |     model_name = args.model_name
 85 | 
 86 |     if API_TYPE == "OpenAI":
 87 |         client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY))
 88 |     elif API_TYPE == "siliconflow":
 89 |         client = OpenAI(base_url=API_URL, api_key=API_KEY)
 90 |     elif API_TYPE == "DeepInfra":
 91 |         model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 92 |         client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY))
 93 | 
 94 |     img_set = []
 95 |     for zl in ['zl15', 'zl17']:
 96 |         stv_in_sat_path = f"{working_dir}/stv_in_sat_{city}_{zl}.csv"
 97 |         assert os.path.exists(stv_in_sat_path), f"{stv_in_sat_path} does not exist."
 98 |         df = pd.read_csv(stv_in_sat_path)
 99 |         img_set.extend(df['image_name'].tolist())
100 |     
101 |     img_set = list(set(img_set))
102 |     assert os.path.exists(image_dir), f"{image_dir} does not exist."
103 |     print(f"Start generating descriptions for {len(img_set)} images.")
104 | 
105 |     with ThreadPoolExecutor(max_workers=128) as executor:  
106 |         futures = {executor.submit(generate_description, client, img_name, image_dir, model_name): img_name for img_name in img_set}
107 |         with open(os.path.join(working_dir, "stv_description.jsonl"), "a") as fout:
108 |             for future in tqdm.tqdm(as_completed(futures), total=len(futures)):
109 |                 result = future.result()
110 |                 if result:
111 |                     fout.write(json.dumps(result, ensure_ascii=False) + "\n")
112 | 
113 | 


--------------------------------------------------------------------------------
/serving/vlm_serving.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import transformers
 4 | 
 5 | from functools import partial
 6 | from config import VLLM_MODEL_PATH, VLM_MODELS
 7 | 
 8 | 
 9 | class VLMWrapper:
10 |     def __init__(self, model_name, max_new_tokens=1000):
11 |         self.model_name = model_name
12 |         assert self.model_name in VLM_MODELS
13 | 
14 |         transformers_version_436=["VILA1.5-3b", "Llama-3-VILA1.5-8b", "VILA1.5-13b"]
15 |         transformers_version_437=["cogvlm2-llama3-chat-19B", "InternVL2-40B", "llava_v1.5_7b", "Yi_VL_6B", "Yi_VL_34B",
16 |                                   "InternVL2-2B", "InternVL2-4B", "InternVL2-8B", "InternVL2-26B"]
17 |         transformers_version_440=["MiniCPM-Llama3-V-2_5"]
18 |         transformers_version_444=["llava_next_yi_34b", "llava_next_llama3", "glm-4v-9b"]
19 |         trainformers_version_latest = ["Qwen2-VL-7B-Instruct", "Qwen2-VL-2B-Instruct"]
20 |         # The following is an example of the model name of UrbanLLaVA
21 |         # TODO: Add your model name of UrbanLLaVA here
22 |         transformers_version_UrbanLLaVA=["UrbanLLaVA-8b-mix-v1"]
23 | 
24 |         # Install the correct version of transformers
25 |         if self.model_name in transformers_version_UrbanLLaVA:
26 |             if transformers.__version__ != "4.36.2":
27 |                 os.system("pip install transformers==4.36.2")
28 |         elif self.model_name in transformers_version_436:
29 |             if transformers.__version__ != "4.36.2":
30 |                 os.system("pip install transformers==4.36.2")
31 |         elif self.model_name in transformers_version_437:
32 |             if transformers.__version__ != "4.37.0":
33 |                 os.system("pip install transformers==4.37.0")
34 |         elif self.model_name in transformers_version_440:
35 |             if transformers.__version__ != "4.40.0":
36 |                 os.system("pip install transformers==4.40.0")
37 |         elif self.model_name in transformers_version_444:
38 |             if transformers.__version__ != "4.44.2":
39 |                 os.system("pip install transformers==4.44.2")
40 |         elif self.model_name in trainformers_version_latest:
41 |             if transformers.__version__ != "4.45.0.dev0":
42 |                 os.system("pip install git+https://github.com/huggingface/transformers@21fac7abba2a37fae86106f87fcf9974fd1e3830 accelerate")
43 |         else:
44 |             print("no need to update transformers")
45 | 
46 |         # place this line after the command "pip install"
47 |         try:
48 |             from vlmeval.config import supported_VLM
49 |             from vlmeval.vlm import VILA
50 |             from functools import partial
51 |         except Exception as e:
52 |             print(e)
53 |             print("need to run this script in vlmeval")
54 | 
55 | 
56 |         # only update local model path
57 |         for model_name in transformers_version_436 + transformers_version_437 + transformers_version_440 + transformers_version_444 + trainformers_version_latest:
58 |             original_func = supported_VLM[model_name]  
59 |             if "glm" in model_name or "cogvlm" in model_name:
60 |                 supported_VLM[model_name] = partial(original_func.func, 
61 |                                                     model_path=VLLM_MODEL_PATH[model_name],
62 |                                                     max_length=max_new_tokens,
63 |                                                     **{k: v for k, v in original_func.keywords.items() if k != 'model_path'})
64 |             else:
65 |                 supported_VLM[model_name] = partial(original_func.func, 
66 |                                                     model_path=VLLM_MODEL_PATH[model_name],
67 |                                                     max_new_tokens=max_new_tokens,
68 |                                                     **{k: v for k, v in original_func.keywords.items() if k != 'model_path'})
69 |         try:
70 |             for model_name in transformers_version_UrbanLLaVA:
71 |                 supported_VLM[model_name]=partial(VILA, model_path=VLLM_MODEL_PATH[model_name], max_new_tokens=max_new_tokens)
72 |         except Exception as e:
73 |             print(e)
74 |             print("UrbanLLaVA is not supported")
75 | 
76 |         self.enable_proxy()
77 |         self.model = supported_VLM[self.model_name]()
78 | 
79 |     def get_vlm_model(self):
80 |         return self.model
81 | 
82 |     def enable_proxy(self):
83 |         # set proxy for OpenAI models
84 |         if self.model_name in ["GPT4o", "GPT4o_MINI"]:
85 |             os.environ["http_proxy"] = 'http://127.0.0.1:10190'
86 |             os.environ["https_proxy"] = 'http://127.0.0.1:10190'
87 | 
88 | 
89 |     def clean_proxy(self):
90 |         try:
91 |             del os.environ["http_proxy"]
92 |             del os.environ["https_proxy"]
93 |         except Exception as e:
94 |             print("Failed to delete proxy environment")
95 | 


--------------------------------------------------------------------------------
/simulate/annotate/stv_landmark_gpt.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pandas as pd
  3 | import json
  4 | from tqdm import tqdm, trange
  5 | import argparse
  6 | import httpx
  7 | import base64
  8 | from openai import OpenAI  # >=1.0, test version 1.16.0
  9 | from concurrent.futures import ThreadPoolExecutor, as_completed
 10 | 
 11 | PROXY = "http://127.0.0.1:10190"
 12 | 
 13 | API_KEY_MAPPING = { 
 14 |     "siliconflow": "SiliconFlow_API_KEY", 
 15 |     "DeepInfra": "DeepInfra_API_KEY", 
 16 |     "OpenAI": "OpenAI_API_KEY"
 17 | }
 18 | API_URL_MAPPING = {
 19 |     "siliconflow": "https://api.siliconflow.cn/v1",
 20 |     "DeepInfra": "https://api.deepinfra.com/v1/openai",
 21 |     "OpenAI": "https://api.openai.com/v1"
 22 | }
 23 | API_TYPE = "OpenAI"
 24 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]]
 25 | API_URL = API_URL_MAPPING[API_TYPE]
 26 | 
 27 | def encode_image(image_path):
 28 |     with open(image_path, "rb") as image_file:
 29 |         return base64.b64encode(image_file.read()).decode("utf-8")
 30 | 
 31 | def process_row(client, df, cnt, image_dir, model_name, working_dir):
 32 |     image_name = df.at[cnt, 'image_name']
 33 |     image_url = os.path.join(image_dir, image_name)
 34 |     if not os.path.exists(image_url):
 35 |         return None
 36 | 
 37 |     base64_image = encode_image(image_url)
 38 |     near_pois = df.at[cnt, 'feature_names']
 39 | 
 40 |     prompt = '''
 41 |     You are given one street view image and the nearest 10 pois as background information. 
 42 |     The nearest pois are ''' + str(near_pois) + '''. 
 43 |     Based on the given pois and the image, please use LESS THAN FIVE WORDS to describe what the landmark in the image is.
 44 |     A landmark is a recognizable natural or artificial feature used for navigation, for example, a building, a statue, a bridge, etc.
 45 |     Please give the name of the landmark and illustrate the landmark if possible. For example, "Eiffel Tower" and "a tall iron tower".
 46 |     Keep your response short and concise, USE LESS THAN FIVE WORDS to describe the landmark.
 47 |     '''
 48 | 
 49 |     dialogs = [{
 50 |         "role": "user",
 51 |         "content": [
 52 |             {"type": "text", "text": prompt},
 53 |             {"type": "image_url", "image_url": {
 54 |                 "url": f"data:image/png;base64,{base64_image}"
 55 |             }}
 56 |         ]
 57 |     }]
 58 | 
 59 |     try:
 60 |         completion = client.chat.completions.create(
 61 |             model=model_name,
 62 |             messages=dialogs,
 63 |             max_tokens=2048,
 64 |             temperature=0
 65 |         )
 66 | 
 67 |         return {
 68 |             "img_name": image_url,
 69 |             "text": completion.choices[0].message.content.strip()
 70 |         }
 71 |     except Exception as e:
 72 |         print(f"Error processing row {cnt}: {e}")
 73 |         return None
 74 | 
 75 | if __name__ == "__main__":
 76 |     parser = argparse.ArgumentParser()
 77 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
 78 |     parser.add_argument('--work_dir', type=str, default='../../data/')
 79 |     parser.add_argument('--model_name', type=str, default='gpt-4o-mini-2024-07-18')
 80 |     args = parser.parse_args()
 81 | 
 82 |     city = args.city
 83 |     work_dir = args.work_dir
 84 |     working_dir = work_dir + f'dev-{city}/'
 85 |     model_name = args.model_name
 86 | 
 87 |     if API_TYPE == "OpenAI":
 88 |         client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY))
 89 |     elif API_TYPE == "siliconflow":
 90 |         client = OpenAI(base_url=API_URL, api_key=API_KEY)
 91 |     elif API_TYPE == "DeepInfra":
 92 |         model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 93 |         client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY))
 94 | 
 95 |     # TODO: Change the following path to the actual path
 96 |     image_dir = f"....../ThreeCityImage/{city}/StreetView"
 97 | 
 98 |     if os .path.exists(working_dir + "stv_poi_landmark_update.jsonl"):
 99 |         os.remove(working_dir + "stv_poi_landmark_update.jsonl")
100 |     print(f"{working_dir}stv_poi_landmark_update.jsonl File Removed!")
101 | 
102 |     for zl in ['zl15', 'zl17']:
103 |         df = pd.read_csv(working_dir + f'stv_in_sat_nearest_features_update_{city}_{zl}.csv')
104 |         output_file = working_dir + "stv_poi_landmark_update.jsonl"
105 | 
106 |         with ThreadPoolExecutor(max_workers=128) as executor:
107 |             futures = {executor.submit(process_row, client, df, cnt, image_dir, model_name, working_dir): cnt for cnt in range(len(df))}
108 |             
109 |             with open(output_file, "a") as fout:
110 |                 for future in tqdm(as_completed(futures), total=len(futures)):
111 |                     result = future.result()
112 |                     if result:
113 |                         fout.write(json.dumps(result, ensure_ascii=False) + "\n")
114 | 
115 | 


--------------------------------------------------------------------------------
/simulate/format/uni_mc_SAT_landuse.py:
--------------------------------------------------------------------------------
  1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
  2 | 
  3 | import json
  4 | import os
  5 | import argparse
  6 | import pandas as pd
  7 | import random
  8 | from sklearn.model_selection import train_test_split
  9 | random.seed(0)
 10 | 
 11 | 
 12 | 
 13 | def prompt_template(choice1, choice2, choice3, choice4):
 14 |     s =  f"""
 15 |     The following is a multiple-choice question about selecting the most possible landuse type in the region of a satellite image.
 16 |     A. {choice1}
 17 |     B. {choice2}
 18 |     C. {choice3}
 19 |     D. {choice4}
 20 |     Please choose the most suitable one among A, B, C and D as the answer to this question. 
 21 |     Please output the option directly. No need for explaination.\n
 22 |     """
 23 | 
 24 |     return s.strip()
 25 | 
 26 | if __name__ == '__main__':
 27 |     parser = argparse.ArgumentParser()
 28 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
 29 |     parser.add_argument('--task_name', type=str, default='sat_landuse_mc', help='task name')
 30 |     parser.add_argument('--work_dir', type=str, default='../../data/')
 31 |     args = parser.parse_args() 
 32 | 
 33 |     city_name = args.city_name
 34 |     task_name = args.task_name   
 35 |     work_dir = args.work_dir
 36 | 
 37 |     cur_dir = os.path.join(work_dir, f"dev-{city_name}/")
 38 | 
 39 |     all_train_data = []
 40 |     for zl in ['zl15', 'zl17']:
 41 |         sat_img_dir = cur_dir + f"sample_sat_image_{zl}/"
 42 | 
 43 |         output = []
 44 | 
 45 |         if os.path.exists(f"rs_landuse_description_{zl}.jsonl"):
 46 |             os.remove(f"rs_landuse_description_{zl}.jsonl")
 47 |             print(f"Removed rs_landuse_description_{zl}.jsonl")
 48 |         df = pd.read_csv(cur_dir + f"SAT_{city_name}_{zl}.csv")
 49 | 
 50 |         for cnt in range(len(df)):
 51 |             img_name = df.at[cnt,'img_name'].split('.')[0]  
 52 | 
 53 |             if not os.path.exists(cur_dir + f'short_clipped_results_{zl}/landuse_'+img_name +'.txt'):
 54 |                 continue
 55 | 
 56 |             with open(cur_dir + f'short_clipped_results_{zl}/landuse_'+img_name +'.txt', 'r') as file:
 57 |             # with open('short_clipped_results_wudaokou_zl17/landuse_'+img_name +'.txt', 'r') as file:
 58 |                 lines = file.readlines()
 59 | 
 60 |             landuse_types_list = ["Retail", "Recreation_ground", "Commercial", "Residential", "Grass", "Forest", "Construction", "Meadow", "Garages", "Railway", "Brownfield", "Farmland", "Religious", "Industrial", "Recreation"]
 61 | 
 62 |             for line in lines:
 63 |                 parts = line.split('location:')
 64 |                 landuse_type = line.split('region')[0].strip().split()[-1].capitalize()  
 65 |                 if not landuse_type in landuse_types_list:
 66 |                     landuse_types_list.append(landuse_type)            
 67 |             i = 0
 68 |             for line in lines:
 69 |                 parts = line.split('location:')
 70 |                 landuse_type = line.split('region')[0].strip().split()[-1].capitalize()  
 71 |                 assert landuse_type in landuse_types_list, landuse_type
 72 | 
 73 |                 other_choices = [d for d in landuse_types_list if d != landuse_type]
 74 |                 other_choices = random.sample(other_choices, 3)
 75 |                 choices = [landuse_type] + other_choices
 76 |                 random.shuffle(choices)
 77 |                 i += 1
 78 |                 reference = chr(ord('A') + choices.index(landuse_type))
 79 |                 prompt = prompt_template(choice1=choices[0], choice2=choices[1], choice3=choices[2], choice4=choices[3])
 80 |             
 81 | 
 82 |                 output.append({
 83 |                     "prompt": prompt,
 84 |                     "choices": choices,
 85 |                     "reference": reference,
 86 |                     "image": os.path.join(sat_img_dir, img_name + '.png')
 87 |                 })
 88 | 
 89 |         test = random.sample(output, min(200, len(output)))
 90 |         train = [d for d in output if d not in test]
 91 |         all_train_data.extend(train)
 92 |         output_dir = os.path.join(cur_dir, "uni_image_data", task_name, city_name)
 93 | 
 94 |         os.makedirs(output_dir, exist_ok=True)
 95 | 
 96 |         with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_test.json"), "w") as f:
 97 |             json.dump(test, f, indent=4, ensure_ascii=False)
 98 | 
 99 |         print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_test.json')}")
100 |         print("Test size:", len(test))
101 | 
102 |         with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_train.json"), "w") as f:
103 |             json.dump(train, f, indent=4, ensure_ascii=False)
104 | 
105 |     with open(os.path.join(output_dir, f"{city_name}_{task_name}_train.json"), "w") as f:
106 |         json.dump(all_train_data, f, indent=4, ensure_ascii=False)
107 | 
108 |     print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_train.json')}")
109 |     print("Total train size:", len(train))
110 | 


--------------------------------------------------------------------------------
/evaluate/uniimage/sat_landuse/sat_landuse_convert.py:
--------------------------------------------------------------------------------
  1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
  2 | 
  3 | import json
  4 | import os
  5 | import argparse
  6 | import pandas as pd
  7 | import random
  8 | from sklearn.model_selection import train_test_split
  9 | random.seed(0)
 10 | 
 11 | from config import UNI_IMAGE_FOLDER
 12 | 
 13 | 
 14 | def prompt_template(choice1, choice2, choice3, choice4):
 15 |     s =  f"""
 16 |     The following is a multiple-choice question about selecting the most possible landuse type in the region of a satellite image.
 17 |     A. {choice1}
 18 |     B. {choice2}
 19 |     C. {choice3}
 20 |     D. {choice4}
 21 |     Please choose the most suitable one among A, B, C and D as the answer to this question. 
 22 |     Please output the option directly. No need for explaination.\n
 23 |     """
 24 | 
 25 |     return s.strip()
 26 | 
 27 | if __name__ == '__main__':
 28 |     parser = argparse.ArgumentParser()
 29 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
 30 |     parser.add_argument('--task_name', type=str, default='sat_landuse_mc', help='task name')
 31 |     args = parser.parse_args() 
 32 | 
 33 |     city_name = args.city_name
 34 |     task_name = args.task_name   
 35 | 
 36 |     work_dir = UNI_IMAGE_FOLDER
 37 | 
 38 |     work_dir = os.path.join(work_dir, f"{city_name}/")
 39 | 
 40 |     all_train_data = []
 41 |     for zl in ['zl15', 'zl17']:
 42 |         sat_img_dir = work_dir + f"sample_sat_image_{zl}/"
 43 | 
 44 |         output = []
 45 | 
 46 |         if os.path.exists(f"rs_landuse_description_{zl}.jsonl"):
 47 |             os.remove(f"rs_landuse_description_{zl}.jsonl")
 48 |             print(f"Removed rs_landuse_description_{zl}.jsonl")
 49 |         df = pd.read_csv(work_dir + f"SAT_{city_name}_{zl}.csv")
 50 | 
 51 |         for cnt in range(len(df)):
 52 |             img_name = df.at[cnt,'img_name'].split('.')[0]  
 53 | 
 54 |             if not os.path.exists(work_dir + f'short_clipped_results_{zl}/landuse_'+img_name +'.txt'):
 55 |                 continue
 56 | 
 57 |             with open(work_dir + f'short_clipped_results_{zl}/landuse_'+img_name +'.txt', 'r') as file:
 58 |             # with open('short_clipped_results_wudaokou_zl17/landuse_'+img_name +'.txt', 'r') as file:
 59 |                 lines = file.readlines()
 60 | 
 61 |             landuse_types_list = ["Retail", "Recreation_ground", "Commercial", "Residential", "Grass", "Forest", "Construction", "Meadow", "Garages", "Railway", "Brownfield", "Farmland", "Religious", "Industrial", "Recreation"]
 62 | 
 63 |             for line in lines:
 64 |                 parts = line.split('location:')
 65 |                 landuse_type = line.split('region')[0].strip().split()[-1].capitalize()  
 66 |                 if not landuse_type in landuse_types_list:
 67 |                     landuse_types_list.append(landuse_type)            
 68 |             i = 0
 69 |             for line in lines:
 70 |                 parts = line.split('location:')
 71 |                 landuse_type = line.split('region')[0].strip().split()[-1].capitalize()  
 72 |                 assert landuse_type in landuse_types_list, landuse_type
 73 | 
 74 |                 other_choices = [d for d in landuse_types_list if d != landuse_type]
 75 |                 other_choices = random.sample(other_choices, 3)
 76 |                 choices = [landuse_type] + other_choices
 77 |                 random.shuffle(choices)
 78 |                 i += 1
 79 |                 reference = chr(ord('A') + choices.index(landuse_type))
 80 |                 prompt = prompt_template(choice1=choices[0], choice2=choices[1], choice3=choices[2], choice4=choices[3])
 81 |             
 82 | 
 83 |                 output.append({
 84 |                     "prompt": prompt,
 85 |                     "choices": choices,
 86 |                     "reference": reference,
 87 |                     "image": os.path.join(sat_img_dir, img_name + '.png')
 88 |                 })
 89 | 
 90 |         test = random.sample(output, min(200, len(output)))
 91 |         train = [d for d in output if d not in test]
 92 |         all_train_data.extend(train)
 93 |         output_dir = os.path.join(UNI_IMAGE_FOLDER, task_name, city_name)
 94 |         os.makedirs(output_dir, exist_ok=True)
 95 | 
 96 |         with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_test.json"), "w") as f:
 97 |             json.dump(test, f, indent=4, ensure_ascii=False)
 98 | 
 99 |         print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_test.json')}")
100 |         print("Test size:", len(test))
101 | 
102 |         with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_train.json"), "w") as f:
103 |             json.dump(train, f, indent=4, ensure_ascii=False)
104 | 
105 |     with open(os.path.join(output_dir, f"{city_name}_{task_name}_train.json"), "w") as f:
106 |         json.dump(all_train_data, f, indent=4, ensure_ascii=False)
107 | 
108 |     print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_train.json')}")
109 |     print("Total train size:", len(train))
110 | 
111 | 
112 | 
113 | 
114 |     


--------------------------------------------------------------------------------
/simulate/advance/CoT/stv_address_cot/gen_CoT_template.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import argparse
  3 | import pandas as pd
  4 | from tqdm import trange
  5 | import os
  6 | import random
  7 | random.seed(0)
  8 | 
  9 | # Generate CoT ground truth
 10 | # Three reasoning steps:
 11 | # 1. Tell the city name
 12 | # 2. Extract the location's pois around
 13 | # 3. Tell the location's address
 14 | 
 15 | 
 16 | def stv_prompt_template(city_name:str, near_feature:str, description:str, address:str):
 17 |     """
 18 |     Generate the prompt for the street view task
 19 |     """
 20 |     prompt = f"""
 21 |     Step 1: Describe the street view image:
 22 |     This is a street view image, in thie image, {description}.
 23 |     Step 2: Tell the city name:
 24 |     According to the street view image, this is probably in {city_name}.
 25 |     Step 3: Extract the location's features around:
 26 |     The street view image is taken in a region with the following features: {near_feature}.
 27 |     Step 4: Tell the location's address:
 28 |     Based on my observation and knowledge about this region, the address of this region is {address}.
 29 |     """
 30 |     prompt = str(prompt).replace('\n', ' ').strip()
 31 | 
 32 |     return prompt
 33 | 
 34 | if __name__ == "__main__":
 35 |     parser = argparse.ArgumentParser()
 36 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
 37 |     parser.add_argument('--work_dir', type=str, default='../../data/')
 38 |     parser.add_argument('--task', type=str, default='stv-address-cot', choices=['stv-address-cot', 'sat-address-cot'])
 39 |     args = parser.parse_args()
 40 |     city = args.city
 41 |     work_dir = args.work_dir      
 42 |     task = args.task
 43 |     work_dir = work_dir + f'dev-{city}'
 44 |     output_dir = os.path.join(work_dir, 'CoT', task)
 45 |     import os
 46 |     os.makedirs(output_dir, exist_ok=True)
 47 | 
 48 |     CITY_NAME = city
 49 | 
 50 |     stv_output = []
 51 | 
 52 | 
 53 |     for zl in ["zl15", "zl17"]:
 54 |         # Streetview    
 55 |         address_csv = os.path.join(work_dir, f'stv_in_sat_address_deploy_{zl}.csv')
 56 |         near_feature_csv = os.path.join(work_dir, f'stv_in_sat_nearest_features_update_{city}_{zl}.csv')
 57 |         stv_description_jsonl = os.path.join(work_dir, f'stv_description.jsonl')
 58 | 
 59 |         addr_df = pd.read_csv(address_csv)
 60 |         near_feature_df = pd.read_csv(near_feature_csv)
 61 |         with open(stv_description_jsonl, 'r') as f:
 62 |             stv_description = f.readlines()
 63 |         stv_description = [json.loads(x) for x in stv_description]
 64 | 
 65 |         addr_df = addr_df[:1000]
 66 | 
 67 |         for i in trange(len(addr_df)):
 68 |             image_name = addr_df.loc[i, 'image_name']
 69 |             address = addr_df.loc[i, 'adr']
 70 |             for j in range(len(stv_description)):
 71 |                 image_name2 = stv_description[j]['img_name'].split('/')[-1]
 72 |                 if image_name == image_name2:
 73 |                     description = stv_description[j]['text']
 74 |                     for k in range(len(near_feature_df)):
 75 |                         if near_feature_df.loc[k, 'image_name'] == image_name:
 76 |                             near_feature = near_feature_df.loc[k, 'feature_names']
 77 |                             
 78 |                             prompt = stv_prompt_template(CITY_NAME, near_feature, description, address)
 79 |                             stv_output.append({
 80 |                                 "img_name": image_name,
 81 |                                 "CoT": prompt,
 82 |                                 "address": address,
 83 |                                 "description": description,
 84 |                                 "near_feature": near_feature
 85 |                             })
 86 | 
 87 | 
 88 | 
 89 |     # for i in trange(len(addr_df)):
 90 |     #     region_name = addr_df.loc[i, 'region_nam']
 91 |     #     sid = addr_df.loc[i, 'sid']
 92 |     #     adr = addr_df.loc[i, 'adr']
 93 |     #     near_feature = None
 94 |     #     for j in range(len(near_feature_df)):
 95 |     #         if near_feature_df.loc[j, 'sid'] == sid:
 96 |     #             near_feature = near_feature_df.loc[j, 'feature_names']
 97 |     #             break
 98 | 
 99 |     #     if near_feature is not None:
100 |     #         near_feature_lst = str(near_feature).split(',')
101 |     #         near_feature_lst = [x for x in near_feature_lst if x != '' and not x.isdigit()]
102 |     #         prompt = stv_prompt_template(CITY_NAME, near_feature_lst, adr)
103 |     #         stv_output.append({
104 |     #             "region_name": region_name,
105 |     #             "sid": sid,
106 |     #             "adr": adr,
107 |     #             "near_feature": near_feature_lst,
108 |     #             "CoT": prompt
109 |     #         })
110 | 
111 | 
112 |     with open(os.path.join(output_dir, f'{task}_{CITY_NAME}.json'), 'w') as f:
113 |         json.dump(stv_output, f, indent=4, ensure_ascii=False)
114 | 
115 |     print(f"Total number of CoT: {len(stv_output)}")
116 |     print(f"Saving CoT to {os.path.join(output_dir, f'{task}_{CITY_NAME}.json')}")
117 |         
118 |         
119 | 


--------------------------------------------------------------------------------
/evaluate/outdoor_navigation/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | import math
  5 | import base64
  6 | 
  7 | from config import NAVIGATION_IMAGE_FOLDER, NAVIGATION_URL_PATH
  8 | 
  9 | def encode_image(image_path):
 10 |   with open(image_path, "rb") as image_file:
 11 |     return base64.b64encode(image_file.read()).decode('utf-8')
 12 | 
 13 | def haversine_distance(lat1, lon1, lat2, lon2):
 14 |     lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
 15 |     # haversine
 16 |     dlat = lat2 - lat1
 17 |     dlon = lon2 - lon1
 18 |     a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
 19 |     c = 2 * math.asin(math.sqrt(a))
 20 |     r = 6371000
 21 |     return c * r
 22 | 
 23 | def extract_coords_from_filename(city, image_filename):
 24 |     meta_file = os.path.join(NAVIGATION_IMAGE_FOLDER, f"{city}_StreetView_Images/combined_stitch_meta_info.csv")
 25 | 
 26 |     parts = image_filename.split('_')
 27 |     dataset_name = parts[0]
 28 |     sid_84_long = parts[1]
 29 |     sid_84_lat = parts[2]
 30 |     sid = parts[3].split('.')[0]  
 31 | 
 32 |     df = pd.read_csv(meta_file)
 33 | 
 34 |     matched_row = df[(df['sid_84_long'] == float(sid_84_long)) & 
 35 |                      (df['sid_84_lat'] == float(sid_84_lat)) & 
 36 |                      (df['sid'] == sid)]
 37 | 
 38 |     return matched_row.iloc[0]['longitude_origin'], matched_row.iloc[0]['latitude_origin']
 39 | 
 40 | 
 41 | 
 42 | def calculate_distance(city, last_image_url, cur_image_url):
 43 |     url_file = NAVIGATION_URL_PATH
 44 |     url_df = pd.read_csv(url_file)
 45 |     last_image_name = url_df.loc[url_df['image_url'] == last_image_url, 'image_name'].values[0]
 46 |     cur_image_name = url_df.loc[url_df['image_url'] == cur_image_url, 'image_name'].values[0]
 47 | 
 48 |     meta_file = os.path.join(NAVIGATION_IMAGE_FOLDER, f"{city}_StreetView_Images/combined_stitch_meta_info.csv")
 49 |     meta_df = pd.read_csv(meta_file)
 50 |     if city not in ["Beijing", "Shanghai"]:
 51 |         last_image_coords = meta_df.loc[meta_df['file_name'] == last_image_name, ['query_longti', 'query_lati']].iloc[0]
 52 |         cur_image_coords = meta_df.loc[meta_df['file_name'] == cur_image_name, ['query_longti', 'query_lati']].iloc[0]
 53 | 
 54 |         distance = haversine_distance(last_image_coords['query_lati'], last_image_coords['query_longti'], 
 55 |                             cur_image_coords['query_lati'], cur_image_coords['query_longti'])
 56 |     else:
 57 |         last_image_lng, last_image_lat = extract_coords_from_filename(city, last_image_name)
 58 |         cur_image_lng, cur_image_lat = extract_coords_from_filename(city, cur_image_name)
 59 |         distance = haversine_distance(last_image_lat, last_image_lng, cur_image_lat, cur_image_lng)
 60 |     return distance
 61 | 
 62 | 
 63 | def calculate_direction(current_end, next_start):
 64 |     dx = next_start[0] - current_end[0]  
 65 |     dy = next_start[1] - current_end[1]  
 66 | 
 67 |     if abs(dx) > abs(dy): 
 68 |         if dx > 0:
 69 |             return "right"
 70 |         else:
 71 |             return "left"
 72 |     else:  
 73 |         if dy > 0:
 74 |             return "forward"
 75 |         else:
 76 |             return "forward"
 77 |     
 78 | def get_basic_prompt():
 79 |     basic_prompt = f"""
 80 |     You are tasked with guiding a virtual traveler through a series of street view images along a specific route. With each image provided:
 81 | 
 82 |     Describe the Image: Identify and describe any prominent landmarks, features, or unique characteristics visible in the photo. This may include notable buildings, distinctive shops, interesting street art, or any other element that stands out.
 83 | 
 84 |     Action Decision: For each image, I will also provide the navigation action decision that needs to be taken at that location (e.g., turn left, go straight, turn right, or stop). You must integrate this action decision into your description, using the landmarks as reference points. For example, you might say, "At the red cafe with the large windows on your left, turn right to head towards the park with the fountain."
 85 | 
 86 |     Remember, your descriptions should not include URL links to images or 'image' word. Instead, they should provide a clear, concise, and complete guide using landmarks that will be paired with the images I provide. And you must intergrate the action decesion with image description. This ensures that anyone using your descriptions and my images can successfully navigate and reach their destination.
 87 | 
 88 |     Here is the images and action decisions for each step of the route:
 89 |     
 90 |     """
 91 |     return basic_prompt
 92 |         
 93 | def get_prompt_eval():
 94 |     basic_prompt = f"""
 95 |     Navigate to the described target location!
 96 |     Action Space: forward, left, right, stop
 97 |     - If you choose "forward", proceed for 50 meters.
 98 |     - If you choose "left" or "right", make the turn at the next intersection.
 99 |     - If you believe you have reached the destination, please select "stop".
100 |     - Format your response as follows:\n
101 |       Reason: <your_reason_for_the_action> Action: <your_action_here>
102 | 
103 |     Navigation Instructions:
104 |     """
105 |     return basic_prompt
106 | 


--------------------------------------------------------------------------------
/evaluate/uniimage/stv_address/stv_address_convert.py:
--------------------------------------------------------------------------------
  1 | # This script is used to convert a address QA into a multi-choice question for evaluation.
  2 | 
  3 | import json
  4 | import os
  5 | import argparse
  6 | import pandas as pd
  7 | import random
  8 | random.seed(0)
  9 | from tqdm import trange
 10 | from config import UNI_IMAGE_FOLDER, BEIJING_STV_IMAGE_FOLDER, LONDON_STV_IMAGE_FOLDER
 11 | 
 12 | def prompt_template(choice1, choice2, choice3, choice4):
 13 |     s =  f"""
 14 |     The following is a multiple-choice question about selecting the most appropriate address for a street view image.
 15 |     A. {choice1}
 16 |     B. {choice2}
 17 |     C. {choice3}
 18 |     D. {choice4}
 19 |     Please choose the most suitable one among A, B, C and D as the answer to this question. 
 20 |     Please output the option directly. No need for explaination.\n
 21 |     """
 22 | 
 23 |     return s.strip()
 24 | 
 25 | if __name__ == '__main__':
 26 |     parser = argparse.ArgumentParser()
 27 |     parser.add_argument('--city_name', type=str, default='Beijing', help='city name')
 28 |     parser.add_argument('--task_name', type=str, default='stv_address_mc', help='task name')
 29 |     args = parser.parse_args() 
 30 | 
 31 |     city_name = args.city_name
 32 |     task_name = args.task_name   
 33 | 
 34 |     work_dir = UNI_IMAGE_FOLDER
 35 | 
 36 |     cur_dir = os.path.join(work_dir, f"{city_name}/")
 37 | 
 38 | 
 39 |     all_train_data = []
 40 | 
 41 |     for zl in ['zl15', 'zl17']:
 42 |         sat_address_file = cur_dir + f"stv_in_sat_address_deploy_{zl}.csv"
 43 |         df = pd.read_csv(sat_address_file)
 44 |         # remove the rows with empty address
 45 |         df = df.dropna(subset=["adr"])
 46 | 
 47 |         # randomly shuffle df, to avoid the same address in the same order
 48 |         df = df.sample(frac=1, random_state=0).reset_index(drop=True)
 49 |         for i in range(len(df)):
 50 |             df.at[i, "img_name"] = df.iloc[i]["image_name"]        
 51 | 
 52 |         if city_name == "Beijing":
 53 |             stv_img_dir = BEIJING_STV_IMAGE_FOLDER
 54 |         elif city_name == "London":
 55 |             stv_img_dir = LONDON_STV_IMAGE_FOLDER
 56 |         elif city_name == "NewYork":
 57 |             pass
 58 | 
 59 | 
 60 |         print("Input file:", sat_address_file)
 61 |         print("Valid records:", len(df))
 62 | 
 63 |         output = []
 64 | 
 65 |         for i in range(len(df)):
 66 |             row = df.iloc[i]
 67 |             img_name = row["img_name"]
 68 |             adr = row["adr"]
 69 | 
 70 |             # print(combined_adr)
 71 | 
 72 |             assert os.path.exists(os.path.join(stv_img_dir, img_name)), f"Image {os.path.join(stv_img_dir, img_name)} not found"
 73 | 
 74 |             # Randomly select 3 other addresses
 75 |             valid_choices = df[(df["img_name"] != img_name) & (df["adr"] != adr)]["adr"].unique()
 76 |             if len(valid_choices) < 3:
 77 |                 raise ValueError(f"Not enough valid choices to sample for image {img_name}")
 78 |             
 79 |             # other_choices = df[(df["img_name"] != img_name) & (df["adr"] != adr)].sample(3)["adr"].tolist()
 80 |             other_choices = random.sample(list(valid_choices), 3)
 81 |             choices = [adr] + other_choices
 82 | 
 83 |             assert len(list(set(choices))) == 4
 84 | 
 85 |             random.shuffle(choices, random=random.seed(i))
 86 | 
 87 |             # print(choices)
 88 | 
 89 |             reference = chr(ord('A') + choices.index(adr))
 90 | 
 91 |             # print(reference)
 92 | 
 93 |             prompt = prompt_template(choice1=choices[0], choice2=choices[1], choice3=choices[2], choice4=choices[3])
 94 | 
 95 |             # print(prompt)
 96 | 
 97 |             output.append({
 98 |                 "prompt": prompt,
 99 |                 "choices": choices,
100 |                 "reference": reference,
101 |                 "image": os.path.join(stv_img_dir, img_name)
102 |             })
103 | 
104 |             # print(output[-1])
105 | 
106 |             # exit()
107 | 
108 |         # os.makedirs(f"./{task_name}/{city_name}", exist_ok=True)
109 |         output_dir = os.path.join(work_dir, task_name, city_name)
110 |         os.makedirs(output_dir, exist_ok=True)
111 | 
112 |         test = random.sample(output, min(200, len(output)))
113 |         train = [d for d in output if d not in test]
114 | 
115 |         with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_test.json"), "w") as f:
116 |             json.dump(test, f, indent=4, ensure_ascii=False) 
117 |         print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_test.json')}")
118 |         print("Test size:", len(test))
119 | 
120 |         with open(os.path.join(output_dir, f"{city_name}_{task_name}_{zl}_train.json"), "w") as f:
121 |             json.dump(train, f, indent=4, ensure_ascii=False)
122 |         print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_{zl}_train.json')}")
123 |         print("Train size:", len(train))
124 | 
125 |         all_train_data.extend(train)
126 | 
127 |     with open(os.path.join(output_dir, f"{city_name}_{task_name}_train.json"), "w") as f:
128 |         json.dump(all_train_data, f, indent=4, ensure_ascii=False)
129 |     
130 |     print(f"Saved to {os.path.join(output_dir, f'{city_name}_{task_name}_train.json')}")
131 |     print("Total train size:", len(all_train_data))
132 | 
133 | 


--------------------------------------------------------------------------------
/simulate/advance/CoT/sat_cross_stv_cot/gpt_polish.py:
--------------------------------------------------------------------------------
  1 | from openai import OpenAI
  2 | import httpx
  3 | import os
  4 | import argparse
  5 | import pandas as pd
  6 | import json
  7 | import base64
  8 | import tqdm
  9 | from tqdm import tqdm
 10 | from concurrent.futures import ThreadPoolExecutor
 11 | 
 12 | # API Key and Proxy settings
 13 | PROXY = "http://127.0.0.1:10190"
 14 | API_KEY_MAPPING = { 
 15 |     "siliconflow": "SiliconFlow_API_KEY",
 16 |     "DeepInfra": "DeepInfra_API_KEY",
 17 |     "OpenAI": "OpenAI_API_KEY"
 18 | }
 19 | API_URL_MAPPING = {
 20 |     "siliconflow": "https://api.siliconflow.cn/v1",
 21 |     "DeepInfra": "https://api.deepinfra.com/v1/openai",
 22 |     "OpenAI": "https://api.openai.com/v1"
 23 | }
 24 | API_TYPE = "OpenAI"
 25 | API_KEY = os.environ[API_KEY_MAPPING[API_TYPE]]
 26 | API_URL = API_URL_MAPPING[API_TYPE]
 27 | 
 28 | def encode_image(image_path):
 29 |     with open(image_path, "rb") as image_file:
 30 |         return base64.b64encode(image_file.read()).decode("utf-8")
 31 | 
 32 | def polish_text(client, model_name, og_text):
 33 |     prompt = f'''
 34 |     Please polish the following paragraph to make it more fluent and natural.
 35 |     Please shorten the text to 2048 tokens or less, keeping the most important information like the reference answer. 
 36 |     Remove redundant or less helpful information. Only keep the most important parts that can help with the task.    
 37 |     You can make necessary changes to the text, like removing the square brackets, adding punctuation, or rephrasing the text.
 38 |     Don't change the meaning of the text.
 39 |     Only output the polished text, without any additional information or appending text.
 40 |     Here is the original text:
 41 |     {og_text}
 42 |     '''
 43 |     
 44 |     dialogs = [{
 45 |         "role": "user",
 46 |         "content": [{"type": "text", "text": prompt}]
 47 |     }]
 48 | 
 49 |     try:
 50 |       completion = client.chat.completions.create(
 51 |           model=model_name,
 52 |           messages=dialogs,
 53 |           max_tokens=2048,
 54 |           temperature=0.1,
 55 |       )
 56 |       return completion.choices[0].message.content
 57 |     except Exception as e:
 58 |       print(e)
 59 |       return ""
 60 | 
 61 | if __name__ == "__main__":
 62 |     parser = argparse.ArgumentParser()
 63 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
 64 |     parser.add_argument('--work_dir', type=str, default='../../data/')
 65 |     parser.add_argument('--task', type=str, default='sat-cross-stv-cot', choices=['sat-cross-stv-cot', 'sat-address-cot'])
 66 |     args = parser.parse_args()
 67 |     
 68 |     city = args.city
 69 |     work_dir = args.work_dir
 70 |     task = args.task
 71 |     CoT_dir = work_dir + f'dev-{city}/CoT/{task}/'
 72 | 
 73 |     model_name = "gpt-4o-2024-08-06"
 74 |     client = OpenAI(base_url=API_URL, api_key=API_KEY, http_client=httpx.Client(proxies=PROXY))
 75 | 
 76 |     for dataset in ["SAT_STV_location_CoT", "SAT_STV_mapping_CoT"]:
 77 |         og_path = f"{dataset}_{city}.json"
 78 |         og_path = os.path.join(CoT_dir, og_path)
 79 | 
 80 |         with open(og_path, 'r') as f:
 81 |             og_data = json.load(f)
 82 | 
 83 |         output = []
 84 |         with ThreadPoolExecutor(max_workers=128) as executor:
 85 |             futures = {executor.submit(polish_text, client, model_name, item['CoT']): item for item in og_data}
 86 | 
 87 |             for future in tqdm(futures):
 88 |                 item = futures[future]
 89 |                 polished_CoT = future.result()
 90 |                 output.append({
 91 |                     "image": item["image"],
 92 |                     "prompt": item["prompt"],
 93 |                     "polished_CoT": polished_CoT,
 94 |                     "og_CoT": item["CoT"],
 95 |                     "reference": item["reference"]
 96 |                 })
 97 | 
 98 |         output_path = os.path.join(CoT_dir, f'polished_{dataset}_{city}.json')
 99 | 
100 |         with open(output_path, 'w') as f:
101 |             json.dump(output, f, indent=4, ensure_ascii=False)
102 | 
103 |         print(f"Polished {dataset} for {city} saved to {output_path}")
104 |         print("Length of output:", len(output))
105 | 
106 |     # for zl in ['zl15', 'zl17']:
107 |     #     og_path = f"{task}_{city}_{zl}.json"
108 |     #     og_path = os.path.join(CoT_dir, og_path)
109 | 
110 |     #     with open(og_path, 'r') as f:
111 |     #         og_data = json.load(f)
112 | 
113 |     #     output = []
114 | 
115 |     #     with ThreadPoolExecutor(max_workers=128) as executor:
116 |     #         futures = {executor.submit(polish_text, client, model_name, item['CoT']): item for item in og_data}
117 | 
118 |     #         for future in tqdm(futures):
119 |     #             item = futures[future]
120 |     #             polished_CoT = future.result()
121 |     #             output.append({
122 |     #                 "img_name": item["img_name"],
123 |     #                 "polished_CoT": polished_CoT,
124 |     #                 "og_CoT": item["CoT"],
125 |     #                 "description": item["description"],
126 |     #                 "address": item["address"]
127 |     #             })
128 | 
129 |     #     output_path = os.path.join(CoT_dir, f'polished_{task}_{city}_{zl}.json')
130 | 
131 |     #     with open(output_path, 'w') as f:
132 |     #         json.dump(output, f, indent=4, ensure_ascii=False)
133 | 


--------------------------------------------------------------------------------
/simulate/advance/cross-view/generate_poi_building_count.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import pandas as pd
  3 | import re
  4 | import os
  5 | import glob
  6 | import argparse
  7 | import random
  8 | import tqdm
  9 | from tqdm import tqdm, trange
 10 | 
 11 | def extract_keys_from_json_files(filenames):
 12 | 
 13 | 
 14 |     all_keys = set()
 15 |     for filename in filenames:
 16 |         with open(filename, 'r') as f:
 17 |             data = json.load(f)
 18 |             all_keys.update(data.keys())
 19 |     return list(all_keys)
 20 | 
 21 | 
 22 | 
 23 | 
 24 | 
 25 | def create_location_dict(filename):
 26 | 
 27 | 
 28 |     location_dict = {}
 29 |     with open(filename, 'r') as f:
 30 |         for line in f:
 31 |             match = re.match(r"(\w+)\s+are\s+at\s+locations:\s+\[(.*)\]", line)
 32 |             if match:
 33 |                 location_type, coordinates = match.groups()
 34 |                 num_locations = len(coordinates.split(',')) // 2
 35 |                 location_dict[location_type] = num_locations
 36 |     return location_dict
 37 | 
 38 | if __name__ == '__main__':
 39 |     parser = argparse.ArgumentParser()
 40 |     parser.add_argument('--city', type=str, default='Beijing', choices=['London', 'NewYork', 'Beijing'])
 41 |     parser.add_argument('--work_dir', type=str, default='../../data/')
 42 |     args = parser.parse_args()
 43 |     city_name = args.city
 44 |     work_dir = args.work_dir
 45 |     working_dir = os.path.join(work_dir, f"dev-{city_name}")
 46 |     # Convert the POI txt files to JSON files
 47 |     for zl in ['zl15','zl17']:
 48 |         target_dir = os.path.join(working_dir, f"poi_json_{city_name}")
 49 |         os.makedirs(target_dir, exist_ok=True)
 50 |         sat_path = pd.read_csv(os.path.join(working_dir, f"SAT_{city_name}_{zl}.csv"))
 51 |         for i in range(len(sat_path)):
 52 |             sat_name = sat_path.at[i,'img_name'].split('.')[0]
 53 |             filename = os.path.join(working_dir, f"short_clipped_results_{zl}/pois_{sat_name}.txt")
 54 |             if not os.path.exists(filename):
 55 |                 continue
 56 |             result_dict = create_location_dict(filename)
 57 |             with open(os.path.join(target_dir, f"pois_{sat_name}_update.json"), 'w') as f:
 58 |                 json.dump(result_dict, f, indent=4)
 59 | 
 60 | 
 61 |     key_groups = {
 62 |         'group_1': ['kindergartens', 'schools', 'colleges', 'research_institutes', 'universitys'],
 63 |         'group_2': ['conveniences',  'malls', 'supermarkets'],
 64 |         'group_3': ['restaurants', 'bakerys','foods', 'fast_foods', 'beveragess', 'food_courts', 'bars', 'cafes', 'coffees', 'vending_machines', 'nightclubs'],
 65 |         'group_4': ['apartments', 'hostels', 'hotels'],
 66 |         'group_5': ['attractions']
 67 |     }
 68 | 
 69 |     # Count the number of POIs in each satellite image
 70 |     for zl in ['zl15','zl17']:
 71 |         sat_df = pd.read_csv(os.path.join(working_dir, f"SAT_{city_name}_{zl}.csv"))
 72 |         img_name_list = list(sat_df['img_name'])
 73 |         json_file_list = [os.path.join(working_dir, f"poi_json_{city_name}/pois_{x.split('.')[0]}_update.json") for x in img_name_list]
 74 |         result_data = []
 75 | 
 76 |         for json_file in json_file_list:
 77 |             file_path = json_file
 78 |             img_name = file_path.split('/')[-1].split('.')[0]
 79 |             if not os.path.exists(file_path):
 80 |                 continue
 81 |             with open(file_path, 'r', encoding='utf-8') as f:
 82 |                 json_data = json.load(f)
 83 | 
 84 |             group_sums = {group: 0 for group in key_groups}
 85 | 
 86 |             for group, keys in key_groups.items():
 87 |                 for key in keys:
 88 |                     group_sums[group] += json_data.get(key, 0)
 89 | 
 90 |             result_data.append([img_name] + list(group_sums.values()))
 91 | 
 92 |         columns = ['img_name'] + list(key_groups.keys())
 93 |         df = pd.DataFrame(result_data, columns=columns)
 94 |         df.to_csv(os.path.join(working_dir, f"POI_key_group_sums_{zl}_{city_name}.csv"), index=False)
 95 |         print(df)
 96 | 
 97 |     ################-----------------------------------------------------------------------------------------------------
 98 | 
 99 |         # count the number of buildings in each satellite image
100 |         for zl in ['zl15','zl17']:
101 | 
102 |             sat_df = pd.read_csv(os.path.join(working_dir, f"SAT_{city_name}_{zl}.csv"))
103 |             img_name_list = list(sat_df['img_name'])
104 |             sat_name_list = []
105 |             sat_building_num = []
106 |             for i in img_name_list:
107 |                 img_name = i.split('.')[0]
108 |                 if not os.path.exists(os.path.join(working_dir, f"clipped_results_{zl}/clipped_buildings_{img_name}.geojson")):
109 |                     continue
110 |                 with open(os.path.join(working_dir, f"clipped_results_{zl}/clipped_buildings_{img_name}.geojson"), 'r', encoding='utf-8') as f:
111 |                     json_data = json.load(f)
112 |                 num_features = len(json_data['features'])
113 |                 sat_name_list.append(img_name)
114 |                 sat_building_num.append(num_features)
115 | 
116 |             pd_dict = pd.DataFrame({'img_name':sat_name_list,'building_num':sat_building_num})
117 |             pd_dict.to_csv(os.path.join(working_dir, f"building_num_sat_{zl}_{city_name}.csv"),index=False)
118 |             print(pd_dict)
119 | 
120 | 


--------------------------------------------------------------------------------