├── .gitignore ├── LICENSE ├── README.md ├── dataset ├── France_RPG │ ├── RPG2DF.py │ └── exploreRPG_labels.py ├── README.md ├── __init__.py ├── labelled_dense │ ├── SS │ │ ├── __init__.py │ │ ├── extract_images_for_parcel_labels.py │ │ ├── extract_parcel_labels_raster.py │ │ └── make_image_timeseries_for_parcel_labels.py │ ├── __init__.py │ ├── extract_images_for_labels.py │ ├── extract_images_for_parcel_labels.py │ ├── extract_labels_raster.py │ ├── extract_parcel_ground_truths.py │ ├── find_parcel_dimensions.py │ ├── make_image_timeseries_for_labels.py │ ├── make_image_timeseries_for_parcel_labels.py │ ├── make_labelled_dataset.sh │ ├── make_labelled_parcel_dataset.sh │ └── split_ground_truths_by_location.ipynb └── unlabelled │ ├── __init__.py │ ├── extract_images.py │ ├── make_image_timeseries.py │ └── make_unlabelled_dataset.sh ├── diagram.png ├── download ├── README.md ├── __init__.py ├── download.sh ├── find_S2_products_for_tile.ipynb ├── find_S2_tiles_for_aoi.ipynb ├── get_downloaded_products_info.ipynb └── sentinelsat_download_tileid.py ├── requirements.txt └── utils ├── __init__.py ├── data_utils.py ├── date_utils.py ├── geospatial_data_utils.py ├── multiprocessing_utils.py └── sentinel_products_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | **/*.png 3 | __pycache__ 4 | **/__pycache__ 5 | *.pyc 6 | **/*.pyc 7 | misc 8 | **/pw.csv 9 | pw.csv 10 | **/.ipynb_checkpoints 11 | run_many.sh 12 | 13 | # exclude superseded 14 | dataset/labelled_dense/SS 15 | download/SS 16 | 17 | # exclude files not tested/documented 18 | dataset/labelled_dense/extract_images_for_parcel_labels.py 19 | dataset/labelled_dense/extract_parcel_ground_truths.py 20 | dataset/labelled_dense/find_parcel_dimensions.py 21 | dataset/labelled_dense/make_image_timeseries_for_parcel_labels.py 22 | dataset/labelled_dense/make_labelled_parcel_dataset.sh 23 | 24 | 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepSatData: Building large scale datasets of satellite images for training machine learning models 2 | ![plot](./diagram.png) 3 | DeepSatData is a toolkit for making datasets from satellite imagery suitable for training machine learning models. 4 | The process is split into two distinct parts: 5 | - identifying and downloading relevant Sentinel products for an area and time period of interest. Read more in [download](./download) 6 | - processing downloaded products into datasets. Read more in [dataset](./dataset). 7 | 8 | Further details on the methodology used can be found in our papers 9 | ["DeepSatData: Building large scale datasets of satellite images for training machine learning models"](arxiv url) and 10 | ["Context-self contrastive pretraining for crop type semantic segmentation"](https://arxiv.org/abs/2104.04310). 11 | 12 | ## Dependencies 13 | Install dependencies using pip 14 | ``` 15 | pip install -r requirements.txt 16 | ``` 17 | 18 | or creating a conda environment 19 | ``` 20 | conda create --name --file requirements.txt 21 | ``` 22 | 23 | ## Citation 24 | If you use DeepSatData in your research consider citing the following BibTeX entries: 25 | ``` 26 | @misc{tarasiou2021deepsatdata, 27 | title={DeepSatData: Building large scale datasets of satellite images for training machine learning models}, 28 | author={Michail Tarasiou and Stefanos Zafeiriou}, 29 | year={2021}, 30 | eprint={2104.13824}, 31 | archivePrefix={arXiv}, 32 | primaryClass={cs.CV} 33 | } 34 | 35 | @misc{tarasiou2021contextself, 36 | title={Context-self contrastive pretraining for crop type semantic segmentation}, 37 | author={Michail Tarasiou and Riza Alp Guler and Stefanos Zafeiriou}, 38 | year={2021}, 39 | eprint={2104.04310}, 40 | archivePrefix={arXiv}, 41 | primaryClass={cs.CV} 42 | } 43 | ``` 44 | 45 | 46 | ## License 47 | This project is under the CC-BY-NC 4.0 license. See [LICENSE](LICENSE) for details. 48 | 49 | 51 | -------------------------------------------------------------------------------- /dataset/France_RPG/RPG2DF.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import shapefile 3 | from shapely import geometry 4 | import pandas as pd 5 | import os 6 | 7 | 8 | def main(): 9 | args = parser.parse_args() 10 | 11 | rpg_file = os.path.join(args.rpg_dir, 'PARCELLES_GRAPHIQUES') 12 | 13 | sf = shapefile.Reader(rpg_file) 14 | year = args.rpg_dir.split("-")[-1] 15 | # print(year) 16 | 17 | data = [] 18 | for i in range(len(sf)): 19 | # if i == 100: 20 | # break 21 | if i % 1e6 == 0: 22 | print('processing record %d of %d' % (i, len(sf))) 23 | s = sf.shape(i) 24 | rec = sf.record(i) 25 | parcel = geometry.Polygon(s.points) 26 | data.append([parcel, rec[2]]) 27 | 28 | data = pd.DataFrame(data, columns=['geometry', 'CODE_CULTU']) 29 | 30 | print("num parcels in data file: %d" % data.shape[0]) 31 | 32 | codecultu = data['CODE_CULTU'].drop_duplicates().tolist() 33 | codecultu = {code: i + 1 for i, code in enumerate(codecultu)} 34 | 35 | data['ground_truth'] = data['CODE_CULTU'].map(codecultu) 36 | del data['CODE_CULTU'] 37 | data['crs'] = args.epsg 38 | data['year'] = year 39 | data = data[['ground_truth', 'crs', 'year', 'geometry']] 40 | 41 | savedir = os.path.join(os.path.dirname(rpg_file), 'DF') 42 | if not os.path.exists(savedir): 43 | os.makedirs(savedir) 44 | 45 | data.to_csv(os.path.join(savedir, os.path.basename(rpg_file) + '_DF.csv'), index=False) 46 | 47 | pd.DataFrame([[k, v] for k, v in codecultu.items()], columns=['CODE_CULTU', 'ground_truth']) \ 48 | .to_csv(os.path.join(savedir, os.path.basename(rpg_file) + '_DF_codes.csv'), index=False) 49 | 50 | 51 | if __name__ == "__main__": 52 | parser = argparse.ArgumentParser(description='Extract polygons and ground truths from RPG data') 53 | parser.add_argument('--rpg-dir', type=str, help='Path to RPG directory') 54 | parser.add_argument('--epsg', default='2154', type=str, help='EPSG coordinate system for RPG data') 55 | 56 | main() 57 | -------------------------------------------------------------------------------- /dataset/France_RPG/exploreRPG_labels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaeltrs/DeepSatData/c2774597dc57af46f777ba2212fe026305d2fd1e/dataset/France_RPG/exploreRPG_labels.py -------------------------------------------------------------------------------- /dataset/README.md: -------------------------------------------------------------------------------- 1 | # Process Sentinel products 2 | What is of interest is to make a timeseries of Sentinel images for the duration of one year given a directory of 3 | downloaded Sentinel products. The code assumes that all (unziped) Sentinel products saved in the same directory correspond to a 4 | single tile, for example: 5 | ``` 6 | products_dir 7 | └───T28PCB 8 | │ └───S2A_MSIL1C_20180702T113321_N0206_R080_T28PCB_20180702T151612.SAFE 9 | │ └───S2A_MSIL1C_20180831T113321_N0206_R080_T28PCB_20180831T153248.SAFE 10 | │ └───... 11 | └───T28PDA 12 | │ └───S2B_MSIL1C_20180316T112109_N0206_R037_T28PDA_20180316T132558.SAFE 13 | │ └───S2B_MSIL1C_20180624T112109_N0206_R037_T28PDA_20180624T132810.SAFE 14 | │ └───... 15 | ``` 16 | 17 | Because the size of a Sentinel tile is too large to fit into gpu memory we split each tile into smaller manageable pieces 18 | of size (HxW) and stack pieces corresponding to the same location at different timestamps to create a timeseries object. 19 | The final output of the process is a .pickle file with the following contents: 20 | - a numpy array of size (TxH_ixW_i) named after each Sentinel band i. We do not rescale bands to match their resolution 21 | resulting in a different size for each band. T is the number of available dates 22 | - a numpy array named "doy" of size T which corresponds to the "day of the year" for each available date 23 | - a numpy array named "year" of size 1 corresponding to the year of observations 24 | 25 | If ground truth data are available we also include the following: 26 | - a numpy array named "labels" of size (HxW) corresponding to ground truth labels 27 | - a numpy array named "ids" of size (HxW) corresponding to parcel identities 28 | 29 | ## Including ground truth data 30 | ### Make canonical .csv 31 | If available, ground truth data are used in the form of a canonical .csv file containing the following columns: 32 | - id: (int) object id corresponding to polygon area (optional, if not included a unique integer will be assigned) 33 | - ground_truth: (int) class corresponding to polygon area 34 | - crs: (int) geographic coordinate reference system 35 | - year: (int) the year the ground truth is valid for the given geometry 36 | - geometry: (str) shapely polygon or multipolygon 37 | 38 | For example: 39 | ``` 40 | ground_truth,crs,year,geometry 41 | 1,32628,2019,"POLYGON ((325059.9695234112 1579552.827570891, 325082.9883194482 1579557.590080416, ...))" 42 | 2,32628,2019,"POLYGON ((325108.9175379751 1579675.065315364, 325119.871309883 1579667.392383354, .))" 43 | ``` 44 | 45 | Specifically for 46 | [RPG](https://www.data.gouv.fr/en/datasets/registre-parcellaire-graphique-rpg-contours-des-parcelles-et-ilots-culturaux-et-leur-groupe-de-cultures-majoritaire/) 47 | crop type data for France, the following can be used the following to transform .shp files to canonical .csv: 48 | ```shell 49 | python dataset/France_RPG/RPG2DF.py --rpg-dir 50 | ``` 51 | 52 | ### Generate data 53 | We distinguish between two different use cases: 54 | 1. we overlay a grid of size equal to the desired sample_size on the AOI. 55 | For each grid square we make a raster of all ground truths and satellite images (timeseries) that fall into that square. 56 | The end result is a set of samples of size (sample_size X sample_size) each containing potentially multiple fields and 57 | not necessarily whole fields as some will be cut at the image boundaries. (show examples) 58 | 2. for each object in the canonical .csv we create a raster ground truth image in which the object is centered and all other pixels not 59 | falling inside the polygon region are assigned the background class. We also generate satellite image timeseries as before. 60 | This results in a single object per sample at the center of the image. 61 | 62 | #### Use case 1 63 | For use case 1 run the following bash script to generate data corresponding to spatial locations for which there are available ground 64 | truths in the form of parcel polygons. 65 | ```shell 66 | sh dataset/labelled_dense/make_labelled_dataset.sh ground_truths_file=<1:ground_truths_file> products_dir=<2:products_dir> labels_dir=<3:labels_dir> windows_dir=<4:windows_dir> timeseries_dir=<5:timeseries_dir> 67 | res=<6:res> sample_size=<7:sample_size> num_processes<8:num_processes> bands=<8:bands (optional)> 68 | ``` 69 | where: 70 | - ground_truths_file: file path for canonical .csv file as defined above 71 | - products_dir: directory path for downloaded Sentinel products 72 | - labels_dir: directory to save rasterized ground truths 73 | - windows_dir: directory to save extracted image windows 74 | - timeseries_dir: directory to ave final timeseries objects 75 | - res: highest resolution of satellite image bands, 10 (m) for Sentinel-2 76 | - sample_size: number of pixels of final image windows (for highest resolution image band) and ground truths 77 | - num_processes: number of processes to run on parallel 78 | - bands: (list) which satellite image bands to use, e.g. 'B02,B03,B04,...'. If not specified all bands are used (optional) 79 | 80 | #### Use case 2 81 | 82 | For use case 2 we first need to decide on the spatial dimensions of the samples. The following command finds the maximum N-S, E-W 83 | distance for each parcel as well as the maximum of the two distances and also saves a cummulative histogram for these 84 | dimensions. 85 | ```shell 86 | python dataset/labelled_dense/find_parcel_dimensions.py ground_truths_file=<1:ground_truths_file> products_dir=<2:products_dir> save_dir=<3:save_dir> 87 | ``` 88 | where: 89 | - ground_truths_file: file path for canonical .csv file as defined above 90 | - products_dir: directory path for downloaded Sentinel products 91 | - save_dir: directory to save output 92 | 93 | This information will help guide the decision on the sample size. We want most parcels to fit in the sample but not make 94 | it larger than needed as this would mean wasting of computational resources. Of course other considerations may come into play. 95 | 96 | After deciding on the parcel size we run the following command to generate use case 2 data: 97 | 98 | ```shell 99 | sh dataset/labelled_dense/make_labelled_parcel_dataset.sh ground_truths_file=<1:ground_truths_file> products_dir=<2:products_dir> labels_dir=<3:labels_dir> windows_dir=<4:windows_dir> timeseries_dir=<5:timeseries_dir> 100 | res=<6:res> sample_size=<7:sample_size> Npoly=<8:Npoly> num_processes=<9:num_processes> bands=<10:bands (optional)> 101 | ``` 102 | ```shell 103 | sh dataset/labelled_dense/make_labelled_parcel_dataset.sh ground_truths_file='/media/michaeltrs/sdb/HD2/Data/Satellite_Imagery/RPG/T31FM_18/example_parcels_in_AOI.csv' products_dir='/media/michaeltrs/0a8a5a48-ede5-47d0-8eff-10d11350bf98/Satellite_Data/Sentinel2/PSETAE_repl/2018/cloud_0_30' labels_dir='/media/michaeltrs/sdb/HD2/Data/Satellite_Imagery/RPG/T31FM_18_example/LABELS' windows_dir='/media/michaeltrs/sdb/HD2/Data/Satellite_Imagery/RPG/T31FM_18_example/IMAGES' timeseries_dir='/media/michaeltrs/sdb/HD2/Data/Satellite_Imagery/RPG/T31FM_18_example/TIMESERIES' res=10 sample_size=100 Npoly=50 num_processes=8 104 | ``` 105 | 106 | ## Without ground truth data 107 | In this case we only need to provide the directory where Sentinel products are downloaded. Optionally we can provide an 108 | anchor point which wil be used when constructing the grid for splitting the AOI into smaller pieces. If provided the 109 | anchor will be placed at a vertex of the constructed grid. 110 | 111 | ### Generate data 112 | Run the following bash script to generate data corresponding to spatial locations for which there are available ground 113 | truths in the form of parcel polygons. 114 | ```shell 115 | sh dataset/unlabelled/make_unlabelled_dataset.sh products_dir=<1:products_dir> windows_dir=<2:windows_dir> timeseries_dir=<3:timeseries_dir> res=<4:res> 116 | sample_size=<5:sample_size> num_processes=<6:num_processes> anchor=<7:anchor (optional)> bands=<8:bands (optional)> 117 | ``` 118 | where: 119 | - products_dir: (str) directory path for downloaded Sentinel products 120 | - windows_dir: (str) directory to save extracted image windows 121 | - timeseries_dir: (str) directory to ave final timeseries objects 122 | - res: (int) highest resolution of satellite image bands, 10 (m) for Sentinel-2 123 | - sample_size: (int) number of pixels of final image windows (for highest resolution image band) and ground truths 124 | - num_processes: (int) number of processes to run on parallel 125 | - anchor: (list) (N,W,CRS) coordinates of an anchor point and CRS to use as a corner for extracting windows (optional) 126 | - bands: (list) which satellite image bands to use, e.g. 'B02,B03,B04,...'. If not specified all bands are used (optional) 127 | -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaeltrs/DeepSatData/c2774597dc57af46f777ba2212fe026305d2fd1e/dataset/__init__.py -------------------------------------------------------------------------------- /dataset/labelled_dense/SS/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaeltrs/DeepSatData/c2774597dc57af46f777ba2212fe026305d2fd1e/dataset/labelled_dense/SS/__init__.py -------------------------------------------------------------------------------- /dataset/labelled_dense/SS/extract_images_for_parcel_labels.py: -------------------------------------------------------------------------------- 1 | """ 2 | Given a set of S2 tiles and a labelled_dense lable map, extract crops of images matching the location of labels 3 | """ 4 | import argparse 5 | import pandas as pd 6 | import rasterio 7 | import numpy as np 8 | import os 9 | from glob import glob 10 | import pickle 11 | if __name__ == "__main__" and __package__ is None: 12 | from sys import path 13 | from os.path import dirname as dir 14 | path.insert(0, dir(dir(path[0]))) 15 | __package__ = "examples" 16 | from utils.data_utils import find_number 17 | from utils.geospatial_data_utils import GeoTransform 18 | from utils.multiprocessing_utils import run_pool 19 | from utils.sentinel_products_utils import get_S2prod_info 20 | 21 | 22 | mult = {'B01': 1/6.,'B02': 1., 'B03': 1., 'B04': 1., 'B05': 1./2., 'B06': 1./2., 'B07': 1./2., 'B08': 1., 'B8A': 1./2, 23 | 'B09': 1./6., 'B10': 1./6., 'B11': 1./2., 'B12': 1./2.} 24 | # jp2s = ["%s.jp2" % i for i in mult.keys()] 25 | 26 | 27 | def extract_images(imdirs): 28 | 29 | jp2s = ["%s.jp2" % i for i in bands] 30 | 31 | saved_files_info = [] 32 | 33 | for ii, imdir in enumerate(imdirs): 34 | # ii, imdir = 1, imdirs[1] 35 | 36 | print("unfolding product %d of %d" % (ii, len(imdirs))) 37 | 38 | imname = "%s_%s" % (imdir.split("/")[-2].split("_")[-3], imdir.split("/")[-4].split("_")[-5]) 39 | 40 | # read product 41 | data = {} 42 | for jp2 in jp2s: 43 | with rasterio.open("%s/%s_%s" % (imdir, imname, jp2)) as f: 44 | data[jp2[:-4]] = f.read(1) 45 | 46 | # if str(f.crs).split(':')[1] != CRSl: 47 | # geotransform_prod2label = GeoTransform(str(f.crs).split(':')[1], CRSl, loc2loc=CRSl != '4326') 48 | # geotransform_label2prod = GeoTransform(CRSl, str(f.crs).split(':')[1], loc2loc=CRSl != '4326') 49 | # Wp, Np = geotransform_prod2label(np.array(f.transform)[2], np.array(f.transform)[5]) 50 | # else: 51 | # Wp, Np = np.array(f.transform)[2], np.array(f.transform)[5] 52 | geotransform_label2prod = GeoTransform(CRSl, str(f.crs).split(':')[1], loc2loc=CRSl != '4326') 53 | Wp, Np = np.array(f.transform)[2], np.array(f.transform)[5] 54 | 55 | prod_savedir = os.path.join(savedir, imdir.split("/")[-4].split(".")[0]) 56 | if not os.path.exists(prod_savedir): 57 | os.makedirs(prod_savedir) 58 | 59 | # saved_gt_info[saved_gt_info['Ntl']==saved_gt_info['Ntl'].max()] 60 | for i in range(saved_gt_info.shape[0]): 61 | # i = 3600 62 | # i = 4500 63 | # i = 4000 64 | 65 | Nl = saved_gt_info.iloc[i]['Ntl'] 66 | Wl = saved_gt_info.iloc[i]['Wtl'] 67 | Wlp, Nlp = geotransform_label2prod(Wl, Nl) 68 | 69 | # ip = int((Np - Nl) / res) # + 2 70 | # jp = int((Wl - Wp) / res) # + 2 71 | ip = int((Np - Nlp) / res) # + 2 72 | jp = int((Wlp - Wp) / res) # + 2 73 | 74 | # # sample outside Sentinel product 75 | # if (ip < 0) or (jp < 0): 76 | # saved_files_info.append( 77 | # [None, Nl, Wl, Np, Wp, ip, jp, sample_size, sample_size, date, imdir, 78 | # "sample outside Sentinel product"]) 79 | # continue 80 | 81 | date = imdir.split("/")[-4].split(".")[0].split("_")[2][:8] 82 | 83 | sample = {} 84 | for jp2 in jp2s: 85 | xpmin = int(np.round(mult[jp2[:-4]] * ip)) 86 | ypmin = int(np.round(mult[jp2[:-4]] * jp)) 87 | sample[jp2[:-4]] = data[jp2[:-4]][xpmin: xpmin + int(mult[jp2[:-4]] * sample_size), 88 | ypmin: ypmin + int(mult[jp2[:-4]] * sample_size)] 89 | 90 | # this parcel falls in black region for this product 91 | if sample[jp2[:-4]].sum() == 0: 92 | saved_files_info.append( 93 | ["", Nlp, Wlp, Nl, Wl, Np, Wp, ip, jp, sample_size, sample_size, date, imdir, "no image"]) 94 | continue 95 | 96 | # import matplotlib.pyplot as plt 97 | # 98 | # 99 | # with open(saved_gt_info.iloc[i]['filepath'], 'rb') as handle: 100 | # labels = pickle.load(handle) # , protocol=pickle.HIGHEST_PROTOCOL)plt.figure() 101 | # 102 | # print(ip, jp) 103 | # 104 | # 105 | # plt.figure() 106 | # plt.imshow(sample['B03']) 107 | # plt.imshow(labels['ratios'], alpha=0.7) 108 | # 109 | # # plt.figure() 110 | # # plt.imshow(labels['ratios']) 111 | # ij = np.array([[3534, 10068], [3582, 10746], [9828, 3456]]) 112 | # l = np.array([[63, 65], [43, 58], [47, 66]]) 113 | # im = np.array([[63, 70], [45, 65], [55, 68.5]]) 114 | # im - l 115 | 116 | sample_save_path = "%s/N%d_W%d_D%s_CRS%s.pickle" % (prod_savedir, int(Nl), int(Wl), date, CRSl) 117 | with open(sample_save_path, 'wb') as handle: 118 | pickle.dump(sample, handle, protocol=pickle.HIGHEST_PROTOCOL) 119 | 120 | saved_files_info.append( 121 | [sample_save_path, Nlp, Wlp, Nl, Wl, Np, Wp, ip, jp, sample_size, sample_size, date, imdir, "ok"]) 122 | 123 | df = pd.DataFrame(data=saved_files_info, 124 | columns=['sample_path', 'Nlp', 'Wlp', 'Nl', 'Wl', 'Np', 'Wp', 'ip', 'jp', 125 | 'height', 'width', 'Date', 'S2_prod_imdir', "comment"]) 126 | return df 127 | 128 | 129 | def main(): 130 | # ground truths 131 | gtfiles = os.listdir(ground_truths_dir) 132 | # years = [find_number(s, "Y") for s in gtfiles] 133 | # files = {year: {} for year in set(years)} 134 | # for i, file in enumerate(gtfiles): 135 | # if not file.startswith('INVALID'): 136 | # files[years[i]][file.split("_")[0]] = file 137 | # print("found ground truths for years %s" % ", ".join(list(files.keys()))) 138 | # global labels 139 | # global Nl 140 | # global Wl 141 | global CRSl 142 | global saved_gt_info 143 | 144 | # global num_rows 145 | # global num_cols 146 | 147 | # sentinel products 148 | imdirs = glob("%s/*.SAFE/GRANULE/**/IMG_DATA" % products_dir) 149 | prod_df = get_S2prod_info(imdirs) 150 | prod_df['Year'] = prod_df['Time'].apply(lambda s: s[:4]) 151 | 152 | out = [] 153 | for gtfile in gtfiles: 154 | # gtfile = gtfiles[0] 155 | 156 | # # ground truths 157 | # labels = np.loadtxt(os.path.join(ground_truths_dir, files[year]['LABELS']), dtype=np.float32) 158 | # Nl = int(find_number(files[year]['LABELS'], "N")) 159 | # Wl = int(find_number(files[year]['LABELS'], "W")) 160 | # 161 | # num_rows, num_cols = [d / sample_size for d in labels.shape] 162 | # assert (np.ceil(num_rows) == num_rows) and (np.ceil(num_cols) == num_cols), \ 163 | # "sample size should be fitting exactly in labels, this suggests an error in extract_labels_raster script" 164 | saved_gt_info = pd.read_csv(os.path.join(ground_truths_dir, gtfile, 'saved_data_info.csv')) 165 | 166 | year = find_number(gtfile, "Y") 167 | CRSl = find_number(gtfile, "CRS") 168 | 169 | # sentinel products 170 | products = prod_df[prod_df['Year'] == year] 171 | imdirs = products['path'].tolist() 172 | 173 | df_year = run_pool(imdirs, extract_images, num_processes) 174 | # df = extract_images([imdirs[0]]) 175 | out.append(pd.concat(df_year)) 176 | 177 | df = pd.concat(out).reset_index(drop=True) 178 | df['crs'] = CRSl 179 | df.to_csv(os.path.join(savedir, "extracted_windows_data_info.csv"), index=False) 180 | 181 | 182 | if __name__ == "__main__": 183 | 184 | # parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 185 | # parser.add_argument('--ground_truths_dir', help='directory containing ground truth parcels raster') 186 | # parser.add_argument('--products_dir', help='directory containing downloaded sentinel products') 187 | # parser.add_argument('--savedir', help='save directory to extract sentinel products windows') 188 | # parser.add_argument('--bands', default=None, help='which satellite image bands to use') 189 | # parser.add_argument('--res', default=10, help='pixel size in meters') 190 | # parser.add_argument('--sample_size', default=24, help='spatial resolution of dataset samples') 191 | # parser.add_argument('--num_processes', default=4, help='number of parallel processes') 192 | # # --------------------------------------------------------------------------------------------- 193 | # 194 | # args = parser.parse_args() 195 | # 196 | # ground_truths_dir = args.ground_truths_dir 197 | # 198 | # products_dir = args.products_dir 199 | # 200 | # savedir = args.savedir 201 | # print("savedir: ", savedir) 202 | # if not os.path.exists(savedir): 203 | # os.makedirs(savedir) 204 | # 205 | # bands = args.bands 206 | # if bands == 'None': 207 | # bands = list(mult.keys()) 208 | # else: 209 | # bands = bands.split(',') 210 | # 211 | # res = int(args.res) 212 | # 213 | # sample_size = int(args.sample_size) 214 | # 215 | # num_processes = int(args.num_processes) 216 | 217 | ground_truths_dir = '/media/michaeltrs/sdb/HD2/Data/Satellite_Imagery/RPG/T31FM_18/LABELS4' 218 | products_dir = '/media/michaeltrs/0a8a5a48-ede5-47d0-8eff-10d11350bf98/Satellite_Data/Sentinel2/PSETAE_repl/2018/cloud_0_30' 219 | savedir = '/media/michaeltrs/sdb/HD2/Data/Satellite_Imagery/RPG/T31FM_18/IMAGES' 220 | bands = 'None' 221 | if bands == 'None': 222 | bands = list(mult.keys()) 223 | else: 224 | bands = bands.split(',') 225 | res = 10 226 | sample_size = 100 227 | num_processes = 4 228 | 229 | 230 | # main() 231 | -------------------------------------------------------------------------------- /dataset/labelled_dense/SS/extract_parcel_labels_raster.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pandas as pd 3 | import numpy as np 4 | from shapely import geometry 5 | import os 6 | from glob import glob 7 | from multiprocessing import Pool 8 | if __name__ == "__main__" and __package__ is None: 9 | from sys import path 10 | from os.path import dirname as dir 11 | path.insert(0, dir(dir(path[0]))) 12 | __package__ = "examples" 13 | from utils.geospatial_data_utils import GeoTransform, get_points_from_str_poly 14 | from utils.multiprocessing_utils import split_df 15 | from utils.sentinel_products_utils import get_S2prod_info 16 | import matplotlib.pyplot as plt 17 | import pickle 18 | from copy import deepcopy 19 | 20 | 21 | def is_valid(parcel_poly, pxmin, pymax): 22 | """ 23 | checks if parcel_poly polygon has valid shape 24 | """ 25 | isvalid = True 26 | i = 0 27 | j = 0 28 | pix_points = [[pxmin + loc[0] * res, pymax - loc[1] * res] for loc in 29 | [[j, i], [j + 1, i], [j + 1, i + 1], [j, i + 1], [j, i]]] 30 | try: 31 | parcel_poly.intersection(geometry.Polygon(pix_points)).area 32 | except: 33 | isvalid = False 34 | return isvalid 35 | 36 | 37 | def plot_poly(points, c=None, newfig=False): 38 | if type(points) in [list, tuple]: 39 | points = np.array(points) 40 | if c is None: 41 | c = "r" 42 | if newfig: 43 | plt.figure() 44 | for i in range(points.shape[0] - 1): 45 | plt.plot(points[i:i + 2, 0], points[i:i + 2, 1], c=c) 46 | 47 | 48 | def str_line_eq(points, h=1e-1): 49 | assert points.shape == (2, 2), 'Two points must be used to derive straight line equation' 50 | x1, y1 = points[0] 51 | x2, y2 = points[1] 52 | denom = x2 - x1 53 | if denom == 0: 54 | denom = h 55 | a = (y2 - y1) / denom # (x2 - x1) 56 | b = (y1 * x2 - x1 * y2) / denom # (x2 - x1) 57 | return a, b 58 | 59 | 60 | def extract_parcel_labels_raster(inputs): 61 | 62 | # rank = 0 63 | rank, geodata, W, N, Wp, Np, year, crs = inputs 64 | # rank, geodata, W, N, Wp, Np, year, crs = inputs[0] 65 | 66 | # # arrays to save 67 | # AOI_labels = np.zeros((int(np.round(dy / res)), int(np.round(dx / res))), dtype=np.float32) # + max_label + 1 68 | # AOI_ids = np.zeros((int(np.round(dy / res)), int(np.round(dx / res))), dtype=np.float32) 69 | # AOI_masks = AOI_ids.copy() 70 | # # additional/helper arrays 71 | # AOI_ratios = AOI_ids.copy() 72 | year_savedir = os.path.join(savedir, 'Y%s_N%s_W%s_R%d_CRS%s' % (year, N, W, res, crs)) 73 | if not os.path.exists(year_savedir): 74 | os.makedirs(year_savedir) 75 | 76 | saved_data_info = [] 77 | # invalid_shapes = [] 78 | for ii in range(geodata.shape[0]): 79 | # ii = 3600 # 4500 80 | print("process %d, parcel %d of %d" % (rank, ii+1, geodata.shape[0])) 81 | parcel_poly = geodata['geometry'][ii] 82 | label = geodata['ground_truth'][ii] 83 | id = geodata['id'][ii] 84 | 85 | points = get_points_from_str_poly(parcel_poly) 86 | anchor = np.array(geometry.Polygon(points).centroid) 87 | # anchor = points.mean(axis=0) 88 | N0 = anchor[1] + sample_size * res / 2. 89 | W0 = anchor[0] - sample_size * res / 2. 90 | 91 | # correct for non integer offset wrt product Nmax, Wmax (top-left) coordinates 92 | dN = (Np - N0) % 60 93 | dW = (W0 - Wp) % 60 94 | N0 += dN 95 | W0 -= dW 96 | # anchor[1] = N0 - sample_size * res / 2. 97 | # anchor[0] = W0 + sample_size * res / 2. 98 | anchor = np.array([W0 + sample_size * res / 2., N0 - sample_size * res / 2.]) 99 | # anchor = points.mean(axis=0) #- sample_size * res / 2 100 | 101 | # pr = points - anchor 102 | pr = (points - anchor + sample_size * res / 2) # !!! 103 | parcel_poly = geometry.Polygon(pr) 104 | 105 | pxmin, pymin = pr.min(axis=0) 106 | pxmax, pymax = pr.max(axis=0) 107 | 108 | # DONT DO VERY SMALL ONES 109 | # if ((pxmax - pxmin) < 20) or ((pymax - pymin) < 20): 110 | 111 | if not is_valid(parcel_poly, pxmin, pymax): 112 | try: 113 | int_area = sum( 114 | [geometry.Polygon(np.array(pol.coords[:])).area for pol in parcel_poly.buffer(0).interiors]) 115 | ext_area = geometry.Polygon(np.array(parcel_poly.buffer(0).exterior.coords[:])).area 116 | if int_area / ext_area < 0.05: # threshold for discarding a parcel polygon 117 | print("included, number of interior areas %d, intarea: %.0f, extarea: %.0f, ratio: %.4f" % 118 | (len(parcel_poly.buffer(0).interiors), int_area, ext_area, int_area / ext_area)) 119 | parcel_poly = geometry.Polygon(np.array(parcel_poly.buffer(0).exterior.coords[:])) 120 | pr = np.stack([np.array(i) for i in parcel_poly.exterior.coords.xy]).T 121 | pxmin, pymin = pr.min(axis=0) 122 | pxmax, pymax = pr.max(axis=0) 123 | else: 124 | print("excluded, number of interior areas %d, intarea: %.0f, extarea: %.0f, ratio: %.4f" % 125 | (len(parcel_poly.buffer(0).interiors), int_area, ext_area, int_area / ext_area)) 126 | values = geodata.iloc[ii].to_list() 127 | for v in [N0, W0, None]: 128 | values.append(v) 129 | saved_data_info.append(values) 130 | continue 131 | except: 132 | continue 133 | 134 | # labels = np.zeros((sample_size, sample_size), dtype=np.float32) 135 | # ids = labels.copy() 136 | ratios = np.zeros((sample_size, sample_size), dtype=np.float32) 137 | alpha = ratios.copy() 138 | # global_alpha = ratios.copy() 139 | global_beta = ratios.copy() 140 | # local_alpha = ratios.copy() 141 | local_beta = ratios.copy() 142 | 143 | row0 = int(np.floor((1 - pymax / (sample_size * res)) * sample_size)) 144 | row1 = int(np.ceil((1 - pymin / (sample_size * res)) * sample_size)) 145 | col0 = int(np.floor(pxmin / (sample_size * res) * sample_size)) 146 | col1 = int(np.ceil(pxmax / (sample_size * res) * sample_size)) # + 1 147 | # row0 = int((1 - pr[:, 1].max() / dy) * AOI_labels.shape[0]) 148 | # row1 = int((1 - pr[:, 1].min() / dy) * AOI_labels.shape[0]) 149 | # col0 = int(pr[:, 0].min() / dx * AOI_labels.shape[1]) 150 | # col1 = int(pr[:, 0].max() / dx * AOI_labels.shape[1]) + 1 151 | 152 | # H, W = sample_size, sample_size 153 | Height, Width = row1 - row0, col1 - col0 154 | 155 | # if (Height < 5) or (Width) 156 | # bl = False 157 | 158 | for i in range(Height): 159 | # if bl: 160 | # break 161 | for j in range(Width): 162 | # i, j = 0, 3 163 | if (row0 + i) * (col0 + j) < 0: 164 | continue 165 | 166 | try: 167 | 168 | pix_points = [[pxmin + loc[0] * res, pymax - loc[1] * res] for loc in 169 | [[j, i], [j + 1, i], [j + 1, i + 1], [j, i + 1], [j, i]]] 170 | 171 | pix_poly = geometry.Polygon(pix_points) 172 | 173 | value = parcel_poly.intersection(pix_poly).area / res ** 2 174 | if (0 < value) and (value < 1): # parcel cuts through pixel 175 | # print(i, j) 176 | # bl = True 177 | global_points = np.array(parcel_poly.boundary.intersection(pix_poly.boundary)) 178 | if global_points.shape[0] > 2: # !!! 179 | global_points = global_points[:2] 180 | global_params = str_line_eq(global_points) 181 | alpha[row0 + i + 1, col0 + j + 1] = global_params[0] 182 | # global_alpha[row0 + i + 1, col0 + j + 1] = global_params[0] 183 | global_beta[row0 + i + 1, col0 + j + 1] = global_params[1] / (sample_size * res) 184 | local_points = (global_points - np.array([pxmin + j * res, pymax - i * res])) / res 185 | local_params = str_line_eq(local_points) 186 | # local_alpha[row0 + i + 1, col0 + j + 1] = local_params[0] 187 | local_beta[row0 + i + 1, col0 + j + 1] = local_params[1] 188 | 189 | # break 190 | 191 | if value == 0: # no intersection 192 | continue 193 | 194 | # labels[row0 + i + 1, col0 + j + 1] = label 195 | ratios[row0 + i + 1, col0 + j + 1] = value 196 | # ratios[col0 + i + 1, row0 + j + 1] = value 197 | 198 | # ids[row0 + i + 1, col0 + j + 1] = id 199 | 200 | except: 201 | continue 202 | # replace global, local alpha with alpha 203 | sample = {'N': N0, 'W': W0, 'boundary': pr / res, 'label': label, 'id': id, 'ratios': ratios, 204 | 'alpha': alpha, 'global_beta': global_beta, 'local_beta': local_beta} 205 | impath = os.path.join(year_savedir, 'N%d_E%d_ground_truths.pickle' % (N0, W0)) 206 | with open(impath, 'wb') as handle: 207 | pickle.dump(sample, handle, protocol=pickle.HIGHEST_PROTOCOL) 208 | 209 | values = geodata.iloc[ii].to_list() 210 | for v in [N0, W0, impath]: 211 | values.append(v) 212 | saved_data_info.append(values) 213 | 214 | return saved_data_info 215 | 216 | 217 | # lines = [] 218 | # boundary = parcel_poly.boundary 219 | # if boundary.type == 'MultiLineString': 220 | # for line in boundary: 221 | # lines.append(line) 222 | # else: 223 | # lines.append(boundary) 224 | # 225 | # ######################################################################## 226 | # 227 | # 228 | # 229 | # points = pix_poly.boundary.intersection(parcel_poly.boundary) # multipoint 230 | # points = np.array(points) 231 | # 232 | # 233 | # print(points) 234 | # plt.figure() 235 | # plt.imshow(ratios) 236 | # plt.scatter((anchor[0]-W0) / res, (N0 - anchor[1])/res) 237 | 238 | 239 | # plt.figure() 240 | # plt.hist(global_alpha[global_alpha != 0], 20) 241 | # 242 | # plt.figure() 243 | # plt.hist(alpha[alpha != 0], 20) 244 | # 245 | # plt.figure() 246 | # plt.hist(np.tanh(global_beta[global_beta != 0])) 247 | # 248 | # plt.figure() 249 | # plt.hist(np.tanh(local_beta[local_beta != 0])) 250 | # 251 | # plt.figure() 252 | # plt.hist(np.tanh(global_alpha[global_alpha != 0]), 20) 253 | # 254 | # plt.figure() 255 | # plt.hist(np.tanh(global_beta[global_beta > 0] / 1000.)) 256 | # 257 | # plt.figure() 258 | # plt.imshow(alpha) # , ::-1]) 259 | # plt.title('alpha') 260 | # plt.colorbar() 261 | # pr1 = pr / res # - np.array([2.5, 8]) 262 | # pr1[:, 1] = 100 - pr1[:, 1] 263 | # pr1 = pr1 264 | # plot_poly(pr1, newfig=False) 265 | # 266 | # plt.figure() 267 | # plt.imshow(global_beta) # , ::-1]) 268 | # plt.title('global_beta') 269 | # plt.colorbar() 270 | # plot_poly(pr1, newfig=False) 271 | # 272 | # 273 | # def dot(x1, x2): 274 | # return x1.dot(x2) 275 | # 276 | # 277 | # def norm_dot(x1, x2): 278 | # x1 = x1 / np.linalg.norm(x1) 279 | # x2 = x2 / np.linalg.norm(x2) 280 | # return x1.dot(x2) 281 | # 282 | # 283 | # l1 = np.array([-100, 300]) 284 | # l2 = np.array([100, -300]) 285 | # l3 = np.array([20, -5]) 286 | # 287 | # dot(l1, l2) 288 | # dot(l2, l3) 289 | # 290 | # norm_dot(l1, l2) 291 | # norm_dot(l2, l3) 292 | 293 | 294 | 295 | 296 | # plot_poly(pix_points, newfig=False) 297 | 298 | # x = np.linspace(0, 100, 100) 299 | # y = -0.407134 * x + (100 - 103.457/10) # 103.457 300 | # 301 | # plt.plot(x, y) 302 | 303 | # # plt.figure() 304 | # plt.plot(*geometry.Polygon(pr1).exterior.xy) 305 | # # for i in range(pr1.shape[0] - 1): 306 | # # plt.plot(pr1[i:i + 2, 0], pr1[i:i + 2, 1], c='r') 307 | # plt.scatter(1, 1) 308 | 309 | # return AOI_labels, AOI_ids, AOI_masks, AOI_ratios, pd.DataFrame(invalid_shapes) 310 | 311 | 312 | def main(): 313 | # ground truth data 314 | gt_df = pd.read_csv(ground_truths_file) 315 | if 'id' not in gt_df: 316 | print('Column "id" not included. Assigning values from 1 to file size') 317 | gt_df['id'] = range(1, gt_df.shape[0]+1) 318 | # gt_df['id'] = range(1, gt_df.shape[0]+1) 319 | assert (gt_df['crs'] == gt_df['crs'].iloc[0]).all(), \ 320 | "Polygons corresponding to multiple CRS were found in %s" % ground_truths_file 321 | crs = gt_df['crs'].iloc[0] 322 | yearly_grouped_gt = gt_df.groupby('year') 323 | years = list(yearly_grouped_gt.groups.keys()) 324 | print("found ground truth data for years %s" % ", ".join([str(i) for i in years])) 325 | if 0 in gt_df['ground_truth'].drop_duplicates(): 326 | gt_df['ground_truth'] += 1 327 | 328 | # sentinel products 329 | imdirs = glob("%s/*.SAFE/GRANULE/**/IMG_DATA" % products_dir) 330 | prod_df = get_S2prod_info(imdirs) 331 | assert (prod_df['West']==prod_df['West'].iloc[0]).all() and (prod_df['North']==prod_df['North'].iloc[0]).all(),\ 332 | "Sentinel products corresponding to multiple tiles were found in %s" % products_dir 333 | geotr = GeoTransform(intr=prod_df['crs'].iloc[0].split(':')[1], outtr=gt_df['crs'].iloc[0], loc2loc=gt_df['crs'].iloc[0] != '4326') 334 | prod_WN = prod_df[['West', 'North']].iloc[0].tolist() 335 | prod_WN = geotr(prod_WN[0], prod_WN[1]) # in ground truth data coordinate system 336 | d = (10 * prod_df[['height', 'width']].iloc[0].values).tolist() 337 | 338 | # find all ground truth data that fall inside sentinel product 339 | prod_poly = geometry.Polygon([[prod_WN[0] + loc[0] * d[0], prod_WN[1] - loc[1] * d[1]] for loc in 340 | [[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]]) 341 | print(prod_poly) 342 | def f(x): 343 | try: 344 | x = get_points_from_str_poly(x) 345 | W = x[:, 0].min() 346 | E = x[:, 0].max() 347 | S = x[:, 1].min() 348 | N = x[:, 1].max() 349 | x = geometry.Polygon(x) 350 | inratio = prod_poly.intersection(x).area / x.area 351 | return np.array([N, S, W, E, inratio]) 352 | except: 353 | return np.array([0, 0, 0, 0, 0]) 354 | 355 | gt_df[['N', 'S', 'W', 'E', 'inratio']] = np.stack(gt_df['geometry'].apply(f).values) 356 | gt_df = gt_df[gt_df['inratio'] == 1.0] 357 | print("found %d polygons inside sentinel tile" % gt_df.shape[0]) 358 | 359 | N = int(np.ceil(gt_df['N'].max())) # N-maxy 360 | # S = int(np.floor(gt_df['S'].min())) # S-miny 361 | # E = int(np.ceil(gt_df['E'].max())) # E-maxx 362 | W = int(np.floor(gt_df['W'].min())) # W-minx 363 | 364 | # # increase AOI dimensions to match integer multiple of sample size 365 | # if np.ceil((maxy - miny) / (sample_size * res)) != (maxy - miny) / (sample_size * res): 366 | # dy = (np.ceil((maxy - miny) / (sample_size * res)) - (maxy - miny) / (sample_size * res)) * (sample_size * res) 367 | # miny = miny - dy 368 | # if np.ceil((maxx - minx) / (sample_size * res)) != (maxx - minx) / (sample_size * res): 369 | # dx = (np.ceil((maxx - minx) / (sample_size * res)) - (maxx - minx) / (sample_size * res)) * (sample_size * res) 370 | # maxx = maxx + dx 371 | # dx = maxx - minx 372 | # dy = maxy - miny 373 | # anchor = minx, miny # WS 374 | 375 | pool = Pool(num_processes) 376 | 377 | for year in years: 378 | # year = years[0] 379 | 380 | geodata = gt_df[gt_df['year'] == year].reset_index(drop=True) 381 | 382 | inputs = [[i, df_, W, N, prod_WN[0], prod_WN[1], year, crs] for i, df_ in enumerate(split_df(geodata, num_processes))] 383 | 384 | outputs = pool.map(extract_parcel_labels_raster, inputs) 385 | 386 | saved_data_info = pd.concat(pd.DataFrame(out) for out in outputs) 387 | save_name = os.path.join(savedir, 'Y%s_N%s_W%s_R%d_CRS%s' % (year, N, W, res, crs), 'saved_data_info.csv') 388 | saved_data_info.columns = ['id', 'ground_truth', 'crs', 'year', 'geometry', 'Np', 'Sp', 'Wp', 'Ep', 389 | 'inratio', 'Dy', 'Dx', 'D', 'Ntl', 'Wtl', 'filepath'] 390 | saved_data_info.to_csv(save_name, index=False) 391 | 392 | # d = pd.read_csv(save_name) 393 | # d['filepath'] = d['filepath'].apply(lambda s: os.path.join('/'.join(s.split('/')[:-1]), 394 | # 'Y2018_N6650384_W799943_R10_CRS2154', 395 | # s.split('/')[-1])) 396 | # AOI_labels = np.stack([out_[0] for out_ in outputs]) 397 | # AOI_ids = np.stack([out_[1] for out_ in outputs]) 398 | # AOI_masks = np.stack([out_[2] for out_ in outputs]) 399 | # AOI_ratios = np.stack([out_[3] for out_ in outputs]) 400 | # invalid_shapes = pd.concat([out_[4] for out_ in outputs]) 401 | # 402 | # labels = AOI_labels.max(axis=0) 403 | # masks = AOI_masks.max(axis=0) 404 | # ids = AOI_ids.sum(axis=0) 405 | # 406 | # locs = np.stack(np.where((AOI_labels > 0).sum(axis=0) > 1)).T 407 | # 408 | # for i, loc in enumerate(locs): 409 | # 410 | # if i % 1000 == 0: 411 | # print("correcting inter process overlaps, step %d of %d" % (i, locs.shape[0])) 412 | # 413 | # if any(AOI_ratios[:, loc[0], loc[1]] == 1.0): 414 | # masks[loc[0], loc[1]] = 2 415 | # else: 416 | # masks[loc[0], loc[1]] = 1 417 | # 418 | # idx = np.argmax(AOI_ratios[:, loc[0], loc[1]]) 419 | # labels[loc[0], loc[1]] = AOI_labels[idx, loc[0], loc[1]] 420 | # ids[loc[0], loc[1]] = AOI_ids[idx, loc[0], loc[1]] 421 | # 422 | # np.savetxt("%s/LABELS_Y%s_N%s_W%s_R%d_CRS%s.csv" % 423 | # (savedir, str(year), str(maxy), str(minx), res, str(crs)), labels) 424 | # np.savetxt("%s/IDS_Y%s_N%s_W%s_R%d_CRS%s.csv" % 425 | # (savedir, str(year), str(maxy), str(int(maxx)), res, str(crs)), ids) 426 | # np.savetxt("%s/MASKS_Y%s_N%s_W%s_R%d_CRS%s.csv" % 427 | # (savedir, str(year), str(maxy), str(int(maxx)), res, str(crs)), masks) 428 | # if invalid_shapes.shape[0] != 0: 429 | # invalid_shapes.to_csv( 430 | # "%s/INVALID_Y%s_N%s_W%s_R%d_CRS%s.csv" % 431 | # (savedir, str(year), str(maxy), str(int(maxx)), res, str(crs)), index=False) 432 | 433 | 434 | if __name__ == "__main__": 435 | 436 | # parser = argparse.ArgumentParser(description='Make raster from shapely polygons') 437 | # parser.add_argument('--ground_truths_file', help='filename containing ground truth parcel polygons') 438 | # parser.add_argument('--products_dir', help='directory containing sentinel products') 439 | # parser.add_argument('--savedir', help='save directory to extract ground truths in raster mode') 440 | # parser.add_argument('--res', default=10, help='pixel size in meters') 441 | # parser.add_argument('--sample_size', default=24, help='spatial resolution of dataset samples') 442 | # parser.add_argument('--num_processes', default=4, help='number of parallel processes') 443 | # 444 | # args = parser.parse_args() 445 | # 446 | # ground_truths_file = args.ground_truths_file 447 | # 448 | # products_dir = args.products_dir 449 | # 450 | # savedir = args.savedir 451 | # print("savedir: ", savedir) 452 | # if not os.path.exists(savedir): 453 | # os.makedirs(savedir) 454 | # 455 | # res = int(args.res) 456 | # 457 | # sample_size = int(args.sample_size) 458 | # 459 | # num_processes = int(args.num_processes) 460 | # 461 | # 462 | # main() 463 | 464 | ground_truths_file = '/media/michaeltrs/sdb/HD2/Data/Satellite_Imagery/RPG/T31FM_18/gt_df_parcels_in_AOI.csv' 465 | products_dir = '/media/michaeltrs/0a8a5a48-ede5-47d0-8eff-10d11350bf98/Satellite_Data/Sentinel2/PSETAE_repl/2018/cloud_0_30' 466 | savedir = '/media/michaeltrs/sdb/HD2/Data/Satellite_Imagery/RPG/T31FM_18/LABELS4' 467 | res = 10 468 | sample_size = 100 # 64 469 | num_processes = 4 470 | 471 | # if not os.path.exists(savedir): 472 | # os.makedirs(savedir) 473 | # 474 | # main() 475 | -------------------------------------------------------------------------------- /dataset/labelled_dense/SS/make_image_timeseries_for_parcel_labels.py: -------------------------------------------------------------------------------- 1 | """ 2 | For a set of extracted image crops and a labelled_dense label map, make a timeseries of all positions matched with labels 3 | """ 4 | import argparse 5 | import pandas as pd 6 | import numpy as np 7 | import os 8 | import shutil 9 | import pickle 10 | if __name__ == "__main__" and __package__ is None: 11 | from sys import path 12 | from os.path import dirname as dir 13 | path.insert(0, dir(dir(path[0]))) 14 | __package__ = "examples" 15 | from utils.data_utils import find_number 16 | from utils.date_utils import get_doy 17 | from utils.multiprocessing_utils import run_pool 18 | 19 | 20 | mult = {'B01': 1/6., 'B02': 1., 'B03': 1., 'B04': 1., 'B05': 1./2., 'B06': 1./2., 'B07': 1./2., 'B08': 1., 'B8A': 1./2, 21 | 'B09': 1./6., 'B10': 1./6., 'B11': 1./2., 'B12': 1./2.} 22 | 23 | 24 | def match_labels_images(yearlocs): 25 | 26 | refband = bands[1] 27 | 28 | saved_files_info = [] 29 | for jj, yearloc in enumerate(yearlocs): 30 | 31 | if jj % 1000 == 0: 32 | print("%d of %d" % (jj, len(yearlocs))) 33 | # yearloc = yearlocs[20000] 34 | try: 35 | 36 | idx = yearloc_groups[yearloc] 37 | data = iminfo.iloc[idx, :].sort_values(by='DOY').copy() 38 | data = data.drop_duplicates(subset=['DOY'], keep='first') # some products downloaded twice 39 | 40 | Y = data['Year'].iloc[0] 41 | N = data['Nl'].iloc[0] 42 | W = data['Wl'].iloc[0] 43 | # il = data['il'].iloc[0] 44 | # jl = data['jl'].iloc[0] 45 | 46 | assert all(data['Year'] == Y) 47 | assert all(data['Nl'] == N) 48 | assert all(data['Wl'] == W) 49 | # assert all(data['il'] == il) 50 | # assert all(data['jl'] == jl) 51 | 52 | # timeseries_sample = {'B01': [], 'B02': [], 'B03': [], 'B04': [], 'B05': [], 'B06': [], 'B07': [], 53 | # 'B08': [], 'B8A': [], 'B09': [], 'B10': [], 'B11': [], 'B12': [], 'doy': []} 54 | timeseries_sample = {band: [] for band in bands} 55 | timeseries_sample['doy'] = [] 56 | for sample_info in data[['sample_path', 'DOY']].values: 57 | # sample_info = data[['sample_path', 'DOY']].values[0] 58 | impath, doy = sample_info 59 | 60 | with open(impath, 'rb') as handle: 61 | sample = pickle.load(handle, encoding='latin1') 62 | 63 | # image falls in black region for this product (should have been excluded in extract_images_for_parcel_labels.py) 64 | if sample[refband].sum() == 0: 65 | # print('zero sum') 66 | continue 67 | 68 | # image does not match required size (should have been excluded in extract_images_for_parcel_labels.py) 69 | height, width = sample[refband].shape 70 | if (height != sample_size) or (width != sample_size): 71 | # print('unequal size') 72 | continue 73 | 74 | # for key in ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B10', 'B11', 'B12']: 75 | for key in bands: 76 | timeseries_sample[key].append(sample[key]) 77 | timeseries_sample['doy'].append(np.array(doy)) 78 | 79 | # for key in ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B10', 'B11', 'B12', 'doy']: 80 | for key in bands: 81 | timeseries_sample[key] = np.stack(timeseries_sample[key]) 82 | timeseries_sample['doy'] = np.stack(timeseries_sample['doy']) 83 | timeseries_sample['year'] = np.array(Y).astype(np.int32) 84 | 85 | timesteps = timeseries_sample[refband].shape[0] 86 | 87 | gt = saved_gt_info[(saved_gt_info['Ntl'] == yearloc[0]) & (saved_gt_info['Wtl'] == yearloc[1])] 88 | with open(gt['filepath'].values[0], 'rb') as handle: 89 | labels = pickle.load(handle, encoding='latin1') 90 | for ltype in labels.keys(): 91 | timeseries_sample[ltype.lower()] = labels[ltype] 92 | 93 | savename = os.path.join(year_savedir, "%d_%d_%s.pickle" % (int(N), int(W), Y)) 94 | with open(savename, 'wb') as handle: 95 | pickle.dump(timeseries_sample, handle, protocol=pickle.HIGHEST_PROTOCOL) 96 | 97 | saved_files_info.append([savename, Y, N, W, sample_size, sample_size, timesteps, "completed"]) 98 | 99 | except: 100 | 101 | saved_files_info.append(["", Y, N, W, sample_size, sample_size, 0, "failed"]) 102 | 103 | saved_files_info = pd.DataFrame(data=saved_files_info, columns=['sample_path', 'Year', 'N', 'W', 'dy', 'dx', 'dt', 104 | 'status']) 105 | return saved_files_info 106 | 107 | 108 | def main(): 109 | 110 | global yearloc_groups 111 | global iminfo 112 | global labels 113 | global year_savedir 114 | global saved_gt_info 115 | 116 | iminfo = pd.read_csv(os.path.join(windows_dir, "extracted_windows_data_info.csv")) 117 | crs = iminfo['crs'].iloc[0] 118 | 119 | # remove non extracted locations 120 | iminfo = iminfo[~pd.isnull(iminfo['sample_path'])].reset_index(drop=True) 121 | iminfo['DOY'] = iminfo['Date'].apply(lambda s: get_doy(str(s))) 122 | iminfo['Year'] = iminfo['Date'].apply(lambda s: str(s)[:4]) 123 | 124 | # ground truths 125 | # gtinfo = pd.read_csv(os.path.join(windows_dir, "extracted_windows_data_info.csv")) 126 | # 127 | # gtfiles = os.listdir(ground_truths_dir) 128 | # years = [find_number(s, "Y") for s in gtfiles] 129 | # files = {year: {} for year in set(years)} 130 | # for i, file in enumerate(gtfiles): 131 | # if not file.startswith('INVALID'): 132 | # files[years[i]][file.split("_")[0]] = file 133 | # print("found ground truths in raster for years %s" % ", ".join(list(files.keys()))) 134 | gtfiles = os.listdir(ground_truths_dir) 135 | 136 | saved_files_info = [] 137 | 138 | for gtfile in gtfiles: 139 | # gtfile = gtfiles[0] 140 | 141 | saved_gt_info = pd.read_csv(os.path.join(ground_truths_dir, gtfile, 'saved_data_info.csv')) 142 | 143 | year = find_number(gtfile, "Y") 144 | CRSl = find_number(gtfile, "CRS") 145 | 146 | year_savedir = os.path.join(savedir, year) 147 | if not os.path.isdir(year_savedir): 148 | os.makedirs(year_savedir) 149 | 150 | yearloc_groups = iminfo[iminfo['Year'] == year].groupby(['Nl', 'Wl'], as_index=False).groups 151 | yearlocs = list(yearloc_groups.keys()) 152 | 153 | df = run_pool(yearlocs, match_labels_images, num_processes) 154 | df = pd.concat(df) 155 | 156 | saved_files_info.append(df) 157 | 158 | 159 | df = pd.concat(saved_files_info).reset_index(drop=True) 160 | df['crs'] = crs 161 | df.to_csv(os.path.join(savedir, "saved_timeseries_data_info.csv"), index=False) 162 | 163 | # delete windows dir 164 | # shutil.rmtree(windows_dir) 165 | 166 | 167 | if __name__ == "__main__": 168 | 169 | # parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 170 | # parser.add_argument('--ground_truths_dir', help='directory containing ground truth parcels raster') 171 | # parser.add_argument('--products_dir', help='directory containing downloaded sentinel products') 172 | # parser.add_argument('--windows_dir', help='directory containing extracted windows from sentinel products') 173 | # parser.add_argument('--savedir', help='save directory for image timeseries with labels') 174 | # parser.add_argument('--bands', default=None, help='which satellite image bands to use') 175 | # parser.add_argument('--res', default=10, help='pixel size in meters') 176 | # parser.add_argument('--sample_size', default=24, help='spatial resolution of dataset samples') 177 | # parser.add_argument('--num_processes', default=4, help='number of parallel processes') 178 | # # --------------------------------------------------------------------------------------------- 179 | # 180 | # args = parser.parse_args() 181 | # 182 | # ground_truths_dir = args.ground_truths_dir 183 | # 184 | # products_dir = args.products_dir 185 | # 186 | # windows_dir = args.windows_dir 187 | # 188 | # savedir = args.savedir 189 | # if not os.path.exists(savedir): 190 | # os.makedirs(savedir) 191 | # 192 | # res = int(args.res) 193 | # 194 | # sample_size = int(args.sample_size) 195 | # 196 | # num_processes = int(args.num_processes) 197 | # 198 | # bands = args.bands 199 | # 200 | # if bands == 'None': 201 | # bands = list(mult.keys()) 202 | # else: 203 | # bands = bands.split(',') 204 | # 205 | # main() 206 | 207 | 208 | ground_truths_dir = '/media/michaeltrs/sdb/HD2/Data/Satellite_Imagery/RPG/T31FM_18/LABELS4' 209 | products_dir = '/media/michaeltrs/0a8a5a48-ede5-47d0-8eff-10d11350bf98/Satellite_Data/Sentinel2/PSETAE_repl/2018/cloud_0_30' 210 | windows_dir = '/media/michaeltrs/sdb/HD2/Data/Satellite_Imagery/RPG/T31FM_18/IMAGES' 211 | savedir = '/media/michaeltrs/sdb/HD2/Data/Satellite_Imagery/RPG/T31FM_18/TIMESERIES2' 212 | if not os.path.exists(savedir): 213 | os.makedirs(savedir) 214 | 215 | res = 10 216 | sample_size = 100 217 | num_processes = 4 218 | bands = 'None' 219 | 220 | 221 | if bands == 'None': 222 | bands = list(mult.keys()) 223 | else: 224 | bands = bands.split(',') 225 | 226 | # main() 227 | -------------------------------------------------------------------------------- /dataset/labelled_dense/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaeltrs/DeepSatData/c2774597dc57af46f777ba2212fe026305d2fd1e/dataset/labelled_dense/__init__.py -------------------------------------------------------------------------------- /dataset/labelled_dense/extract_images_for_labels.py: -------------------------------------------------------------------------------- 1 | """ 2 | Given a set of S2 tiles and a labelled_dense lable map, extract crops of images matching the location of labels 3 | """ 4 | import argparse 5 | import pandas as pd 6 | import rasterio 7 | import numpy as np 8 | import os 9 | from glob import glob 10 | import pickle 11 | if __name__ == "__main__" and __package__ is None: 12 | from sys import path 13 | from os.path import dirname as dir 14 | path.insert(0, dir(dir(path[0]))) 15 | __package__ = "examples" 16 | from utils.data_utils import find_number 17 | from utils.geospatial_data_utils import GeoTransform 18 | from utils.multiprocessing_utils import run_pool 19 | from utils.sentinel_products_utils import get_S2prod_info 20 | 21 | 22 | mult = {'B01': 1/6., 'B02': 1., 'B03': 1., 'B04': 1., 'B05': 1./2., 'B06': 1./2., 'B07': 1./2., 'B08': 1., 'B8A': 1./2, 23 | 'B09': 1./6., 'B10': 1./6., 'B11': 1./2., 'B12': 1./2.} 24 | 25 | 26 | def extract_images(imdirs): 27 | 28 | jp2s = ["%s.jp2" % i for i in bands] 29 | 30 | saved_files_info = [] 31 | 32 | for ii, imdir in enumerate(imdirs): 33 | # ii, imdir = 0, imdirs[0] 34 | 35 | print("unfolding product %d of %d" % (ii, len(imdirs))) 36 | 37 | imname = "%s_%s" % (imdir.split("/")[-2].split("_")[-3], imdir.split("/")[-4].split("_")[-5]) 38 | 39 | # read product 40 | data = {} 41 | for jp2 in jp2s: 42 | with rasterio.open("%s/%s_%s" % (imdir, imname, jp2)) as f: 43 | data[jp2[:-4]] = f.read(1) 44 | 45 | geotransform_label2prod = GeoTransform(CRSl, str(f.crs).split(':')[1], loc2loc=CRSl != '4326') 46 | Wp, Np = np.array(f.transform)[2], np.array(f.transform)[5] 47 | 48 | prod_savedir = os.path.join(savedir, imdir.split("/")[-4].split(".")[0]) 49 | if not os.path.exists(prod_savedir): 50 | os.makedirs(prod_savedir) 51 | 52 | for i in range(int(num_rows)): 53 | 54 | for j in range(int(num_cols)): 55 | # i, j = 2, 0 56 | 57 | if i * num_cols + j == 1000: 58 | print("row %d of %d, column %d of %d" % (i, num_rows, j, num_cols)) 59 | 60 | # Nij = Nl - i * res * sample_size # N for extracted label window 61 | # Wij = Wl + j * res * sample_size # W for extracted label window 62 | # ip = (Np - Nij) / (res * sample_size) # product row 63 | # jp = (Wij - Wp) / (res * sample_size) # product column 64 | # Nl, Wl = geotransform_label2prod(Wl, Nl) 65 | Nij = Nl - i * 10 * sample_size # N for extracted label window 66 | Wij = Wl + j * 10 * sample_size # W for extracted label window 67 | Wij, Nij = geotransform_label2prod(Wij, Nij) 68 | ip = (Np - Nij) / (10 * sample_size) # product row 69 | jp = (Wij - Wp) / (10 * sample_size) # product column 70 | 71 | # exception: image id falls outside sentinel product 72 | if (ip < 0) or (jp < 0): 73 | saved_files_info.append( 74 | [None, Nij, Wij, Nl, Wl, Np, Wp, i, j, ip, jp, sample_size, sample_size, date, imdir, 75 | "sample outside Sentinel product"]) 76 | continue 77 | 78 | date = imdir.split("/")[-4].split(".")[0].split("_")[2][:8] 79 | 80 | # exception: no labels for this location 81 | if labels[i * label_mult * sample_size: (i + 1) * label_mult * sample_size, j * label_mult * sample_size: (j + 1) * label_mult * sample_size].sum() == 0: 82 | saved_files_info.append( 83 | [None, Nij, Wij, Nl, Wl, Np, Wp, i, j, ip, jp, sample_size, sample_size, date, imdir, "no labels"]) 84 | continue 85 | 86 | # load image data 87 | sample = {} 88 | for jp2 in jp2s: 89 | xpmin = int(np.round(mult[jp2[:-4]] * ip * sample_size)) 90 | ypmin = int(np.round(mult[jp2[:-4]] * jp * sample_size)) 91 | sample[jp2[:-4]] = data[jp2[:-4]][xpmin: xpmin + int(mult[jp2[:-4]] * sample_size), 92 | ypmin: ypmin + int(mult[jp2[:-4]] * sample_size)] 93 | 94 | # assert all images are square, intended to catch images at the edge of a product 95 | if any([sample[k].shape[0] != sample[k].shape[1] for k in sample.keys()]): 96 | continue 97 | 98 | # exception: image is all zero for this location 99 | if sample[jp2[:-4]].sum() == 0: 100 | saved_files_info.append( 101 | [None, Nij, Wij, Nl, Wl, Np, Wp, i, j, ip, jp, sample_size, sample_size, date, imdir, "no image"]) 102 | continue 103 | 104 | # none of the above exceptions apply, save image data 105 | sample_save_path = "%s/N%d_W%d_D%s.pickle" % (prod_savedir, int(Nij), int(Wij), date) 106 | with open(sample_save_path, 'wb') as handle: 107 | pickle.dump(sample, handle, protocol=pickle.HIGHEST_PROTOCOL) 108 | 109 | saved_files_info.append( 110 | [sample_save_path, Nij, Wij, Nl, Wl, Np, Wp, i, j, ip, jp, sample_size, sample_size, date, imdir, "ok"]) 111 | 112 | df = pd.DataFrame(data=saved_files_info, 113 | columns=['sample_path', 'Nij', 'Wij', 'Nl', 'Wl', 'Np', 'Wp', 'il', 'jl', 'ip', 'jp', 114 | 'height', 'width', 'Date', 'S2_prod_imdir', "comment"]) 115 | return df 116 | 117 | 118 | def main(): 119 | global labels 120 | global Nl 121 | global Wl 122 | global CRSl 123 | global num_rows 124 | global num_cols 125 | global label_mult 126 | 127 | # read all extracted ground truths files 128 | gtfiles = os.listdir(ground_truths_dir) 129 | years = [find_number(s, "Y") for s in gtfiles] 130 | files = {year: {} for year in set(years)} 131 | for i, file in enumerate(gtfiles): 132 | if not file.startswith('INVALID'): 133 | files[years[i]][file.split("_")[0]] = file 134 | print("found ground truths in raster for years %s" % ", ".join(list(files.keys()))) 135 | 136 | label_mult = int(10. / res) 137 | 138 | # get information on saved sentinel products 139 | imdirs = glob("%s/*.SAFE/GRANULE/**/IMG_DATA" % products_dir) 140 | prod_df = get_S2prod_info(imdirs) 141 | prod_df['Year'] = prod_df['Time'].apply(lambda s: s[:4]) 142 | 143 | out = [] 144 | for year in set(years): 145 | # year = years[0] 146 | # ground truths 147 | labels = np.loadtxt(os.path.join(ground_truths_dir, files[year]['LABELS']), dtype=np.float32) 148 | Nl = int(find_number(files[year]['LABELS'], "N")) 149 | Wl = int(find_number(files[year]['LABELS'], "W")) 150 | 151 | num_rows, num_cols = [d / (10 / res * sample_size) for d in labels.shape] 152 | assert (np.ceil(num_rows) == num_rows) and (np.ceil(num_cols) == num_cols), \ 153 | "sample size should be fitting exactly in labels, this suggests an error in extract_labels_raster script" 154 | CRSl = find_number(files[year]['LABELS'], "CRS") 155 | 156 | # sentinel products 157 | products = prod_df[prod_df['Year'] == year] 158 | imdirs = products['path'].tolist() 159 | 160 | df_year = run_pool(imdirs, extract_images, num_processes) 161 | # df = extract_images([imdirs[0]]) 162 | out.append(pd.concat(df_year)) 163 | 164 | 165 | df = pd.concat(out).reset_index(drop=True) 166 | df['crs'] = CRSl 167 | df.to_csv(os.path.join(savedir, "extracted_windows_data_info.csv"), index=False) 168 | 169 | 170 | 171 | if __name__ == "__main__": 172 | 173 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 174 | parser.add_argument('--ground_truths_dir', help='directory containing ground truth parcels raster') 175 | parser.add_argument('--products_dir', help='directory containing downloaded sentinel products') 176 | parser.add_argument('--savedir', help='save directory to extract sentinel products windows') 177 | parser.add_argument('--bands', default=None, help='which satellite image bands to use') 178 | parser.add_argument('--res', default=10, help='pixel size in meters') 179 | parser.add_argument('--sample_size', default=24, help='spatial resolution of dataset samples') 180 | parser.add_argument('--num_processes', default=4, help='number of parallel processes') 181 | # --------------------------------------------------------------------------------------------- 182 | 183 | args = parser.parse_args() 184 | 185 | ground_truths_dir = args.ground_truths_dir 186 | 187 | products_dir = args.products_dir 188 | 189 | savedir = args.savedir 190 | print("savedir: ", savedir) 191 | if not os.path.exists(savedir): 192 | os.makedirs(savedir) 193 | 194 | bands = args.bands 195 | if bands == 'None': 196 | bands = list(mult.keys()) 197 | else: 198 | bands = bands.split(',') 199 | 200 | res = float(args.res) 201 | assert np.ceil(10. / res) == 10. / res, "Label pixel size should divide min satellite pixel size (10m), but %.1f was selected" % res 202 | 203 | sample_size = int(args.sample_size) 204 | 205 | num_processes = int(args.num_processes) 206 | 207 | main() 208 | 209 | -------------------------------------------------------------------------------- /dataset/labelled_dense/extract_images_for_parcel_labels.py: -------------------------------------------------------------------------------- 1 | """ 2 | Given a set of S2 tiles and a labelled_dense lable map, extract crops of images matching the location of labels 3 | """ 4 | import argparse 5 | import pandas as pd 6 | import rasterio 7 | import numpy as np 8 | import os 9 | from glob import glob 10 | import pickle 11 | if __name__ == "__main__" and __package__ is None: 12 | from sys import path 13 | from os.path import dirname as dir 14 | path.insert(0, dir(dir(path[0]))) 15 | __package__ = "examples" 16 | from utils.data_utils import find_number 17 | from utils.geospatial_data_utils import GeoTransform 18 | from utils.multiprocessing_utils import run_pool 19 | from utils.sentinel_products_utils import get_S2prod_info 20 | 21 | 22 | mult = {'B01': 1/6., 'B02': 1., 'B03': 1., 'B04': 1., 'B05': 1./2., 'B06': 1./2., 'B07': 1./2., 'B08': 1., 'B8A': 1./2, 23 | 'B09': 1./6., 'B10': 1./6., 'B11': 1./2., 'B12': 1./2.} 24 | 25 | 26 | def extract_images(imdirs): 27 | 28 | jp2s = ["%s.jp2" % i for i in bands] 29 | 30 | saved_files_info = [] 31 | 32 | for ii, imdir in enumerate(imdirs): 33 | 34 | print("unfolding product %d of %d" % (ii, len(imdirs))) 35 | 36 | imname = "%s_%s" % (imdir.split("/")[-2].split("_")[-3], imdir.split("/")[-4].split("_")[-5]) 37 | 38 | # read product 39 | data = {} 40 | for jp2 in jp2s: 41 | with rasterio.open("%s/%s_%s" % (imdir, imname, jp2)) as f: 42 | data[jp2[:-4]] = f.read(1) 43 | 44 | geotransform_label2prod = GeoTransform(CRSl, str(f.crs).split(':')[1], loc2loc=CRSl != '4326') 45 | Wp, Np = np.array(f.transform)[2], np.array(f.transform)[5] 46 | 47 | prod_savedir = os.path.join(savedir, imdir.split("/")[-4].split(".")[0]) 48 | if not os.path.exists(prod_savedir): 49 | os.makedirs(prod_savedir) 50 | 51 | for i in range(saved_gt_info.shape[0]): 52 | 53 | Nl = saved_gt_info.iloc[i]['Ntl'] 54 | Wl = saved_gt_info.iloc[i]['Wtl'] 55 | Wlp, Nlp = geotransform_label2prod(Wl, Nl) 56 | 57 | ip = int(np.round((Np - Nlp) / 10.)) 58 | jp = int(np.round((Wlp - Wp) / 10.)) 59 | 60 | date = imdir.split("/")[-4].split(".")[0].split("_")[2][:8] 61 | 62 | sample = {} 63 | for jp2 in jp2s: 64 | xpmin = int(np.round(mult[jp2[:-4]] * ip)) 65 | ypmin = int(np.round(mult[jp2[:-4]] * jp)) 66 | sample[jp2[:-4]] = data[jp2[:-4]][xpmin: xpmin + int(mult[jp2[:-4]] * sample_size), 67 | ypmin: ypmin + int(mult[jp2[:-4]] * sample_size)] 68 | 69 | # this parcel falls in black region for this product 70 | if sample[jp2[:-4]].sum() == 0: 71 | saved_files_info.append( 72 | ["", Nlp, Wlp, Nl, Wl, Np, Wp, ip, jp, sample_size, sample_size, date, imdir, "no image"]) 73 | continue 74 | 75 | sample_save_path = "%s/N%d_W%d_D%s_CRS%s.pickle" % (prod_savedir, int(Nl), int(Wl), date, CRSl) 76 | with open(sample_save_path, 'wb') as handle: 77 | pickle.dump(sample, handle, protocol=pickle.HIGHEST_PROTOCOL) 78 | 79 | saved_files_info.append( 80 | [sample_save_path, Nlp, Wlp, Nl, Wl, Np, Wp, ip, jp, sample_size, sample_size, date, imdir, "ok"]) 81 | 82 | df = pd.DataFrame(data=saved_files_info, 83 | columns=['sample_path', 'Nlp', 'Wlp', 'Nl', 'Wl', 'Np', 'Wp', 'ip', 'jp', 84 | 'height', 'width', 'Date', 'S2_prod_imdir', "comment"]) 85 | return df 86 | 87 | 88 | def main(): 89 | # ground truths 90 | gtdirs = [f for f in os.listdir(ground_truths_dir) if os.path.isdir(os.path.join(ground_truths_dir, f))] 91 | 92 | global CRSl 93 | global saved_gt_info 94 | 95 | # sentinel products 96 | imdirs = glob("%s/*.SAFE/GRANULE/**/IMG_DATA" % products_dir) 97 | prod_df = get_S2prod_info(imdirs) 98 | prod_df['Year'] = prod_df['Time'].apply(lambda s: s[:4]) 99 | 100 | out = [] 101 | for gtdir in gtdirs: 102 | 103 | # ground truths 104 | saved_gt_info = pd.read_csv(os.path.join(ground_truths_dir, gtdir, 'saved_data_info.csv')) 105 | 106 | year = find_number(gtdir, "Y") 107 | CRSl = find_number(gtdir, "CRS") 108 | 109 | # sentinel products 110 | products = prod_df[prod_df['Year'] == year] 111 | imdirs = products['path'].tolist() 112 | 113 | df_year = run_pool(imdirs, extract_images, num_processes) 114 | 115 | out.append(pd.concat(df_year)) 116 | 117 | df = pd.concat(out).reset_index(drop=True) 118 | df['crs'] = CRSl 119 | df.to_csv(os.path.join(savedir, "extracted_windows_data_info.csv"), index=False) 120 | 121 | 122 | if __name__ == "__main__": 123 | 124 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 125 | parser.add_argument('--ground_truths_dir', help='directory containing ground truth parcels raster') 126 | parser.add_argument('--products_dir', help='directory containing downloaded sentinel products') 127 | parser.add_argument('--savedir', help='save directory to extract sentinel products windows') 128 | parser.add_argument('--bands', default=None, help='which satellite image bands to use') 129 | parser.add_argument('--res', default=10, help='pixel size in meters') 130 | parser.add_argument('--sample_size', default=24, help='spatial resolution of dataset samples') 131 | parser.add_argument('--num_processes', default=4, help='number of parallel processes') 132 | # --------------------------------------------------------------------------------------------- 133 | 134 | args = parser.parse_args() 135 | 136 | ground_truths_dir = args.ground_truths_dir 137 | 138 | products_dir = args.products_dir 139 | 140 | savedir = args.savedir 141 | print("savedir: ", savedir) 142 | if not os.path.exists(savedir): 143 | os.makedirs(savedir) 144 | 145 | bands = args.bands 146 | if bands == 'None': 147 | bands = list(mult.keys()) 148 | else: 149 | bands = bands.split(',') 150 | 151 | # res = int(args.res) 152 | res = float(args.res) 153 | 154 | sample_size = int(args.sample_size) 155 | 156 | num_processes = int(args.num_processes) 157 | 158 | main() 159 | -------------------------------------------------------------------------------- /dataset/labelled_dense/extract_labels_raster.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pandas as pd 3 | import numpy as np 4 | from shapely import geometry 5 | import os 6 | from glob import glob 7 | from multiprocessing import Pool 8 | if __name__ == "__main__" and __package__ is None: 9 | from sys import path 10 | from os.path import dirname as dir 11 | path.insert(0, dir(dir(path[0]))) 12 | __package__ = "examples" 13 | from utils.geospatial_data_utils import GeoTransform, get_points_from_str_poly, closest_point_to_poly 14 | from utils.multiprocessing_utils import split_df 15 | from utils.sentinel_products_utils import get_S2prod_info 16 | 17 | 18 | def is_valid(parcel_poly, pxmin, pymax): 19 | """ 20 | checks if parcel_poly polygon has valid shape 21 | """ 22 | isvalid = True 23 | i = 0 24 | j = 0 25 | pix_points = [[pxmin + loc[0] * res, pymax - loc[1] * res] for loc in 26 | [[j, i], [j + 1, i], [j + 1, i + 1], [j, i + 1], [j, i]]] 27 | try: 28 | parcel_poly.intersection(geometry.Polygon(pix_points)).area 29 | except: 30 | isvalid = False 31 | return isvalid 32 | 33 | 34 | def extract_labels_raster(inputs): 35 | # inputs = inputs[0] 36 | rank, geodata, anchor, dx, dy = inputs 37 | 38 | # arrays to save 39 | AOI_labels = np.zeros((int(np.round(dy / res)), int(np.round(dx / res))), dtype=np.float32) # + max_label + 1 40 | AOI_ids = np.zeros((int(np.round(dy / res)), int(np.round(dx / res))), dtype=np.float32) 41 | AOI_masks = AOI_ids.copy() 42 | # additional/helper arrays 43 | AOI_ratios = AOI_ids.copy() 44 | AOI_distances = AOI_ids.copy() 45 | # AOI_alphas = AOI_ids.copy() 46 | 47 | invalid_shapes = [] 48 | for ii in range(geodata.shape[0]): 49 | # ii = 0 50 | print("process %d, parcel %d of %d" % (rank, ii+1, geodata.shape[0])) 51 | parcel_poly = geodata['geometry'][ii] 52 | label = geodata['ground_truth'][ii] 53 | id = geodata['id'][ii] 54 | 55 | points = get_points_from_str_poly(parcel_poly) 56 | pr = (points - anchor) 57 | parcel_poly = geometry.Polygon(pr) 58 | 59 | pxmin, pymin = pr.min(axis=0) 60 | pxmax, pymax = pr.max(axis=0) 61 | 62 | if not is_valid(parcel_poly, pxmin, pymax): 63 | try: 64 | int_area = sum( 65 | [geometry.Polygon(np.array(pol.coords[:])).area for pol in parcel_poly.buffer(0).interiors]) 66 | ext_area = geometry.Polygon(np.array(parcel_poly.buffer(0).exterior.coords[:])).area 67 | if int_area / ext_area < 0.05: # threshold for discarding a parcel polygon 68 | print("included, number of interior areas %d, intarea: %.0f, extarea: %.0f, ratio: %.4f" % 69 | (len(parcel_poly.buffer(0).interiors), int_area, ext_area, int_area / ext_area)) 70 | parcel_poly = geometry.Polygon(np.array(parcel_poly.buffer(0).exterior.coords[:])) 71 | pr = np.stack([np.array(i) for i in parcel_poly.exterior.coords.xy]).T 72 | pxmin, pymin = pr.min(axis=0) 73 | pxmax, pymax = pr.max(axis=0) 74 | else: 75 | print("excluded, number of interior areas %d, intarea: %.0f, extarea: %.0f, ratio: %.4f" % 76 | (len(parcel_poly.buffer(0).interiors), int_area, ext_area, int_area / ext_area)) 77 | invalid_shapes.append(geodata.iloc[ii]) 78 | continue 79 | except: 80 | continue 81 | 82 | row0 = int((1 - pr[:, 1].max() / dy) * AOI_labels.shape[0]) 83 | row1 = int((1 - pr[:, 1].min() / dy) * AOI_labels.shape[0]) 84 | col0 = int(pr[:, 0].min() / dx * AOI_labels.shape[1]) 85 | col1 = int(pr[:, 0].max() / dx * AOI_labels.shape[1]) + 1 86 | 87 | H, W = row1 - row0, col1 - col0 88 | 89 | for i in range(H): 90 | 91 | for j in range(W): 92 | # i = j = 15 93 | try: 94 | 95 | pix_points = [[pxmin + loc[0] * res, pymax - loc[1] * res] for loc in 96 | [[j, i], [j + 1, i], [j + 1, i + 1], [j, i + 1], [j, i]]] 97 | 98 | pix_poly = geometry.Polygon(pix_points) 99 | 100 | value = parcel_poly.intersection(pix_poly).area / res ** 2 101 | 102 | if value == 0: # no intersection 103 | continue 104 | 105 | elif AOI_ratios[row0 + i, col0 + j] == 1.0: # interior of at least another poly 106 | 107 | if AOI_labels[row0 + i, col0 + j] != label: # mask only if label conflict 108 | AOI_masks[row0 + i, col0 + j] = 2 109 | continue 110 | 111 | elif AOI_ratios[row0 + i, col0 + j] > 0: # at least partly assigned to another poly 112 | if AOI_labels[row0 + i, col0 + j] != label: # mask only if label conflict 113 | AOI_masks[row0 + i, col0 + j] = 1 114 | 115 | if value > AOI_ratios[row0 + i, col0 + j]: # this poly covers a larger area, assign here 116 | AOI_labels[row0 + i, col0 + j] = label 117 | AOI_ratios[row0 + i, col0 + j] = value 118 | AOI_ids[row0 + i, col0 + j] = id 119 | pix_center = np.array(pix_points)[:-1].mean(axis=0) 120 | AOI_distances[row0 + i, col0 + j] = closest_point_to_poly( 121 | np.array(parcel_poly.exterior.coords.xy).T, pix_center, return_dist=True) 122 | 123 | except: 124 | continue 125 | 126 | return AOI_labels, AOI_ids, AOI_masks, AOI_ratios, AOI_distances, pd.DataFrame(invalid_shapes) 127 | 128 | 129 | def main(): 130 | # read ground truth data 131 | gt_df = pd.read_csv(ground_truths_file) 132 | 133 | # ensure all polygons use the same crs 134 | assert (gt_df['crs'] == gt_df['crs'].iloc[0]).all(), \ 135 | "Polygons corresponding to multiple CRS were found in %s" % ground_truths_file 136 | crs = gt_df['crs'].iloc[0] 137 | 138 | # find unique years 139 | years = gt_df['year'].drop_duplicates().to_list() 140 | print("found ground truth data for years %s" % ", ".join([str(i) for i in years])) 141 | 142 | # 0 class will indicate background, if 0 class already exists in labels add one 143 | if 0 in gt_df['ground_truth'].drop_duplicates().tolist(): 144 | gt_df['ground_truth'] += 1 145 | 146 | # sentinel products 147 | imdirs = glob("%s/*.SAFE/GRANULE/**/IMG_DATA" % products_dir) 148 | prod_df = get_S2prod_info(imdirs) 149 | assert (prod_df['West']==prod_df['West'].iloc[0]).all() and (prod_df['North']==prod_df['North'].iloc[0]).all(),\ 150 | "Sentinel products corresponding to multiple tiles were found in %s" % products_dir 151 | geotr = GeoTransform(intr=prod_df['crs'].iloc[0].split(':')[1], outtr=gt_df['crs'].iloc[0], loc2loc=True) 152 | prod_WN = prod_df[['West', 'North']].iloc[0].tolist() 153 | prod_WN = geotr(prod_WN[0], prod_WN[1]) # in ground truth data coordinate system 154 | d = (10 * prod_df[['height', 'width']].iloc[0].values).tolist() 155 | 156 | # find all ground truth data that fall inside sentinel product 157 | prod_poly = geometry.Polygon([[prod_WN[0] + loc[0] * d[0], prod_WN[1] - loc[1] * d[1]] for loc in 158 | [[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]]) 159 | 160 | def f(x): 161 | try: 162 | x = get_points_from_str_poly(x) 163 | W = x[:, 0].min() 164 | E = x[:, 0].max() 165 | S = x[:, 1].min() 166 | N = x[:, 1].max() 167 | x = geometry.Polygon(x) 168 | inratio = prod_poly.intersection(x).area / x.area 169 | return np.array([N, S, W, E, inratio]) 170 | except: 171 | return np.array([0, 0, 0, 0, 0]) 172 | 173 | gt_df[['N', 'S', 'W', 'E', 'inratio']] = np.stack(gt_df['geometry'].apply(f).values) 174 | gt_df = gt_df[gt_df['inratio'] == 1.0] 175 | print("found %d polygons inside sentinel tile" % gt_df.shape[0]) 176 | 177 | # increasing AOI size will allow extracting the parcels at the boundary of the true AOI placed at the center of the 178 | # image. This shouldnt make a difference when splitting the AOI by grid a slabels will be zero in these locations 179 | maxy = int(np.ceil(gt_df['N'].max())) + res * sample_size # N 180 | miny = int(np.floor(gt_df['S'].min())) - res * sample_size # S! 181 | maxx = int(np.ceil(gt_df['E'].max())) + res * sample_size # E! 182 | minx = int(np.floor(gt_df['W'].min())) - res * sample_size # W 183 | 184 | # increase AOI dimensions to match integer multiple of sample size 185 | if np.ceil((maxy - miny) / (sample_size * 10)) != (maxy - miny) / (sample_size * 10): 186 | dy = (np.ceil((maxy - miny) / (sample_size * 10)) - (maxy - miny) / (sample_size * 10)) * (sample_size * 10) 187 | miny = miny - dy 188 | if np.ceil((maxx - minx) / (sample_size * 10)) != (maxx - minx) / (sample_size * 10): 189 | dx = (np.ceil((maxx - minx) / (sample_size * 10)) - (maxx - minx) / (sample_size * 10)) * (sample_size * 10) 190 | maxx = maxx + dx 191 | dx = maxx - minx 192 | dy = maxy - miny 193 | anchor = minx, miny # WS 194 | 195 | pool = Pool(num_processes) 196 | 197 | for year in years: 198 | # year = years[0] 199 | 200 | geodata = gt_df[gt_df['year'] == year].reset_index(drop=True) 201 | 202 | inputs = [[i, df_, anchor, dx, dy] for i, df_ in enumerate(split_df(geodata, num_processes))] 203 | 204 | outputs = pool.map(extract_labels_raster, inputs) 205 | AOI_labels = np.stack([out_[0] for out_ in outputs]) 206 | AOI_ids = np.stack([out_[1] for out_ in outputs]) 207 | AOI_masks = np.stack([out_[2] for out_ in outputs]) 208 | AOI_ratios = np.stack([out_[3] for out_ in outputs]) 209 | AOI_distances = np.stack([out_[4] for out_ in outputs]) 210 | invalid_shapes = pd.concat([out_[5] for out_ in outputs]) 211 | 212 | labels = AOI_labels.max(axis=0) 213 | masks = AOI_masks.max(axis=0) 214 | ids = AOI_ids.sum(axis=0) 215 | ratios = AOI_ratios.max(axis=0) 216 | distances = AOI_distances.max(axis=0) 217 | 218 | locs = np.stack(np.where((AOI_labels > 0).sum(axis=0) > 1)).T 219 | 220 | for i, loc in enumerate(locs): 221 | 222 | if i % 1000 == 0: 223 | print("correcting inter process overlaps, step %d of %d" % (i, locs.shape[0])) 224 | 225 | if any(AOI_ratios[:, loc[0], loc[1]] == 1.0): 226 | masks[loc[0], loc[1]] = 2 227 | else: 228 | masks[loc[0], loc[1]] = 1 229 | 230 | idx = np.argmax(AOI_ratios[:, loc[0], loc[1]]) 231 | labels[loc[0], loc[1]] = AOI_labels[idx, loc[0], loc[1]] 232 | ids[loc[0], loc[1]] = AOI_ids[idx, loc[0], loc[1]] 233 | ratios[loc[0], loc[1]] = AOI_ratios[idx, loc[0], loc[1]] 234 | distances[loc[0], loc[1]] = AOI_distances[idx, loc[0], loc[1]] 235 | 236 | np.savetxt("%s/LABELS_Y%s_N%s_W%s_R%d_CRS%s.csv" % 237 | (savedir, str(year), str(int(maxy)), str(int(minx)), res, str(crs)), labels) 238 | np.savetxt("%s/IDS_Y%s_N%s_W%s_R%d_CRS%s.csv" % 239 | (savedir, str(year), str(int(maxy)), str(int(minx)), res, str(crs)), ids) 240 | np.savetxt("%s/MASKS_Y%s_N%s_W%s_R%d_CRS%s.csv" % 241 | (savedir, str(year), str(int(maxy)), str(int(minx)), res, str(crs)), masks) 242 | np.savetxt("%s/RATIOS_Y%s_N%s_W%s_R%d_CRS%s.csv" % 243 | (savedir, str(year), str(int(maxy)), str(int(minx)), res, str(crs)), ratios) 244 | np.savetxt("%s/DISTANCES_Y%s_N%s_W%s_R%d_CRS%s.csv" % 245 | (savedir, str(year), str(int(maxy)), str(int(minx)), res, str(crs)), distances) 246 | 247 | if invalid_shapes.shape[0] != 0: 248 | invalid_shapes.to_csv( 249 | "%s/INVALID_Y%s_N%s_W%s_R%d_CRS%s.csv" % 250 | (savedir, str(year), str(maxy), str(int(maxx)), res, str(crs)), index=False) 251 | 252 | 253 | 254 | if __name__ == "__main__": 255 | 256 | parser = argparse.ArgumentParser(description='Make raster from shapely polygons') 257 | parser.add_argument('--ground_truths_file', help='filename containing ground truth parcel polygons') 258 | parser.add_argument('--products_dir', help='directory containing sentinel products') 259 | parser.add_argument('--savedir', help='save directory to extract ground truths in raster mode') 260 | parser.add_argument('--res', default=10, help='pixel size in meters') 261 | parser.add_argument('--sample_size', default=24, help='spatial resolution of dataset image samples') 262 | parser.add_argument('--num_processes', default=4, help='number of parallel processes') 263 | 264 | args = parser.parse_args() 265 | 266 | ground_truths_file = args.ground_truths_file 267 | 268 | products_dir = args.products_dir 269 | 270 | savedir = args.savedir 271 | print("savedir: ", savedir) 272 | if not os.path.exists(savedir): 273 | os.makedirs(savedir) 274 | 275 | res = float(args.res) 276 | assert np.ceil(10. / res) == 10. / res, "Label pixel size should divide min satellite pixel size (10m), but %.1f was selected" % res 277 | 278 | sample_size = int(args.sample_size) 279 | 280 | num_processes = int(args.num_processes) 281 | 282 | main() 283 | -------------------------------------------------------------------------------- /dataset/labelled_dense/extract_parcel_ground_truths.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pandas as pd 3 | import numpy as np 4 | from shapely import geometry 5 | import os 6 | from glob import glob 7 | from multiprocessing import Pool 8 | if __name__ == "__main__" and __package__ is None: 9 | from sys import path 10 | from os.path import dirname as dir 11 | path.insert(0, dir(dir(path[0]))) 12 | __package__ = "examples" 13 | from utils.geospatial_data_utils import GeoTransform, get_points_from_str_poly, simplify_poly_points, is_valid, str_line_eq 14 | from utils.multiprocessing_utils import split_df 15 | from utils.sentinel_products_utils import get_S2prod_info 16 | import pickle 17 | 18 | 19 | def extract_parcel_labels_raster(inputs): 20 | 21 | # inputs = inputs[0] 22 | rank, geodata, W, N, Wp, Np, year, crs = inputs 23 | 24 | # arrays to save 25 | year_savedir = os.path.join(savedir, 'Y%s_N%s_W%s_R%d_CRS%s' % (year, N, W, res, crs)) 26 | if not os.path.exists(year_savedir): 27 | os.makedirs(year_savedir) 28 | 29 | saved_data_info = [] 30 | for ii in range(geodata.shape[0]): 31 | print("process %d, parcel %d of %d" % (rank, ii+1, geodata.shape[0])) 32 | parcel_poly = geodata['geometry'][ii] 33 | label = geodata['ground_truth'][ii] 34 | id = geodata['id'][ii] 35 | 36 | points = get_points_from_str_poly(parcel_poly) 37 | anchor = np.array(geometry.Polygon(points).centroid) # anchor is centroid of parcel 38 | # anchor = points.mean(axis=0) 39 | N0 = anchor[1] + sample_size * 10. / 2. # Nmax of image 40 | W0 = anchor[0] - sample_size * 10. / 2. # Wmin of image 41 | 42 | # correct for non integer offset wrt product Nmax, Wmax (top-left) coordinates 43 | dN = (Np - N0) % 60 44 | dW = (W0 - Wp) % 60 45 | N0 += dN 46 | W0 -= dW 47 | anchor = np.array([W0 + sample_size * 10. / 2., N0 - sample_size * 10. / 2.]) # recalculate centroid 48 | 49 | pr = (points - anchor + sample_size * 10. / 2) # local polygon coordinates 50 | parcel_poly = geometry.Polygon(pr) 51 | 52 | ### Define criterion for removing very slender fields 53 | slenderness = parcel_poly.area / parcel_poly.length # 1.0 54 | if slenderness < 5: 55 | continue 56 | 57 | # min, max coordinates 58 | pxmin, pymin = pr.min(axis=0) 59 | pxmax, pymax = pr.max(axis=0) 60 | 61 | # DONT DO VERY SMALL ONES 62 | if ((pxmax - pxmin) < 50) or ((pymax - pymin) < 50): 63 | continue 64 | 65 | if not is_valid(parcel_poly, pxmin, pymax): 66 | try: 67 | int_area = sum( 68 | [geometry.Polygon(np.array(pol.coords[:])).area for pol in parcel_poly.buffer(0).interiors]) 69 | ext_area = geometry.Polygon(np.array(parcel_poly.buffer(0).exterior.coords[:])).area 70 | if int_area / ext_area < 0.05: # threshold for discarding a parcel polygon 71 | print("included, number of interior areas %d, intarea: %.0f, extarea: %.0f, ratio: %.4f" % 72 | (len(parcel_poly.buffer(0).interiors), int_area, ext_area, int_area / ext_area)) 73 | parcel_poly = geometry.Polygon(np.array(parcel_poly.buffer(0).exterior.coords[:])) 74 | pr = np.stack([np.array(i) for i in parcel_poly.exterior.coords.xy]).T 75 | pxmin, pymin = pr.min(axis=0) 76 | pxmax, pymax = pr.max(axis=0) 77 | else: 78 | print("excluded, number of interior areas %d, intarea: %.0f, extarea: %.0f, ratio: %.4f" % 79 | (len(parcel_poly.buffer(0).interiors), int_area, ext_area, int_area / ext_area)) 80 | values = geodata.iloc[ii].to_list() 81 | for v in [N0, W0, None]: 82 | values.append(v) 83 | saved_data_info.append(values) 84 | continue 85 | except: 86 | continue 87 | 88 | # define zero placeholder matrices 89 | ratios = np.zeros((label_mult * sample_size, label_mult * sample_size), dtype=np.float32) 90 | alpha = ratios.copy() 91 | global_beta = ratios.copy() 92 | local_beta = ratios.copy() 93 | 94 | # include 2 pixel threshold (this wont matter as external pixels will not update their values) 95 | row0 = int(np.floor((pymin / (sample_size * 10)) * label_mult * sample_size)) - 2 # min row containing parcel 96 | row1 = int(np.ceil((pymax / (sample_size * 10)) * label_mult * sample_size)) + 2 # max row containing parcel 97 | col0 = int(np.floor(pxmin / (sample_size * 10) * label_mult * sample_size)) - 2 # min col containing parcel 98 | col1 = int(np.ceil(pxmax / (sample_size * 10) * label_mult * sample_size)) + 2 # max col containing parcel 99 | 100 | Height, Width = row1 - row0, col1 - col0 101 | 102 | for i in range(Height): 103 | 104 | for j in range(Width): 105 | 106 | if (row0 + i) * (col0 + j) < 0: 107 | continue 108 | 109 | try: 110 | 111 | pix_points = [[res * (col0 + j + loc[1]), res * (row0 + i + loc[0])] for loc in 112 | [[-0.5, -0.5], [-0.5, 0.5], [0.5, 0.5], [0.5, -0.5], [-0.5, -0.5]]] 113 | 114 | pix_poly = geometry.Polygon(pix_points) 115 | 116 | value = parcel_poly.intersection(pix_poly).area / res ** 2 117 | if (0 < value) and (value < 1): # parcel cuts through pixel 118 | global_points = np.array(parcel_poly.boundary.intersection(pix_poly.boundary)) 119 | if global_points.shape[0] > 2: # !!! 120 | global_points = global_points[:2] 121 | global_params = str_line_eq(global_points) 122 | alpha[label_mult * sample_size - (row0 + i + 1), col0 + j + 1] = global_params[0] 123 | global_beta[label_mult * sample_size - (row0 + i + 1), col0 + j + 1] = global_params[1] / (sample_size * res) 124 | local_points = (global_points - np.array([res * (col0 + j + 0.5), res * (row0 + i + 0.5)])) / res 125 | local_params = str_line_eq(local_points) 126 | local_beta[label_mult * sample_size - (row0 + i + 1), col0 + j + 1] = local_params[1] 127 | 128 | 129 | if value == 0: # no intersection 130 | continue 131 | 132 | ratios[label_mult * sample_size - (row0 + i + 0), col0 + j + 0] = value 133 | 134 | except: 135 | continue 136 | 137 | idxN = int(np.round((N_ - N0) / res - 1., 0)) 138 | idxW = int(np.round((W0 - W_) / res - 1., 0)) 139 | 140 | # add AOI raster ground truths 141 | labels2d = raster['LABELS'][idxN: idxN + label_mult * sample_size, idxW: idxW + label_mult * sample_size] 142 | ids2d = raster['IDS'][idxN: idxN + label_mult * sample_size, idxW: idxW + label_mult * sample_size] 143 | masks2d = raster['MASKS'][idxN: idxN + label_mult * sample_size, idxW: idxW + label_mult * sample_size] 144 | distances2d = raster['DISTANCES'][idxN: idxN + label_mult * sample_size, idxW: idxW + label_mult * sample_size] 145 | ratios2d = raster['RATIOS'][idxN: idxN + label_mult * sample_size, idxW: idxW + label_mult * sample_size] 146 | 147 | # add simpilied polygons 148 | simplified = simplify_poly_points(pr, Npoly) 149 | 150 | sample = {'N': N0, 'W': W0, 151 | 'poly_var': pr / res, 'poly_fixed': simplified / res, 152 | 'label': label, 'id': id, 153 | 'labels2d': labels2d, 'ids2d': ids2d, 'masks2d': masks2d, 'distances2d': distances2d, 154 | 'ratios': ratios, 'alpha': alpha, 'global_beta': global_beta, 'local_beta': local_beta} 155 | 156 | impath = os.path.join(year_savedir, 'N%d_E%d_ground_truths.pickle' % (N0, W0)) 157 | with open(impath, 'wb') as handle: 158 | pickle.dump(sample, handle, protocol=pickle.HIGHEST_PROTOCOL) 159 | 160 | values = geodata.iloc[ii].to_list() 161 | for v in [N0, W0, impath]: 162 | values.append(v) 163 | saved_data_info.append(values) 164 | 165 | return saved_data_info 166 | 167 | 168 | def main(): 169 | global N_ 170 | global W_ 171 | global R_ 172 | global CRS_ 173 | global raster 174 | global label_mult 175 | 176 | label_mult = int(10. / res) 177 | 178 | # ground truth data 179 | gt_df = pd.read_csv(ground_truths_file) 180 | if 'id' not in gt_df: 181 | print('Column "id" not included. Assigning values from 1 to file size') 182 | gt_df['id'] = range(1, gt_df.shape[0]+1) 183 | # gt_df['id'] = range(1, gt_df.shape[0]+1) 184 | assert (gt_df['crs'] == gt_df['crs'].iloc[0]).all(), \ 185 | "Polygons corresponding to multiple CRS were found in %s" % ground_truths_file 186 | crs = gt_df['crs'].iloc[0] 187 | yearly_grouped_gt = gt_df.groupby('year') 188 | years = list(yearly_grouped_gt.groups.keys()) 189 | print("found ground truth data for years %s" % ", ".join([str(i) for i in years])) 190 | if 0 in gt_df['ground_truth'].drop_duplicates(): 191 | gt_df['ground_truth'] += 1 192 | 193 | # AOI rasterized ground truths 194 | raster_files = [fname for fname in os.listdir(raster_labels_dir) if fname.endswith('csv')] 195 | raster = {} 196 | meta = [] 197 | for raster_file in raster_files: 198 | # raster_file = raster_files[0] 199 | ftype_ = raster_file.split("_")[0] 200 | year_ = raster_file.split("_")[1][1:] 201 | N_ = raster_file.split("_")[2][1:] 202 | W_ = raster_file.split("_")[3][1:] 203 | R_ = raster_file.split("_")[4][1:] 204 | CRS_ = raster_file.split("_")[5][3:].split('.')[0] 205 | raster[ftype_] = np.loadtxt(os.path.join(raster_labels_dir, raster_file)) 206 | meta.append([year_, N_, W_, R_, CRS_]) 207 | meta = np.array(meta) 208 | assert all([(meta[i] == meta[0]).all() for i in range(len(meta))]), \ 209 | 'Not all AOI raster ground truth files correspond to the same location, time, resolution or crs' 210 | N_ = int(N_) 211 | W_ = int(W_) 212 | R_ = int(R_) 213 | CRS_ = int(CRS_) 214 | 215 | # sentinel products 216 | imdirs = glob("%s/*.SAFE/GRANULE/**/IMG_DATA" % products_dir) 217 | prod_df = get_S2prod_info(imdirs) 218 | assert (prod_df['West']==prod_df['West'].iloc[0]).all() and (prod_df['North']==prod_df['North'].iloc[0]).all(),\ 219 | "Sentinel products corresponding to multiple tiles were found in %s" % products_dir 220 | geotr = GeoTransform(intr=prod_df['crs'].iloc[0].split(':')[1], outtr=gt_df['crs'].iloc[0], loc2loc=gt_df['crs'].iloc[0] != '4326') 221 | prod_WN = prod_df[['West', 'North']].iloc[0].tolist() 222 | prod_WN = geotr(prod_WN[0], prod_WN[1]) # in ground truth data coordinate system 223 | d = (10 * prod_df[['height', 'width']].iloc[0].values).tolist() 224 | 225 | # find all ground truth data that fall inside sentinel product 226 | prod_poly = geometry.Polygon([[prod_WN[0] + loc[0] * d[0], prod_WN[1] - loc[1] * d[1]] for loc in 227 | [[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]]) 228 | print(prod_poly) 229 | def f(x): 230 | try: 231 | x = get_points_from_str_poly(x) 232 | W = x[:, 0].min() 233 | E = x[:, 0].max() 234 | S = x[:, 1].min() 235 | N = x[:, 1].max() 236 | x = geometry.Polygon(x) 237 | inratio = prod_poly.intersection(x).area / x.area 238 | return np.array([N, S, W, E, inratio]) 239 | except: 240 | return np.array([0, 0, 0, 0, 0]) 241 | 242 | gt_df[['N', 'S', 'W', 'E', 'inratio']] = np.stack(gt_df['geometry'].apply(f).values) 243 | gt_df = gt_df[gt_df['inratio'] == 1.0] 244 | print("found %d polygons inside sentinel tile" % gt_df.shape[0]) 245 | 246 | N = int(np.ceil(gt_df['N'].max())) 247 | W = int(np.floor(gt_df['W'].min())) 248 | 249 | pool = Pool(num_processes) 250 | 251 | for year in years: 252 | # year = years[0] 253 | 254 | geodata = gt_df[gt_df['year'] == year].reset_index(drop=True) 255 | 256 | inputs = [[i, df_, W, N, prod_WN[0], prod_WN[1], year, crs] for i, df_ in enumerate(split_df(geodata, num_processes))] 257 | 258 | outputs = pool.map(extract_parcel_labels_raster, inputs) 259 | 260 | saved_data_info = pd.concat(pd.DataFrame(out) for out in outputs) 261 | save_name = os.path.join(savedir, 'Y%s_N%s_W%s_R%d_CRS%s' % (year, N, W, res, crs), 'saved_data_info.csv') 262 | saved_data_info.columns = ['id', 'ground_truth', 'crs', 'year', 'geometry', 'Np', 'Sp', 'Wp', 'Ep', 263 | 'inratio', 'Dy', 'Dx', 'D', 'Ntl', 'Wtl', 'filepath'] 264 | saved_data_info.to_csv(save_name, index=False) 265 | 266 | 267 | if __name__ == "__main__": 268 | 269 | parser = argparse.ArgumentParser(description='Make raster from shapely polygons') 270 | parser.add_argument('--ground_truths_file', help='filename containing ground truth parcel polygons') 271 | parser.add_argument('--raster_labels_dir', help='directory containing extracted raster ground truths') 272 | parser.add_argument('--products_dir', help='directory containing sentinel products') 273 | parser.add_argument('--savedir', help='save directory to extract ground truths in raster mode') 274 | parser.add_argument('--res', default=10, help='pixel size in meters') 275 | parser.add_argument('--sample_size', default=24, help='spatial resolution of dataset samples') 276 | parser.add_argument('--Npoly', default=50, help='number of vertices for polygons') 277 | parser.add_argument('--num_processes', default=4, help='number of parallel processes') 278 | 279 | args = parser.parse_args() 280 | 281 | ground_truths_file = args.ground_truths_file 282 | 283 | raster_labels_dir = args.raster_labels_dir 284 | 285 | products_dir = args.products_dir 286 | 287 | savedir = args.savedir 288 | print("savedir: ", savedir) 289 | if not os.path.exists(savedir): 290 | os.makedirs(savedir) 291 | 292 | # res = int(args.res) 293 | res = float(args.res) 294 | assert np.ceil(10. / res) == 10. / res, "Label pixel size should divide min satellite pixel size (10m), but %.1f was selected" % res 295 | 296 | sample_size = int(args.sample_size) 297 | 298 | Npoly = int(args.Npoly) 299 | 300 | num_processes = int(args.num_processes) 301 | 302 | main() 303 | -------------------------------------------------------------------------------- /dataset/labelled_dense/find_parcel_dimensions.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pandas as pd 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from shapely import geometry 6 | import os 7 | from glob import glob 8 | from multiprocessing import Pool 9 | if __name__ == "__main__" and __package__ is None: 10 | from sys import path 11 | from os.path import dirname as dir 12 | path.insert(0, dir(dir(path[0]))) 13 | __package__ = "examples" 14 | from utils.geospatial_data_utils import GeoTransform, get_points_from_str_poly 15 | from utils.multiprocessing_utils import split_df 16 | from utils.sentinel_products_utils import get_S2prod_info 17 | 18 | 19 | def get_nbins(x): 20 | N = x.shape[0] 21 | if N < 1e2: 22 | return 10 23 | if N < 1e3: 24 | return 25 25 | # if N < 1e4: 26 | else: 27 | return 100 28 | 29 | 30 | def main(): 31 | # ground truth data 32 | gt_df = pd.read_csv(ground_truths_file) 33 | # gt_df['id'] = range(1, gt_df.shape[0]+1) 34 | # gt_df['crs'] = 2154 35 | if 'id' not in gt_df: 36 | gt_df['id'] = range(1, gt_df.shape[0]+1) 37 | assert (gt_df['crs'] == gt_df['crs'].iloc[0]).all(), \ 38 | "Polygons corresponding to multiple CRS were found in %s" % ground_truths_file 39 | crs = gt_df['crs'].iloc[0] 40 | yearly_grouped_gt = gt_df.groupby('year') 41 | years = list(yearly_grouped_gt.groups.keys()) 42 | print("found ground truth data for years %s" % ", ".join([str(i) for i in years])) 43 | # if 0 in gt_df['ground_truth'].drop_duplicates(): 44 | # gt_df['ground_truth'] += 1 45 | 46 | # sentinel products 47 | imdirs = glob("%s/*.SAFE/GRANULE/**/IMG_DATA" % products_dir) 48 | prod_df = get_S2prod_info(imdirs) 49 | assert (prod_df['West']==prod_df['West'].iloc[0]).all() and (prod_df['North']==prod_df['North'].iloc[0]).all(),\ 50 | "Sentinel products corresponding to multiple tiles were found in %s" % products_dir 51 | geotr = GeoTransform(intr=prod_df['crs'].iloc[0].split(':')[1], outtr=gt_df['crs'].iloc[0], loc2loc=True) 52 | prod_WN = prod_df[['West', 'North']].iloc[0].tolist() 53 | prod_WN = geotr(prod_WN[0], prod_WN[1]) # in ground truth data coordinate system 54 | d = (10 * prod_df[['height', 'width']].iloc[0].values).tolist() 55 | 56 | # find all ground truth data that fall inside sentinel product 57 | prod_poly = geometry.Polygon([[prod_WN[0] + loc[0] * d[0], prod_WN[1] - loc[1] * d[1]] for loc in 58 | [[0, 0], [1, 0], [1, 1], [0, 1], [0, 0]]]) 59 | print(prod_poly) 60 | def f(x): 61 | try: 62 | x = get_points_from_str_poly(x) 63 | W = x[:, 0].min() 64 | E = x[:, 0].max() 65 | S = x[:, 1].min() 66 | N = x[:, 1].max() 67 | num_vertices = x.shape[0] 68 | x = geometry.Polygon(x) 69 | inratio = prod_poly.intersection(x).area / x.area 70 | return np.array([N, S, W, E, inratio, num_vertices]) 71 | except: 72 | return np.array([0, 0, 0, 0, 0, 0]) 73 | 74 | gt_df[['N', 'S', 'W', 'E', 'inratio', 'num_vertices']] = np.stack(gt_df['geometry'].apply(f).values) 75 | gt_df = gt_df[gt_df['inratio'] == 1.0] 76 | print("found %d polygons inside sentinel tile" % gt_df.shape[0]) 77 | 78 | gt_df['Dy'] = np.abs(gt_df['N'] - gt_df['S']) 79 | gt_df['Dx'] = np.abs(gt_df['E'] - gt_df['W']) 80 | gt_df['D'] = gt_df[['Dx', 'Dy']].max(axis=1) 81 | # gt_df['D'].max() 82 | # gt_df[gt_df['D'] < 480].shape[0] / gt_df.shape[0] 83 | # gt_df[gt_df['D'] > 700].shape[0] / gt_df.shape[0] 84 | gt_df.to_csv(os.path.join(save_dir, 'gt_df_parcels_in_AOI.csv'), index=False) 85 | 86 | # if cutoff is None: 87 | # x = np.random.normal(mu, sigma, size=100) 88 | print('maxD | %obj >maxD') 89 | print('-------------------') 90 | for maxd in [240, 320, 480, 640, 1000, 1280, 1600]: 91 | r = gt_df[gt_df['D'] < maxd].shape[0] / gt_df.shape[0] 92 | print('%s|%s' % (str(maxd).ljust(7), ('%.4f' % r).rjust(9))) 93 | plt.ioff() 94 | fig, ax = plt.subplots(figsize=(8, 4)) 95 | n_bins = get_nbins(gt_df['D']) 96 | # plot the cumulative histogram 97 | n, bins, patches = ax.hist(gt_df['D'].values, n_bins, density=True, histtype='step', 98 | cumulative=True, label='cummulative sum') 99 | ax.hist(gt_df['D'].values, bins=bins, density=True, histtype='step', cumulative=-1, 100 | label='reversed cummulative sum') 101 | ax.grid(True) 102 | ax.legend(loc='right') 103 | ax.set_title('Cumulative step histograms') 104 | ax.set_xlabel('Object largest x-y dimension') 105 | ax.set_ylabel('Likelihood of occurrence') 106 | # plt.show() 107 | plt.savefig(os.path.join(save_dir, 'parcel_dimensions_cumsum.png')) 108 | 109 | plt.figure() 110 | plt.hist(gt_df['num_vertices'], 100, density=True) 111 | plt.grid() 112 | plt.xlabel('Number of AF Vertices') 113 | plt.ylabel('density') 114 | plt.savefig(os.path.join(save_dir, 'number_of_vertices_hist.png')) 115 | 116 | # else: 117 | # gt_df = gt_df[gt_df['D'] < cutoff] 118 | # print('Number of samples is %d for max object dimension <%dm' % (gt_df.shape[0], cutoff)) 119 | # gt_df.to_csv(os.path.join(save_dir, 'gt_df_maxd_lt_%d.csv' % cutoff), index=False) 120 | 121 | 122 | 123 | if __name__ == "__main__": 124 | 125 | parser = argparse.ArgumentParser(description='Make raster from shapely polygons') 126 | parser.add_argument('--ground_truths_file', help='filename containing ground truth parcel polygons') 127 | parser.add_argument('--products_dir', help='directory containing sentinel products') 128 | parser.add_argument('--save_dir', help='save directory to extract ground truths in raster mode') 129 | # parser.add_argument('--cutoff', default=None, help='max allowed parcel size. If None to script will save a cumsum ' 130 | # 'histogram to help decide the max alloed size') 131 | # parser.add_argument('--sample_size', default=24, help='spatial resolution of dataset samples') 132 | # parser.add_argument('--num_processes', default=4, help='number of parallel processes') 133 | 134 | args = parser.parse_args() 135 | 136 | ground_truths_file = args.ground_truths_file 137 | 138 | products_dir = args.products_dir 139 | 140 | save_dir = args.save_dir 141 | print("save_dir: ", save_dir) 142 | if not os.path.exists(save_dir): 143 | os.makedirs(save_dir) 144 | 145 | # cutoff = int(args.cutoff) 146 | 147 | main() 148 | -------------------------------------------------------------------------------- /dataset/labelled_dense/make_image_timeseries_for_labels.py: -------------------------------------------------------------------------------- 1 | """ 2 | For a set of extracted image crops and a labelled_dense label map, make a timeseries of all positions matched with labels 3 | """ 4 | import argparse 5 | import pandas as pd 6 | import numpy as np 7 | import os 8 | import shutil 9 | import pickle 10 | if __name__ == "__main__" and __package__ is None: 11 | from sys import path 12 | from os.path import dirname as dir 13 | path.insert(0, dir(dir(path[0]))) 14 | __package__ = "examples" 15 | from utils.data_utils import find_number 16 | from utils.date_utils import get_doy 17 | from utils.multiprocessing_utils import run_pool 18 | 19 | 20 | mult = {'B01': 1/6., 'B02': 1., 'B03': 1., 'B04': 1., 'B05': 1./2., 'B06': 1./2., 'B07': 1./2., 'B08': 1., 'B8A': 1./2, 21 | 'B09': 1./6., 'B10': 1./6., 'B11': 1./2., 'B12': 1./2.} 22 | 23 | 24 | def match_labels_images(yearlocs): 25 | 26 | refband = bands[0] 27 | 28 | saved_files_info = [] 29 | for yearloc in yearlocs: 30 | 31 | try: 32 | 33 | idx = yearloc_groups[yearloc] 34 | data = iminfo.iloc[idx, :].sort_values(by='DOY').copy() 35 | data = data.drop_duplicates(subset=['DOY'], keep='first') # some products downloaded twice 36 | 37 | Y = data['Year'].iloc[0] 38 | N = data['Nij'].iloc[0] 39 | W = data['Wij'].iloc[0] 40 | il = data['il'].iloc[0] 41 | jl = data['jl'].iloc[0] 42 | 43 | assert all(data['Year'] == Y) 44 | assert all(data['Nij'] == N) 45 | assert all(data['Wij'] == W) 46 | assert all(data['il'] == il) 47 | assert all(data['jl'] == jl) 48 | 49 | timeseries_sample = {band: [] for band in bands} 50 | timeseries_sample['doy'] = [] 51 | for sample_info in data[['sample_path', 'DOY']].values: 52 | 53 | impath, doy = sample_info 54 | 55 | with open(impath, 'rb') as handle: 56 | sample = pickle.load(handle, encoding='latin1') 57 | 58 | for key in bands: 59 | timeseries_sample[key].append(sample[key]) 60 | timeseries_sample['doy'].append(np.array(doy)) 61 | 62 | for key in bands: 63 | timeseries_sample[key] = np.stack(timeseries_sample[key]) 64 | timeseries_sample['doy'] = np.stack(timeseries_sample['doy']) 65 | timeseries_sample['year'] = np.array(Y).astype(np.int32) 66 | 67 | timesteps = timeseries_sample[refband].shape[0] 68 | 69 | for ltype in labels.keys(): 70 | timeseries_sample[ltype.lower()] = \ 71 | labels[ltype][il * label_mult * sample_size: (il + 1) * label_mult * sample_size, jl * label_mult * sample_size: (jl + 1) * label_mult * sample_size] 72 | 73 | savename = os.path.join(year_savedir, "%d_%d_%s.pickle" % (int(N), int(W), Y)) 74 | with open(savename, 'wb') as handle: 75 | pickle.dump(timeseries_sample, handle, protocol=pickle.HIGHEST_PROTOCOL) 76 | 77 | saved_files_info.append([savename, Y, N, W, sample_size, sample_size, timesteps, il, jl, "completed"]) 78 | 79 | except: 80 | 81 | saved_files_info.append(["", Y, N, W, sample_size, sample_size, 0, il, jl, "failed"]) 82 | 83 | saved_files_info = pd.DataFrame(data=saved_files_info, columns=['sample_path', 'Year', 'N', 'W', 'dy', 'dx', 'dt', 84 | 'label_win_i', 'label_win_j', 'status']) 85 | return saved_files_info 86 | 87 | 88 | def main(): 89 | 90 | global yearloc_groups 91 | global iminfo 92 | global labels 93 | global year_savedir 94 | global label_mult 95 | 96 | # ratio of image to label pixel size 97 | label_mult = int(10 / res) 98 | 99 | # read info on extracted image windows 100 | iminfo = pd.read_csv(os.path.join(windows_dir, "extracted_windows_data_info.csv")) 101 | crs = iminfo['crs'].iloc[0] 102 | 103 | # remove non extracted locations 104 | iminfo = iminfo[~pd.isnull(iminfo['sample_path'])].reset_index(drop=True) 105 | iminfo['DOY'] = iminfo['Date'].apply(lambda s: get_doy(str(s))) 106 | iminfo['Year'] = iminfo['Date'].apply(lambda s: str(s)[:4]) 107 | 108 | # ground truths 109 | gtfiles = os.listdir(ground_truths_dir) 110 | years = [find_number(s, "Y") for s in gtfiles] 111 | files = {year: {} for year in set(years)} 112 | for i, file in enumerate(gtfiles): 113 | if not file.startswith('INVALID'): 114 | files[years[i]][file.split("_")[0]] = file 115 | print("found ground truths in raster for years %s" % ", ".join(list(files.keys()))) 116 | 117 | saved_files_info = [] 118 | 119 | for year in set(years): 120 | 121 | year_savedir = os.path.join(savedir, year) 122 | if not os.path.isdir(year_savedir): 123 | os.makedirs(year_savedir) 124 | 125 | labels = {} 126 | for ltype in files[year]: 127 | 128 | labels[ltype] = np.loadtxt(os.path.join(ground_truths_dir, files[year][ltype]), dtype=np.float32) 129 | 130 | yearloc_groups = iminfo[iminfo['Year'] == year].groupby(['Nij', 'Wij'], as_index=False).groups 131 | yearlocs = list(yearloc_groups.keys()) 132 | 133 | df = run_pool(yearlocs, match_labels_images, num_processes) 134 | df = pd.concat(df) 135 | 136 | saved_files_info.append(df) 137 | 138 | df = pd.concat(saved_files_info).reset_index(drop=True) 139 | df['crs'] = crs 140 | df.to_csv(os.path.join(savedir, "saved_timeseries_data_info.csv"), index=False) 141 | 142 | # delete previously saved image windows 143 | shutil.rmtree(windows_dir) 144 | 145 | 146 | if __name__ == "__main__": 147 | 148 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 149 | parser.add_argument('--ground_truths_dir', help='directory containing ground truth parcels raster') 150 | parser.add_argument('--products_dir', help='directory containing downloaded sentinel products') 151 | parser.add_argument('--windows_dir', help='directory containing extracted windows from sentinel products') 152 | parser.add_argument('--savedir', help='save directory for image timeseries with labels') 153 | parser.add_argument('--bands', default=None, help='which satellite image bands to use') 154 | parser.add_argument('--res', default=10, help='pixel size in meters') 155 | parser.add_argument('--sample_size', default=24, help='spatial resolution of dataset samples') 156 | parser.add_argument('--num_processes', default=4, help='number of parallel processes') 157 | # --------------------------------------------------------------------------------------------- 158 | 159 | args = parser.parse_args() 160 | 161 | ground_truths_dir = args.ground_truths_dir 162 | 163 | products_dir = args.products_dir 164 | 165 | windows_dir = args.windows_dir 166 | 167 | savedir = args.savedir 168 | if not os.path.exists(savedir): 169 | os.makedirs(savedir) 170 | 171 | res = float(args.res) 172 | assert np.ceil(10. / res) == 10. / res, "Label pixel size should divide min satellite pixel size (10m), but %.1f was selected" % res 173 | 174 | sample_size = int(args.sample_size) 175 | 176 | num_processes = int(args.num_processes) 177 | 178 | bands = args.bands 179 | if bands == 'None': 180 | bands = list(mult.keys()) 181 | else: 182 | bands = bands.split(',') 183 | 184 | main() 185 | 186 | -------------------------------------------------------------------------------- /dataset/labelled_dense/make_image_timeseries_for_parcel_labels.py: -------------------------------------------------------------------------------- 1 | """ 2 | For a set of extracted image crops and a labelled_dense label map, make a timeseries of all positions matched with labels 3 | """ 4 | import argparse 5 | import pandas as pd 6 | import numpy as np 7 | import os 8 | import shutil 9 | import pickle 10 | if __name__ == "__main__" and __package__ is None: 11 | from sys import path 12 | from os.path import dirname as dir 13 | path.insert(0, dir(dir(path[0]))) 14 | __package__ = "examples" 15 | from utils.data_utils import find_number 16 | from utils.date_utils import get_doy 17 | from utils.multiprocessing_utils import run_pool 18 | 19 | 20 | mult = {'B01': 1/6., 'B02': 1., 'B03': 1., 'B04': 1., 'B05': 1./2., 'B06': 1./2., 'B07': 1./2., 'B08': 1., 'B8A': 1./2, 21 | 'B09': 1./6., 'B10': 1./6., 'B11': 1./2., 'B12': 1./2.} 22 | 23 | 24 | def match_labels_images(yearlocs): 25 | 26 | refband = bands[1] 27 | 28 | saved_files_info = [] 29 | for jj, yearloc in enumerate(yearlocs): 30 | 31 | if jj % 1000 == 0: 32 | print("%d of %d" % (jj, len(yearlocs))) 33 | try: 34 | 35 | idx = yearloc_groups[yearloc] 36 | data = iminfo.iloc[idx, :].sort_values(by='DOY').copy() 37 | data = data.drop_duplicates(subset=['DOY'], keep='first') # some products downloaded twice 38 | 39 | Y = data['Year'].iloc[0] 40 | N = data['Nl'].iloc[0] 41 | W = data['Wl'].iloc[0] 42 | 43 | assert all(data['Year'] == Y) 44 | assert all(data['Nl'] == N) 45 | assert all(data['Wl'] == W) 46 | 47 | timeseries_sample = {band: [] for band in bands} 48 | timeseries_sample['doy'] = [] 49 | for sample_info in data[['sample_path', 'DOY']].values: 50 | impath, doy = sample_info 51 | 52 | with open(impath, 'rb') as handle: 53 | sample = pickle.load(handle, encoding='latin1') 54 | 55 | # image falls in black region for this product (should have been excluded in extract_images_for_parcel_labels.py) 56 | if sample[refband].sum() == 0: 57 | # print('zero sum') 58 | continue 59 | 60 | # image does not match required size (should have been excluded in extract_images_for_parcel_labels.py) 61 | height, width = sample[refband].shape 62 | if (height != sample_size) or (width != sample_size): 63 | # print('unequal size') 64 | continue 65 | 66 | for key in bands: 67 | timeseries_sample[key].append(sample[key]) 68 | timeseries_sample['doy'].append(np.array(doy)) 69 | 70 | for key in bands: 71 | timeseries_sample[key] = np.stack(timeseries_sample[key]) 72 | timeseries_sample['doy'] = np.stack(timeseries_sample['doy']) 73 | timeseries_sample['year'] = np.array(Y).astype(np.int32) 74 | 75 | timesteps = timeseries_sample[refband].shape[0] 76 | 77 | gt = saved_gt_info[(saved_gt_info['Ntl'] == yearloc[0]) & (saved_gt_info['Wtl'] == yearloc[1])] 78 | with open(gt['filepath'].values[0], 'rb') as handle: 79 | labels = pickle.load(handle, encoding='latin1') 80 | for ltype in labels.keys(): 81 | timeseries_sample[ltype.lower()] = labels[ltype] 82 | 83 | savename = os.path.join(year_savedir, "%d_%d_%s.pickle" % (int(N), int(W), Y)) 84 | with open(savename, 'wb') as handle: 85 | pickle.dump(timeseries_sample, handle, protocol=pickle.HIGHEST_PROTOCOL) 86 | 87 | saved_files_info.append([savename, Y, N, W, sample_size, sample_size, timesteps, "completed"]) 88 | 89 | except: 90 | 91 | saved_files_info.append(["", Y, N, W, sample_size, sample_size, 0, "failed"]) 92 | 93 | saved_files_info = pd.DataFrame(data=saved_files_info, columns=['sample_path', 'Year', 'N', 'W', 'dy', 'dx', 'dt', 94 | 'status']) 95 | return saved_files_info 96 | 97 | 98 | def main(): 99 | 100 | global yearloc_groups 101 | global iminfo 102 | global labels 103 | global year_savedir 104 | global saved_gt_info 105 | 106 | iminfo = pd.read_csv(os.path.join(windows_dir, "extracted_windows_data_info.csv")) 107 | crs = iminfo['crs'].iloc[0] 108 | 109 | # remove non extracted locations 110 | iminfo = iminfo[~pd.isnull(iminfo['sample_path'])].reset_index(drop=True) 111 | iminfo['DOY'] = iminfo['Date'].apply(lambda s: get_doy(str(s))) 112 | iminfo['Year'] = iminfo['Date'].apply(lambda s: str(s)[:4]) 113 | 114 | # ground truths 115 | gtfiles = [f for f in os.listdir(ground_truths_dir) if os.path.isdir(os.path.join(ground_truths_dir, f))] 116 | 117 | saved_files_info = [] 118 | 119 | for gtfile in gtfiles: 120 | # gtfile = gtfiles[0] 121 | 122 | saved_gt_info = pd.read_csv(os.path.join(ground_truths_dir, gtfile, 'saved_data_info.csv')) 123 | 124 | year = find_number(gtfile, "Y") 125 | # CRSl = find_number(gtfile, "CRS") 126 | 127 | year_savedir = os.path.join(savedir, year) 128 | if not os.path.isdir(year_savedir): 129 | os.makedirs(year_savedir) 130 | 131 | yearloc_groups = iminfo[iminfo['Year'] == year].groupby(['Nl', 'Wl'], as_index=False).groups 132 | yearlocs = list(yearloc_groups.keys()) 133 | 134 | df = run_pool(yearlocs, match_labels_images, num_processes) 135 | df = pd.concat(df) 136 | 137 | saved_files_info.append(df) 138 | 139 | 140 | df = pd.concat(saved_files_info).reset_index(drop=True) 141 | df['crs'] = crs 142 | df.to_csv(os.path.join(savedir, "saved_timeseries_data_info.csv"), index=False) 143 | 144 | # delete windows dir 145 | shutil.rmtree(windows_dir) 146 | 147 | 148 | if __name__ == "__main__": 149 | 150 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 151 | parser.add_argument('--ground_truths_dir', help='directory containing ground truth parcels raster') 152 | parser.add_argument('--products_dir', help='directory containing downloaded sentinel products') 153 | parser.add_argument('--windows_dir', help='directory containing extracted windows from sentinel products') 154 | parser.add_argument('--savedir', help='save directory for image timeseries with labels') 155 | parser.add_argument('--bands', default=None, help='which satellite image bands to use') 156 | parser.add_argument('--res', default=10, help='pixel size in meters') 157 | parser.add_argument('--sample_size', default=24, help='spatial resolution of dataset samples') 158 | parser.add_argument('--num_processes', default=4, help='number of parallel processes') 159 | # --------------------------------------------------------------------------------------------- 160 | 161 | args = parser.parse_args() 162 | 163 | ground_truths_dir = args.ground_truths_dir 164 | 165 | products_dir = args.products_dir 166 | 167 | windows_dir = args.windows_dir 168 | 169 | savedir = args.savedir 170 | if not os.path.exists(savedir): 171 | os.makedirs(savedir) 172 | 173 | # res = int(args.res) 174 | res = float(args.res) 175 | 176 | sample_size = int(args.sample_size) 177 | 178 | num_processes = int(args.num_processes) 179 | 180 | bands = args.bands 181 | 182 | if bands == 'None': 183 | bands = list(mult.keys()) 184 | else: 185 | bands = bands.split(',') 186 | 187 | main() 188 | -------------------------------------------------------------------------------- /dataset/labelled_dense/make_labelled_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | bands='None' 4 | for ARGUMENT in "$@" 5 | do 6 | 7 | KEY=$(echo $ARGUMENT | cut -f1 -d=) 8 | VALUE=$(echo $ARGUMENT | cut -f2 -d=) 9 | 10 | case "$KEY" in 11 | 12 | ground_truths_file) ground_truths_file=${VALUE} ;; 13 | products_dir) products_dir=${VALUE} ;; 14 | labels_dir) labels_dir=${VALUE} ;; 15 | windows_dir) windows_dir=${VALUE} ;; 16 | timeseries_dir) timeseries_dir=${VALUE} ;; 17 | res) res=${VALUE} ;; 18 | sample_size) sample_size=${VALUE} ;; 19 | num_processes) num_processes=${VALUE} ;; 20 | bands) bands=${VALUE} ;; 21 | *) 22 | esac 23 | 24 | done 25 | 26 | # 1:ground_truths_file, 2:products_dir, 3:labels_dir, 4:windows_dir, 5:timeseries_dir, 6:res, 7:sample_size, 8:num_processes 27 | python dataset/labelled_dense/extract_labels_raster.py --ground_truths_file $ground_truths_file \ 28 | --products_dir $products_dir \ 29 | --savedir $labels_dir \ 30 | --res $res \ 31 | --sample_size $sample_size \ 32 | --num_processes $num_processes 33 | 34 | python dataset/labelled_dense/extract_images_for_labels.py --ground_truths_dir $labels_dir \ 35 | --products_dir $products_dir \ 36 | --savedir $windows_dir \ 37 | --bands $bands \ 38 | --res $res \ 39 | --sample_size $sample_size \ 40 | --num_processes $num_processes 41 | 42 | python dataset/labelled_dense/make_image_timeseries_for_labels.py --ground_truths_dir $labels_dir \ 43 | --products_dir $products_dir \ 44 | --windows_dir $windows_dir \ 45 | --savedir $timeseries_dir \ 46 | --bands $bands \ 47 | --res $res \ 48 | --sample_size $sample_size \ 49 | --num_processes $num_processes 50 | -------------------------------------------------------------------------------- /dataset/labelled_dense/make_labelled_parcel_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | bands='None' 4 | for ARGUMENT in "$@" 5 | do 6 | 7 | KEY=$(echo $ARGUMENT | cut -f1 -d=) 8 | VALUE=$(echo $ARGUMENT | cut -f2 -d=) 9 | 10 | case "$KEY" in 11 | 12 | ground_truths_file) ground_truths_file=${VALUE} ;; 13 | products_dir) products_dir=${VALUE} ;; 14 | labels_dir) labels_dir=${VALUE} ;; 15 | windows_dir) windows_dir=${VALUE} ;; 16 | timeseries_dir) timeseries_dir=${VALUE} ;; 17 | res) res=${VALUE} ;; 18 | sample_size) sample_size=${VALUE} ;; 19 | Npoly) Npoly=${VALUE} ;; 20 | num_processes) num_processes=${VALUE} ;; 21 | bands) bands=${VALUE} ;; 22 | *) 23 | esac 24 | 25 | done 26 | 27 | # 1:ground_truths_file, 2:products_dir, 3:labels_dir, 4:windows_dir, 5:timeseries_dir, 6:res, 7:sample_size, 8:num_processes 28 | python dataset/labelled_dense/extract_labels_raster.py --ground_truths_file $ground_truths_file \ 29 | --products_dir $products_dir \ 30 | --savedir $labels_dir \ 31 | --res $res \ 32 | --sample_size $sample_size \ 33 | --num_processes $num_processes 34 | 35 | python dataset/labelled_dense/extract_parcel_ground_truths.py --ground_truths_file $ground_truths_file \ 36 | --raster_labels_dir $labels_dir \ 37 | --products_dir $products_dir \ 38 | --savedir $labels_dir \ 39 | --res $res \ 40 | --sample_size $sample_size \ 41 | --Npoly $Npoly \ 42 | --num_processes $num_processes 43 | 44 | python dataset/labelled_dense/extract_images_for_parcel_labels.py --ground_truths_dir $labels_dir \ 45 | --products_dir $products_dir \ 46 | --savedir $windows_dir \ 47 | --bands $bands \ 48 | --res $res \ 49 | --sample_size $sample_size \ 50 | --num_processes $num_processes 51 | 52 | python dataset/labelled_dense/make_image_timeseries_for_parcel_labels.py --ground_truths_dir $labels_dir \ 53 | --products_dir $products_dir \ 54 | --windows_dir $windows_dir \ 55 | --savedir $timeseries_dir \ 56 | --bands $bands \ 57 | --res $res \ 58 | --sample_size $sample_size \ 59 | --num_processes $num_processes 60 | -------------------------------------------------------------------------------- /dataset/labelled_dense/split_ground_truths_by_location.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "helpful-notion", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "from sentinelsat import SentinelAPI, read_geojson, geojson_to_wkt\n", 13 | "import os\n", 14 | "from collections import OrderedDict\n", 15 | "import matplotlib.pyplot as plt\n", 16 | "%matplotlib inline\n", 17 | "if __name__ == \"__main__\" and __package__ is None:\n", 18 | " from sys import path\n", 19 | " from os.path import dirname as dir\n", 20 | "\n", 21 | " path.append(dir(path[0]))\n", 22 | " __package__ = \"examples\"\n", 23 | "from utils.date_utils import get_doy" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "id": "blank-eugene", 29 | "metadata": {}, 30 | "source": [ 31 | "### User input" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 12, 37 | "id": "organic-contribution", 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "savedir = '/media/michaeltrs/0a8a5a48-ede5-47d0-8eff-10d11350bf98/Satellite_Data/Sentinel2/ARISE/Tanzania/S2-products'\n", 42 | "year = '2022'\n", 43 | "date_range = ('0101', '0428') # (mindate: 'mmdd', maxdate: 'mmdd')\n", 44 | "cloudcoverpercentage = (0, 70) # (min %, max %)\n", 45 | "minprodsize = 400 # Mb\n", 46 | "numproducts = 60\n", 47 | "tile = '37MDN'\n", 48 | "platformname = 'Sentinel-2'\n", 49 | "processinglevel = 'Level-1C'" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "id": "creative-destiny", 55 | "metadata": {}, 56 | "source": [ 57 | "### Read user credentials" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 13, 63 | "id": "following-stanford", 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "cred = pd.read_csv(\"pw.csv\", header=None)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "id": "altered-antique", 73 | "metadata": {}, 74 | "source": [ 75 | "### Query for Sentinel products" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 14, 81 | "id": "becoming-fifty", 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "querying...\n", 89 | "found 10 products\n" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "api = SentinelAPI(cred[0][0], cred[0][1], 'https://scihub.copernicus.eu/dhus')\n", 95 | "print(\"querying...\")\n", 96 | "products = api.query(tileid=tile,\n", 97 | " platformname=platformname,\n", 98 | " cloudcoverpercentage=cloudcoverpercentage,\n", 99 | " date=(\"%s%s\" % (year, date_range[0]), \"%s%s\" % (year, date_range[1])),\n", 100 | " processinglevel=processinglevel)\n", 101 | "df = api.to_dataframe(products)\n", 102 | "print(\"found %d products\" % len(products))" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "id": "pressing-commerce", 108 | "metadata": {}, 109 | "source": [ 110 | "### Remove very small size products" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 15, 116 | "id": "nasty-adjustment", 117 | "metadata": { 118 | "pycharm": { 119 | "name": "#%%\n" 120 | } 121 | }, 122 | "outputs": [ 123 | { 124 | "name": "stdout", 125 | "output_type": "stream", 126 | "text": [ 127 | "keeping 10 products with larger than 400Mb\n", 128 | "number of products found (10) is less than initially selected (60)\n", 129 | "changing number of selected products to 10\n", 130 | "you may want to change selection criteria in 'User input' cell to find more products\n" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "sizes = np.array([float(s.split(\" \")[0]) for s in df['size'].values])\n", 136 | "products2keep = OrderedDict()\n", 137 | "for i, prodkey in enumerate(list(products.keys())):\n", 138 | " if sizes[i] >= minprodsize:\n", 139 | " # print(sizes[i])\n", 140 | " products2keep[prodkey] = products[prodkey]\n", 141 | "df2keep = api.to_dataframe(products2keep).reset_index()\n", 142 | "print(\"keeping %d products with larger than %dMb\" % (len(products2keep), minprodsize))\n", 143 | "\n", 144 | "if len(products2keep) < numproducts:\n", 145 | " print(\"number of products found (%d) is less than initially selected (%d)\" % (len(products2keep), numproducts))\n", 146 | " print(\"changing number of selected products to %d\" % (len(products2keep)))\n", 147 | " print(\"you may want to change selection criteria in 'User input' cell to find more products\")\n", 148 | " numproducts = len(products2keep)" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "id": "atomic-joseph", 154 | "metadata": { 155 | "pycharm": { 156 | "name": "#%% md\n" 157 | } 158 | }, 159 | "source": [ 160 | "### Spread products evenly in time and visualize" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 16, 166 | "id": "turkish-fetish", 167 | "metadata": { 168 | "pycharm": { 169 | "name": "#%%\n" 170 | } 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "ccfactor = 0.0 # cloud cover factor when selecting products\n", 175 | "def distance(doys, target_doy, ccfactor=0):\n", 176 | " \"\"\"\n", 177 | " distance function for selecting products depending on \n", 178 | " proximity to desired date and cloud cover\n", 179 | " \"\"\"\n", 180 | " dist = np.abs(doys['doy'] - target_doy) + ccfactor * doys['cloudcoverpercentage']\n", 181 | " return dist" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 17, 187 | "id": "delayed-benefit", 188 | "metadata": { 189 | "scrolled": true 190 | }, 191 | "outputs": [ 192 | { 193 | "name": "stdout", 194 | "output_type": "stream", 195 | "text": [ 196 | " doy size cloudcoverpercentage\n", 197 | "0 5 820.48 MB 55.1488\n", 198 | "1 40 794.63 MB 9.41109\n", 199 | "2 60 803.36 MB 33.9301\n", 200 | "3 65 768.85 MB 47.6807\n", 201 | "4 70 795.89 MB 36.565\n", 202 | "5 75 784.40 MB 66.0153\n", 203 | "6 85 812.33 MB 27.2806\n", 204 | "7 95 819.05 MB 27.7287\n", 205 | "8 100 806.83 MB 38.3053\n", 206 | "9 115 782.31 MB 65.9704\n" 207 | ] 208 | }, 209 | { 210 | "data": { 211 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcAAAAEGCAYAAADylEXaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAAXUklEQVR4nO3dfXRU9Z3H8c83TyRoiEGmIAEbLRAICSoIpwJSn9d2taiIx7Urxarbokj3KHXrATm1Xf+olt09rQ+cItKiUlkEV9d2rcX6RF3Q4BMxgsYttFJjggSJECCT/PaPewPDZCaZBJJM/L1f58zJzL3f+5vv/BLy4d6Z3GvOOQEA4JuM3m4AAIDeQAACALxEAAIAvEQAAgC8RAACALyU1ZniQYMGueLiYknS1q3BspKSY91Sao7l88ePlWjsVGq62mdXx+7KcwHoeZs2bdrpnIv0dh84UqcCsLi4WBUVFZKkc84Jlr344jHuKEXH8vnjx0o0dio1qYzd1edPRW9/TwAkZmbbe7sHtMUhUACAlwhAAICXCEAAgJc69R4gAKB3bNq06UtZWVkPSSoTOy+paJFUGY1Gb5gwYUJtogICEAD6gKysrIeGDBkyJhKJ1GdkZHAS5w60tLRYXV1daU1NzUOSvpmohv9FAEDfUBaJRPYQfqnJyMhwkUjkMwV7zIlrerAfAEDXZRB+nRPOV9KcIwABAF4iAAEAx9SMGTOKly9fXtjZ7bZu3ZqzZMmSgT31fAQgACAtfPDBB/1WrVrV6QDsKgIQANChPXv2ZJxzzjkjSkpKSkeOHDl26dKlha+88kr/iRMnlowdO3bM1KlTR27fvj07frtkNZWVlf0mT548qqSkpLS0tHTMu+++22/BggVFFRUVx48ePbr0rrvu+lI0GtV3v/vdYWVlZWNGjRpVeu+99w6SpJaWFs2aNevk4uLissmTJ4/auXNnl/6igT+DAIA+aNIkHdPT3r/2mra2t37t2rUDhgwZ0vTiiy9WS9Knn36aecEFF4z87W9/Wz106NDo0qVLC+fPn1+0evXqba3bHDhwwObNm3dyopprrrnmlPnz59fMmjVr9759+6y5udnuvvvuHYsXLx78wgsvVEvSz372s0EFBQXNlZWV7zU2NtrEiRNHX3rppXs2btzYv7q6ul91dXXlRx99lF1eXj529uzZn3b2NROAAIAOjR8/vnHBggXD58yZUzR9+vTPTjzxxOgHH3yQd955542Sgr2ySCTSFLvNO++80y9RTX19fcYnn3ySM2vWrN2S1L9/fyepzSdc161bN2DLli39n3766UJJamhoyKyqqsp96aWX8q+66qpdWVlZKi4ubjrrrLMauvKaCEAA6IM62mM71saNG3fgjTfeqFqzZk3BnXfeWTRt2rQ9I0aMaHzrrbe2JNvGOWeJaurr61N6+805Z4sXL/7LjBkz9sQuf+aZZwq69iqOxHuAAIAObdu2LTs/P7/lpptu2nXrrbfWVFRUHLdr166sdevWHScFhzsrKipyY7cZN27c/kQ1hYWFLUOGDDn4yCOPnCBJjY2N1tDQkFFQUND8+eefZ7Zuf+GFF3724IMPRg4cOGBSsEe5Z8+ejK997WsNTzzxxMBoNKrt27dnb9iwIb8rr4k9QABAhzZt2pR3xx13DMvIyFBWVpZ74IEHtmdlZbl58+ad3NDQkNnc3Gxz5sz55Mwzz9zfuk1ubq57/PHHP0xU8+ijj/75xhtv/PJPfvKTodnZ2W716tUfTpo0qTEzM9OVlJSUXnPNNTsXLlxYu23btn7l5eVjnHM2cODApt/97ncfXnvttbuff/75ASNGjCgbOnTogTPOOOPzrrwmAhAA0KEZM2bsmTFjRlX88oqKijaHYtesWbOt9f7kyZMbE9WUl5cf2LBhw/vxy+OX3XfffTsk7YivW7FixV9S7z4xDoECALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALrFyy+/3H/27NnDpeCP3SdPnjxq9OjRpUuXLu30pYu6A38HCADoFtOmTds3bdq0fZL06quv9pekLVu2tPlbwmSi0aiysrovptgDBACkZOvWrTkjR44c2/p40aJFg2+99dahkyZNKpkzZ05ReXn5mOLi4rJnn332eEl65pln8s8999wRO3bsyLruuutO2bx5c//Ro0eXvvvuu/2eeuqp/DFjxpSOGjWqdObMmcWNjY0mSUVFReVz5swpKi0tHfPwww8XFhUVld98881Fo0ePLi0rKxuzfv36/lOnTh05fPjwsnvuuSdyNK+HPUAA6IsmTTqml0PSa68d1cm1o9Gobd68+b1Vq1YV/PjHPx568cUXHzqjS1FRUfSBBx7Y3nqpo3379tn5559f8txzz20dN27cgcsvv7z43nvvjSxatKhWkk488cRoVVXVe5J01113DTv55JMPbtmyper6668f/p3vfKd448aNWxobGzPKy8vH3n777XVd7Zk9QADAUZs5c2a9JE2ePHnvRx99lNNe7dtvv507bNiwA+PGjTsgSbNnz/50/fr1h05oPWvWrPrY+quuumq3JJWXl+8bP3783sLCwpahQ4dGc3JyWnbu3JmpLmIPEAD6oqPcY+uKrKws19LScujx/v37D+1E5ebmurBGzc3NdjTPk5+f3xL7uHXsjIwM5eTkHLpuYEZGhpqamrr8XOwBAgBSMmzYsOiuXbuyampqMhsbG+33v/99l67Ld9ppp+3fsWNHTmVlZT9JWrFixYlnn312ly5qezTYAwQApKRfv37utttu+3jixIljBg8e3DRixIj9HW/VVv/+/d2SJUu2zZw58yvNzc067bTT9s2fP7/L7+V1FQEIAEjZwoULaxcuXFibbP1JJ50U3bFjx2ZJuuSSSxouueSShvj7kjR9+vSG6dOnt/mTiNZtEz2eN2/ep5I+TVbbWRwCBQB4iQAEAHiJAASAvqGlpaXlqD5d6ZtwvlqSrScAAaBvqKyrqysgBFPT0tJidXV1BZIqk9XwIRgA6AOi0egNNTU1D9XU1JSJnZdUtEiqjEajNyQrIAABoA+YMGFCraRv9nYfXyT8LwIA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOAlAhAA4CUCEADgJQIQAOClzgVgNCq9/rpUV6emJqmhQaqra6e+ru5QfYc6Uyupad9B7avZo53vpVbf3vjxr6WpSfq8/qDqnztcn6jmiNefZPyk8xRT3+HYHfSf8Lk6OZ8A4B3nXMq3CWbOFRS4ppw8N9hqXGamc3l5zq1c6dpauTJYWVDQTlEXap1z6+eudGfrJTdFr7i9ynPr57Zf3974K1c6l5HhDr2WuXOdG2w17my95HYreK3r565sUxP7eP3cxOPHj33oaWP6iZ/L+LFXrkxtfmKfa1b2SteUk/p8AuhekipcJ37XcuuZmwXfm9ScaeZ+Ht6/SferUmPVrGzl5Ehr10qFheHK+nrpiiukgwcPb9ymqAu1knZvq1fut67QbVosSbpft2i/crT/sbU6obhtfXvj16uwzaoC1atY22Vq0f26RZK0Xzm6SM+pWdlthi9QvdbqCuXqyPF3L1+ry68rbPO0Ty6v1wnXHfmksXMZ70s5wfjWzvzEvsRMNek5XdSmn2TzCaD7TZkyZZNz7sze7gNH6vJ7gPfoBxqpaklSVpZUUxOzsqZGyo77Zd6mqAu1kuqrahRVtu7XLYcCKqos1Vclrm9v/ESrTlKNfqrbD43dOn7ra413koJ+4sevr6pJ+LT1VW2fNHYu4xVl1Kg5o/35iX0dI1WtpgT9JJtPAPBWZ3YXJ0jOhbe9ynODVOuk4ChbbW3M/n5tbbAwpr5tURdqnXN1VbVur46s36s8V1eVuL698ROtGqTE47e+1vhbonqXF/ST6Gnrqto+aXvjD8+tdS0dzE/s60jWT7L5BND9xCHQtLx1fg8wM1PRnDx9L3uZDg6IKC9PWrZMikRiaiKRYGFGhpSZqcRFXaiVNGhMRG/OXaZmZSiqTO1Tnt6cu0yDxiSub2/81lV5edKAAcHXq+dG9L3sw+NHc4Lx9+ZFDtXMnXt4m715QT/x4w8aE2kz9rJlQf+x/cTPZezYeXnSTx+OyDqYn9jXcXBA0L+z1OYTAHzVufcAjz/eVZSUSM8+qzpFtG2bVFzczu/WKVOk/fulZ5/t+BdwZ2olNU2aooN79qvxyWeTh1+K49fV6YjXUlcn5V4wRbnar+x1QX2imiNef5Lx29Ql6Cd+LhNuk8L8HLHdZZ2bTwDdx8x4DzANZXWq2kzKzw/2npTC79Xs7OCWyi/gztRKyu6frez+2ToulfDrYPxI5MjFkYikwmxJh+sT1RwxVJLx29QlqI+fy4TbpDA/R2zXyfkEAN/wh/AAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgAAAL5lzLvViswZJW7uvnW4zSNLO3m6ik/pizxJ99zT67lld7fvLzrnIsW4GRyerk/VbnXNndksn3cjMKvpa332xZ4m+exp996y+2jcS4xAoAMBLBCAAwEudDcBfdksX3a8v9t0Xe5bou6fRd8/qq30jgU59CAYAgC8KDoECALxEAAIAvJRSAJrZxWa21cyqzeyH3d3U0TCzbWa22czeMrOKcNlAM/uDmX0Qfi1Mgz4fNrNaM6uMWZawTwv8PJz/d8xsfJr1/SMz2xHO+Vtm9o2YdXeEfW81s7/rpZ6Hm9kLZlZlZu+a2ffD5Wk93+30ne7znWtmr5nZ22Hfd4XLTzGzjWF/q8wsJ1zeL3xcHa4vTrO+f2Vmf46Z79PD5Wnxc4Kj4Jxr9yYpU9KHkk6VlCPpbUmlHW3XWzdJ2yQNilt2j6Qfhvd/KOmnadDnNEnjJVV21Kekb0j6H0km6auSNqZZ3z+SND9BbWn489JP0inhz1FmL/R8kqTx4f18Se+HvaX1fLfTd7rPt0k6PryfLWljOI//KenqcPkSSXPC+zdJWhLev1rSql6a72R9/0rSlQnq0+LnhFvXb6nsAU6SVO2c+z/n3EFJj0uansJ26WS6pF+H938t6bLeayXgnHtZ0q64xcn6nC5phQtskHSCmZ3UI43GSdJ3MtMlPe6cO+Cc+7OkagU/Tz3KOfexc+6N8H6DpPckFSnN57udvpNJl/l2zrnPw4fZ4c1JOk/SE+Hy+Plu/T48Iel8M7Oe6fawdvpOJi1+TtB1qQRgkaS/xjz+SO3/I+xtTtJzZrbJzP4pXDbYOfdxeL9G0uDeaa1DyfrsC9+DueFhoIdjDjGnXd/h4bUzFPzvvs/Md1zfUprPt5llmtlbkmol/UHB3uhu51w0QW+H+g7XfybpxB5tOBTft3Oudb7vDuf7382sX7gsbeYbXfNF/BDMVOfceElfl3SzmU2LXemcc2r/f3Vpoa/0GXpQ0lcknS7pY0mLe7WbJMzseElrJP2zc25P7Lp0nu8Efaf9fDvnmp1zp0sapmAvdHTvdpSa+L7NrEzSHQr6nyhpoKR/6b0OcSylEoA7JA2PeTwsXJaWnHM7wq+1kp5U8I/vk9ZDE+HX2t7rsF3J+kzr74Fz7pPwF0eLpKU6fNgtbfo2s2wFIfKYc25tuDjt5ztR331hvls553ZLekHSWQoOEbaefzi2t0N9h+sLJH3as50eKabvi8ND0c45d0DScqXxfKNzUgnA1yWNDD/BlaPgTeqnu7etrjGz48wsv/W+pIskVSro99th2bclPdU7HXYoWZ9PS5oVfursq5I+izl01+vi3ve4XMGcS0HfV4ef8jtF0khJr/VCfyZpmaT3nHP/FrMqrec7Wd99YL4jZnZCeD9P0oUK3r98QdKVYVn8fLd+H66U9Mdwj7xHJel7S8x/kkzB+5ax893rPyc4Cql8UkbBp53eV3Acf0Fvf3KnnT5PVfApuLclvdvaq4L3E56X9IGkdZIGpkGvv1Fw+KpJwXsH1yfrU8GnzO4P53+zpDPTrO9Hwr7eUfBL4aSY+gVh31slfb2Xep6q4PDmO5LeCm/fSPf5bqfvdJ/vcZLeDPurlLQoXH6qgkCulrRaUr9weW74uDpcf2qa9f3HcL4rJT2qw58UTYufE25dv3EqNACAl76IH4IBAKBDBCAAwEsEIADASwQgAMBLBCAAwEsEIHpEeAWD+d04fiS8ksCbZnZ2dz0PgC+OrI5LgD7hfEmbnXM39MSTmVmWO3xeSwB9EHuA6DZmtsDM3jez9ZJKYpbfaGavh9ddW2Nm/c0sP7zmWnZYMyD2ccy2xWb2x/DExM+b2cnh9dnukTQ9vF5bXkz9eWb2XzGPLzSzJ8P7F5nZ/5rZG2a2OjznpsxsUdhfpZn9svXKBGb2opn9hwXXmfx+d80bgJ5BAKJbmNkEBafNO13B2Usmxqxe65yb6Jw7TcEpsq53weV+XpT092HN1WFdU9zQv5D0a+fcOEmPSfq5c+4tSYsUXEfudOdcY0z9C5JGm1kkfHydpIfNbJCkhZIucMHJ0ysk3RrW3Bf2VyYpT9IlMePlOOfOdM6l3QmoAXQOAYjucrakJ51z+1xwBYPY88eWmdkrZrZZ0rckjQ2XP6QgoBR+XZ5g3LMkrQzvP6LgdGFJueBUR49I+sfwPI9nKbiI6VcVXED2T+Hlb74t6cvhZueG7yduVnANu7ExQ65q7/kA9B28B4je8CtJlznn3jaz2ZLOkSTn3J/CQ5znKLiSeWWyATppuaT/lrRf0mrnXDQ8rPkH59w/xBaaWa6kBxSc1/GvZvYjBeeqbLX3GPUEoJexB4ju8rKky8wsL7xCx6Ux6/IlfRy+v/etuO1WKNjDS7T3J0mvKjg8qnDbVzpqxDn3N0l/U3DIs3XcDZKmmNkI6dCVREbpcNjtDN8TvDJ+PABfDOwBols4594ws1UKrsxRq+CyWq3uVHBl87rwa37Musck/auCq04kcouk5Wb2g3D765LUxXtMUsQ5917YX1249/kbO3yF74XOuffNbKmCM//XxPUN4AuEq0EgrZjZlZKmO+euPcbj3ifpTefcsmM5LoC+iz1ApA0z+4Wkryv41OixHHeTgvfubjuW4wLo29gDBAB4iQ/BAAC8RAACALxEAAIAvEQAAgC8RAACALz0/1Vt0kiPfK/SAAAAAElFTkSuQmCC\n", 212 | "text/plain": [ 213 | "
" 214 | ] 215 | }, 216 | "metadata": { 217 | "needs_background": "light" 218 | }, 219 | "output_type": "display_data" 220 | } 221 | ], 222 | "source": [ 223 | "start_doy = get_doy(\"%s%s\" % (year, date_range[0]))\n", 224 | "end_doy = get_doy(\"%s%s\" % (year, date_range[1]))\n", 225 | "uniform_doy_list = np.linspace(start_doy, end_doy, numproducts).tolist()\n", 226 | "\n", 227 | "doys = df2keep.copy() # [['datatakesensingstart', 'cloudcoverpercentage', 'size']]\n", 228 | "doys['doy'] = pd.DataFrame(\n", 229 | " doys['datatakesensingstart'].apply(lambda s: get_doy(str(s).split(' ')[0].replace('-', ''))))\n", 230 | "\n", 231 | "idx_list = []\n", 232 | "for doy_ in uniform_doy_list:\n", 233 | " # print(doy_)\n", 234 | " doys['distance'] = distance(doys, doy_, ccfactor)\n", 235 | " idx = doys['distance'].argmin()\n", 236 | " idx_list.append(pd.DataFrame(doys.iloc[idx, :]).T)\n", 237 | " doys = doys.drop(index=idx).reset_index(drop=True)\n", 238 | "prod2keep = pd.concat(idx_list).reset_index(drop=True) # df2keep.iloc[idx_list].reset_index(drop=True)\n", 239 | "prod2keep['doy'] = pd.DataFrame(\n", 240 | " prod2keep['datatakesensingstart'].apply(lambda s: get_doy(str(s).split(' ')[0].replace('-', ''))))\n", 241 | "\n", 242 | "# visualize\n", 243 | "plt.scatter(prod2keep['doy'].values, np.zeros(prod2keep.shape[0]), s=20, c='b')\n", 244 | "plt.scatter(uniform_doy_list, np.zeros(len(uniform_doy_list)), s=20, c='r')\n", 245 | "plt.vlines(prod2keep['doy'].values, 0, 1, color='b', label='selected')\n", 246 | "plt.vlines(uniform_doy_list, 0, -1, color='r', label='uniform')\n", 247 | "plt.hlines(0, 1, 365, color='k', alpha=0.3)\n", 248 | "plt.ylim(-1, 1)\n", 249 | "plt.xlim(0, 365)\n", 250 | "plt.yticks([], [])\n", 251 | "plt.xlabel('day of year')\n", 252 | "plt.legend(bbox_to_anchor=(1.3, 1))\n", 253 | "\n", 254 | "# examine\n", 255 | "print(prod2keep[['doy', 'size', 'cloudcoverpercentage']]) # .columns)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "id": "auburn-nomination", 261 | "metadata": { 262 | "pycharm": { 263 | "name": "#%% md\n" 264 | } 265 | }, 266 | "source": [ 267 | "### Save selected products to disk" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": 18, 273 | "id": "constitutional-newman", 274 | "metadata": { 275 | "pycharm": { 276 | "name": "#%%\n" 277 | } 278 | }, 279 | "outputs": [ 280 | { 281 | "name": "stdout", 282 | "output_type": "stream", 283 | "text": [ 284 | "saving products info to /media/michaeltrs/0a8a5a48-ede5-47d0-8eff-10d11350bf98/Satellite_Data/Sentinel2/ARISE/Tanzania/S2-products/Sentinel-2_Level-1C_tile37MDN_minsize400Mb_10dates_year2022_from0101to0428_mincc0maxcc70.csv\n" 285 | ] 286 | } 287 | ], 288 | "source": [ 289 | "savename = '%s/%s_%s_tile%s_minsize%dMb_%ddates_year%s_from%sto%s_mincc%dmaxcc%d.csv' % \\\n", 290 | " (savedir, platformname, processinglevel, tile, minprodsize, numproducts, year, \n", 291 | " date_range[0], date_range[1], cloudcoverpercentage[0], cloudcoverpercentage[1])\n", 292 | "\n", 293 | "if not os.path.exists(os.path.dirname(savename)):\n", 294 | " print(\"making new directory %s\" % os.path.dirname(savename))\n", 295 | " os.makedirs(os.path.dirname(savename))\n", 296 | "\n", 297 | "print(\"saving products info to %s\" % savename)\n", 298 | "prod2keep.to_csv(savename, index=False)" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": null, 304 | "id": "amino-blade", 305 | "metadata": {}, 306 | "outputs": [], 307 | "source": [] 308 | } 309 | ], 310 | "metadata": { 311 | "kernelspec": { 312 | "display_name": "Python 3", 313 | "language": "python", 314 | "name": "python3" 315 | }, 316 | "language_info": { 317 | "codemirror_mode": { 318 | "name": "ipython", 319 | "version": 3 320 | }, 321 | "file_extension": ".py", 322 | "mimetype": "text/x-python", 323 | "name": "python", 324 | "nbconvert_exporter": "python", 325 | "pygments_lexer": "ipython3", 326 | "version": "3.8.2" 327 | } 328 | }, 329 | "nbformat": 4, 330 | "nbformat_minor": 5 331 | } 332 | -------------------------------------------------------------------------------- /dataset/unlabelled/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaeltrs/DeepSatData/c2774597dc57af46f777ba2212fe026305d2fd1e/dataset/unlabelled/__init__.py -------------------------------------------------------------------------------- /dataset/unlabelled/extract_images.py: -------------------------------------------------------------------------------- 1 | """ 2 | Given a directory of Sentinel tiles extract crops of images 3 | """ 4 | import argparse 5 | import pandas as pd 6 | import rasterio 7 | import numpy as np 8 | import os 9 | from glob import glob 10 | import pickle 11 | if __name__ == "__main__" and __package__ is None: 12 | from sys import path 13 | from os.path import dirname as dir 14 | path.insert(0, dir(dir(path[0]))) 15 | __package__ = "examples" 16 | from utils.geospatial_data_utils import GeoTransform 17 | from utils.multiprocessing_utils import run_pool 18 | from utils.sentinel_products_utils import get_S2prod_info 19 | 20 | 21 | mult = {'B01': 1/6., 'B02': 1., 'B03': 1., 'B04': 1., 'B05': 1./2., 'B06': 1./2., 'B07': 1./2., 'B08': 1., 'B8A': 1./2, 22 | 'B09': 1./6., 'B10': 1./6., 'B11': 1./2., 'B12': 1./2.} 23 | 24 | 25 | def extract_images(imdirs): 26 | 27 | jp2s = ["%s.jp2" % i for i in bands] 28 | # print('jp2s: ', jp2s) 29 | 30 | refband = None 31 | for band in bands: 32 | if mult[band] == 1.0: 33 | refband = band 34 | break 35 | assert refband is not None, "in curerent implementation at least one 10m band should be included" 36 | 37 | saved_files_info = [] 38 | 39 | for ii, imdir in enumerate(imdirs): 40 | 41 | print("processing product %d of %d in current process" % (ii+1, len(imdirs))) 42 | 43 | imname = "%s_%s" % (imdir.split("/")[-2].split("_")[-3], imdir.split("/")[-4].split("_")[-5]) 44 | date = imdir.split("/")[-4].split(".")[0].split("_")[2][:8] 45 | 46 | # read product 47 | data = {} 48 | for jp2 in jp2s: 49 | with rasterio.open("%s/%s_%s" % (imdir, imname, jp2)) as f: 50 | data[jp2[:-4]] = f.read(1) 51 | 52 | if anchor is not None: 53 | Nanchor, Wanchor, CRSanchor = anchor 54 | 55 | geotransform_prod2anchor = GeoTransform(CRSanchor, str(f.crs).split(':')[1], loc2loc=True) 56 | Wp, Np = geotransform_prod2anchor(np.array(f.transform)[2], np.array(f.transform)[5]) 57 | 58 | dN = divmod((Np - Nanchor) / (sample_size * res), 1)[1] * sample_size * res 59 | dW = divmod((Wanchor - Wp) / (sample_size * res), 1)[1] * sample_size * res 60 | 61 | else: 62 | Wp, Np = np.array(f.transform)[2], np.array(f.transform)[5] 63 | dN = dW = 0 64 | 65 | num_rows = (data[refband].shape[0] * 10 - dN) / (sample_size * res) 66 | num_cols = (data[refband].shape[0] * 10 - dW) / (sample_size * res) 67 | 68 | prod_savedir = os.path.join(savedir, imdir.split("/")[-4].split(".")[0]) 69 | if not os.path.exists(prod_savedir): 70 | os.makedirs(prod_savedir) 71 | 72 | for i in range(int(num_rows)): 73 | 74 | for j in range(int(num_cols)): 75 | 76 | Nij = Np - dN - i * res * sample_size # N for extracted label window 77 | Wij = Wp + dW + j * res * sample_size # W for extracted label window 78 | 79 | ip = (Np - Nij) / (res * sample_size) # product row 80 | jp = (Wij - Wp) / (res * sample_size) # product column 81 | 82 | sample = {} 83 | for jp2 in jp2s: 84 | xpmin = int(np.round(mult[jp2[:-4]] * ip * sample_size)) 85 | ypmin = int(np.round(mult[jp2[:-4]] * jp * sample_size)) 86 | sample[jp2[:-4]] = data[jp2[:-4]][xpmin: xpmin + int(mult[jp2[:-4]] * sample_size), 87 | ypmin: ypmin + int(mult[jp2[:-4]] * sample_size)] 88 | 89 | if sample[jp2[:-4]].sum() == 0: 90 | saved_files_info.append( 91 | [None, Nij, Wij, Np, Wp, i, j, ip, jp, sample_size, sample_size, date, imdir, "no image"]) 92 | continue 93 | 94 | sample_save_path = "%s/N%d_W%d_D%s.pickle" % (prod_savedir, int(Nij), int(Wij), date) 95 | with open(sample_save_path, 'wb') as handle: 96 | pickle.dump(sample, handle, protocol=pickle.HIGHEST_PROTOCOL) 97 | 98 | saved_files_info.append( 99 | [sample_save_path, Nij, Wij, Np, Wp, i, j, ip, jp, sample_size, sample_size, date, imdir, "ok"]) 100 | 101 | df = pd.DataFrame(data=saved_files_info, 102 | columns=['sample_path', 'Nij', 'Wij', 'Np', 'Wp', 'il', 'jl', 'ip', 'jp', 103 | 'height', 'width', 'Date', 'S2_prod_imdir', "comment"]) 104 | # print('process finished') 105 | return df 106 | 107 | 108 | def main(): 109 | 110 | # sentinel products 111 | imdirs = glob("%s/*.SAFE/GRANULE/**/IMG_DATA" % products_dir) 112 | prod_df = get_S2prod_info(imdirs) 113 | prod_df['Year'] = prod_df['Time'].apply(lambda s: s[:4]) 114 | years = prod_df['Year'].drop_duplicates().tolist() 115 | 116 | out = [] 117 | for year in years: 118 | 119 | # sentinel products 120 | products = prod_df[prod_df['Year'] == year] 121 | imdirs = products['path'].tolist() 122 | 123 | df_year = run_pool(imdirs, extract_images, num_processes) 124 | # print('process finished 2') 125 | 126 | out.append(pd.concat(df_year)) 127 | # print('process finished 3') 128 | 129 | # print('pool finished') 130 | df = pd.concat(out).reset_index(drop=True) 131 | df.to_csv(os.path.join(savedir, "extracted_windows_data_info.csv"), index=False) 132 | 133 | 134 | if __name__ == "__main__": 135 | 136 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 137 | parser.add_argument('--products_dir', help='directory containing sentinel products') 138 | parser.add_argument('--savedir', help='save directory to extract ground truths in raster mode') 139 | parser.add_argument('--bands', default=None, help='which satellite image bands to use') 140 | parser.add_argument('--res', default=10, help='pixel size in meters') 141 | parser.add_argument('--anchor', default=None, help='anchor point for grid (N, W, crs)') 142 | parser.add_argument('--sample_size', default=24, help='spatial resolution of dataset samples') 143 | parser.add_argument('--num_processes', default=4, help='number of parallel processes') 144 | # --------------------------------------------------------------------------------------------- 145 | 146 | args = parser.parse_args() 147 | 148 | products_dir = args.products_dir 149 | 150 | bands = args.bands 151 | if bands == 'None': 152 | bands = list(mult.keys()) 153 | else: 154 | bands = bands.split(',') 155 | 156 | savedir = args.savedir 157 | if not os.path.exists(savedir): 158 | os.makedirs(savedir) 159 | 160 | res = int(args.res) 161 | 162 | sample_size = int(args.sample_size) 163 | 164 | num_processes = int(args.num_processes) 165 | 166 | anchor = args.anchor 167 | if anchor == 'None': 168 | anchor = None 169 | else: 170 | anchor = [int(i) for i in anchor.split(",")] 171 | 172 | main() 173 | -------------------------------------------------------------------------------- /dataset/unlabelled/make_image_timeseries.py: -------------------------------------------------------------------------------- 1 | """ 2 | For a set of extracted image crops, make a timeseries for all locations 3 | """ 4 | import argparse 5 | import pandas as pd 6 | import numpy as np 7 | import os 8 | import shutil 9 | import pickle 10 | from multiprocessing import Pool 11 | if __name__ == "__main__" and __package__ is None: 12 | from sys import path 13 | from os.path import dirname as dir 14 | path.insert(0, dir(dir(path[0]))) 15 | __package__ = "examples" 16 | from utils.date_utils import get_doy 17 | from utils.multiprocessing_utils import split_num_segments 18 | 19 | 20 | mult = {'B01': 1/6., 'B02': 1., 'B03': 1., 'B04': 1., 'B05': 1./2., 'B06': 1./2., 'B07': 1./2., 'B08': 1., 'B8A': 1./2, 21 | 'B09': 1./6., 'B10': 1./6., 'B11': 1./2., 'B12': 1./2.} 22 | 23 | 24 | def make_image_timeseries(inputs): 25 | rank, yearlocs, yearloc_groups, iminfo, year_savedir = inputs#[0] 26 | 27 | refband = bands[0] 28 | 29 | saved_files_info = [] 30 | for ii, yearloc in enumerate(yearlocs): 31 | # ii, yearloc = 0, yearlocs[0] 32 | if ii % 1e3 == 0: 33 | print("process %d, location %d of %d" % (rank, ii+1, len(yearlocs))) 34 | 35 | idx = yearloc_groups[yearloc] 36 | data = iminfo.iloc[idx, :].sort_values(by='DOY').copy() 37 | data = data.drop_duplicates(subset=['DOY'], keep='first') # some products downloaded twice 38 | 39 | Y = data['Year'].iloc[0] 40 | N = data['Nij'].iloc[0] 41 | W = data['Wij'].iloc[0] 42 | il = data['il'].iloc[0] 43 | jl = data['jl'].iloc[0] 44 | 45 | assert all(data['Year'] == Y) 46 | assert all(data['Nij'] == N) 47 | assert all(data['Wij'] == W) 48 | assert all(data['il'] == il) 49 | assert all(data['jl'] == jl) 50 | 51 | timeseries_sample = {band: [] for band in bands} 52 | timeseries_sample['doy'] = [] 53 | # timeseries_sample = {'B01': [], 'B02': [], 'B03': [], 'B04': [], 'B05': [], 'B06': [], 'B07': [], 54 | # 'B08': [], 'B8A': [], 'B09': [], 'B10': [], 'B11': [], 'B12': [], 'doy': []} 55 | for sample_info in data[['sample_path', 'DOY']].values: 56 | 57 | impath, doy = sample_info 58 | 59 | with open(impath, 'rb') as handle: 60 | sample = pickle.load(handle, encoding='latin1') 61 | 62 | # for key in ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B10', 'B11', 'B12']: 63 | for key in bands: 64 | timeseries_sample[key].append(sample[key]) 65 | timeseries_sample['doy'].append(np.array(doy)) 66 | 67 | # for key in ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B10', 'B11', 'B12', 'doy']: 68 | for key in bands: 69 | timeseries_sample[key] = np.stack(timeseries_sample[key]) 70 | timeseries_sample['doy'] = np.stack(timeseries_sample['doy']) 71 | timeseries_sample['year'] = np.array(Y).astype(np.int32) 72 | 73 | timesteps = timeseries_sample[refband].shape[0] 74 | 75 | savename = os.path.join(year_savedir, "%d_%d_%s.pickle" % (int(N), int(W), Y)) 76 | with open(savename, 'wb') as handle: 77 | pickle.dump(timeseries_sample, handle, protocol=pickle.HIGHEST_PROTOCOL) 78 | 79 | saved_files_info.append([savename, Y, N, W, sample_size, sample_size, timesteps, il, jl, "completed"]) 80 | 81 | saved_files_info = pd.DataFrame(data=saved_files_info, columns=['sample_path', 'Year', 'N', 'W', 'dy', 'dx', 'dt', 82 | 'win_i', 'win_j', 'status']) 83 | return saved_files_info 84 | 85 | 86 | def main(): 87 | 88 | global yearloc_groups 89 | global iminfo 90 | global year_savedir 91 | 92 | iminfo = pd.read_csv(os.path.join(windows_dir, "extracted_windows_data_info.csv")) 93 | iminfo = iminfo[~pd.isnull(iminfo['sample_path'])].reset_index(drop=True) 94 | iminfo['DOY'] = iminfo['Date'].apply(lambda s: get_doy(str(s))) 95 | iminfo['Year'] = iminfo['Date'].apply(lambda s: str(s)[:4]) 96 | years = iminfo['Year'].drop_duplicates().tolist() 97 | print("found windows for years %s" % ", ".join(years)) 98 | 99 | pool = Pool(num_processes) 100 | 101 | saved_files_info = [] 102 | 103 | for year in set(years): 104 | 105 | year_savedir = os.path.join(savedir, year) 106 | if not os.path.isdir(year_savedir): 107 | os.makedirs(year_savedir) 108 | 109 | yearloc_groups = iminfo[iminfo['Year'] == year].copy().groupby(['Nij', 'Wij'], as_index=False).groups 110 | yearlocs = list(yearloc_groups.keys()) 111 | 112 | inputs = [[i, yearlocs_, yearloc_groups, iminfo, year_savedir] 113 | for i, yearlocs_ in enumerate(split_num_segments(yearlocs, num_processes))] 114 | 115 | df = pool.map(make_image_timeseries, inputs) 116 | df = pd.concat(df) 117 | 118 | saved_files_info.append(df) 119 | 120 | df = pd.concat(saved_files_info).reset_index(drop=True) 121 | df.to_csv(os.path.join(savedir, "saved_timeseries_data_info.csv"), index=False) 122 | 123 | paths = df['sample_path'].apply(lambda s: s[len(savedir)+1:]) 124 | paths.to_csv(os.path.join(savedir, "data_paths.csv"), header=None, index=False) 125 | 126 | # delete windows dir 127 | shutil.rmtree(windows_dir) 128 | 129 | 130 | 131 | if __name__ == "__main__": 132 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 133 | parser.add_argument('--windows_dir', help='directory containing sentinel products') 134 | parser.add_argument('--savedir', help='save directory to extract ground truths in raster mode') 135 | parser.add_argument('--bands', default=None, help='which satellite image bands to use') 136 | parser.add_argument('--res', default=10, help='pixel size in meters') 137 | parser.add_argument('--sample_size', default=24, help='spatial resolution of dataset samples') 138 | parser.add_argument('--num_processes', default=4, help='number of parallel processes') 139 | # --------------------------------------------------------------------------------------------- 140 | 141 | args = parser.parse_args() 142 | 143 | windows_dir = args.windows_dir 144 | 145 | savedir = args.savedir 146 | if not os.path.exists(savedir): 147 | os.makedirs(savedir) 148 | 149 | res = int(args.res) 150 | 151 | sample_size = int(args.sample_size) 152 | 153 | num_processes = int(args.num_processes) 154 | 155 | bands = args.bands 156 | if bands == 'None': 157 | bands = list(mult.keys()) 158 | else: 159 | bands = bands.split(',') 160 | 161 | main() 162 | -------------------------------------------------------------------------------- /dataset/unlabelled/make_unlabelled_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | anchor='None' 4 | bands='None' 5 | for ARGUMENT in "$@" 6 | do 7 | 8 | KEY=$(echo $ARGUMENT | cut -f1 -d=) 9 | VALUE=$(echo $ARGUMENT | cut -f2 -d=) 10 | 11 | case "$KEY" in 12 | 13 | products_dir) products_dir=${VALUE} ;; 14 | windows_dir) windows_dir=${VALUE} ;; 15 | timeseries_dir) timeseries_dir=${VALUE} ;; 16 | res) res=${VALUE} ;; 17 | sample_size) sample_size=${VALUE} ;; 18 | num_processes) num_processes=${VALUE} ;; 19 | anchor) anchor=${VALUE} ;; 20 | bands) bands=${VALUE} ;; 21 | *) 22 | esac 23 | 24 | done 25 | 26 | 27 | python dataset/unlabelled/extract_images.py --products_dir $products_dir \ 28 | --bands $bands \ 29 | --savedir $windows_dir \ 30 | --anchor $anchor \ 31 | --res $res \ 32 | --sample_size $sample_size \ 33 | --num_processes $num_processes 34 | 35 | python dataset/unlabelled/make_image_timeseries.py --windows_dir $windows_dir \ 36 | --savedir $timeseries_dir \ 37 | --bands $bands \ 38 | --res $res \ 39 | --sample_size $sample_size \ 40 | --num_processes $num_processes 41 | -------------------------------------------------------------------------------- /diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaeltrs/DeepSatData/c2774597dc57af46f777ba2212fe026305d2fd1e/diagram.png -------------------------------------------------------------------------------- /download/README.md: -------------------------------------------------------------------------------- 1 | # Data download 2 | 3 | ## General Description 4 | We propose to split the task of downloading all relevant products to an AOI during a specific time period to the 5 | following subtasks: 6 | 1) for an AOI find all overlaping Sentinel tiles using **`find_tiles_for_aoi.ipynb`** 7 | 2) make a list of all products to download for each tile and period of interest and save a file with all selected 8 | products to disk using **`find_products_for_tile.ipynb`**. During this step some compromises might be needed to reduce 9 | the total download time. 10 | 3) download all selected products with **`aoi_download.sh`** 11 | 12 | Downloading data can take a significant amount of time. 13 | We propose to perform steps 1,2 manually using the provided .ipynb files to ensure an optimal selection of products and 14 | automate the final part of downloading a list of pre-selected products. 15 | 16 | ## Authentication 17 | All scripts make use of the [sentinelsat]{https://github.com/sentinelsat/sentinelsat} library for querying and 18 | downloading Sentinel products from the ESA [Copernicus Open Access Hub]{https://scihub.copernicus.eu/} (COAH). 19 | You will need to [sign up]{https://scihub.copernicus.eu/dhus/#/self-registration} to COAH and save the user name and 20 | password in a two row file **pw.csv** with the following form: 21 | ``` 22 | username 23 | password 24 | ``` 25 | 26 | ### Find Sentinel tiles for AOI 27 | Notebook **`find_tiles_for_aoi.ipynb`** 28 | - AOI is defined as a rectangle. Define the coordinates of the North-West (NW) and South-East (SE) corners of the AOI 29 | as well as coordinate system used 30 | - output is all tiles that overlap with defined rectangle and part of the area of teh rectangle covered by each tile. 31 | Note that for very large AOIs all tiles will cover a small portion of the defined rectangle 32 | 33 | ### Find a list of products 34 | Notebook **find_products_for_tile.ipynb** 35 | - specify the following parameters: 36 | - savedir: where to save products list 37 | - year: 'yyyy' 38 | - date_range: minimum and maximum dates for the same year (mindate: 'mmdd', maxdate: 'mmdd') 39 | - cloudcoverpercentage: minimum and maximum percentage of cloud cover (min %, max %) 40 | - minprodsize: minimum size of product in Mb 41 | - numproducts: number of products to select 42 | - tile: Sentinel tile name e.g. '35PNK' from previous step 43 | - platformname: Sentinel mission name i.e. 'Sentinel-2' 44 | - processinglevel: processing level of products i.e. 'Level-1C' 45 | - the script queries COAH for available products given the parameters set and selects products such that they are 46 | equally spaced in the defined time period. All selected products are saved in a .csv file. 47 | 48 | ### Download data from file 49 | Pass one or more generated .csv files containing selected products to **download.sh** separated by commas: 50 | ``` 51 | sh download/download.sh file1.csv,file2.csv,... 52 | ``` 53 | All products will be downloaded in the parent directory of the first .csv file. 54 | -------------------------------------------------------------------------------- /download/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaeltrs/DeepSatData/c2774597dc57af46f777ba2212fe026305d2fd1e/download/__init__.py -------------------------------------------------------------------------------- /download/download.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | x=1 3 | while [ $x -le 1000 ] 4 | do 5 | echo "Attempt $x" 6 | python download/sentinelsat_download_tileid.py --products_file $1 7 | x=$(( $x + 1 )) 8 | sleep 1800 9 | done 10 | -------------------------------------------------------------------------------- /download/find_S2_products_for_tile.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 51, 6 | "id": "helpful-notion", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "import pandas as pd\n", 12 | "from sentinelsat import SentinelAPI, read_geojson, geojson_to_wkt\n", 13 | "import os\n", 14 | "from collections import OrderedDict\n", 15 | "import matplotlib.pyplot as plt\n", 16 | "%matplotlib inline\n", 17 | "if __name__ == \"__main__\" and __package__ is None:\n", 18 | " from sys import path\n", 19 | " from os.path import dirname as dir\n", 20 | "\n", 21 | " path.append(dir(path[0]))\n", 22 | " __package__ = \"examples\"\n", 23 | "from utils.date_utils import get_doy" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "id": "blank-eugene", 29 | "metadata": {}, 30 | "source": [ 31 | "### User input" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 52, 37 | "id": "organic-contribution", 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "savedir = ''\n", 42 | "year = '2021'\n", 43 | "date_range = ('0101', '0530') # (mindate: 'mmdd', maxdate: 'mmdd')\n", 44 | "cloudcoverpercentage = (0, 70) # (min %, max %)\n", 45 | "minprodsize = 400 # Mb\n", 46 | "numproducts = 40\n", 47 | "tile = '32UPU'\n", 48 | "platformname = 'Sentinel-2'\n", 49 | "processinglevel = 'Level-1C'" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "id": "creative-destiny", 55 | "metadata": {}, 56 | "source": [ 57 | "### Read user credentials" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 53, 63 | "id": "following-stanford", 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "cred = pd.read_csv(\"pw.csv\", header=None)" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "id": "altered-antique", 73 | "metadata": {}, 74 | "source": [ 75 | "### Query for Sentinel products" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 54, 81 | "id": "becoming-fifty", 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "querying...\n", 89 | "found 22 products\n" 90 | ] 91 | } 92 | ], 93 | "source": [ 94 | "api = SentinelAPI(cred[0][0], cred[0][1], 'https://scihub.copernicus.eu/dhus')\n", 95 | "print(\"querying...\")\n", 96 | "products = api.query(tileid=tile,\n", 97 | " platformname=platformname,\n", 98 | " cloudcoverpercentage=cloudcoverpercentage,\n", 99 | " date=(\"%s%s\" % (year, date_range[0]), \"%s%s\" % (year, date_range[1])),\n", 100 | " processinglevel=processinglevel)\n", 101 | "df = api.to_dataframe(products)\n", 102 | "print(\"found %d products\" % len(products))" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "id": "pressing-commerce", 108 | "metadata": {}, 109 | "source": [ 110 | "### Remove very small size products" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 55, 116 | "id": "nasty-adjustment", 117 | "metadata": { 118 | "pycharm": { 119 | "name": "#%%\n" 120 | } 121 | }, 122 | "outputs": [ 123 | { 124 | "name": "stdout", 125 | "output_type": "stream", 126 | "text": [ 127 | "keeping 22 products with larger than 400Mb\n", 128 | "number of products found (22) is less than initially selected (40)\n", 129 | "changing number of selected products to 22\n", 130 | "you may want to change selection criteria in 'User input' cell to find more products\n" 131 | ] 132 | } 133 | ], 134 | "source": [ 135 | "sizes = np.array([float(s.split(\" \")[0]) for s in df['size'].values])\n", 136 | "products2keep = OrderedDict()\n", 137 | "for i, prodkey in enumerate(list(products.keys())):\n", 138 | " if sizes[i] >= minprodsize:\n", 139 | " # print(sizes[i])\n", 140 | " products2keep[prodkey] = products[prodkey]\n", 141 | "df2keep = api.to_dataframe(products2keep).reset_index()\n", 142 | "print(\"keeping %d products with larger than %dMb\" % (len(products2keep), minprodsize))\n", 143 | "\n", 144 | "if len(products2keep) < numproducts:\n", 145 | " print(\"number of products found (%d) is less than initially selected (%d)\" % (len(products2keep), numproducts))\n", 146 | " print(\"changing number of selected products to %d\" % (len(products2keep)))\n", 147 | " print(\"you may want to change selection criteria in 'User input' cell to find more products\")\n", 148 | " numproducts = len(products2keep)" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "id": "atomic-joseph", 154 | "metadata": { 155 | "pycharm": { 156 | "name": "#%% md\n" 157 | } 158 | }, 159 | "source": [ 160 | "### Spread products evenly in time and visualize" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 56, 166 | "id": "turkish-fetish", 167 | "metadata": { 168 | "pycharm": { 169 | "name": "#%%\n" 170 | } 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "ccfactor = 0.0 # cloud cover factor when selecting products\n", 175 | "def distance(doys, target_doy, ccfactor=0):\n", 176 | " \"\"\"\n", 177 | " distance function for selecting products depending on \n", 178 | " proximity to desired date and cloud cover\n", 179 | " \"\"\"\n", 180 | " dist = np.abs(doys['doy'] - target_doy) + ccfactor * doys['cloudcoverpercentage']\n", 181 | " return dist" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 57, 187 | "id": "delayed-benefit", 188 | "metadata": { 189 | "scrolled": true 190 | }, 191 | "outputs": [ 192 | { 193 | "name": "stdout", 194 | "output_type": "stream", 195 | "text": [ 196 | " doy size cloudcoverpercentage\n", 197 | "0 8 509.94 MB 64.8212\n", 198 | "1 13 632.77 MB 25.6059\n", 199 | "2 21 843.46 MB 20.0165\n", 200 | "3 36 790.31 MB 69.4045\n", 201 | "4 43 699.40 MB 10.1653\n", 202 | "5 48 605.60 MB 34.0831\n", 203 | "6 53 620.62 MB 24.2112\n", 204 | "7 56 789.05 MB 0\n", 205 | "8 58 569.29 MB 53.8954\n", 206 | "9 66 804.37 MB 15.0384\n", 207 | "10 71 804.90 MB 39.1288\n", 208 | "11 83 625.95 MB 0\n", 209 | "12 88 607.89 MB 6.254\n", 210 | "13 91 802.14 MB 23.6216\n", 211 | "14 111 808.88 MB 22.0033\n", 212 | "15 113 611.37 MB 0\n", 213 | "16 116 799.76 MB 25.1599\n", 214 | "17 118 596.73 MB 30.6784\n", 215 | "18 136 800.57 MB 68.9963\n", 216 | "19 143 567.81 MB 57.6438\n", 217 | "20 63 510.67 MB 46.9177\n", 218 | "21 61 804.79 MB 0.0178\n" 219 | ] 220 | }, 221 | { 222 | "data": { 223 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcAAAAEGCAYAAADylEXaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAAY70lEQVR4nO3df3RV5Z3v8c9zcvJTQwoSgQRs7ACBQFBBWDUVarV6bccO7UIcrzNiqO3torXYUcZbF5RV2+m9a7TednWodRVHW20ZGcTeem2nWq2/GK/QYFFCJEpvYSpjTFCEADHJyfneP/Y+5CQ55+QHJDnxeb/WOivnPHs/+/meJ4FP9t4nezszEwAAvomMdgEAAIwGAhAA4CUCEADgJQIQAOAlAhAA4KXoYFaeOHGiVVRU9GlvbAy+VlZmbhvIskzS9RvI+MmvE88TEm1tbVJhYc91Bvqe+qs11fiDff/pxklVd8JQxxiO7x/gq507dx4ys9LRrgM9DSoAKyoqVFdX16f9kkuCr88+m7ltIMsySddvIOMnv048T0i07dolnX9+z3UG+p76qzXV+IN9/+nGSVV3wlDHGI7vH+Ar59yB0a4BfXEIFADgJQIQAOAlAhAA4KVBnQMEAIyOnTt3nh2NRu+TNFfsvAxEXFJ9LBb7woIFC5pTrUAAAsAYEI1G75s8efLs0tLSw5FIhIs49yMej7uWlpaqpqam+yT9Vap1+C0CAMaGuaWlpUcJv4GJRCJWWlp6RMEec+p1RrAeAMDQRQi/wQnnK23OEYAAAC8RgACA02rZsmUVDzzwwPjB9mtsbMy79957J4zUeAQgACArvPHGG/mbN28edAAOFQEIAOjX0aNHI5dccsn0ysrKqhkzZszZuHHj+BdeeKFo4cKFlXPmzJl98cUXzzhw4EBu737p1qmvr8+vqamZWVlZWVVVVTV7z549+WvXri2vq6s7c9asWVV33HHH2bFYTF/60pemzp07d/bMmTOr7rrrromSFI/HtWLFinMqKirm1tTUzDx06NCQ/qKBP4MAgDFo0SKd1svR79ihxkzLH3300XGTJ0/ufPbZZ/dJ0jvvvJPzyU9+csavfvWrfWVlZbGNGzeOX7NmTfmWLVv2J/q0t7e71atXn5Nqneuuu+7cNWvWNK1YseK9EydOuK6uLved73zn4N133z3pmWee2SdJ3/3udyeWlJR01dfXv9bW1uYWLlw46zOf+czR7du3F+3bty9/37599W+++WZudXX1nNra2ncG+54JQABAv+bPn9+2du3aaatWrSpfunTpkbPOOiv2xhtvFF566aUzpWCvrLS0tDO5z6uvvpqfap3Dhw9H3n777bwVK1a8J0lFRUUmqc8nXJ966qlxe/fuLXrsscfGS1Jra2tOQ0NDwXPPPVd8zTXXvBuNRlVRUdF50UUXtQ7lPRGAADAG9bfHdrrNmzev/eWXX27YunVryTe+8Y3yJUuWHJ0+fXrbrl279qbrY2Yu1TqHDx8e0Ok3M3N33333fyxbtuxocvvjjz9eMrR30RPnAAEA/dq/f39ucXFx/Mtf/vK7t9xyS1NdXd0Z7777bvSpp546QwoOd9bV1RUk95k3b977qdYZP358fPLkyR0PPfTQhySpra3Ntba2RkpKSrqOHTuWk+h/+eWXH/nRj35U2t7e7qRgj/Lo0aORj3/8462PPPLIhFgspgMHDuS+9NJLxUN5T+wBAgD6tXPnzsLbb799aiQSUTQatXvuuedANBq11atXn9Pa2prT1dXlVq1a9faFF174fqJPQUGBPfzww39Mtc7PfvazP33xi1/88Le//e2y3Nxc27Jlyx8XLVrUlpOTY5WVlVXXXXfdoXXr1jXv378/v7q6eraZuQkTJnT++te//uP111//3tNPPz1u+vTpc8vKytovuOCCY0N5TwQgAKBfy5YtO7ps2bKG3u11dXV9DsVu3bp1f+J5TU1NW6p1qqur21966aXXe7f3btuwYcNBSQd7r/fggw/+x8CrT41DoAAALxGAAAAvEYAAAC8RgAAALxGAAAAvEYAAAC8RgACAYfH8888X1dbWTpOCP3avqamZOWvWrKqNGzcO+tZFw4G/AwQADIslS5acWLJkyQlJevHFF4skae/evX3+ljCdWCymaHT4Yoo9QADAgDQ2NubNmDFjTuL1+vXrJ91yyy1lixYtqly1alV5dXX17IqKirm/+c1vzpSkxx9/vPgTn/jE9IMHD0ZXrlx57u7du4tmzZpVtWfPnvxf/vKXxbNnz66aOXNm1fLlyyva2tqcJJWXl1evWrWqvKqqavb9998/vry8vPorX/lK+axZs6rmzp07e9u2bUUXX3zxjGnTps298847S0/l/bAHCABj0aJFp/V2SNqx45Qurh2Lxdzu3btf27x5c8m3vvWtsiuvvPLkFV3Ky8tj99xzz4HErY5OnDjhLrvsssonn3yycd68ee2f+9znKu66667S9evXN0vSWWedFWtoaHhNku64446p55xzTsfevXsbbrzxxmmf//znK7Zv3763ra0tUl1dPee2225rGWrN7AECAE7Z8uXLD0tSTU3N8TfffDMv07qvvPJKwdSpU9vnzZvXLkm1tbXvbNu27eQFrVesWHE4ef1rrrnmPUmqrq4+MX/+/OPjx4+Pl5WVxfLy8uKHDh3K0RCxBwgAY9Ep7rENRTQatXg8fvL1+++/f3InqqCgwMJ11NXV5U5lnOLi4njy68S2I5GI8vLyTt43MBKJqLOzc8hjsQcIABiQqVOnxt59991oU1NTTltbm3viiSeGdF++88477/2DBw/m1dfX50vSgw8+eNbixYuHdFPbU8EeIABgQPLz8+3WW299a+HChbMnTZrUOX369Pf779VXUVGR3XvvvfuXL1/+F11dXTrvvPNOrFmzZsjn8oaKAAQADNi6deua161b15xu+ZQpU2IHDx7cLUlXXXVV61VXXdXa+7kkLV26tHXp0qV9/iQi0TfV69WrV78j6Z106w4Wh0ABAF4iAAEAXiIAAWBsiMfj8VP6dKVvwvmKp1tOAALA2FDf0tJSQggOTDwedy0tLSWS6tOtw4dgAGAMiMViX2hqarqvqalprth5GYi4pPpYLPaFdCsQgAAwBixYsKBZ0l+Ndh0fJPwWAQDwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8BIBCADwEgEIAPASAQgA8NLgAjAWk37/e6mlpUdz54kOnWg6qkOvdbd3dkqtrQraevXp7JSOHe7Q4Sf7bktS0DbAcdJtr/f4nSc61NoaLE4sazvSvb3OzuDtHT4c9IkdPqpjhzt6lJCx7l41J8ZoaQmWJW8v09z0NwepxklV94kTPccf7Djp5jrjskzjAEC2MbMBPxY4Z1ZSYlZYaLZpk5mZbbtpky3Wc/YxvWDHVWjbbtpkmzaZRSJmkyNNdlyF1l7Y3WfTJrNJrskW6zl7TyXWmde9LTMLnhcWDmicxOq9t7ftpk09xj+RV2KL9ZxNjjRZbq6Zc919Ets7W00mmZUq6PMxvWCL9ZxdnxvUnLHuXjUnxs/JMVuRu8nao93buy6yyZxLPTf9zUGqcZyzlHWXqskikWD8zrwU2xrCXGdclq5mACapzgbxfy2PkXlEB5mWevHIkeB5ba2OHDHlb1ipVt0tSdqlNrVvqNXXfjxOLn6mJmmPdqlNamsLutfWar2ZJtt7alVce3RE6gja3bhxJ7erjo6TfTKN8/CccfrazdIUO9Bje+0bahXV2ZqkxmD8jja16lVNiterLf5hHdOZmqw9alX85PamaKtaVK0y7dYutem4dgXDd96qa28YJ+f6jmO1tXJm0sqVPWpu31ArpzJJ0g1dtapTx8nt3Ri/VY/obE2yxh5zo9paKcMcKM04EZWpSzkq06s96i5TvY7EK3VDvFY71CF1DGycdHP964uCPgUbavss+7c5ppKbV/atedw4afz4Af94AcBIGlwA9ugZ1bEdDcpRrn6or55sjimqKWrSBEn/qNt6dOmKRDUn3qBV+r6KdbxHe7SpKXiRmxv8RzqAcf68o0nlEel/6rYe24spqkv1jL6m759s+6G+qmMq0s36gRTWltznTv29vqe/O9knMdYxFanMNaXs0xWJKtrQ0KfmmKKaoX2SpE7lqkAdPbbXu7bE+1SGOVA/4yTmOnmc7+nvTo4/0HHSzfXhhqBPaYplx3Y0qCRVzU1NBCCA7DWY3cUFwX5I8CgstEPbGuy4CrvbJDuuQptW0GwT1dxnWbyw0M7L79snXlho1twcPAp7Lss0TuO2ZptW0Hec4yq0SqXuM1Gpa8vUZ2p+6nHihYVmDQ19ah7KONbPHIzUOOnmuqWh2VoaUo9zaFvf2k6OA4BDoFn6GNTKC6TgxFav80UxRaxTOT3OARYWmtUWBMu6Ijk9zgFen9vdJ+U5wMQJtH7GSazee3vbbtrUY/yYC/rUFmyy3FyzvDyzv4323N51bpNJZn+tnu3J5wDT1t2r5sT448YFfbqSx4lssry81HPT3xykGic311LW/dcKarg+d5PFXYptDWGuMy5LVzMAAjBLH4MLwDPOMJs/v89v9h0La+xY5Xxraehub24227EjWNa7T3Oz2dF5NdYxr++2zMyspm+fdOOk217v8Vsamm3Hju6dnx07zI6f37295mazJ54IHh0Lg229/ERzjxIy1t2r5sQYzc3BsuTtZZqb/uYg1Tip6m5o6Dn+YMdJN9cZl2UaB/AYAZidj8GdA3ROKi6WSkt7NOcW5Sq3KFdnzO5uLy0NVyvKlZTbo09pqaTxfdu7N5gbPAYwTrrt9R5/4uxSTezdpyRXKune3hVXhAv/R65UlKsLruh/nHQ1nxw/XJY7vuf20s1Nf3OQapz+6k67rQzL0s11xmWZxgGALMMfwgMAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC8RAACALxEAAIAvEQAAgC85Mxs4Cs71yqpcfjKGTYTJR0a7SIGaSzWLFH3SKPukTXUuj9sZqWnuxicmugg1280swuHpZJh5JyrG2t1j8WaJeoeadQ9ssZq3UiNQ6AAAC8RgAAALw02AH88LFUMv7FY91isWaLukUbdI2us1o0UBvUhGAAAPig4BAoA8BIBCADw0oAC0Dl3pXOu0Tm3zzn39eEu6lQ45/Y753Y753Y55+rCtgnOud86594Iv47Pgjrvd841O+fqk9pS1ukCPwjn/1Xn3Pwsq/ubzrmD4Zzvcs59OmnZ7WHdjc65/zJKNU9zzj3jnGtwzu1xzt0ctmf1fGeoO9vnu8A5t8M590pY9x1h+7nOue1hfZudc3lhe374el+4vCLL6v6Jc+5PSfN9ftieFT8nOAVmlvEhKUfSHyV9RFKepFckVfXXb7QekvZLmtir7U5JXw+ff13SP2ZBnUskzZdU31+dkj4t6d8kOUkflbQ9y+r+pqQ1KdatCn9e8iWdG/4c5YxCzVMkzQ+fF0t6Pawtq+c7Q93ZPt9O0pnh81xJ28N5/FdJ14bt90paFT7/sqR7w+fXSto8SvOdru6fSLo6xfpZ8XPCY+iPgewBLpK0z8z+n5l1SHpY0tIB9MsmSyX9NHz+U0mfHb1SAmb2vKR3ezWnq3OppAct8JKkDznnpoxIob2kqTudpZIeNrN2M/uTpH0Kfp5GlJm9ZWYvh89bJb0mqVxZPt8Z6k4nW+bbzOxY+DI3fJikSyU9Erb3nu/E9+ERSZc559zIVNstQ93pZMXPCYZuIAFYLunPSa/fVOZ/hKPNJD3pnNvpnPtvYdskM3srfN4kadLolNavdHWOhe/BTeFhoPuTDjFnXd3h4bULFPx2P2bmu1fdUpbPt3Muxzm3S1KzpN8q2Bt9z8xiKWo7WXe4/Iiks0a04FDvus0sMd/fCef7e865/LAta+YbQ/NB/BDMxWY2X9KnJH3FObckeaGZmTL/VpcVxkqdoR9J+gtJ50t6S9Ldo1pNGs65MyVtlfQ1MzuavCyb5ztF3Vk/32bWZWbnS5qqYC901uhWNDC963bOzZV0u4L6F0qaIOm/j16FOJ0GEoAHJU1Lej01bMtKZnYw/Nos6RcK/vG9nTg0EX5tHr0KM0pXZ1Z/D8zs7fA/jrikjeo+7JY1dTvnchWEyM/N7NGwOevnO1XdY2G+E8zsPUnPSLpIwSHCxPWHk2s7WXe4vETSOyNbaU9JdV8ZHoo2M2uX9ICyeL4xOAMJwN9LmhF+gitPwUnqx4a3rKFxzp3hnCtOPJd0haR6BfXeEK52g6Rfjk6F/UpX52OSVoSfOvuopCNJh+5GXa/zHp9TMOdSUPe14af8zpU0Q9KOUajPSfpnSa+Z2f9KWpTV852u7jEw36XOuQ+FzwslXa7g/OUzkq4OV+s934nvw9WSfhfukY+oNHXvTfolySk4b5k836P+c4JTMJBPyij4tNPrCo7jrx3tT+5kqPMjCj4F94qkPYlaFZxPeFrSG5KekjQhC2r9FwWHrzoVnDu4MV2dCj5l9sNw/ndLujDL6n4orOtVBf8pTElaf21Yd6OkT41SzRcrOLz5qqRd4ePT2T7fGerO9vmeJ+kPYX31ktaH7R9REMj7JG2RlB+2F4Sv94XLP5Jldf8unO96ST9T9ydFs+LnhMfQH1wKDQDgpQ/ih2AAAOgXAQgA8BIBCADwEgEIAPASAQgA8BIBiBER3sFgzTBuvzS8k8AfnHOLh2scAB8c0f5XAcaEyyTtNrMvjMRgzrmodV/XEsAYxB4gho1zbq1z7nXn3DZJlUntX3TO/T6879pW51yRc644vOdabrjOuOTXSX0rnHO/Cy9M/LRz7pzw/mx3Sloa3q+tMGn9S51z/zvp9eXOuV+Ez69wzv1f59zLzrkt4TU35ZxbH9ZX75z7ceLOBM65Z51z33fBfSZvHq55AzAyCEAMC+fcAgWXzTtfwdVLFiYtftTMFprZeQoukXWjBbf7eVbSX4brXBuu19lr0/8k6admNk/SzyX9wMx2SVqv4D5y55tZW9L6z0ia5ZwrDV+vlHS/c26ipHWSPmnBxdPrJN0SrrMhrG+upEJJVyVtL8/MLjSzrLsANYDBIQAxXBZL+oWZnbDgDgbJ14+d65x7wTm3W9LfSJoTtt+nIKAUfn0gxXYvkrQpfP6QgsuFpWXBpY4ekvS34XUeL1JwE9OPKriB7L+Ht7+5QdKHw26fCM8n7lZwD7s5SZvcnGk8AGMH5wAxGn4i6bNm9opzrlbSJZJkZv8eHuK8RMGdzOvTbWCQHpD0fyS9L2mLmcXCw5q/NbP/mryic65A0j0Kruv4Z+fcNxVcqzLh+GmqCcAoYw8Qw+V5SZ91zhWGd+j4TNKyYklvhef3/qZXvwcV7OGl2vuTpBcVHB5V2PeF/goxs/+U9J8KDnkmtvuSpI8556ZLJ+8kMlPdYXcoPCd4de/tAfhgYA8Qw8LMXnbObVZwZ45mBbfVSviGgjubt4Rfi5OW/VzSPyi460QqX5X0gHPu78P+K9Os19vPJZWa2WthfS3h3ue/uO47fK8zs9edcxsVXPm/qVfdAD5AuBsEsopz7mpJS83s+tO83Q2S/mBm/3w6twtg7GIPEFnDOfdPkj6l4FOjp3O7OxWcu7v1dG4XwNjGHiAAwEt8CAYA4CUCEADgJQIQAOAlAhAA4CUCEADgpf8PqtOQteONilYAAAAASUVORK5CYII=\n", 224 | "text/plain": [ 225 | "
" 226 | ] 227 | }, 228 | "metadata": { 229 | "needs_background": "light" 230 | }, 231 | "output_type": "display_data" 232 | } 233 | ], 234 | "source": [ 235 | "start_doy = get_doy(\"%s%s\" % (year, date_range[0]))\n", 236 | "end_doy = get_doy(\"%s%s\" % (year, date_range[1]))\n", 237 | "uniform_doy_list = np.linspace(start_doy, end_doy, numproducts).tolist()\n", 238 | "\n", 239 | "doys = df2keep.copy() # [['datatakesensingstart', 'cloudcoverpercentage', 'size']]\n", 240 | "doys['doy'] = pd.DataFrame(\n", 241 | " doys['datatakesensingstart'].apply(lambda s: get_doy(str(s).split(' ')[0].replace('-', ''))))\n", 242 | "\n", 243 | "idx_list = []\n", 244 | "for doy_ in uniform_doy_list:\n", 245 | " # print(doy_)\n", 246 | " doys['distance'] = distance(doys, doy_, ccfactor)\n", 247 | " idx = doys['distance'].argmin()\n", 248 | " idx_list.append(pd.DataFrame(doys.iloc[idx, :]).T)\n", 249 | " doys = doys.drop(index=idx).reset_index(drop=True)\n", 250 | "prod2keep = pd.concat(idx_list).reset_index(drop=True) # df2keep.iloc[idx_list].reset_index(drop=True)\n", 251 | "prod2keep['doy'] = pd.DataFrame(\n", 252 | " prod2keep['datatakesensingstart'].apply(lambda s: get_doy(str(s).split(' ')[0].replace('-', ''))))\n", 253 | "\n", 254 | "# visualize\n", 255 | "plt.scatter(prod2keep['doy'].values, np.zeros(prod2keep.shape[0]), s=20, c='b')\n", 256 | "plt.scatter(uniform_doy_list, np.zeros(len(uniform_doy_list)), s=20, c='r')\n", 257 | "plt.vlines(prod2keep['doy'].values, 0, 1, color='b', label='selected')\n", 258 | "plt.vlines(uniform_doy_list, 0, -1, color='r', label='uniform')\n", 259 | "plt.hlines(0, 1, 365, color='k', alpha=0.3)\n", 260 | "plt.ylim(-1, 1)\n", 261 | "plt.xlim(0, 365)\n", 262 | "plt.yticks([], [])\n", 263 | "plt.xlabel('day of year')\n", 264 | "plt.legend(bbox_to_anchor=(1.3, 1))\n", 265 | "\n", 266 | "# examine\n", 267 | "print(prod2keep[['doy', 'size', 'cloudcoverpercentage']]) # .columns)" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "id": "auburn-nomination", 273 | "metadata": { 274 | "pycharm": { 275 | "name": "#%% md\n" 276 | } 277 | }, 278 | "source": [ 279 | "### Save selected products to disk" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 58, 285 | "id": "constitutional-newman", 286 | "metadata": { 287 | "pycharm": { 288 | "name": "#%%\n" 289 | } 290 | }, 291 | "outputs": [ 292 | { 293 | "name": "stdout", 294 | "output_type": "stream", 295 | "text": [ 296 | "making new directory /media/michaeltrs/0a8a5a48-ede5-47d0-8eff-10d11350bf98/Satellite_Data/Sentinel2/Germany/S2_products/T32UPV/2021\n", 297 | "saving products info to /media/michaeltrs/0a8a5a48-ede5-47d0-8eff-10d11350bf98/Satellite_Data/Sentinel2/Germany/S2_products/T32UPV/2021/Sentinel-2_Level-1C_tile32UPU_minsize400Mb_22dates_year2021_from0101to0530_mincc0maxcc70.csv\n" 298 | ] 299 | } 300 | ], 301 | "source": [ 302 | "savename = '%s/%s_%s_tile%s_minsize%dMb_%ddates_year%s_from%sto%s_mincc%dmaxcc%d.csv' % \\\n", 303 | " (savedir, platformname, processinglevel, tile, minprodsize, numproducts, year, \n", 304 | " date_range[0], date_range[1], cloudcoverpercentage[0], cloudcoverpercentage[1])\n", 305 | "\n", 306 | "if not os.path.exists(os.path.dirname(savename)):\n", 307 | " print(\"making new directory %s\" % os.path.dirname(savename))\n", 308 | " os.makedirs(os.path.dirname(savename))\n", 309 | "\n", 310 | "print(\"saving products info to %s\" % savename)\n", 311 | "prod2keep.to_csv(savename, index=False)" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "id": "amino-blade", 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [] 321 | } 322 | ], 323 | "metadata": { 324 | "kernelspec": { 325 | "display_name": "Python 3", 326 | "language": "python", 327 | "name": "python3" 328 | }, 329 | "language_info": { 330 | "codemirror_mode": { 331 | "name": "ipython", 332 | "version": 3 333 | }, 334 | "file_extension": ".py", 335 | "mimetype": "text/x-python", 336 | "name": "python", 337 | "nbconvert_exporter": "python", 338 | "pygments_lexer": "ipython3", 339 | "version": "3.8.2" 340 | } 341 | }, 342 | "nbformat": 4, 343 | "nbformat_minor": 5 344 | } -------------------------------------------------------------------------------- /download/find_S2_tiles_for_aoi.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "declared-banana", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "from sentinelsat import SentinelAPI, geojson_to_wkt\n", 12 | "import shapely.wkt\n", 13 | "from shapely.geometry import Polygon\n", 14 | "if __name__ == \"__main__\" and __package__ is None:\n", 15 | " from sys import path\n", 16 | " from os.path import dirname as dir\n", 17 | " path.append(dir(path[0]))\n", 18 | " __package__ = \"examples\"\n", 19 | "from utils.geospatial_data_utils import GeoTransform, make_rect_poly" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "id": "urban-adapter", 25 | "metadata": {}, 26 | "source": [ 27 | "### User input" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "id": "adequate-mandate", 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "NW = (9.5, 26.5) # north-west coordinates of AOI box\n", 38 | "SE = (7, 28.5) # south east coordinates of AOI box\n", 39 | "CRS = '4326' # '2154' # coordinate reference system for AOI" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "id": "accredited-tutorial", 46 | "metadata": { 47 | "scrolled": true 48 | }, 49 | "outputs": [ 50 | { 51 | "name": "stderr", 52 | "output_type": "stream", 53 | "text": [ 54 | "/home/michaeltrs/Programming/miniconda3/envs/satdata/lib/python3.8/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=:' syntax is deprecated. ':' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n", 55 | " return _prepare_from_string(\" \".join(pjargs))\n", 56 | "/home/michaeltrs/Programming/miniconda3/envs/satdata/lib/python3.8/site-packages/pyproj/crs/crs.py:294: FutureWarning: '+init=:' syntax is deprecated. ':' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n", 57 | " projstring = _prepare_from_string(\" \".join((projstring, projkwargs)))\n", 58 | "/home/michaeltrs/Programming/miniconda3/envs/satdata/lib/python3.8/site-packages/pyproj/crs/crs.py:53: FutureWarning: '+init=:' syntax is deprecated. ':' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n", 59 | " return _prepare_from_string(\" \".join(pjargs))\n", 60 | "/home/michaeltrs/Programming/miniconda3/envs/satdata/lib/python3.8/site-packages/pyproj/crs/crs.py:294: FutureWarning: '+init=:' syntax is deprecated. ':' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6\n", 61 | " projstring = _prepare_from_string(\" \".join((projstring, projkwargs)))\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "transform = GeoTransform(CRS, '4326', loc2loc=False)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "id": "rapid-rochester", 72 | "metadata": {}, 73 | "source": [ 74 | "### Make rectangular polygon for AOI extent" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 4, 80 | "id": "developed-volume", 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "name": "stdout", 85 | "output_type": "stream", 86 | "text": [ 87 | "AOI area: 5.0\n" 88 | ] 89 | } 90 | ], 91 | "source": [ 92 | "NW_glob = transform(NW[1], NW[0])\n", 93 | "SE_glob = transform(SE[1], SE[0])\n", 94 | "AOI = Polygon([[NW_glob[1], NW_glob[0]],\n", 95 | " [NW_glob[1], SE_glob[0]],\n", 96 | " [SE_glob[1], SE_glob[0]],\n", 97 | " [SE_glob[1], NW_glob[0]],\n", 98 | " [NW_glob[1], NW_glob[0]]])\n", 99 | "print('AOI area: ', AOI.area)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "id": "searching-idaho", 105 | "metadata": {}, 106 | "source": [ 107 | "### Query for products" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 5, 113 | "id": "approved-anxiety", 114 | "metadata": {}, 115 | "outputs": [ 116 | { 117 | "name": "stdout", 118 | "output_type": "stream", 119 | "text": [ 120 | "querying...\n" 121 | ] 122 | }, 123 | { 124 | "name": "stderr", 125 | "output_type": "stream", 126 | "text": [ 127 | "Querying products: 100%|██████████| 121/121 [00:02<00:00, 9.53 products/s]\n" 128 | ] 129 | }, 130 | { 131 | "name": "stdout", 132 | "output_type": "stream", 133 | "text": [ 134 | "found tiles overlapping with AOI: 35PPL, 35NPH, 35NPJ, 35PPK, 35PNK, 35NNH, 35NNJ, 35PNL, 35PML, 35PMK, 35NMJ, 35NMH\n", 135 | "finding overlap with AOI:\n", 136 | "----------------------------------------------\n", 137 | "tile id | AOI/Tile overlap | Tile/AOI overlap\n", 138 | "----------------------------------------------\n", 139 | "35PPL | 0.0391 | 0.2733\n", 140 | "35NPH | 0.0281 | 0.1422\n", 141 | "35NPJ | 0.1178 | 0.5962\n", 142 | "35PPK | 0.1007 | 0.5554\n", 143 | "35PNK | 0.0060 | 1.0000\n", 144 | "35NNH | 0.0150 | 0.1894\n", 145 | "35NNJ | 0.0403 | 1.0000\n", 146 | "35PNL | 0.1084 | 0.5458\n", 147 | "35PML | 0.0638 | 0.3213\n", 148 | "35PMK | 0.1169 | 0.5900\n", 149 | "35NMJ | 0.1169 | 0.5911\n", 150 | "35NMH | 0.0280 | 0.1416\n" 151 | ] 152 | } 153 | ], 154 | "source": [ 155 | "poly = make_rect_poly(NW_glob, SE_glob)\n", 156 | "footprint = geojson_to_wkt(poly)\n", 157 | "cred = pd.read_csv(\"pw.csv\", header=None)\n", 158 | "api = SentinelAPI(cred[0][0], cred[0][1], 'https://scihub.copernicus.eu/dhus')\n", 159 | "print(\"querying...\")\n", 160 | "products = api.query(footprint,\n", 161 | " platformname='Sentinel-2',\n", 162 | " cloudcoverpercentage=(0,100),\n", 163 | " area_relation='Intersects',\n", 164 | " date=('20200101', '20200201'),\n", 165 | " processinglevel='Level-1C')\n", 166 | "\n", 167 | "# find unique tiles\n", 168 | "tiles = {}\n", 169 | "tileids = []\n", 170 | "for prod in products:\n", 171 | " if products[prod]['tileid'] not in tileids:\n", 172 | " tileids.append(products[prod]['tileid'])\n", 173 | " tiles[prod] = products[prod]\n", 174 | " # print(products[prod].keys())\n", 175 | " # break\n", 176 | "print(\"found tiles overlapping with AOI: %s\" % \", \".join(tileids))\n", 177 | "\n", 178 | "# find overlap with AOI for each tile\n", 179 | "print(\"finding overlap with AOI:\")\n", 180 | "print(\"----------------------------------------------\")\n", 181 | "print(\"tile id | AOI/Tile overlap | Tile/AOI overlap\")\n", 182 | "print(\"----------------------------------------------\")\n", 183 | "for i, pr in enumerate(list(tiles.keys())):\n", 184 | " meta = api.get_product_odata(pr)\n", 185 | " tile = shapely.wkt.loads(meta['footprint'])\n", 186 | " aoi_cover_ratio = AOI.intersection(tile).area/AOI.area\n", 187 | " tile_cover_ratio = AOI.intersection(tile).area/tile.area\n", 188 | " print(\"%s | %.4f | %.4f\" \n", 189 | " % (tileids[i], aoi_cover_ratio, tile_cover_ratio))" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "id": "bridal-active", 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [] 199 | } 200 | ], 201 | "metadata": { 202 | "kernelspec": { 203 | "display_name": "Python 3", 204 | "language": "python", 205 | "name": "python3" 206 | }, 207 | "language_info": { 208 | "codemirror_mode": { 209 | "name": "ipython", 210 | "version": 3 211 | }, 212 | "file_extension": ".py", 213 | "mimetype": "text/x-python", 214 | "name": "python", 215 | "nbconvert_exporter": "python", 216 | "pygments_lexer": "ipython3", 217 | "version": "3.8.2" 218 | } 219 | }, 220 | "nbformat": 4, 221 | "nbformat_minor": 5 222 | } 223 | -------------------------------------------------------------------------------- /download/get_downloaded_products_info.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "generic-astronomy", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import os\n", 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "from glob import glob\n", 14 | "from datetime import datetime\n", 15 | "import matplotlib.pyplot as plt\n", 16 | "%matplotlib inline\n", 17 | "if __name__ == \"__main__\" and __package__ is None:\n", 18 | " from sys import path\n", 19 | " from os.path import dirname as dir\n", 20 | " path.append(dir(path[0]))\n", 21 | " __package__ = \"examples\"\n", 22 | "from utils.sentinel_products_utils import get_S2prod_info\n", 23 | "from utils.date_utils import get_doy" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "id": "activated-audit", 29 | "metadata": {}, 30 | "source": [ 31 | "### User input" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 16, 37 | "id": "personal-metallic", 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "products_dir = \"/media/michaeltrs/0a8a5a48-ede5-47d0-8eff-10d11350bf98/Satellite_Data/Sentinel2/PSETAE_repl/2017/cloud_0_30\"\n", 42 | "# \"/media/michaeltrs/0a8a5a48-ede5-47d0-8eff-10d11350bf98/Satellite_Data/Sentinel2/PSETAE_repl/2017/cloud_0_70\"\n", 43 | "ext = \".zip\" # \".SAFE\" #" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "id": "flexible-paper", 49 | "metadata": {}, 50 | "source": [ 51 | "### Find products \n", 52 | "\n", 53 | "If a directory contains yest unzipped products we will parse product info from the filename following the [Sentinel product naming convention](https://sentinel.esa.int/web/sentinel/user-guides/sentinel-2-msi/naming-convention).\n", 54 | "\n", 55 | "#### Compact Naming Convention\n", 56 | "\n", 57 | "The compact naming convention is arranged as follows:\n", 58 | "\n", 59 | "MMM_MSIXXX_YYYYMMDDHHMMSS_Nxxyy_ROOO_Txxxxx_.ext\n", 60 | "\n", 61 | "The products contain two dates.\n", 62 | "\n", 63 | "The first date (YYYYMMDDHHMMSS) is the datatake sensing time.\n", 64 | "The second date is the \"\" field, which is 15 characters in length, and is used to distinguish between different end user products from the same datatake. Depending on the instance, the time in this field can be earlier or slightly later than the datatake sensing time.\n", 65 | "\n", 66 | "The other components of the filename are:\n", 67 | "\n", 68 | "- MMM: is the mission ID(S2A/S2B)\n", 69 | "- MSIXXX: MSIL1C denotes the Level-1C product level/ MSIL2A denotes the Level-2A product level\n", 70 | "- YYYYMMDDHHMMSS: the datatake sensing start time\n", 71 | "- Nxxyy: the PDGS Processing Baseline number (e.g. N0204)\n", 72 | "- ROOO: Relative Orbit number (R001 - R143)\n", 73 | "- Txxxxx: Tile Number field\n", 74 | "- ext: file extension either zip or SAFE: Product Format (Standard Archive Format for Europe)\n" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 17, 80 | "id": "fifty-wedding", 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "name": "stdout", 85 | "output_type": "stream", 86 | "text": [ 87 | "num data: 23\n", 88 | " filename tile platformname \\\n", 89 | "17 S2A_MSIL1C_20170413T104021_N0204_R008_T31TFM_2... 31TFM Sentinel-2 \n", 90 | "18 S2A_MSIL1C_20170423T104021_N0204_R008_T31TFM_2... 31TFM Sentinel-2 \n", 91 | "9 S2A_MSIL1C_20170510T103031_N0205_R108_T31TFM_2... 31TFM Sentinel-2 \n", 92 | "22 S2A_MSIL1C_20170602T104021_N0205_R008_T31TFM_2... 31TFM Sentinel-2 \n", 93 | "16 S2A_MSIL1C_20170619T103021_N0205_R108_T31TFM_2... 31TFM Sentinel-2 \n", 94 | "\n", 95 | " processinglevel year date Mb doy \n", 96 | "17 Level-1C 2017 20170413 835.608218 103 \n", 97 | "18 Level-1C 2017 20170423 841.460811 113 \n", 98 | "9 Level-1C 2017 20170510 414.762817 130 \n", 99 | "22 Level-1C 2017 20170602 836.677158 153 \n", 100 | "16 Level-1C 2017 20170619 312.798229 170 \n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "filenames = [os.path.basename(fn) for fn in glob(\"%s/*%s\" % (products_dir, ext))]\n", 106 | "# print(filenames)\n", 107 | "prodinfo = []\n", 108 | "for fn in filenames:\n", 109 | " info = fn.split('_')\n", 110 | " year = info[2][:4]\n", 111 | " date = info[2][:8]\n", 112 | " size = int(os.path.getsize(os.path.join(products_dir, fn)))/1e6\n", 113 | " tile = info[5][1:]\n", 114 | " platformname = \"Sentinel-%s\" % info[0][1]\n", 115 | " processinglevel = \"Level-%s\" % info[1][-2:]\n", 116 | " prodinfo.append([fn, tile, platformname, processinglevel, year, date, size])\n", 117 | "prodinfo = pd.DataFrame(\n", 118 | " prodinfo, columns=['filename', 'tile', 'platformname', 'processinglevel', 'year', 'date', 'Mb'])\n", 119 | "prodinfo['doy'] = prodinfo['date'].apply(lambda s: get_doy(s))\n", 120 | "prodinfo = prodinfo.sort_values('doy')\n", 121 | "print(\"num data: \", prodinfo.shape[0])\n", 122 | "print(prodinfo.head(5))" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "id": "medium-owner", 128 | "metadata": {}, 129 | "source": [ 130 | "### Visualize downloaded product dates" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 18, 136 | "id": "announced-mongolia", 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "data": { 141 | "text/plain": [ 142 | "" 143 | ] 144 | }, 145 | "execution_count": 18, 146 | "metadata": {}, 147 | "output_type": "execute_result" 148 | }, 149 | { 150 | "data": { 151 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdAAAAEWCAYAAADW7MapAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAAcZ0lEQVR4nO3de3xV5Z3v8e8vIUCQm0C8oGBUhBIQocG+RKFSqk7B+0AdHSui2PF4plJPtdqedjxOX3WOzJnaqW3VjiIqXrCC1to6tiBYsd4KFQUvXKygIJGLgAkIEvI7f6xnwzYmOztPsrN39PN+vfYre6/1PGv91rM3+WZd2MvcXQAAoHmK8l0AAADtEQEKAEAEAhQAgAgEKAAAEQhQAAAiEKAAAEQgQNEsZrbGzE7Jcw0/NrPNZlbVxuudYmbPZph/rpm9a2Y1ZjaiLWsD0PYI0Dwzs05mNsPM1ppZtZktNbPxafMrzGyxmW0Nj/lmVpHPmluqqSBqom9/SVdLqnD3Q1q3shb7D0nfcveu7v5ySxdmZv9hZqvC5+JNM5tcb/5wM1tiZjvDz+Fp875iZgvNbLuZranXr38I+fSHm9nVLa0Z+DwhQPOvg6R3JZ0sqYekH0r6tZmVh/nvSZokqZekPpJ+K2l2a6zYzDq0xnLaWH9JW9x9Y74LacARkl6L6WhmxQ1M3iHpTCWfi4sl/czMTgztO0p6TNJ9kg6UdI+kx8L0VN+7JH23/kLd/Z0Q8l3dvaukYyXVSZobUzvweUWA5pm773D3G9x9jbvXufvvJL0tqTLM3xbmuSSTtFfSgMaWZ2ZPm9n/NbOXzOxDM3vMzHqFeeVhT2Oqmb0jaYGZFZnZD8Me8EYzu9fMeqQt76Iwb4uZ/aDeuu42sx+nvR5rZuvSXvczs0fMbFPo/wszGyzpdkmjwp7PttB2gpm9Hva21pvZNQ1s2ymS5knqG/reHaafZWavmdm2sP2D0/q4mQ1Ie72v5lS9ZnZ12PYNZnZJWtveZvbbMI4vSTq6kTHvZGY1koolvWJmb4Xpg0M920J9Z9Wr4zYze8LMdkj6Sv3luvv/cfc3w+fiRUmLJI0Ks8cq+ePrP919t7vfouTzMS70fcndZ0n6W0M11zNZ0jPuviaLtgACArTAmNnBkgaq3p5MCJpdkn4u6d+aWMxkSZdKOlRSraRb6s0/WdJgSX8naUp4fEXSUZK6SvpFWGeFpNskXSSpr6Tekg7PcjuKJf1O0lpJ5ZIOkzTb3d+Q9D8kPR/2gHqGLjMkXe7u3SQNlbSg/jLdfb6k8ZLeC32nmNlASQ9KukpSmaQnJD2etifWlEOU7OEdJmmqpF+a2YFh3i+VjPmhSsbz0oYWEAKsa3h5nLsfbWYlkh6X9EdJB0m6UtL9ZjYores/SrpRUjdJGQ9pm1mppOO1/3MxRNKr/snv4nw1TM+amZmSz8s9zekHgAAtKOGX7v2S7nH3N9PnhaDpIelbkpo6vzbL3Ze7+w5J/yLpvHqHCG8Ie74fSbpQ0s3u/jd3r5H0fUnnh8O7kyT9zt2fcffdYVl1WW7Ol5SE7nfDuna5e6aQ2COpwsy6u/tWd/9rluv5B0m/d/d57r5HyXnIUkknZtl/j6Qfufsed39CUo2kQWG8Jkq6PtS/XM0LmROU/DFyk7t/7O4LlPxBcUFam8fc/c9hD3NXE8u7XdIrkv4QXneVtL1em+1Kwrg5Rks6WNKcZvYDPvcI0AJhZkWSZkn6WElIfkoIxNsl3WtmB2VY3Ltpz9dKKlFy/rSh+X1Dm/T2HZT8Uu2b3jasf0tT2xL0k7TW3WuzbD9R0gRJa83sT2Y2qqkOwSfqd/c6JTUflmX/LfVq3KkknMq0//x0Svo4ZVPXu6Ge9P7pdb2rLJjZ/1OyV35e2h5njaTu9Zp2l1TdjBql5Nzq3PDHE4BmIEALQDiMNkNJaE0Me1KNKZLURZkDol/a8/5K9rI2p01LP+z3npKLX9Lb10p6X9KG9GWZWRclh3FTdoRaUtKvin1XUv9GLlT61C2A3P0v7n62ksOdv5H06wb6NeQT9Yex7CdpfZi0M0ONmWxSMg71xzJb70nqF/4wSu+/Pu11k7dCMrN/VXLY+jR3/zBt1muShoXtTRmmZlzEFA4Lf10cvgWiEKCF4TYl5yTPDIdV9zGzU81shJkVm1l3STdL2irpjQzL+4Yl//2li6QfSZrj7nsbafugpP9lZkeaWVcl51cfCntlcySdYWajwznFH+mTn5mlkiaYWS8zO0TJeciUl5QE8E1mdoCZdTazk8K89yUdnjpPaWYdzexCM+sR/nj4UNkfKv61pNPN7KvhEPjVknZLei6txn8M4/c1Jed/mxTG6xFJN5hZl3A++OIsa5KkF5WE97VmVmJmY5VcUZv1FdRm9n0l50lPcff6e/5PK7mgbFq4iCl11GJB6FtkZp2VHH2wMP71zwufq+SztLAZ2wUgIEDzzMyOkHS5pOGSqmz//8u7MDTpqSTktkt6S8mVoF9r4pzZLEl3S6qS1FnStAxt7wrtn1Fy9e8uJRe8yN1fk/TPkh5QEoZbJa1L6ztLyXm5NUoulnkoNSME0JlKrhh+J/T7hzB7gZI9pSozS+0ZXyRpjZl9qOQio9T2Z+TuKyR9Q8nFVZvDOs90949Dk2+HadvCMn+TzXKDbyk5nFulZDxnZtsxrP9MJXuPmyXdKmly/XPbTfg3JXutq9M+F/87bfnnKLkAaJuSC5zOSdvuL0v6SMlFVf3D8z/WW/7FSs6Xc1NgIILxb+ezxcyelnSfu9+Z71oA4LOMPVAAACIQoAAAROAQLgAAEdgDBQAgQrO+TLxPnz5eXl6eo1LQkBUrkp+DBrWsTUvX0dbLTPVPyWY56evMtP7W2t5s19dYv5T0Prl8v1varzEtGceYmtrivW1tS5Ys2ezuZfmuA62rWQFaXl6uxYsX56oWNGDs2OTn00+3rE1L19HWy0z1T8lmOenrzLT+1trebNfXWL+U9D65fL9b2q8xLRnHmJra4r1tbWbWnG+xQjvBIVwAACIQoAAARCBAAQCI0KxzoACAtrVkyZKDOnTocKeSO/Kw09O26iQtr62tvayysnJj/ZkEKAAUsA4dOtx5yCGHDC4rK9taVFTEf9xvQ3V1dbZp06aKqqqqOyWdVX8+f80AQGEbWlZW9iHh2faKioq8rKxsu5K9/0/Pb+N6AADNU0R45k8Y+wazkgAFACACAQoAyNp3vvOdvtdff/3Brb3cFStWdDzmmGOGtMayJk6cWD5z5swDc71uAhQAgAgEKAAgo+uuu+6Q8vLyoZWVlYNWrVrVSZKee+650uOOO+4LAwcOrDj11FOP3rRpU/H69es7DBkyZLAkPf/886VmVrlq1aqOktSvX7+h1dXVRRMnTiyfMmVKvxEjRnzh8MMPP7ahPcWdO3fapEmTygcOHFgxePDgiscff7yblOwpVlZWDqqoqBhcUVExeN68eQdIUl1dnSZPnty/vLx86Iknnjhw8+bN+/6HyaJFi7ocf/zxg4YMGTJ49OjRx6xdu7YkNX3QoEEVgwYNqrj55psPihkX/hsLALQjX/qSWvWr8l96SRlvF7Bo0aIujz76aK9ly5a9vmfPHg0fPrxixIgRO6dMmXLkT3/603dOP/30mquuuqrvdddd1/euu+56d/fu3UUffPBB0cKFC7sOGTJk5/z587u6e03v3r1ru3XrVidJ77//fsnixYvfXLp0aedzzz13wCWXXLI1fZ3Tp08/yMy0cuXK119++eXOEyZMOOatt95a3rdv39pFixat7NKliy9btqzTBRdccNTy5cvfmDVrVs/Vq1d3Wr169fJ169aVHHvssUOmTJmyZffu3TZt2rT+v//971f37du39o477jjwmmuuOezhhx9eM3Xq1PKf/exn74wfP77m8ssvPzxm7AhQAECjFi5c2HXChAnbUuF32mmnbduxY0dRdXV18emnn14jSd/85je3fP3rXz9KkkaOHFkzf/78rs8++2y3a6+9dsOTTz7Zw911wgkn1KSWedZZZ20rLi5WZWXlri1btpTUX+dzzz3X9corr9woSSNGjNjVt2/fj5ctW9Z5wIABH0+dOvWI119/vbSoqEhr167tJEl/+tOfup133nkfdOjQQeXl5XtGjRpVLUmvvvpqp1WrVpWOGzduoJTsqZaVle3ZvHlzcXV1dfH48eNrJOnSSy/dsmDBgh7NHRsCFADakab2GPNtzJgx1c8880y3devWdbzwwgu3/eQnPzlEkp9xxhnbU206d+6877/luGf/P3RuvPHGgw866KA9c+fOfbuurk6lpaWVmdq7uw0YMOCjpUuXvpk+ffPmzcXZb1HjOAcKAGjUuHHjap544omeNTU1tnXr1qJ58+b1POCAA+q6d+++98knn+wqSTNmzOg9atSoGkk65ZRTaubOndvryCOP3F1cXKyePXvWLly4sMepp55ak3lN+5100kk19913Xy8p2YvcsGFDx2HDhu3avn178aGHHrqnuLhYt956a++9e/dKkk4++eTqOXPm9KqtrdXatWtLXnjhhW6SNGzYsF0ffPBBh/nz5x8gSbt377bFixd37tOnz95u3brt/cMf/tBVku6+++5eMWPDHigAoFGjR4/eee65534wdOjQIb17994zbNiwHZI0c+bMt6+44oojpk2bVtS/f//dDz744BpJGjRo0MfubmPGjKmWpFGjRtVs2LChY1lZ2d5s13nttddunDx58hEDBw6sKC4u1q9+9as1paWlftVVV22cOHHi0bNnz+49bty47aWlpXWSdNFFF2176qmnug8YMGBo3759d48YMaJGSvZ0Z8+e/da0adP6V1dXF+/du9euuOKK90eOHLlrxowZay677LJyM9PYsWM/jBkbAhQAkNH06dOrpk+fXlV/+iuvvPJmQ+2rqqpeTT2/6aabqm666aZ9fefOnbsmve3OnTtflpLgXbVq1WuS1KVLF58zZ84n2knSscceu3vlypWvp17fdttt6yWpqKhI99577zsN1XLiiSd+tHjx4k8d9h4zZszOFStWvJ42aV1D/TPhEC4AABEIUAAAIhCgAFDY6urq6izfRXxehbGva2geAQoAhW35pk2behCibS/cD7SHpOUNzeciIgAoYLW1tZdVVVXdWVVVNVTs9LS1OknLa2trL2toJgEKAAWssrJyo6Sz8l0HPo2/ZgAAiECAAgAQgQAFACACAQoAQAQCFACACAQoAAARCFAAACIQoAAARCBAAQCIQIACABCBAAUAIAIBCgBABAIUAIAIBCgAABEIUAAAIhCgAABEIEABAIhAgAIAEIEABQAgAgEKAEAEAhQAgAgEKAAAEQhQAAAiEKAAAEQgQAEAiECAAgAQgQAFACACAQoAQAQCFACACAQoAAARCFAAACIQoAAARCBAAQCIQIACABCBAAUAIAIBCgBABAIUAIAIBCgAABEIUAAAIhCgAABEIEABAIhAgAIAEIEABQAgAgEKAEAEAhQAgAgEKAAAEQhQAAAiEKAAAEQgQAEAiECAAgAQgQAFACACAQoAQAQCFACACAQoAAARCFAAACIQoAAARCBAAQCIQIACABCBAAUAIAIBCgBABAIUAIAIBCgAABEIUAAAIhCgAABEIEABAIhAgAIAEIEABQAgAgEKAEAEAhQAgAgEKAAAEQhQAAAiEKAAAEQgQAEAiECAAgAQgQAFACACAQoAQAQCFACACAQoAAARCFAAACIQoAAARCBAAQCIQIACABCBAAUAIAIBCgBABAIUAIAIBCgAABEIUAAAIhCgAABEIEABAIhAgAIAEIEABQAgAgEKAEAEAhQAgAgEKAAAEQhQAAAiEKAAAEQgQAEAiECAAgAQgQAFACACAQoAQAQCFACACAQoAAARCFAAACIQoAAARCBAAQCIQIACABCBAAUAIAIBCgBABAIUAIAIBCgAABEIUAAAIhCgAABEIEABAIhAgAIAEIEABQAgAgEKAEAEAhQAgAgEKAAAEQhQAAAiEKAAAEQgQAEAiECAAgAQgQAFACACAQoAQAQCFACACAQoAAARCFAAACIQoAAARCBAAQCIQIACABCBAAUAIAIBCgBABAIUAIAIBCgAABEIUAAAIhCgAABEIEABAIhAgAIAEIEABQAgAgEKAEAEAhQAgAgEKAAAEQhQAAAiEKAAAEQgQAEAiECAAgAQgQAFACACAQoAQAQCFACACAQoAAARCFAAACIQoAAARCBAAQCIQIACABCBAAUAIAIBCgBABAIUAIAIBCgAABEIUAAAIhCgAABEIEABAIhAgAIAEIEABQAgAgEKAEAEAhQAgAgEKAAAEQhQAAAiEKAAAEQgQAEAiECAAgAQgQAtcHv2SNXV0qZNLWvT0nW09TJT/XfuzH456evMtP7W2t5s19dYv4a2LZfvd0v7peqt/7Ml4xhTU1u8t0BW3D3rR2VlpaPtPPCAe1GRe3Gxe2lp8jqmTUvXkYu6s+lfVOQuJT+bWk76OktK3M0aXn9rbW+262vOtuXy/W5pv1S9Zp/8mc1705o1ZeqTi89ya5G02Jvxu5ZH+3hY8t5mZ+TIkX7LLbfkLs2xz9at0t//vfTxx/undewoPfKIdOCB2bdp6TpyUXdz+ze1nEx90vtJrbO92a6vOXV27Jj8zMX73Zr9GtPccYypKVMfqfU/y63ppJNOWuLuI/NdB1oXh3ALVFWVVFLyyWkdOiTTm9OmpetorlzU1NRyMvVJ79da25vt+prTr6goeWRaTmz9rdmvMc0dx5iaMvXJxWcZaFJzdlc5hNt2Nm5MDkNJ+x+lpcn05rRp6TpyUXdz+ze1nEx90vu11vZmu77m9OvcOXfvd2v2a+42t2ZNmfrk4rPcmsQh3M/ko1mNCdC29cADyS+B7t0znxNrqk1L15GLurPp37mz7/tFmM25xdQ6S0rcO3ZseP2ttb3Zrq8525bL97ul/VL1lpR88mc2701r1pSpTy4+y62FAP1sPpp9DnTx4sU52xvGp23aJK1ZI5WXS2Vl8W1auo62Xmaqf9euUk1NdstJX6fU+Ppba3uzXV9j/Rratly+3y3tl6q3/s+WjGNMTZn65OKz3BrMjHOgn0EEKADkGAH62cRFRAAARCBAAQCIQIACABCBAAUAIAIBCgBABAIUAIAIBCgAABEIUAAAIhCgAABEIEABAIhAgAIAEIEABQAgAgEKAEAEAhQAgAgEKAAAEZp1P1Azq5a0Infl5EwfSZvzXUQztceaJepua9TdtmLrPsLdC+gW32gNHZrZfkV7vCmsmS1ub3W3x5ol6m5r1N222mvdyA0O4QIAEIEABQAgQnMD9L9yUkXutce622PNEnW3NepuW+21buRAsy4iAgAACQ7hAgAQgQAFACBCVgFqZl8zsxVmttrMvpfrolrCzNaY2TIzW2pmi8O0XmY2z8xWhZ8HFkCdd5nZRjNbnjatwTotcUsY/1fN7IsFVvcNZrY+jPlSM5uQNu/7oe4VZvZ3eaq5n5ktNLPXzew1M/t2mF7Q452h7kIf785m9pKZvRLq/tcw/UgzezHU95CZdQzTO4XXq8P88gKr+24zezttvIeH6QXxOUEeuXvGh6RiSW9JOkpSR0mvSKpoql++HpLWSOpTb9q/S/peeP49SdMLoM4vS/qipOVN1SlpgqT/lmSSTpD0YoHVfYOkaxpoWxE+L50kHRk+R8V5qPlQSV8Mz7tJWhlqK+jxzlB3oY+3SeoanpdIejGM468lnR+m3y7pivD8f0q6PTw/X9JDeRrvxuq+W9KkBtoXxOeER/4e2eyBfknSanf/m7t/LGm2pLOz6FdIzpZ0T3h+j6Rz8ldKwt2fkfRBvcmN1Xm2pHs98YKknmZ2aJsUWk8jdTfmbEmz3X23u78tabWSz1ObcvcN7v7X8Lxa0huSDlOBj3eGuhtTKOPt7l4TXpaEh0saJ2lOmF5/vFPvwxxJXzUza5tq98tQd2MK4nOC/MkmQA+T9G7a63XK/I8431zSH81siZn9U5h2sLtvCM+rJB2cn9Ka1Fid7eE9+FY4jHVX2iHygqs7HB4coWTvot2Md726pQIfbzMrNrOlkjZKmqdkb3ibu9c2UNu+usP87ZJ6t2nBQf263T013jeG8f6pmXUK0wpmvJEfn8WLiEa7+xcljZf0z2b25fSZ7u7K/FdlQWgvdQa3STpa0nBJGyT9JK/VNMLMukqaK+kqd/8wfV4hj3cDdRf8eLv7XncfLulwJXvBX8hvRdmpX7eZDZX0fSX1Hy+pl6Tr8lchCkk2AbpeUr+014eHaQXJ3deHnxslParkH+/7qUMr4efG/FWYUWN1FvR74O7vh188dZLu0P7DhgVTt5mVKAmh+939kTC54Me7obrbw3inuPs2SQsljVJyiDP1/dvpte2rO8zvIWlL21b6SWl1fy0cSnd33y1ppgp4vNG2sgnQv0g6JlxB11HJSf7f5rasOGZ2gJl1Sz2XdJqk5UrqvTg0u1jSY/mpsEmN1flbSZPDVX8nSNqedugx7+qd9zlXyZhLSd3nh6ssj5R0jKSX8lCfSZoh6Q13vzltVkGPd2N1t4PxLjOznuF5qaRTlZy/XShpUmhWf7xT78MkSQvCEYE21Ujdb6b9kWVKztumj3fePyfIo2yuNFJytdlKJecxfpDvK58y1HmUkqsQX5H0WqpWJedTnpK0StJ8Sb0KoNYHlRx+26Pk3MnUxupUcpXfL8P4L5M0ssDqnhXqelXJL5VD09r/INS9QtL4PNU8Wsnh2VclLQ2PCYU+3hnqLvTxHibp5VDfcknXh+lHKQn01ZIeltQpTO8cXq8O848qsLoXhPFeLuk+7b9StyA+Jzzy9+Cr/AAAiPBZvIgIAICcI0ABAIhAgAIAEIEABQAgAgEKAEAEAhRtItxB5JocLr8s3MnjZTMbk6v1AEBKh6abAO3CVyUtc/fL2mJlZtbB93+vK4DPIfZAkTNm9gMzW2lmz0oalDb9m2b2l3Dfxblm1sXMuoV7LpaENt3TX6f1LTezBeGLvZ8ys/7h/oz/LunscL/G0rT248zsN2mvTzWzR8Pz08zseTP7q5k9HL5zVmZ2fahvuZn9V+rOIGb2tJn9pyX3mf12rsYNQPtAgCInzKxSydc+Dlfy7TnHp81+xN2Pd/fjlHzF21RPbtf1tKTTQ5vzQ7s99Rb9c0n3uPswSfdLusXdl0q6Xsl9JIe7+0dp7RdK+oKZlYXXl0i6y8z6SPqhpFM8ufnAYknfCW1+EeobKqlU0hlpy+vo7iPdveC+wB1A2yJAkStjJD3q7js9uYNI+vcnDzWzRWa2TNKFkoaE6XcqCTiFnzMbWO4oSQ+E57OUfN1dozz5qq1Zkr4Rvud0lJKbIJ+g5AbUfw63r7pY0hGh21fC+dRlSu5hOSRtkQ9lWh+Azw/OgSIf7pZ0jru/YmZTJI2VJHf/czhEO1ZSsbsvb2wBzTRT0uOSdkl62N1rw2HZee5+QXpDM+ss6VYl32v6rpndoOS7WlN2tFJNANo59kCRK89IOsfMSsMdcs5Mm9dN0oZwfvPCev3uVbKH2dDepyQ9p+TwrkLfRU0V4u7vSXpPySHb1HJfkHSSmQ2Q9t3JZ6D2h+XmcE50Uv3lAYDEHihyxN3/amYPKbkzzkYlt8VL+RdJL0raFH52S5t3v6QfK7nrS0OulDTTzL4b+l/SSLv67pdU5u5vhPo2hb3fB82sU2jzQ3dfaWZ3KLnzRlW9ugFgH+7GgoJiZpMkne3uF7Xycn8h6WV3n9GaywXw+cUeKAqGmf1c0nglV+225nKXKDl3eXVrLhfA5xt7oAAAROAiIgAAIhCgAABEIEABAIhAgAIAEIEABQAgwv8HyV+YQ4PBI/UAAAAASUVORK5CYII=\n", 152 | "text/plain": [ 153 | "
" 154 | ] 155 | }, 156 | "metadata": { 157 | "needs_background": "light" 158 | }, 159 | "output_type": "display_data" 160 | } 161 | ], 162 | "source": [ 163 | "plt.title(\"%d products found for %s\" % \n", 164 | " (prodinfo['doy'].shape[0], \",\".join(prodinfo['year'].drop_duplicates().tolist())))\n", 165 | "plt.scatter(prodinfo['doy'].values, np.zeros(prodinfo.shape[0]), s=20, c='b')\n", 166 | "#plt.scatter(uniform_doy_list, np.zeros(selected_doys.shape[0]), s=20, c='r')\n", 167 | "plt.vlines(prodinfo['doy'].values, 0, 1, color='b', label='downloaded')\n", 168 | "# plt.vlines(uniform_doy_list, 0, -1, color='r', label='uniform')\n", 169 | "plt.hlines(0, 1, 365, color='k', alpha=0.3)\n", 170 | "plt.ylim(-0.1, 1)\n", 171 | "plt.xlim(0, 365)\n", 172 | "plt.yticks([], [])\n", 173 | "plt.xlabel('day of year')\n", 174 | "plt.legend(bbox_to_anchor=(1.35, 1))" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "id": "concrete-wildlife", 180 | "metadata": {}, 181 | "source": [ 182 | "### Save downloaded products to disk" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 19, 188 | "id": "thermal-examination", 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "name": "stdout", 193 | "output_type": "stream", 194 | "text": [ 195 | "saving products info to /media/michaeltrs/0a8a5a48-ede5-47d0-8eff-10d11350bf98/Satellite_Data/Sentinel2/PSETAE_repl/2017/cloud_0_30/downloaded_as_of_20211117_170444.csv\n" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "savename = '%s/downloaded_as_of_%s.csv' % \\\n", 201 | " (products_dir, datetime.now().strftime('%Y%m%d_%H%M%S'))\n", 202 | "\n", 203 | "if not os.path.exists(os.path.dirname(savename)):\n", 204 | " print(\"making new directory %s\" % os.path.dirname(savename))\n", 205 | " os.makedirs(os.path.dirname(savename))\n", 206 | "\n", 207 | "print(\"saving products info to %s\" % savename)\n", 208 | "prodinfo.to_csv(savename, index=False)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "id": "happy-defense", 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [] 218 | } 219 | ], 220 | "metadata": { 221 | "kernelspec": { 222 | "display_name": "Python 3", 223 | "language": "python", 224 | "name": "python3" 225 | }, 226 | "language_info": { 227 | "codemirror_mode": { 228 | "name": "ipython", 229 | "version": 3 230 | }, 231 | "file_extension": ".py", 232 | "mimetype": "text/x-python", 233 | "name": "python", 234 | "nbconvert_exporter": "python", 235 | "pygments_lexer": "ipython3", 236 | "version": "3.8.2" 237 | } 238 | }, 239 | "nbformat": 4, 240 | "nbformat_minor": 5 241 | } 242 | -------------------------------------------------------------------------------- /download/sentinelsat_download_tileid.py: -------------------------------------------------------------------------------- 1 | # spatial data processing pipelines 2 | import argparse 3 | import pandas as pd 4 | from sentinelsat import SentinelAPI # , read_geojson, geojson_to_wkt 5 | import os 6 | from glob import glob 7 | from collections import OrderedDict 8 | 9 | 10 | # USER INPUT ----------------------------------------------------------------------------------------------------------- 11 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 12 | parser.add_argument('--products_file', metavar='PRODUCTS FILE', default='', 13 | help='path to file containing all products to be downloaded') 14 | 15 | args = parser.parse_args() 16 | products_file = args.products_file 17 | 18 | # CODE ----------------------------------------------------------------------------------------------------------------- 19 | # authentication 20 | cred = pd.read_csv("download/pw.csv", header=None) 21 | api = SentinelAPI(cred[0][0], cred[0][1], 'https://apihub.copernicus.eu/apihub') # 'https://scihub.copernicus.eu/dhus') # 22 | 23 | # read products to download from file 24 | if ',' in products_file: 25 | products_file = products_file.split(',') 26 | savedir = os.path.dirname(products_file[0]) 27 | products = pd.concat([pd.read_csv(products_file_) for products_file_ in products_file]) 28 | else: 29 | savedir = os.path.dirname(products_file) 30 | products = pd.read_csv(products_file) 31 | 32 | # make products into ordered dict 33 | products2download = OrderedDict() 34 | for i in range(products.shape[0]): # enumerate(list(products.keys())): 35 | products2download[products['index'].iloc[i]] = products.iloc[i].to_dict() 36 | 37 | # find number of remaining products 38 | down_filenames = [os.path.basename(p).split(".")[0] for p in glob(os.path.join(savedir, "*.zip"))] 39 | N = 0 40 | for key in products2download: 41 | if products2download[key]['identifier'] in down_filenames: 42 | N += 1 43 | print("%d of %d new products already downloaded, %d remaining" % (N, len(products2download), len(products2download)-N)) 44 | 45 | # download 46 | # try: 47 | api.download_all(products2download, directory_path=savedir, n_concurrent_dl=1) 48 | # except: 49 | # p 50 | print("waiting 30min...") 51 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib~=3.3.0 2 | numpy~=1.19.1 3 | pandas~=1.1.0 4 | rasterio~=1.1.5 5 | shapely~=1.7.0 6 | sentinelsat~=0.14 7 | scikit-learn 8 | simplification 9 | pyproj -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaeltrs/DeepSatData/c2774597dc57af46f777ba2212fe026305d2fd1e/utils/__init__.py -------------------------------------------------------------------------------- /utils/data_utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import zipfile 4 | 5 | 6 | def unzip_all(dir_name, extension=".zip"): 7 | for item in os.listdir(dir_name): 8 | if item.endswith(extension): 9 | file_name = os.path.join(dir_name, item) 10 | zip_ref = zipfile.ZipFile(file_name) 11 | zip_ref.extractall(dir_name) 12 | zip_ref.close() 13 | os.remove(file_name) 14 | 15 | 16 | def find_number(text, c, single=True): 17 | val = re.findall(r'%s(\d+)' % c, text) 18 | if single: 19 | val = val[0] 20 | return val 21 | -------------------------------------------------------------------------------- /utils/date_utils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | from glob import glob 4 | import pandas as pd 5 | 6 | 7 | def get_doy(date): 8 | Y = date[:4] 9 | m = date[4:6] 10 | d = date[6:] 11 | date = "%s.%s.%s" % (Y, m, d) 12 | dt = datetime.datetime.strptime(date, '%Y.%m.%d') 13 | return dt.timetuple().tm_yday 14 | 15 | 16 | def get_date(day): 17 | """ 18 | :param day: day of the year [0, 365] 19 | :return: sting day_of_month-month, ie. "3-Jul" 20 | """ 21 | if day < 31: 22 | m = "Jan" 23 | d = day 24 | elif day < 59: 25 | m = "Feb" 26 | d = day - 31 27 | elif day < 90: 28 | m = "Mar" 29 | d = day - 59 30 | elif day < 120: 31 | m = "Apr" 32 | d = day - 90 33 | elif day < 151: 34 | m = "May" 35 | d = day - 120 36 | elif day < 181: 37 | m = "Jun" 38 | d = day - 151 39 | elif day < 212: 40 | m = "Jul" 41 | d = day - 181 42 | elif day < 243: 43 | m = "Aug" 44 | d = day - 212 45 | elif day < 273: 46 | m = "Sep" 47 | d = day - 243 48 | elif day < 304: 49 | m = "Oct" 50 | d = day - 273 51 | elif day < 334: 52 | m = "Nov" 53 | d = day - 304 54 | else: 55 | m = "Dec" 56 | d = day - 334 57 | return "%d-%s" % (d, m) 58 | 59 | 60 | def get_paths(root_dir, pattern, save_name=None, relative=True): 61 | files = glob(os.path.join(root_dir, pattern)) 62 | N = len(root_dir.split("/")) 63 | if relative: 64 | # base = "/".join(pattern.split("/")[:-1]) 65 | files = ["/".join(x.split("/")[N:]) for x in files] 66 | print("%d files found matching %s" % (len(files), pattern)) 67 | if save_name: 68 | # check if abs path 69 | if not os.path.exists(save_name): 70 | save_name = os.path.join(root_dir, save_name) 71 | pd.DataFrame(files).to_csv(save_name, header=None, index=False) 72 | else: 73 | return files 74 | 75 | 76 | def get_unique_vals(path, col, header=None, name_fn=None): 77 | data = pd.read_csv(path, header=header) 78 | data = data[col] 79 | if name_fn: 80 | data = data.apply(name_fn) 81 | return data.value_counts() 82 | 83 | 84 | def get_lat_lon(loc, loc_type="meters"): 85 | if loc_type == "meters": 86 | lat = 111319.488 87 | lon = 111120.0 88 | return loc[0] / lat, loc[1] / lon 89 | -------------------------------------------------------------------------------- /utils/geospatial_data_utils.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from shapely import geometry 4 | from shapely.geometry import Polygon 5 | from pyproj import Proj, transform 6 | import re 7 | from simplification.cutil import simplify_coords 8 | from sentinelsat import geojson_to_wkt 9 | 10 | 11 | class GeoTransform: 12 | def __init__(self, intr, outtr, loc2loc=False): 13 | """ 14 | - loc2loc: from local to local coord system. In this case tranform remains x, y -> x, y, 15 | otherwise x, y -> y, x 16 | """ 17 | intr = str(intr) 18 | outtr = str(outtr) 19 | if not intr.isnumeric(): intr = get_epsg_code(intr) 20 | if not outtr.isnumeric(): outtr = get_epsg_code(outtr) 21 | self.inProj = Proj(init='epsg:%s' % intr) # %d' % get_epsg_code(country)) 22 | self.outProj = Proj(init='epsg:%s' % outtr) # 2154') 23 | self.loc2loc = loc2loc 24 | 25 | def __call__(self, x, y): 26 | yout, xout = transform(self.inProj, self.outProj, x, y) 27 | if self.loc2loc: 28 | return yout, xout 29 | return xout, yout 30 | 31 | 32 | def make_AOI(coords, transform): 33 | if type(coords) == str: 34 | x = [float(x) for x in re.findall("[+-]?\d+(?:\.\d+)?", coords)] 35 | x = np.array(x).reshape(-1, 2) 36 | 37 | points = [] 38 | for point in x: 39 | points.append(transform(point[0], point[1])) 40 | points.append(transform(x[0][0], x[0][1])) 41 | 42 | poly = make_poly(points[:-1]) 43 | footprint = coords 44 | AOI = Polygon(points) 45 | 46 | elif type(coords) in [list, tuple]: 47 | if len(coords) == 2: # assume NW, SE boxx coords 48 | NW, SE = coords 49 | NW_glob = transform(NW[1], NW[0]) 50 | SE_glob = transform(SE[1], SE[0]) 51 | 52 | poly = make_rect_poly(NW_glob, SE_glob) 53 | footprint = geojson_to_wkt(poly) 54 | AOI = Polygon([[NW_glob[1], NW_glob[0]], 55 | [NW_glob[1], SE_glob[0]], 56 | [SE_glob[1], SE_glob[0]], 57 | [SE_glob[1], NW_glob[0]], 58 | [NW_glob[1], NW_glob[0]]]) 59 | 60 | else: 61 | points = [] 62 | for point in coords: 63 | points.append(transform(point[0], point[1])) 64 | points.append(transform(x[0][0], x[0][1])) 65 | 66 | poly = make_poly(points[:-1]) 67 | footprint = geojson_to_wkt(poly) 68 | AOI = Polygon(points) 69 | 70 | return poly, footprint, AOI 71 | 72 | 73 | def make_poly(points, ret_points=False): 74 | points.append(points[0]) 75 | poly = {"type": "FeatureCollection", 76 | "features": [{"type": "Feature", "properties": {}, "geometry": { 77 | "type": "Polygon", 78 | "coordinates": [[points]]} }]} 79 | if ret_points: 80 | return poly['features'][0]['geometry']['coordinates'] 81 | return poly 82 | 83 | 84 | def make_rect_poly(nw, se, ret_points=False): 85 | # W, N 86 | poly = {"type": "FeatureCollection", 87 | "features": [{"type": "Feature", "properties": {}, "geometry": { 88 | "type": "Polygon", 89 | "coordinates": [[[nw[1], nw[0]], 90 | [nw[1], se[0]], 91 | [se[1], se[0]], 92 | [se[1], nw[0]], 93 | [nw[1], nw[0]]]]} }]} 94 | if ret_points: 95 | return poly['features'][0]['geometry']['coordinates'] 96 | return poly 97 | 98 | 99 | def get_epsg_code(country): 100 | epsg_code = {'germany': 32632, 'senegal': 32628, 'france': 32631} 101 | return epsg_code[country] 102 | 103 | 104 | def plot_poly(points, c=None, newfig=True): 105 | if type(points) in [list, tuple]: 106 | points = np.array(points) 107 | if c is None: 108 | c = "r" 109 | if newfig: 110 | plt.figure() 111 | for i in range(points.shape[0] - 1): 112 | plt.plot(points[i:i + 2, 0], points[i:i + 2, 1], c=c) 113 | plt.scatter(points[i, 0], points[i, 1], c=c) 114 | 115 | 116 | def get_points_from_str_poly(str_poly): 117 | return np.array([[float(j) for j in i.split(" ") if j != ''] for i in str_poly.split("(")[-1].split(")")[0].split(",")]) 118 | 119 | 120 | # eometry 121 | def get_line_eq(p1, p2, h=1e-7): 122 | ''' 123 | P: (x, y) 124 | ''' 125 | denom = p2[0] - p1[0] 126 | if denom == 0: 127 | denom = h 128 | a = (p2[1] - p1[1]) / denom 129 | b = (p1[1] * p2[0] - p2[1] * p1[0]) / denom 130 | return a, b 131 | 132 | 133 | def get_perp_line(p1, p2, p3, h=1e-7): 134 | a, b = get_line_eq(p1, p2) 135 | if a == 0: 136 | a = h 137 | a_ = - 1. / a 138 | b_ = p3[1] + 1. / a * p3[0] 139 | return a_, b_ 140 | 141 | 142 | def get_perp_bisect(p1, p2, p3, h=1e-7): 143 | ''' 144 | p1, p2: line segment end points 145 | p3: point outside line 146 | ''' 147 | a, b = get_line_eq(p1, p2) 148 | if a == 0: 149 | a = h 150 | a_ = - 1. / a 151 | b_ = p3[1] + 1. / a * p3[0] 152 | x = (b_ - b) / (a - a_) 153 | y = a * x + b 154 | return x, y 155 | 156 | 157 | def dist(p1, p2): 158 | return np.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2) 159 | 160 | 161 | def is_between(p1, p2, p3): 162 | xmax = max((p1[0]), p2[0]) 163 | xmin = min((p1[0]), p2[0]) 164 | if (p3[0] < xmax) & (p3[0] > xmin): 165 | return True 166 | else: 167 | return False 168 | 169 | 170 | def min_dist(p1, p2, p3): 171 | p4 = get_perp_bisect(p1, p2, p3) 172 | if is_between(p1, p2, p4): 173 | return p4, dist(p3, p4) 174 | d1 = dist(p3, p1) 175 | d2 = dist(p3, p2) 176 | return [p1, p2][np.argmin([d1, d2])], min((d1, d2)) 177 | 178 | 179 | def closest_point_to_poly(poly, point, return_dist=False): 180 | D = [] 181 | P = [] 182 | for i in range(len(poly)): 183 | if i == (len(poly) - 1): 184 | p, d = min_dist(poly[i], poly[0], point) 185 | else: 186 | p, d = min_dist(poly[i], poly[i + 1], point) 187 | D.append(d) 188 | P.append(p) 189 | idx = np.argmin(D) 190 | if return_dist: 191 | return D[idx] 192 | return P[idx] 193 | 194 | 195 | def distance_pix_to_poly(poly, point): 196 | poly_point = closest_point_to_poly(poly, point) 197 | return dist(poly_point, point) 198 | 199 | 200 | 201 | def add_points(poly, numpoints=100): 202 | # increase number of points by splitting largest line segments in half 203 | while poly.shape[0] < numpoints: 204 | idx = np.argmax([dist(poly[i], poly[i+1]) for i in range(poly.shape[0]-1)]) 205 | new_point = (poly[idx] + poly[idx+1]) / 2. 206 | poly = np.insert(poly, idx+1, new_point, 0) 207 | return poly 208 | 209 | 210 | def interp1d(N, Nmax, Nmin, tmax, tmin): 211 | return (N - Nmin) / (Nmax - Nmin) * (tmax - tmin) + tmin 212 | 213 | 214 | def simplify_poly_points(poly, numpoints=20, iter_max=20): 215 | numpoints_init = poly.shape[0] 216 | 217 | if numpoints_init == numpoints: 218 | return poly 219 | elif numpoints_init < numpoints: 220 | return add_points(poly, numpoints) 221 | else: # get initial values of t that lead to larger and smaller polygons 222 | Nmax = numpoints_init 223 | t = 5 224 | while simplify_coords(poly, t).shape[0] >= numpoints: 225 | t *= 2 226 | Nmin = simplify_coords(poly, t).shape[0] 227 | Tmax, Tmin = 0, t 228 | 229 | iter = 0 230 | while True: 231 | t = interp1d(numpoints, Nmax, Nmin, Tmax, Tmin) 232 | poly_ = simplify_coords(poly, t) 233 | N = poly_.shape[0] 234 | # print(N, t) 235 | if N == numpoints: 236 | break 237 | elif N > numpoints: 238 | Nmax, Tmax = N, t 239 | elif N < numpoints: 240 | Nmin, Tmin = N, t 241 | iter += 1 242 | if iter > iter_max: 243 | poly_ = simplify_coords(poly, Tmin) 244 | return add_points(poly_, numpoints) 245 | return poly_ 246 | 247 | 248 | def is_valid(parcel_poly, pxmin, pymax, res=10): 249 | """ 250 | checks if parcel_poly polygon has valid shape 251 | """ 252 | isvalid = True 253 | i = 0 254 | j = 0 255 | pix_points = [[pxmin + loc[0] * res, pymax - loc[1] * res] for loc in 256 | [[j, i], [j + 1, i], [j + 1, i + 1], [j, i + 1], [j, i]]] 257 | try: 258 | parcel_poly.intersection(geometry.Polygon(pix_points)).area 259 | except: 260 | isvalid = False 261 | return isvalid 262 | 263 | 264 | def str_line_eq(points, h=1e-1): 265 | assert points.shape == (2, 2), 'Two points must be used to derive straight line equation' 266 | x1, y1 = points[0] 267 | x2, y2 = points[1] 268 | denom = x2 - x1 269 | if denom == 0: 270 | denom = h 271 | a = (y2 - y1) / denom # (x2 - x1) 272 | b = (y1 * x2 - x1 * y2) / denom # (x2 - x1) 273 | return a, b 274 | 275 | 276 | # def find_samples_in_poly(N, W, h, w, T, data_df): 277 | # is_in_prod = (N >= data_df['north']) & (N - 10 * h <= (data_df['north'])) & \ 278 | # (W <= data_df['west']) & (W + 10 * w >= (data_df['west'])) 279 | # prod_doy = get_doy(T) 280 | # data_doy = (data_df[[c for c in data_df.columns if c.startswith("doy")]] * 365.0001).round(0).astype(np.int32) 281 | # doy_idx = (data_doy == prod_doy).any(axis=1) 282 | # return data_df[is_in_prod & doy_idx] 283 | -------------------------------------------------------------------------------- /utils/multiprocessing_utils.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | 3 | 4 | def flatten_list(l): 5 | return [item for sublist in l for item in sublist] 6 | 7 | 8 | def run_pool(x, f, num_cores, split=False): 9 | if not split: 10 | x = split_num_segments(x, num_cores) 11 | print(len(x)) 12 | # x = [[x_, i] for i, x_ in enumerate(x)] 13 | pool = Pool(num_cores) 14 | res = pool.map(f, x) 15 | return res 16 | 17 | 18 | def split_num_segments(inlist, num_segments): 19 | res = [[] for _ in range(num_segments)] 20 | i = 0 21 | while len(inlist) > 0: 22 | if i < num_segments: 23 | res[i].append(inlist.pop()) 24 | else: 25 | res[i % num_segments].append(inlist.pop()) 26 | i += 1 27 | return res 28 | 29 | 30 | def split_size_segments(inlist, seg_size): 31 | i = 0 32 | newlist = [] 33 | while len(inlist) - len(newlist) * seg_size > seg_size: 34 | newlist.append(inlist[i * seg_size: (i + 1) * seg_size]) 35 | i += 1 36 | if len(inlist) - len(newlist) * seg_size > 0: 37 | newlist.append(inlist[i * seg_size:]) 38 | return newlist 39 | 40 | 41 | def split_df(df, num_segments): 42 | idx = df.index.to_list() 43 | idx_segments = split_num_segments(idx, num_segments) 44 | return [df.iloc[idx_seg].reset_index(drop=True) for idx_seg in idx_segments] 45 | -------------------------------------------------------------------------------- /utils/sentinel_products_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from glob import glob 3 | import pandas as pd 4 | import rasterio 5 | 6 | 7 | def get_S2prod_info(imdirs): 8 | data = [] 9 | for imdir in imdirs: 10 | imname = "%s_%s" % (imdir.split("/")[-2].split("_")[-3], imdir.split("/")[-4].split("_")[-5]) 11 | f = rasterio.open("%s/%s_B02.jp2" % (imdir, imname)) 12 | tile_transform = list(f.meta['transform']) 13 | tile_wn = [tile_transform[2], tile_transform[5]] 14 | 15 | data.append([imdir, imdir.split("/")[-2], imname, tile_wn[0], tile_wn[1], f.meta['height'], 16 | f.meta['width'], imname.split("_")[1][:8], f.crs.to_dict()['init']]) 17 | df = pd.DataFrame(data=data, 18 | columns=["path", "prod_name1", "prod_name2", "West", "North", "height", "width", "Time", 19 | "crs"]) # , 20 | # dtype=[np.str, np.str, np.float32, np.float32, np.float32, np.float32, np.str, np.str]) 21 | return df 22 | 23 | 24 | def get_S2tile_coords(basedir): 25 | """ 26 | basedir: directory containing sentinel-2 products 27 | """ 28 | basedir = '/media/michaeltrs/0a8a5a48-ede5-47d0-8eff-10d11350bf98/Satellite_Data/Sentinel2/PSETAE_repl/2018/cloud_0_30' 29 | if basedir.split('.')[-1] == 'SAFE': 30 | imdir = basedir 31 | elif os.path.dir(basedir): 32 | files = glob('%s/*.SAFE' % basedir) 33 | tile = [s.split('/')[-1].split('_')[5] for s in files] 34 | assert all([t == tile[0] for t in tile]), "not all products in dir correspond to the same tile" 35 | imdir = files[0] 36 | imdir = glob("%s/GRANULE/**/IMG_DATA" % imdir)[0] 37 | # info = get_S2prod_info(filename) 38 | imname = "%s_%s" % (imdir.split("/")[-2].split("_")[-3], imdir.split("/")[-4].split("_")[-5]) 39 | f = rasterio.open("%s/%s_B02.jp2" % (imdir, imname)) 40 | tile_transform = list(f.meta['transform']) 41 | tile_wn = [tile_transform[2], tile_transform[5]] 42 | tile_es = [tile_wn[0] + 10 * f.meta['width'], tile_wn[1] - 10 * f.meta['height']] 43 | return tile_wn, tile_es 44 | 45 | 46 | --------------------------------------------------------------------------------