├── figures └── example.png ├── ai4boundaries ├── __init__.py └── functions.py ├── setup.py ├── LICENSE └── README.md /figures/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/parvvaresh/ai4boundaries/main/figures/example.png -------------------------------------------------------------------------------- /ai4boundaries/__init__.py: -------------------------------------------------------------------------------- 1 | # This is so that you can import ppack or import average from ppack 2 | # in stead of from ppack.functions import average 3 | 4 | from .functions import download_file, url, download_ai4boundaries -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | setuptools.setup( 4 | name='ai4boundaries', 5 | version='0.0.1', 6 | author='Franz Waldner', 7 | author_email='francois.waldner.atwork@gmail.com', 8 | description='Utility package to download AI4Boundaries data', 9 | long_description='Utility package to download AI4Boundaries data', 10 | long_description_content_type="text/markdown", 11 | url='https://github.com/waldnerf/ai4boundaries', 12 | project_urls={ 13 | "Bug Tracker": "https://github.com/waldnerf/ai4boundaries/issues" 14 | }, 15 | license='MIT', 16 | packages=['ai4boundaries'], 17 | install_requires=['requests', 'bs4', 'pathlib', 'tqdm'], 18 | ) 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Mike Huls 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /ai4boundaries/functions.py: -------------------------------------------------------------------------------- 1 | import urllib.error 2 | import urllib.request 3 | import requests 4 | from bs4 import BeautifulSoup 5 | from pathlib import Path 6 | from tqdm import tqdm 7 | import time 8 | 9 | # URL of data set 10 | url = 'http://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/DRLL/AI4BOUNDARIES/' 11 | 12 | 13 | def download_file(url, dst_path): 14 | """ 15 | Download files to disk 16 | 17 | :param url: URL of the file to download 18 | :param dst_path: File location on disk after download 19 | 20 | """ 21 | try: 22 | with urllib.request.urlopen(url) as web_file: 23 | data = web_file.read() 24 | with open(dst_path, mode='wb') as local_file: 25 | local_file.write(data) 26 | except urllib.error.URLError as e: 27 | print(e) 28 | 29 | 30 | def download_ai4boundaries(dir): 31 | """ 32 | Download AI4boundaries data set 33 | :param dir: Path to directory where to save the data 34 | 35 | """ 36 | url = 'http://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/DRLL/AI4BOUNDARIES/' 37 | urls = [] 38 | url_fns = [] 39 | 40 | def scrape(site): 41 | """ 42 | Recursively scrape a website 43 | :param site: 44 | :return: 45 | """ 46 | 47 | # getting the request from url 48 | r = requests.get(site) 49 | 50 | # converting the text 51 | s = BeautifulSoup(r.text, "html.parser") 52 | 53 | for i in s.find_all("a"): 54 | href = i.attrs['href'] 55 | 56 | if href.endswith("/"): 57 | 58 | subsite = site + href 59 | 60 | if subsite not in urls: 61 | urls.append(subsite) 62 | 63 | # calling it self 64 | scrape(subsite) 65 | if href.endswith("tif") | href.endswith("nc"): 66 | url_fn_ = site + href 67 | url_fns.append(url_fn_) 68 | 69 | print('Scraping data') 70 | scrape(url) 71 | 72 | print('Creating folder architecture') 73 | if dir.endswith('/'): 74 | subdirs = [i.replace(url, dir) for i in urls if not i.endswith('DRLL/')] 75 | else: 76 | subdirs = [i.replace(url, dir + '/') for i in urls if not i.endswith('DRLL/')] 77 | 78 | subdirs = [subdir.replace('DRLL/', '') for subdir in subdirs if not 'ftp' in subdir] 79 | 80 | for subdir in subdirs: 81 | Path(subdir).mkdir(parents=True, exist_ok=True) 82 | 83 | failed_fns = [] 84 | print('Downloading data') 85 | for url_fn in tqdm(url_fns): 86 | if dir.endswith('/'): 87 | fn = url_fn.replace(url, dir) 88 | else: 89 | fn = url_fn.replace(url, dir + '/') 90 | try: 91 | download_file(url_fn, fn) 92 | except: 93 | time.sleep(20) 94 | failed_fns = url_fn 95 | 96 | # Reprocessing failed downloads 97 | for url_fn in tqdm(failed_fns): 98 | if dir.endswith('/'): 99 | fn = url_fn.replace(url, dir) 100 | else: 101 | fn = url_fn.replace(url, dir + '/') 102 | try: 103 | download_file(url_fn, fn) 104 | except: 105 | continue 106 | 107 | print('Download finished!') 108 | print('Cite the data set:') 109 | print('d\'Andrimont, R., Claverie, M., Kempeneers, P., Muraro, D., Yordanov, M., Peressutti, D., Batič, M., ' 110 | 'and Waldner, F.: AI4Boundaries: an open AI-ready dataset to map field boundaries with Sentinel-2 and aerial ' 111 | 'photography, Earth Syst. Sci. Data Discuss. [preprint], ' 112 | 'https://doi.org/10.5194/essd-2022-298, in review, 2022.') 113 | 114 | 115 | if __name__ == '__main__': 116 | out_dir = r'C:/Users/franc/Downloads/ai4boundaries' 117 | download_ai4boundaries(out_dir) 118 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AI4Boundaries 2 | 3 | AI4boundaries is a Python package that facilitates download of the AI4boundaries data set. 4 | 5 | Check out [the paper](https://essd.copernicus.org/preprints/essd-2022-298/) 6 | 7 | d'Andrimont, R., Claverie, M., Kempeneers, P., Muraro, D., Yordanov, M., Peressutti, D., Batič, M., and Waldner, F.: AI4Boundaries: an open AI-ready dataset to map field boundaries with Sentinel-2 and aerial photography, Earth Syst. Sci. Data Discuss. [preprint], https://doi.org/10.5194/essd-2022-298, in review, 2022. 8 | 9 | ![Data sample](figures/example.png) 10 | 11 | ## Installation and updating 12 | Use the package manager [pip](https://pip.pypa.io/en/stable/) to install `ai4boundaries` like below. 13 | Rerun this command to check for and install updates. 14 | ```bash 15 | pip install git+https://github.com/waldnerf/ai4boundaries 16 | ``` 17 | 18 | #### Demo of some of the features: 19 | ```python 20 | from ai4boundaries import download_ai4boundaries 21 | 22 | dir = '/path/to/ai4boundaries_data' 23 | download_ai4boundaries(dir) 24 | 25 | 26 | ``` 27 | 28 | ## Abstract 29 | 30 | Field boundaries are at the core of many agricultural applications and are a key enabler for operational monitoring of agricultural production to support food security. Recent scientific progress in deep learning methods has highlighted the capacity to extract field boundaries from satellite and aerial images with a clear improvement from object-based image analysis (e.g., multiresolution segmentation) or conventional filters (e.g., Sobel filters). However, these methods need labels to be trained on. So far, no standard data set exists to easily and robustly benchmark models and progress the state of the art. The absence of such benchmark data further impedes proper comparison against existing methods. Besides, there is no consensus on which evaluation metrics should be reported (both at the pixel and field levels). As a result, it is currently impossible to compare and benchmark new and existing methods. To fill these gaps, we introduce AI4Boundaries, a data set of images and labels readily usable to train and compare models on the task of field boundary detection. AI4Boundaries includes two specific data sets: (i) a 10-m Sentinel-2 monthly composites for large-scale analyses in retrospect, (ii) a 1-m orthophoto data set for regional-scale analyses such as the automatic extraction of Geospatial Aid Application (GSAA). All labels have been sourced from GSAA data that have been made openly available (Austria, Catalonia, France, Luxembourg, the Netherlands, Slovenia, and Sweden) for 2019 representing 14.8 M parcels covering 376 km2. Data were selected following a stratified random sampling drawn based on two landscape fragmentation metrics, the perimeter/area ratio and the area covered by parcels, thus taking into account the diversity of the agricultural landscapes. The resulting “AI4Boundaries” dataset consists of 7,831 samples of 256 by 256 pixels for the 10-m Sentinel-2 dataset and of 512 by 512 pixels for the 1-m aerial orthophoto. Both datasets are provided with the corresponding vector ground-truth parcel delineation (2.5 M parcels covering 47,105 km2) and with a raster version already pre-processed and ready to use. Besides providing this open dataset to foster computer vision developments of parcel delineation methods, we discuss perspectives and limitations of the dataset for various types of applications in the agriculture domain and consider possible further improvements. 31 | 32 | ## CITATION 33 | 34 | @Article{essd-2022-298, 35 | AUTHOR = {d'Andrimont, R. and Claverie, M. and Kempeneers, P. and Muraro, D. and Yordanov, M. and Peressutti, D. and Bati\v{c}, M. and Waldner, F.}, 36 | TITLE = {AI4Boundaries: an open AI-ready dataset to map field boundaries with Sentinel-2 and aerial photography}, 37 | JOURNAL = {Earth System Science Data Discussions}, 38 | VOLUME = {2022}, 39 | YEAR = {2022}, 40 | PAGES = {1--16}, 41 | URL = {https://essd.copernicus.org/preprints/essd-2022-298/}, 42 | DOI = {10.5194/essd-2022-298} 43 | } 44 | 45 | ## License 46 | [MIT](https://choosealicense.com/licenses/mit/) 47 | --------------------------------------------------------------------------------