├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── examples └── example_building_footprints.ipynb ├── images ├── building-height-coverage.png ├── country-overview.png ├── footprints-sample.png ├── polygonization.jpg └── segmentation.jpg └── scripts ├── make-gis-friendly.ps1 ├── make-gis-friendly.py └── read-files.py /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Aa][Rr][Mm]/ 27 | [Aa][Rr][Mm]64/ 28 | bld/ 29 | [Bb]in/ 30 | [Oo]bj/ 31 | [Ll]og/ 32 | [Ll]ogs/ 33 | 34 | # Visual Studio 2015/2017 cache/options directory 35 | .vs/ 36 | # Uncomment if you have tasks that create the project's static files in wwwroot 37 | #wwwroot/ 38 | 39 | # Visual Studio 2017 auto generated files 40 | Generated\ Files/ 41 | 42 | # MSTest test Results 43 | [Tt]est[Rr]esult*/ 44 | [Bb]uild[Ll]og.* 45 | 46 | # NUnit 47 | *.VisualState.xml 48 | TestResult.xml 49 | nunit-*.xml 50 | 51 | # Build Results of an ATL Project 52 | [Dd]ebugPS/ 53 | [Rr]eleasePS/ 54 | dlldata.c 55 | 56 | # Benchmark Results 57 | BenchmarkDotNet.Artifacts/ 58 | 59 | # .NET Core 60 | project.lock.json 61 | project.fragment.lock.json 62 | artifacts/ 63 | 64 | # StyleCop 65 | StyleCopReport.xml 66 | 67 | # Files built by Visual Studio 68 | *_i.c 69 | *_p.c 70 | *_h.h 71 | *.ilk 72 | *.meta 73 | *.obj 74 | *.iobj 75 | *.pch 76 | *.pdb 77 | *.ipdb 78 | *.pgc 79 | *.pgd 80 | *.rsp 81 | *.sbr 82 | *.tlb 83 | *.tli 84 | *.tlh 85 | *.tmp 86 | *.tmp_proj 87 | *_wpftmp.csproj 88 | *.log 89 | *.vspscc 90 | *.vssscc 91 | .builds 92 | *.pidb 93 | *.svclog 94 | *.scc 95 | 96 | # Chutzpah Test files 97 | _Chutzpah* 98 | 99 | # Visual C++ cache files 100 | ipch/ 101 | *.aps 102 | *.ncb 103 | *.opendb 104 | *.opensdf 105 | *.sdf 106 | *.cachefile 107 | *.VC.db 108 | *.VC.VC.opendb 109 | 110 | # Visual Studio profiler 111 | *.psess 112 | *.vsp 113 | *.vspx 114 | *.sap 115 | 116 | # Visual Studio Trace Files 117 | *.e2e 118 | 119 | # TFS 2012 Local Workspace 120 | $tf/ 121 | 122 | # Guidance Automation Toolkit 123 | *.gpState 124 | 125 | # ReSharper is a .NET coding add-in 126 | _ReSharper*/ 127 | *.[Rr]e[Ss]harper 128 | *.DotSettings.user 129 | 130 | # TeamCity is a build add-in 131 | _TeamCity* 132 | 133 | # DotCover is a Code Coverage Tool 134 | *.dotCover 135 | 136 | # AxoCover is a Code Coverage Tool 137 | .axoCover/* 138 | !.axoCover/settings.json 139 | 140 | # Visual Studio code coverage results 141 | *.coverage 142 | *.coveragexml 143 | 144 | # NCrunch 145 | _NCrunch_* 146 | .*crunch*.local.xml 147 | nCrunchTemp_* 148 | 149 | # MightyMoose 150 | *.mm.* 151 | AutoTest.Net/ 152 | 153 | # Web workbench (sass) 154 | .sass-cache/ 155 | 156 | # Installshield output folder 157 | [Ee]xpress/ 158 | 159 | # DocProject is a documentation generator add-in 160 | DocProject/buildhelp/ 161 | DocProject/Help/*.HxT 162 | DocProject/Help/*.HxC 163 | DocProject/Help/*.hhc 164 | DocProject/Help/*.hhk 165 | DocProject/Help/*.hhp 166 | DocProject/Help/Html2 167 | DocProject/Help/html 168 | 169 | # Click-Once directory 170 | publish/ 171 | 172 | # Publish Web Output 173 | *.[Pp]ublish.xml 174 | *.azurePubxml 175 | # Note: Comment the next line if you want to checkin your web deploy settings, 176 | # but database connection strings (with potential passwords) will be unencrypted 177 | *.pubxml 178 | *.publishproj 179 | 180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 181 | # checkin your Azure Web App publish settings, but sensitive information contained 182 | # in these scripts will be unencrypted 183 | PublishScripts/ 184 | 185 | # NuGet Packages 186 | *.nupkg 187 | # NuGet Symbol Packages 188 | *.snupkg 189 | # The packages folder can be ignored because of Package Restore 190 | **/[Pp]ackages/* 191 | # except build/, which is used as an MSBuild target. 192 | !**/[Pp]ackages/build/ 193 | # Uncomment if necessary however generally it will be regenerated when needed 194 | #!**/[Pp]ackages/repositories.config 195 | # NuGet v3's project.json files produces more ignorable files 196 | *.nuget.props 197 | *.nuget.targets 198 | 199 | # Microsoft Azure Build Output 200 | csx/ 201 | *.build.csdef 202 | 203 | # Microsoft Azure Emulator 204 | ecf/ 205 | rcf/ 206 | 207 | # Windows Store app package directories and files 208 | AppPackages/ 209 | BundleArtifacts/ 210 | Package.StoreAssociation.xml 211 | _pkginfo.txt 212 | *.appx 213 | *.appxbundle 214 | *.appxupload 215 | 216 | # Visual Studio cache files 217 | # files ending in .cache can be ignored 218 | *.[Cc]ache 219 | # but keep track of directories ending in .cache 220 | !?*.[Cc]ache/ 221 | 222 | # Others 223 | ClientBin/ 224 | ~$* 225 | *~ 226 | *.dbmdl 227 | *.dbproj.schemaview 228 | *.jfm 229 | *.pfx 230 | *.publishsettings 231 | orleans.codegen.cs 232 | 233 | # Including strong name files can present a security risk 234 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 235 | #*.snk 236 | 237 | # Since there are multiple workflows, uncomment next line to ignore bower_components 238 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 239 | #bower_components/ 240 | 241 | # RIA/Silverlight projects 242 | Generated_Code/ 243 | 244 | # Backup & report files from converting an old project file 245 | # to a newer Visual Studio version. Backup files are not needed, 246 | # because we have git ;-) 247 | _UpgradeReport_Files/ 248 | Backup*/ 249 | UpgradeLog*.XML 250 | UpgradeLog*.htm 251 | ServiceFabricBackup/ 252 | *.rptproj.bak 253 | 254 | # SQL Server files 255 | *.mdf 256 | *.ldf 257 | *.ndf 258 | 259 | # Business Intelligence projects 260 | *.rdl.data 261 | *.bim.layout 262 | *.bim_*.settings 263 | *.rptproj.rsuser 264 | *- [Bb]ackup.rdl 265 | *- [Bb]ackup ([0-9]).rdl 266 | *- [Bb]ackup ([0-9][0-9]).rdl 267 | 268 | # Microsoft Fakes 269 | FakesAssemblies/ 270 | 271 | # GhostDoc plugin setting file 272 | *.GhostDoc.xml 273 | 274 | # Node.js Tools for Visual Studio 275 | .ntvs_analysis.dat 276 | node_modules/ 277 | 278 | # Visual Studio 6 build log 279 | *.plg 280 | 281 | # Visual Studio 6 workspace options file 282 | *.opt 283 | 284 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 285 | *.vbw 286 | 287 | # Visual Studio LightSwitch build output 288 | **/*.HTMLClient/GeneratedArtifacts 289 | **/*.DesktopClient/GeneratedArtifacts 290 | **/*.DesktopClient/ModelManifest.xml 291 | **/*.Server/GeneratedArtifacts 292 | **/*.Server/ModelManifest.xml 293 | _Pvt_Extensions 294 | 295 | # Paket dependency manager 296 | .paket/paket.exe 297 | paket-files/ 298 | 299 | # FAKE - F# Make 300 | .fake/ 301 | 302 | # CodeRush personal settings 303 | .cr/personal 304 | 305 | # Python Tools for Visual Studio (PTVS) 306 | __pycache__/ 307 | *.pyc 308 | 309 | # Cake - Uncomment if you are using it 310 | # tools/** 311 | # !tools/packages.config 312 | 313 | # Tabs Studio 314 | *.tss 315 | 316 | # Telerik's JustMock configuration file 317 | *.jmconfig 318 | 319 | # BizTalk build output 320 | *.btp.cs 321 | *.btm.cs 322 | *.odx.cs 323 | *.xsd.cs 324 | 325 | # OpenCover UI analysis results 326 | OpenCover/ 327 | 328 | # Azure Stream Analytics local run output 329 | ASALocalRun/ 330 | 331 | # MSBuild Binary and Structured Log 332 | *.binlog 333 | 334 | # NVidia Nsight GPU debugger configuration file 335 | *.nvuser 336 | 337 | # MFractors (Xamarin productivity tool) working folder 338 | .mfractor/ 339 | 340 | # Local History for Visual Studio 341 | .localhistory/ 342 | 343 | # BeatPulse healthcheck temp database 344 | healthchecksdb 345 | 346 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 347 | MigrationBackup/ 348 | 349 | # Ionide (cross platform F# VS Code tools) working folder 350 | .ionide/ 351 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Data in this repository has been licensed by Microsoft under the Open Data Commons Open Database License (ODbL). -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Introduction 2 | 3 | Bing Maps is releasing open building footprints around the world. We have detected **1.4B** buildings from Bing Maps imagery between 2014 and 2024 including Maxar, Airbus, and IGN France imagery. The data is freely available for download and use under ODbL. This dataset includes our [other releases](#will-there-be-more-data-coming-for-other-geographies). 4 | 5 | ## Updates 6 | * 2025-02-28 - Added **18M** building footprints and **4.2M** height estimates derived from Maxar and Vexcel imagery between 2017 and 2024. Largest contributions are to Turkey (5.8M), Greece (6M), and France (3.8M). [dataset-links.csv](https://minedbuildings.z5.web.core.windows.net/global-buildings/dataset-links.csv) updated 28 February 2025. 7 | * 2025-02-03 - Added **7.4M** building footprints and **2.4M** height estimates derived from from Maxar and Vexcel imagery between 2020 and 2024. The largest contributions are to France (5.8M) and the United States (1.2M). dataset-links.csv updated 3 February 2025. 8 | * 2025-01-06 - Added **9.6M** building footprint edits derived from Maxar imagery between 2021 and 2024. Largest contributions are to Chile (3.4M), Norway (2M), Brazil (2M), and Sweden (1.5M). No new height estimates. dataset-links.csv updated 6 January 2025. 9 | * 2024-12-02 - Added **12M** building footprint edits and **554k** height estimates derived from Maxar and Vexcel imagery between 2018 and 2024. The largest contributions are to Sudan (5.5M), Ethiopia (3.3M), and Saudi Arabia (590k). dataset-links.csv updated on 2 December 2024. Changed merge logic to update tiles at LOD 19 instead of LOD 15 reducing update boundary visibility. 10 | * 2024-11-07 - Dataset hosting is moving! You will notice `dataset-links.csv` has a new url https://minedbuildings.z5.web.core.windows.net/global-buildings/dataset-links.csv. All links are updated from https://minedbuildings.**blob**.core.windows.net/ -> https://minedbuildings.**z5.web**.core.windows.net/. Older versions will not be moved but updates will be available at this new location going forward. 11 | * 2024-11-01 - Added **12M** building footprint edits and **3.2M** height estimates derived from Maxar and Vexcel imagery between 2019 and 2024. The largest contributions are to the US (3.2M), Argentina (2.8M), and Iran (2.4M). dataset-links.csv updated on 1 November 2024. 12 | * 2024-09-25 - Added **4.7M** building footprint edits and **4.4M** height estimates derived from Maxar and Vexcel imagery between 2019 and 2024. The largest contributions are to the US (2.6M) and UK (1.5M). dataset-links.csv updated on 25 September 2024. 13 | * 2024-08-27 - Added **4.8M** building footprint edits and **2.1M** height estimates derived from Vexcel and Maxar imagery between 2019 and 2024. The largest contributions are for the United States (1.7M), South Africa (1.5M), and Brazil (1.1M). dataset-links.csv updated on 28 August 2024. 14 | * 2024-07-30 - Added **1.6M** building footprint edits and **1.5M** height estimates derived from Maxar and Vexcel imagery between 2020 and 2024. The largest contribution was in the United States. dataset-links.csv updated on 6 August 2024. 15 | * 2024-06-25 - Added **380K** building footprint edits and **364K** height estimates derived from Maxar and Vexcel imagery between 2020 and 2024. The largest contribution was in the United States. dataset-links.csv updated on 27 June 2024. 16 | * 2024-06-03 - Added **637K** building footprint edits and **627K** height estimated derived from Maxar and Vexcel imagery between 2020 and 2024. Primary contribution is in the United States (631K). dataset-links.csv was updated on 3 June 2024. 17 | * 2024-05-01 - Added **596K** building footprint edits and **125K** height estimated derived from Maxar and Vexcel imagery between 2017 and 2023. Primary contributions are in France (525K) and Germany (44K). dataset-links.csv was updated on 1 May 2024. 18 | * 2024-03-26 - Added **128M** building footprint edits and **3.5M** height estimated derived from Maxar and Vexcel imagery between 2019 and 2023. Primary contributions are in India (110M) and Nepal (7M). dataset-links.csv was updated on 1 April 2024. 19 | * 2024-03-01 - added **58M** new building footprint edits and **10M** height estimates derived from Maxar and Vexcel imagery between 2019 and 2023. The largest contributions are to Brazil (43M), United States (4.7M), and Germany (3.5M). 20 | * 2024-02-01 - added **47M** new building footprint edits and **26M** height estimates derived from Maxar, Vexcel and IGN-France imagery between 2016 and 2023. The largest contributions are to Germany (8M) and United States (8M). 21 | * 2024-01-03 - added **35M** new building footprint edits and **15M** height estimates derived from Vexcel and Maxar imagery between 2016 and 2022. The largest contributions are to Brazil (19M) and Italy (15M). 22 | * 2023-12-01 - added **69M** building footprint edits and **2M** height estimates derived from Maxar and Vexcel imagery between 2015 and 2023. Biggest contributions are Egypt (11M), Algeria (8M), and France (7M). We've added confidence scores to new footprints. See [Building confidence scores](#building-confidence-scores). 23 | * 2023-10-20 - Added **95M** building footprint edits and **9M** height estimates derived from Maxar and Vexcel imagery between 2017 and 2023. Updated structures in 52 24 | countries with the largest contributors in Mexico (17M), Ethiopia (16M) and Kenya (15M). 25 | * 2023-10-02 - Added **77M** buildings footprint edits derived from Maxar and Vexcel imagery between 2018 and 2023. Updated structures in 35 countries with the top 3 contributions in India (24M), Bangladesh (19M), and Pakistan (14M). 26 | * 2023-09-05 - Added **38M** building footprint edits and **21M** height estimates derived from Vexcel and Maxar imagery from 2018 to 2023. Updated structures in 22 countries primarily in the United States (13M), United Kingdom (7.6M), and Peru (9M). 27 | * 2023-08-22 - Added **730K** buildings from Vexcel imagery between 2020 and 2022. The largest updates are for Spain (369K), Austria (196K), and Germany (69K). 28 | * 2023-07-24 - added **5M** new buildings from Digital Globe and Maxar. Biggest contribution is Australia (5M). 29 | * 2023-06-05 - Combining [all building footprint releases](#will-there-be-more-data-coming-for-other-geographies) into one distribution. Total footprints: **1.2B**. Footprints with height: **174M**. Updated coverage map and associated features. Other data sources will remain unchanged. 30 | * 2023-05-31 - added **49M** updated footprints and **10M** height attributes from Vexcel and Maxar imagery. Biggest contributions are Nigeria (18M), United States (9M), and Burkina Faso (7M). 31 | * 2023-05-18 - added **77M** buildings heights to the US and Western Europe. See Coverage map. 32 | * 2023-04-28 - Improved near duplicate and overlapping data detection and removal. 33 | * 2023-03-13 - Added **41MM** new buildings in Japan derived from Maxar Imagery (FP rate 0.8%). Added **79M** building height estimates for North America structures. 34 | * 2022-11-16 - Added **40M** new and updated buildings across 46 geographies derived from Bing imagery including Maxar, IGN-France, and AirBus between 2015 and 2022. The largest updates are for Pakistan (16M), Turkey (13M), Afghanistan (3M), and Saudi Arabia (2.5M). Added [make-gis-friendly.py](scripts/make-gis-friendly.py) demonstrating how to convert files 35 | into a GIS tool (e.g., QGIS, ArcGIS) friendly format. 36 | * 2022-10-12 - Added **147M** new buildings for North America based on Vexcel and Maxar imagery between 2017 and 2022. This data is a refresh of [US](https://github.com/microsoft/USBuildingFootprints). Updated data format from country-partitioned zip 37 | files to country-[l9 quad key](https://learn.microsoft.com/en-us/bingmaps/articles/bing-maps-tile-system#tile-coordinates-and-quadkeys) gzipped partitioned files. Each file extension is .csv.gz but the contents are geojsonl. False positive rate for this dataset is ~1% based on a 4k structure sample. Link table was moved 38 | to a [dataset-links.csv](https://minedbuildings.z5.web.core.windows.net/global-buildings/dataset-links.csv) 39 | * 2022-07-08 - Added **78M** buildings in Western EU Countries from Maxar imagery between 2014 and 2021 bringing the total structure count to **856M**. Added link to download buildings coverage. 40 | * 2022-07-05 - The complete building footprints dataset is available on [Microsoft's Planetary Computer](https://planetarycomputer.microsoft.com/dataset/ms-buildings) 41 | 42 | 43 | ![sample footprints](images/footprints-sample.png) 44 | 45 | ### Regions included 46 | 47 | ![building regions](images/country-overview.png) 48 | 49 | You can download the layer above as GeoJSON [here](https://minedbuildings.z5.web.core.windows.net/global-buildings/buildings-coverage.geojson). 50 | 51 | ### Buildings with height coverage 52 | ![building heights](images/building-height-coverage.png) 53 | 54 | You can download the layer above as GeoJSON [here](https://minedbuildings.z5.web.core.windows.net/global-buildings/buildings-with-height-coverage.geojson). 55 | 56 | ## License 57 | This data is licensed by Microsoft under the [Open Data Commons Open Database License (ODbL)](https://opendatacommons.org/licenses/odbl/). 58 | 59 | 60 | ## FAQ 61 | ### What does the data include? 62 | 999M building footprint polygon geometries located around the world in line delimited GeoJSON format. Due to the way we process the data, file extensions are `.csv.gz` see [make-gis-friendly.py](scripts/make-gis-friendly.py) for an example of how to decompress and change file extension. 63 | 64 | As of October 2022, we moved the location table to [dataset-links.csv](https://minedbuildings.z5.web.core.windows.net/global-buildings/dataset-links.csv) since it's over 19k records with country-quadkey partitioning. 65 | 66 | ### What is the GeoJSON format? 67 | GeoJSON is a format for encoding a variety of geographic data structures. 68 | For intensive documentation and tutorials, refer to this [blog](http://geojson.org/). 69 | 70 | ### Why is the data being released? 71 | Microsoft has a continued interest in supporting a thriving OpenStreetMap ecosystem. 72 | 73 | ### Should we import the data into OpenStreetMap? 74 | Maybe. Never overwrite the hard work of other contributors or blindly import data into OSM without first checking the local quality. While our metrics show that this data meets or exceeds the quality of hand-drawn building footprints, the data does vary in quality from place to place, between rural and urban, mountains and plains, and so on. Inspect quality locally and discuss an import plan with the community. Always follow the [OSM import community guidelines](https://wiki.openstreetmap.org/wiki/Import/Guidelines). 75 | 76 | ### Will the data be used or made available in the larger OpenStreetMap ecosystem? 77 | Yes. The [HOT Tasking Manager](https://tasks.hotosm.org) has integrated Facebook [Rapid](https://rapideditor.org/edit) where the data has been made available. 78 | 79 | ### How did we create the data? 80 | The building extraction is done in two stages: 81 | 1. Semantic Segmentation – Recognizing building pixels on an aerial image using deep neural networks (DNNs) 82 | 2. Polygonization – Converting building pixel detections into polygons 83 | 84 | #### Stage1: Semantic Segmentation 85 | ![segmenation diagram](images/segmentation.jpg) 86 | 87 | #### Stage 2: Polygonization 88 | ![polygonization diagram](images/polygonization.jpg) 89 | 90 | ### How do we estimate building height? 91 | We trained a neural network to estimate height above ground using imagery paired with height measurements, and then we take the 92 | average height within a building polygon. Structures without height estimates are populated with a -1. Height estimates are in meters. 93 | 94 | ### Building confidence scores 95 | Confidence scores are between 0 and 1 and can be read as percent confidence. For structures released before this update, we use -1 as a placeholder value. 96 | A confidence value of 0.8 is read as "80% confidence." Higher values mean higher detection confidence. There are two stages in the building detection process -- first we use a model to classify pixels as either building or not and next we convert groups of pixels into polygons. Each pixel has a probability of being a building and a 97 | probability >0.5 is classified as "building pixel". When we generate the polygons, we then look at the pixels within and average the probability values to give and 98 | overall confidence score. The confidence scores are for the footprint and not height estimate. 99 | 100 | ### Were there any modeling improvements used for this release? 101 | We did not apply any modeling improvements for this release. Instead, we focused on scaling our approach to increase coverage, and trained models regionally. 102 | 103 | ### Evaluation set metrics 104 | The evaluation metrics are computed on a set of building polygon labels for each region. Note, we only have verification results for 105 | Mexico buildings since we did not train a model for the country. 106 | 107 | Building match metrics on the evaluation set: 108 | 109 | | Region | Precision | Recall | 110 | |:----------------:|:------------:|:---------:| 111 | | Africa | 94.4% | 70.9% | 112 | | Caribbean | 92.2% | 76.8% | 113 | | Central Asia | 97.17% | 79.47% | 114 | | Europe | 94.3% | 85.9% | 115 | | Middle East | 95.7% | 85.4% | 116 | | South America | 95.4% | 78.0% | 117 | | South Asia | 94.8% | 76.7% | 118 | 119 | 120 | 121 | 122 | We track the following metrics to measure the quality of matched building polygons in the evaluation set: 123 | 1. Intersection over Union – This is a standard metric measuring the overlap quality against the labels 124 | 2. Dominant angle rotation error – This measures the polygon rotation deviation 125 | 126 | | Region | IoU | Rotation error [deg] | 127 | |:----------------:|:-------:|:-----------------------:| 128 | | Africa | 64.5% | 5.67 | 129 | | Caribbean | 64.0% | 6.64 | 130 | | Central Asia | 68.2% | 6.91 | 131 | | Europe | 65.1% | 10.28 | 132 | | Middle East | 65.1% | 9.3 | 133 | | South America | 66.7% | 6.34 | 134 | | South Asia | 63.1% | 6.25 | 135 | 136 | 137 | 138 | ### False positive ratio in the corpus 139 | 140 | False positives are estimated per country from randomly sampled building polygon predictions. 141 | 142 | | Region | Buildings Sampled | False Positive Rate | Run Date | 143 | | :--: | :--: | :--: | :--: | 144 | | Africa | 5,000 | 1.1% | Early 2022 | 145 | | Caribbean | 3,000 | 1.8% | Early 2022 | 146 | | Central Asia | 3,000 | 2.2% | Early 2022 | 147 | | Europe | 5,000 | 1.4% | Early 2022 | 148 | | Mexico | 2,000 | 0.1% | Early 2022 | 149 | | Middle East | 7,000 | 1.8% | Early 2022 | 150 | | South America | 5,000 | 1.7% | Early 2022 | 151 | | South Asia | 7,000 | 1.4% | Early 2022 | 152 | | North America | 4,000 | 1% | Oct 2022 | 153 | | Europe Maxar | 5,000 | 1.4% | July 2022 | 154 | 155 | 156 | ### What is the vintage of this data? 157 | Vintage of extracted building footprints depends on the vintage of the underlying imagery. The underlying imagery is from Bing Maps including Maxar and Airbus between 2014 and 2021. 158 | 159 | ### How good is the data? 160 | Our metrics show that in the vast majority of cases the quality is at least as good as hand digitized buildings in OpenStreetMap. It is not perfect, particularly in dense urban areas but it provides good recall in rural areas. 161 | 162 | ### What is the coordinate reference system? 163 | EPSG: 4326 164 | 165 | ### Will there be more data coming for other geographies? 166 | Maybe. This is a work in progress. Also, check out our other building releases! 167 | * [US](https://github.com/microsoft/USBuildingFootprints) 168 | * [Australia](https://github.com/microsoft/AustraliaBuildingFootprints) 169 | * [Canada](https://github.com/microsoft/CanadianBuildingFootprints) 170 | * [Uganda and Tanzania](https://github.com/microsoft/Uganda-Tanzania-Building-Footprints) 171 | * [South America](https://github.com/microsoft/SouthAmericaBuildingFootprints) 172 | * [Kenya and Nigeria](https://github.com/microsoft/KenyaNigeriaBuildingFootprints) 173 | * [Indonesia, Malaysia, and the Philippines](https://github.com/microsoft/IdMyPhBuildingFootprints) 174 | 175 | ### Why are some locations missing? 176 | We excluded imagery from processing if tiles were dated before 2014 or there was a low-probability of detection. Detection probability is loosely defined here as proximity to roads and population centers. This filtering and tile exclusion results in squares of missing data. 177 | 178 | ### How can I read large files? 179 | Some files are very large but they are stored in line-delimited format so one could use parallel processing tools (e.g., [Spark](https://spark.apache.org/), [Dask](https://docs.dask.org/en/stable/dataframe.html)) or create a memory 180 | efficient script to segment into smaller pieces. See `scripts/read-large-files.py` for a Python example. 181 | 182 | ## Need roads? 183 | Check out our [ML Road Detections](https://github.com/microsoft/RoadDetections) project page! 184 | 185 |
186 | 187 | ## Contributing 188 | 189 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 190 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 191 | the rights to use your contribution. For details, visit https://cla.microsoft.com. 192 | 193 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide 194 | a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions 195 | provided by the bot. You will only need to do this once across all repos using our CLA. 196 | 197 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 198 | For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 199 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 200 | 201 | ## Legal Notices 202 | 203 | Microsoft, Windows, Microsoft Azure and/or other Microsoft products and services referenced in the documentation 204 | may be either trademarks or registered trademarks of Microsoft in the United States and/or other countries. 205 | The licenses for this project do not grant you rights to use any Microsoft names, logos, or trademarks. 206 | Microsoft's general trademark guidelines can be found [here](http://go.microsoft.com/fwlink/?LinkID=254653). 207 | 208 | Privacy information can be found [here](https://privacy.microsoft.com/en-us/). 209 | 210 | Microsoft and any contributors reserve all others rights, whether under their respective copyrights, patents, 211 | or trademarks, whether by implication, estoppel or otherwise. 212 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd). 40 | 41 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # TODO: The maintainer of this repo has not yet edited this file 2 | 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? 4 | 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help. 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps. 7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide. 8 | 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.* 10 | 11 | # Support 12 | 13 | ## How to file issues and get help 14 | 15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 17 | feature request as a new Issue. 18 | 19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER 21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**. 22 | 23 | ## Microsoft Support Policy 24 | 25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 26 | -------------------------------------------------------------------------------- /examples/example_building_footprints.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import geopandas as gpd\n", 11 | "from shapely import geometry\n", 12 | "import mercantile\n", 13 | "from tqdm import tqdm\n", 14 | "import os\n", 15 | "import tempfile" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "## Step 1 - Define our area of interest (AOI)\n", 23 | "\n", 24 | "We define our area of interest (or AOI) as a GeoJSON geometry, then use the `shapely` library to get the bounding box.\n", 25 | "\n", 26 | "**Note**: the coordinate reference system for the GeoJSON should be \"EPSG:4326\", i.e. in global lat/lon format." 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "# Geometry copied from https://geojson.io\n", 36 | "aoi_geom = {\n", 37 | " \"coordinates\": [\n", 38 | " [\n", 39 | " [-122.16484503187519, 47.69090474454916],\n", 40 | " [-122.16484503187519, 47.6217555345674],\n", 41 | " [-122.06529607517405, 47.6217555345674],\n", 42 | " [-122.06529607517405, 47.69090474454916],\n", 43 | " [-122.16484503187519, 47.69090474454916],\n", 44 | " ]\n", 45 | " ],\n", 46 | " \"type\": \"Polygon\",\n", 47 | "}\n", 48 | "aoi_shape = geometry.shape(aoi_geom)\n", 49 | "minx, miny, maxx, maxy = aoi_shape.bounds\n", 50 | "\n", 51 | "output_fn = \"example_building_footprints.geojson\"" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "## Step 2 - Determine which tiles intersect our AOI" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "quad_keys = set()\n", 68 | "for tile in list(mercantile.tiles(minx, miny, maxx, maxy, zooms=9)):\n", 69 | " quad_keys.add(mercantile.quadkey(tile))\n", 70 | "quad_keys = list(quad_keys)\n", 71 | "print(f\"The input area spans {len(quad_keys)} tiles: {quad_keys}\")" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "## Step 3 - Download the building footprints for each tile that intersects our AOI and crop the results\n", 79 | "\n", 80 | "This is where most of the magic happens. We download all the building footprints for each tile that intersects our AOI, then only keep the footprints that are _contained_ by our AOI.\n", 81 | "\n", 82 | "*Note*: this step might take awhile depending on how many tiles your AOI covers and how many buildings footprints are in those tiles." 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "df = pd.read_csv(\n", 92 | " \"https://minedbuildings.z5.web.core.windows.net/global-buildings/dataset-links.csv\", dtype=str\n", 93 | ")\n", 94 | "df.head()" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "idx = 0\n", 104 | "combined_gdf = gpd.GeoDataFrame()\n", 105 | "with tempfile.TemporaryDirectory() as tmpdir:\n", 106 | " # Download the GeoJSON files for each tile that intersects the input geometry\n", 107 | " tmp_fns = []\n", 108 | " for quad_key in tqdm(quad_keys):\n", 109 | " rows = df[df[\"QuadKey\"] == quad_key]\n", 110 | " if rows.shape[0] == 1:\n", 111 | " url = rows.iloc[0][\"Url\"]\n", 112 | "\n", 113 | " df2 = pd.read_json(url, lines=True)\n", 114 | " df2[\"geometry\"] = df2[\"geometry\"].apply(geometry.shape)\n", 115 | "\n", 116 | " gdf = gpd.GeoDataFrame(df2, crs=4326)\n", 117 | " fn = os.path.join(tmpdir, f\"{quad_key}.geojson\")\n", 118 | " tmp_fns.append(fn)\n", 119 | " if not os.path.exists(fn):\n", 120 | " gdf.to_file(fn, driver=\"GeoJSON\")\n", 121 | " elif rows.shape[0] > 1:\n", 122 | " raise ValueError(f\"Multiple rows found for QuadKey: {quad_key}\")\n", 123 | " else:\n", 124 | " raise ValueError(f\"QuadKey not found in dataset: {quad_key}\")\n", 125 | "\n", 126 | " # Merge the GeoJSON files into a single file\n", 127 | " for fn in tmp_fns:\n", 128 | " gdf = gpd.read_file(fn) # Read each file into a GeoDataFrame\n", 129 | " gdf = gdf[gdf.geometry.within(aoi_shape)] # Filter geometries within the AOI\n", 130 | " gdf['id'] = range(idx, idx + len(gdf)) # Update 'id' based on idx\n", 131 | " idx += len(gdf)\n", 132 | " combined_gdf = pd.concat([combined_gdf,gdf],ignore_index=True)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "## Step 4 - Save the resulting footprints to file" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "combined_gdf = combined_gdf.to_crs('EPSG:4326')\n", 149 | "combined_gdf.to_file(output_fn, driver='GeoJSON')" 150 | ] 151 | } 152 | ], 153 | "metadata": { 154 | "kernelspec": { 155 | "display_name": "Geo", 156 | "language": "python", 157 | "name": "geo" 158 | }, 159 | "language_info": { 160 | "codemirror_mode": { 161 | "name": "ipython", 162 | "version": 3 163 | }, 164 | "file_extension": ".py", 165 | "mimetype": "text/x-python", 166 | "name": "python", 167 | "nbconvert_exporter": "python", 168 | "pygments_lexer": "ipython3", 169 | "version": "3.10.6" 170 | } 171 | }, 172 | "nbformat": 4, 173 | "nbformat_minor": 4 174 | } 175 | -------------------------------------------------------------------------------- /images/building-height-coverage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/GlobalMLBuildingFootprints/f1b082e6b7f01b8e0823adfe4eb84f5e46abc249/images/building-height-coverage.png -------------------------------------------------------------------------------- /images/country-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/GlobalMLBuildingFootprints/f1b082e6b7f01b8e0823adfe4eb84f5e46abc249/images/country-overview.png -------------------------------------------------------------------------------- /images/footprints-sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/GlobalMLBuildingFootprints/f1b082e6b7f01b8e0823adfe4eb84f5e46abc249/images/footprints-sample.png -------------------------------------------------------------------------------- /images/polygonization.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/GlobalMLBuildingFootprints/f1b082e6b7f01b8e0823adfe4eb84f5e46abc249/images/polygonization.jpg -------------------------------------------------------------------------------- /images/segmentation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/GlobalMLBuildingFootprints/f1b082e6b7f01b8e0823adfe4eb84f5e46abc249/images/segmentation.jpg -------------------------------------------------------------------------------- /scripts/make-gis-friendly.ps1: -------------------------------------------------------------------------------- 1 | 2 | $CountryToProcess = "Abyei" 3 | 4 | $DatasetLinksUrl = "https://minedbuildings.z5.web.core.windows.net/global-buildings/dataset-links.csv" 5 | $CompressedData = "compressed" 6 | $ExpandedData = "expanded" 7 | 8 | mkdir $CompressedData -Force 9 | mkdir $ExpandedData -Force 10 | 11 | Function Expand-GZip{ 12 | Param( 13 | $infile, 14 | $outfile 15 | ) 16 | $inputData = New-Object System.IO.FileStream $inFile, ([IO.FileMode]::Open), ([IO.FileAccess]::Read), ([IO.FileShare]::Read) 17 | $output = New-Object System.IO.FileStream $outFile, ([IO.FileMode]::Create), ([IO.FileAccess]::Write), ([IO.FileShare]::None) 18 | $gzipStream = New-Object System.IO.Compression.GzipStream $inputData, ([IO.Compression.CompressionMode]::Decompress) 19 | $buffer = New-Object byte[](1024) 20 | while($true){ 21 | $read = $gzipstream.Read($buffer, 0, 1024) 22 | if ($read -le 0){break} 23 | $output.Write($buffer, 0, $read) 24 | } 25 | $gzipStream.Close() 26 | $output.Close() 27 | $inputData.Close() 28 | } 29 | 30 | $resp = Invoke-WebRequest -Uri $DatasetLinksUrl 31 | $links = ($resp.ToString() | ConvertFrom-Csv) 32 | 33 | foreach ($link in $links) { 34 | if ($link.Location -eq $CountryToProcess) { 35 | Write-Host "Country: $($link.Location) QuadKey: $($link.QuadKey) Url: $($link.Url)" 36 | $downloadedGzip = "$CompressedData\$($link.Location)-$($link.QuadKey).csv.gz" 37 | $decompressedData = "$ExpandedData\$($link.Location)-$($link.QuadKey).geojsonl" 38 | Invoke-WebRequest -Uri $link.Url -OutFile $downloadedGzip 39 | Expand-GZip $downloadedGzip $decompressedData 40 | } 41 | } -------------------------------------------------------------------------------- /scripts/make-gis-friendly.py: -------------------------------------------------------------------------------- 1 | """ 2 | This snippet demonstrates how to access and convert the buildings 3 | data from .csv.gz to geojson for use in common GIS tools. You will 4 | need to install pandas, geopandas, and shapely. 5 | """ 6 | 7 | import pandas as pd 8 | import geopandas as gpd 9 | from shapely.geometry import shape 10 | 11 | def main(): 12 | # this is the name of the geography you want to retrieve. update to meet your needs 13 | location = 'Greece' 14 | 15 | dataset_links = pd.read_csv("https://minedbuildings.z5.web.core.windows.net/global-buildings/dataset-links.csv") 16 | greece_links = dataset_links[dataset_links.Location == location] 17 | for _, row in greece_links.iterrows(): 18 | df = pd.read_json(row.Url, lines=True) 19 | df['geometry'] = df['geometry'].apply(shape) 20 | gdf = gpd.GeoDataFrame(df, crs=4326) 21 | gdf.to_file(f"{row.QuadKey}.geojson", driver="GeoJSON") 22 | 23 | 24 | if __name__ == "__main__": 25 | main() -------------------------------------------------------------------------------- /scripts/read-files.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | This python script is an example of how to read a large file, line-delimited and split it into multiple 4 | parts. This can be helpful when using a machine that cannot load an entire file into memory. 5 | """ 6 | import os 7 | 8 | def main(): 9 | # path to decompressed geojsonl file 10 | input_file = "Angola.geojsonl" 11 | 12 | # check to make sure we can find the input file 13 | assert os.path.exists(input_file), f"{input_file} not found!" 14 | 15 | # template output file path. the script will populate the curly brackets {} with a number 16 | output_file_template = "Angola_part-{}.geojsonl" 17 | 18 | # this is the maximum number of features per file. adjust as desired. 10k features produces ~3MB files. 19 | buildings_per_file = 10_000 20 | 21 | # open the large file 22 | with open(input_file) as inf: 23 | # read a single line 24 | line = inf.readline() 25 | 26 | # used for updating file numbers 27 | file_counter = 1 28 | 29 | # this is where we count the number of features in a single file 30 | lines_per_file = 0 31 | 32 | # create the actual file path fome the template above 33 | current_target_file_path = output_file_template.format(file_counter) 34 | 35 | # prevent overwriting existing files 36 | assert not os.path.exists(current_target_file_path), f"{current_target_file_path} already exists!" 37 | 38 | # open an output file in write mode. 39 | target = open(current_target_file_path, 'w') 40 | 41 | # start iterating through each feature 42 | while line: 43 | 44 | # write a single feature to the current output files 45 | target.write(line) 46 | 47 | # increment the count for number of features in a file 48 | lines_per_file += 1 49 | 50 | # go to next feature in the large file 51 | line = inf.readline() 52 | 53 | # check if we have hit the desire feature limit per file 54 | if lines_per_file == buildings_per_file: 55 | # close the current target file since we've reached the desired feature limit 56 | target.close() 57 | print(f"wrote {lines_per_file:,} lines to {current_target_file_path}") 58 | 59 | # increment the file counter so we can create a new output 60 | file_counter += 1 61 | 62 | # reset the line counter for the new output file 63 | lines_per_file = 0 64 | 65 | # create the path for the next output file 66 | current_target_file_path = output_file_template.format(file_counter) 67 | # prevent overwriting existing files 68 | assert not os.path.exists(current_target_file_path), f"{current_target_file_path} already exists!" 69 | 70 | # open the next output file 71 | target = open(current_target_file_path, 'w') 72 | 73 | # when we get here, there are no more features left in the larger file so we close the last target file 74 | if not target.closed: 75 | print(f"wrote {lines_per_file:,} lines to {current_target_file_path}") 76 | target.close() 77 | print(f"Complete!") 78 | 79 | 80 | if __name__ == "__main__": 81 | main() 82 | --------------------------------------------------------------------------------