├── .gitignore ├── __pycache__ └── ot_utils.cpython-36.pyc ├── pipeline.json ├── ingest_template.org ├── ingest_template.py ├── readme.md └── ot_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | ./test_data 2 | OT_utils_ChangeLog.org 3 | *~* 4 | *#* -------------------------------------------------------------------------------- /__pycache__/ot_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenTopography/Data_QA-QC/main/__pycache__/ot_utils.cpython-36.pyc -------------------------------------------------------------------------------- /pipeline.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipeline": [ 3 | { 4 | "type" : "readers.las" 5 | }, 6 | { 7 | "type":"filters.reprojection", 8 | "in_srs":"EPSG:26910+5703", 9 | "out_srs":"EPSG:6339+5703" 10 | }, 11 | { 12 | "type" : "writers.las", 13 | "compression": "laszip", 14 | "a_srs": "EPSG:6339+5703" 15 | } 16 | 17 | ] 18 | } 19 | 20 | -------------------------------------------------------------------------------- /ingest_template.org: -------------------------------------------------------------------------------- 1 | #+TITLE: Notes on Ingest of X 2 | #+OPTIONS: ^:nil 3 | 4 | * Org Mode Notes: 5 | # ------------------------------------------------------------------------ 6 | # 1. go to Org -> HyperLinks -> Literal Links to show full paths so that I 7 | # can do a search and replace 8 | # 2. C-U C-U TAB to collapse all 9 | # 3. C-U C-U C-U TAB to reveal all 10 | # 4. To indent and fram a section of text put #+BEGIN_EXAMPLE at the 11 | # beginning and #+END_EXAMPLE at the end of the section of text. 12 | # 5. C-c C-c toggles check box 13 | # 6. C-c C-l lets you put in links, or rename links 14 | # 7. * Title 15 | # 8. ** SubTitle1 16 | # 9. *** SubTitle2 17 | # 10. * Check List example 18 | # - [ ] item1 19 | # - [ ] item2 20 | # 11. [[path of link][link name] add the closing "]" to hide the path 21 | # 12. To indent and frame a section of text put #+BEGIN_EXAMPLE at the 22 | # beginning and #+END_EXAMPLE at the end of the section of text. 23 | # 13. Table example: 24 | # |Spacecraft |Type |time interval| 25 | # |-------------+-------+----------| 26 | # |Meteosat-9 |Channels|3h| 27 | # ------------------------------------------------------------------------ 28 | 29 | * Tasks 30 | - [ ] PDAL logs created 31 | - [ ] LAZ files exist? 32 | - [ ] Are there any empty files? 33 | - [ ] QA/QC on CRS and version of LAZ files - is it uniform? 34 | - [ ] plot georeferenced LAZ to verify that the CRS in metadata is 35 | actually what the data is in (i.e. don't trust metadata?) 36 | - [ ] Boundary created 37 | - [ ] Area of boundary verified? 38 | - [ ] Header values match input (i.e. nothing was accidentally altered) 39 | - [ ] File counts match (for both rasters and pc) 40 | - [ ] Rasters in tiff format? 41 | - [ ] Rasters contain CRS info? 42 | - [ ] Rasters all in same CRS? 43 | - [ ] Rasters in same data type 44 | - [ ] if Orthos present, tile and compress to reduce size 45 | - [ ] Is DSM > DEM? 46 | - [ ] Can VRT be built? 47 | - [ ] Does Raster boundary match lidar boundary? Are there gaps 48 | - [ ] Put Metadata into PDF format if necessary. 49 | - [ ] Create registration page on xx/xx/xxxx 50 | - [ ] Add logo to registration 51 | - [ ] Uploaded LAZ, and rasters with rsync to beta on xx/xx/xxxx 52 | - [ ] When data is made public, notify Minh to turn on "Bulk Download" 53 | option. 54 | - [ ] When data is public, make sure you can see boundary in global view 55 | (especially for LINZ data). If not, notify minh to update the 56 | boundaries . 57 | - [ ] When data is made public add project to PI's registration page so 58 | they can see stats on their dataset (if applicable) 59 | - [ ] When data is made public, add to Catalog Service for the Web (CSW) 60 | 61 | 62 | * Summary 63 | ** Original Check Before ingest 64 | - Title: 65 | - PI 66 | - CRS according to metadata: 67 | - HCRS/VCRS: 68 | - Projection: 69 | - Original LAS/LAZ file count: 70 | - Original files are in la? 1.x 71 | 72 | * Bounds 73 | 74 | ** Area 75 | - km^2. My calcs matched QGIS. Metadata reports 76 | 77 | * Metadata 78 | 79 | * CRS checks 80 | - all files are in EPSG: and version 1.x 81 | 82 | * Rasters 83 | * Beta Testing 84 | - [ ] check all links 85 | - [ ] download and check boundary kml 86 | - [ ] check DOI 87 | - [ ] download and check metadata 88 | - [ ] Check LAS Validation report 89 | - [ ] Check Citation 90 | - [ ] Check that las and laz contain proper CRS info 91 | - [ ] Check that OT rasters contain CRS and plot in a GIS 92 | 93 | * Prod testing 94 | - [ ] check all links 95 | - [ ] download and check boundary kml 96 | - [ ] check DOI 97 | - [ ] download and check metadata 98 | - [ ] Check LAS Validation report 99 | - [ ] Check Citation 100 | - [ ] Check that las and laz contain proper CRS info 101 | - [ ] Check that OT rasters contain CRS and plot in a GIS 102 | - [ ] Add PIs to list of private users 103 | - [ ] Add dataset to PI DataProvider List for metrics. 104 | - [ ] Make sure Minh: Copies to SDSC Cloud for bulk downloads 105 | - [ ] Make sure Minh: Updates KML for “Find Data” Map 106 | - [ ] Make sure Minh: Inserts record to GeoPortal 107 | - [ ] Update Excel spreadsheet. 108 | -------------------------------------------------------------------------------- /ingest_template.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import sys,os,ipdb 5 | import ot_utils as ot 6 | 7 | """Name: 8 | Description: config file to run QA/QC of datasets 9 | Notes: 10 | """ 11 | 12 | #inputs: 13 | #---------------------------------------------------------------------- 14 | #Best to name working directory as an OT shortname. Assumes you are 15 | #running this from the scripts directory.. 16 | ingestBase = os.path.dirname(os.getcwd()) 17 | shortname = os.path.basename(ingestBase) 18 | bounds_base = os.path.join(ingestBase,'bounds') 19 | log_dir = os.path.join(ingestBase,'logs') 20 | scripts_dir = os.path.join(ingestBase,'scripts') 21 | #---------------------------------------------------------------------- 22 | 23 | 24 | #Config file for Converting LAS files to LAZ 25 | #---------------------------------------------------------------------- 26 | #module to initialize the config file to all null values. 27 | config1 = ot.initializeNullConfig() 28 | 29 | config1['log_dir'] = log_dir 30 | config1['ingestLog'] = os.path.join(log_dir,shortname+'_LAS2LAZ_QAQCLog.txt') 31 | config1['LAS2LAZ'] = 1 32 | config1['LAS2LAZ_method'] = 'pdal' 33 | config1['getFilesWild'] = '.*\.las$' 34 | config1['getFilesDir'] = '/Volumes/New Volume/ToOT_HD35/2018_13_265_Feehan/_Deliverables/PCTiles' 35 | config1['ftype'] = 'f' 36 | config1['recursive'] = 0 37 | config1['LAZDir_out'] = '/volumes/OT6TB/CA18_Feehan/LAZ' 38 | config1['pipeline'] = os.path.join(scripts_dir,'pipeline.json') 39 | 40 | #Run Module to Convert LAS2LAS... 41 | #ot.RunQAQC(config1) 42 | #---------------------------------------------------------------------- 43 | 44 | #Config file for initial check of LAS files.... 45 | #---------------------------------------------------------------------- 46 | #module to initialize the config file to all null values 47 | config2 = ot.initializeNullConfig() 48 | 49 | config2['log_dir'] = log_dir 50 | config2['ingestLog'] = os.path.join(log_dir,shortname+'_initialCheck_QAQCLog.txt') 51 | config2['recursive'] = 0 52 | config2['getFilesDir'] = '/volumes/OT6TB/CA17_Dietrich/2017_LAS_Tiles' 53 | config2['getFilesWild'] = '.*\.las$' 54 | config2['ftype'] = 'f' 55 | config2['CreatePDALInfo'] = 1 56 | config2['PDALInfoFile'] = shortname+'_PDALInfoLog_initial.txt' 57 | config2['ReadPDALLog'] = 1 58 | config2['CheckLAZCount'] = 1 59 | config2['MissingHCRS'] = 1 60 | config2['MissingVCRS'] = 1 61 | config2['HCRS_Uniform'] = 1 62 | config2['VCRS_Uniform'] = 1 63 | config2['VersionCheck'] = 1 64 | config2['PointTypeCheck'] = 1 65 | config2['GlobalEncodingCheck'] = 1 66 | config2['PointCountCheck'] = 1 67 | config2['CreatePDALBoundary'] = 1 68 | config2['bounds_PDAL'] = os.path.join(bounds_base,'PDAL.shp') 69 | config2['BufferSize'] = 1 70 | config2['epsg'] = 6339 71 | config2['bounds_PDALmerge'] = os.path.join(bounds_base,'PDALMerged.shp') 72 | config2['bounds_PDALmergeArea'] = os.path.join(bounds_base,'PDALMergedwArea.shp') 73 | config2['bounds_PDALKML'] = os.path.join(bounds_base,'PDALMergedwArea.kml') 74 | config2['winePath'] = '/Applications/LASTools/bin' 75 | config2['CreateLASBoundary'] = 1 76 | config2['bounds_LT'] = os.path.join(bounds_base,'LTBounds.shp') 77 | config2['randFrac'] = 0.25 78 | config2['concavity'] = 100 79 | config2['bounds_LTArea'] = os.path.join(bounds_base,'LTBoundswArea.shp') 80 | config2['bounds_LTKML'] = os.path.join(bounds_base,'LTBoundswArea.kml') 81 | 82 | #Run Module to do initial check of LAS files. 83 | #ot.RunQAQC(config2) 84 | #---------------------------------------------------------------------- 85 | 86 | 87 | #Config file for adding CRS to files... 88 | #---------------------------------------------------------------------- 89 | #module to initialize the config file to all null values. 90 | config3 = ot.initializeNullConfig() 91 | 92 | config3['log_dir'] = log_dir 93 | config3['ingestLog'] = os.path.join(log_dir,shortname+'_ADDCRS_QAQCLog.txt') 94 | config3['AddCRS2Header']= 1 95 | config3['getFilesWild'] = '.*\.las$' 96 | config3['getFilesDir'] = '/volumes/OT6TB/CA17_Dietrich/2017_LAS_Tiles' 97 | config3['ftype'] = 'f' 98 | config3['recursive'] = 0 99 | config3['fsuffix'] = '_EPSG6339' 100 | config3['overwrite'] = 0 101 | config3['LAZDir_out'] = '/volumes/OT6TB/CA17_Dietrich/LAZ' 102 | config3['pipeline'] = os.path.join(scripts_dir,'pipeline.json') 103 | config3['LAS2LAZ_method'] = 'pdal' 104 | 105 | #Run Module to add CRS to lidar files (LAS or LAZ) 106 | #ot.RunQAQC(config3) 107 | #---------------------------------------------------------------------- 108 | 109 | 110 | 111 | #Config file for QA/QC of LAZ and Create Boundaries 112 | #---------------------------------------------------------------------- 113 | #module to initialize the config file to all null values 114 | config4 = ot.initializeNullConfig() 115 | 116 | config4['log_dir'] = log_dir 117 | config4['ingestLog'] = os.path.join(log_dir,shortname+'_QAQCLog.txt') 118 | config4['recursive'] = 0 119 | config4['getFilesDir'] = '/volumes/OT6TB/CA17_Dietrich/LAZ' 120 | config4['getFilesWild'] = '.*\.laz$' 121 | config4['ftype'] = 'f' 122 | config4['CreatePDALInfo'] = 1 123 | config4['PDALInfoFile'] = shortname+'_PDALInfoLog.txt' 124 | config4['ReadPDALLog'] = 1 125 | config4['CheckLAZCount'] = 1 126 | config4['MissingHCRS'] = 1 127 | config4['MissingVCRS'] = 1 128 | config4['HCRS_Uniform'] = 1 129 | config4['VCRS_Uniform'] = 1 130 | config4['VersionCheck'] = 1 131 | config4['PointTypeCheck'] = 1 132 | config4['GlobalEncodingCheck'] = 1 133 | config4['CreatePDALBoundary'] = 1 134 | config4['bounds_PDAL'] = os.path.join(bounds_base,'PDAL.shp') 135 | config4['BufferSize'] = 1 136 | config4['epsg'] = 6339 137 | config4['bounds_PDALmerge'] = os.path.join(bounds_base,'PDALMerged.shp') 138 | config4['bounds_PDALmergeArea'] = os.path.join(bounds_base,'PDALMergedwArea.shp') 139 | config4['bounds_PDALKML'] = os.path.join(bounds_base,'PDALMergedwArea.kml') 140 | config4['winePath'] = '/Applications/LASTools/bin' 141 | config4['CreateLASBoundary'] = 1 142 | config4['bounds_LT'] = os.path.join(bounds_base,'LTBounds.shp') 143 | config4['randFrac'] = 0.25 144 | config4['concavity'] = 100 145 | config4['bounds_LTArea'] = os.path.join(bounds_base,'LTBoundswArea.shp') 146 | config4['bounds_LTKML'] = os.path.join(bounds_base,'LTBoundswArea.kml') 147 | 148 | #Run Module to Ingest LAZ, Create Boundaries 149 | #ot.RunQAQC(config4) 150 | #---------------------------------------------------------------------- 151 | 152 | 153 | 154 | #Config file for Checking Original Rasters for Metadata 155 | #---------------------------------------------------------------------- 156 | #module to initialize the config file to all null values? 157 | config5 = ot.initializeNullConfig() 158 | 159 | config5['CheckRasMeta'] = 1 160 | config5['log_dir'] = log_dir 161 | config5['ingestLog'] = os.path.join(log_dir,shortname+'_TEST_QAQCLog.txt') 162 | config5['getFilesDir'] = '/volumes/OT6TB/CA17_Dietrich/2017_ESRI_50cm' 163 | config5['getFilesWild'] = '.*\.flt$' 164 | config5['ftype'] = 'f' 165 | config5['recursive'] = 1 166 | 167 | #Run module to convert rasters to tiffs 168 | #ot.RunQAQC(config5) 169 | #---------------------------------------------------------------------- 170 | 171 | 172 | #Config file for reprojecting and converting to tiffs. 173 | #---------------------------------------------------------------------- 174 | ##module to initialize the config file to all null values 175 | config6 = ot.initializeNullConfig() 176 | 177 | config6['log_dir'] = log_dir 178 | config6['ingestLog'] = os.path.join(log_dir,shortname+'_TEST_QAQCLog.txt') 179 | config6['getFilesDir'] = '/volumes/OT6TB/CA17_Dietrich/2017_ESRI_50cm' 180 | config6['getFilesWild'] = '.*\.flt$' 181 | config6['ftype'] = 'f' 182 | config6['recursive'] = 1 183 | config6['Warp2Tiff'] = 1 184 | config6['ras_xBlock'] = 256 185 | config6['ras_yBlock'] = 256 186 | config6['warp_t_srs'] = '6339' 187 | config6['RasOutDir'] = '/path/to/output/rasters' 188 | 189 | #Run module to reproject rasters... 190 | #ot.RunQAQC(config6) 191 | #---------------------------------------------------------------------- 192 | 193 | #Config file for ONLY converting to tiffs. 194 | #---------------------------------------------------------------------- 195 | ##module to initialize the config file to all null values 196 | config6 = ot.initializeNullConfig() 197 | 198 | config6['log_dir'] = log_dir 199 | config6['ingestLog'] = os.path.join(log_dir,shortname+'_FLT2TIF_QAQCLog.txt') 200 | config6['getFilesDir'] = '/Volumes/New Volume/ToOT_HD35/2018_13_265_Feehan/_Deliverables/Rasters' 201 | config6['getFilesWild'] = '.*\.flt$' 202 | config6['ftype'] = 'f' 203 | config6['recursive'] = 1 204 | config6['Translate2Tiff'] = 1 205 | config6['ras_xBlock'] = 256 206 | config6['ras_yBlock'] = 256 207 | config6['RasOutDir'] = '/volumes/OT6TB/CA18_Feehan/Rasters' 208 | 209 | #Run module to reproject rasters... 210 | #ot.RunQAQC(config6) 211 | #---------------------------------------------------------------------- 212 | 213 | 214 | #Make sure the proper CRS info is in the header. 215 | #---------------------------------------------------------------------- 216 | #module to initialize the config file to all null values? 217 | config7 = ot.initializeNullConfig() 218 | config7['SetRasterCRS'] = 1 219 | config7['log_dir'] = log_dir 220 | config7['ingestLog'] = os.path.join(log_dir,shortname+'_TEST_QAQCLog.txt') 221 | config7['getFilesDir'] = '/volumes/OT6TB/CA17_Dietrich/2017_ESRI_50cm' 222 | config7['getFilesWild'] = '.*\.tif$' 223 | config7['ftype'] = 'f' 224 | config7['recursive'] = 1 225 | config7['a_srs']='6339+5703' 226 | 227 | #Run module to re-check the raster metadata 228 | #ot.RunQAQC(config7) 229 | #---------------------------------------------------------------------- 230 | 231 | 232 | 233 | 234 | 235 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | [![NSF-1948997](https://img.shields.io/badge/NSF-1948997-blue.svg)](https://nsf.gov/awardsearch/showAward?AWD_ID=1948997) 2 | [![NSF-1948994](https://img.shields.io/badge/NSF-1948994-blue.svg)](https://nsf.gov/awardsearch/showAward?AWD_ID=1948994) 3 | [![NSF-1948857](https://img.shields.io/badge/NSF-1948857-blue.svg)](https://nsf.gov/awardsearch/showAward?AWD_ID=1948857) 4 | 5 | OT QA/QC Utility Tools 6 | 7 | * Summary 8 | - This repo consists of python scripts and templates to better automate 9 | and standardize the first-level QA/QC of data to be ingested into 10 | OpenTopography. The scripts will check the lidar files for missing 11 | Coordinate Reference System (CRS) info, las versions, etc. It also 12 | checks for uniformity of values amoung all the files, as well as 13 | creating boundaries of the data, and calculating area. 14 | 15 | * How to run the QA/QC Software 16 | - Clone this repo: git clone https://www.unavco.org/gitlab/beckley/otQAQC.git 17 | - Rename cloned directory to OT shortname (mv otQAQC/ new_shortname) 18 | - cd into newly renamed directory 19 | - run the main level code in python (version 3), and specify your working 20 | directory as the first argument: 21 | 22 | python ot_utils.py $PWD 23 | 24 | This will set up the directory structure with all the necessary 25 | template files. 26 | - edit the configuration file, ingest_shortname.py as necessary. 27 | Specify the location of the data, name of log files, which modules to 28 | run, etc. 29 | - See section below on configuration details 30 | 31 | 32 | * Configuration File Description 33 | - To run the QA/QC code, you should initialize a configuration file to 34 | nulls by doing: config = ot.initializeNullConfig(). Then set 35 | whatever parameters you want. It's best to run multiple QA/QCs if 36 | you have multiple steps to do (ex: convert to LAZ first, then run 37 | Boundary creation) 38 | 39 | - *ingestBase*. This is generally your base working directory and will 40 | be taken automatically if you are running the scripts with the default 41 | directory structure. In general, this will be something like: 42 | 43 | ex: /Users/matt/OT/DataIngest/WA18_Wall 44 | 45 | - *shortname*. This is the name you are going to use for the OT ingest. 46 | In general, your directory structure will contain this name, and you 47 | will use this throughout the process to identify this project. By 48 | default, this is taken automatically from the ingestBase variable. 49 | 50 | ex: WA18_Wall 51 | 52 | - *bounds_base*. This is the directory that will hold all the work for 53 | creating the boundary of the data. Boundaries can be created in either 54 | PDAL and LASTools. They do yield slightly different results, and 55 | currently LASTools is *MUCH* faster. If using the default directory 56 | structure, this variable will be taken automatically from the 57 | ingestBase variable. 58 | 59 | - *log_dir*. This is the directory that will hold all the logs. There 60 | is a log that contains messages as the ingest process proceeds, but 61 | there is also a log that is the PDAL metadata output from all the 62 | las/laz files. The majority of error checking is done based on the 63 | PDAL metadata log file. If using the default directory structure, 64 | this variable will be taken automatically from the ingestBase 65 | variable. 66 | 67 | - *scripts_dir*. This is the directory that holds the ingest script as 68 | well as a sample PDAL pipeline file if needed. The pipeline file 69 | should only be needed if you have to convert the files from LAS to LAZ, 70 | or some other PDAL operation. If using the default directory structure, 71 | this variable will be taken automatically from the ingestBase 72 | variable. 73 | 74 | - *log_dir*. Directory where you want the ingest log to be written. This 75 | is usually: /Users/matt/OT/DataIngest/shortname/logs 76 | 77 | - *ingestLog*. Full path to ingest log. Filename is usually in the form: 78 | shortname+'_ingestLog.txt' 79 | 80 | - *AddCRS2Header*. Set this to 1 if you want to simply add the CRS 81 | info to the lidar header files. Currently uses PDAL for this. 82 | 83 | - *ftype*. This is set to either 'f' of 'd' for 'file' or 84 | 'directory'. This value is getting fed directly to a unix find call. 85 | This keyword is only used with the module, getFiles. The default is set 86 | to 'f'. The only time where setting it to 'd' will be used is when 87 | converting ESRI grid files, which are are set of files in a directory. 88 | With ESRI grid files, if you specify the directory name, GDAL will be able 89 | to convert it. 90 | 91 | - *LAS2LAZ*. Set this to 1 if you want to convert LAS files to LAZ 92 | otherwise set to 0. 93 | 94 | - *LAS2LAZ_method*. Set this to 'lastools' or 'pdal'. With pdal, it 95 | will read in a pipeline (the path to which you specify), and can do 96 | multiple operations in one run. So, you can convert to LAZ, and 97 | reproject in one step. But it is sometimes slower than LASTools. With 98 | LASTools, it will just convert to LAZ. If your original LAS files do 99 | not have CRS info, you will have to define them in a separate run, so it 100 | is a two step process. Consider adding another EPSG keyword to feed to 101 | LAStools, but the config file is already confusing. 102 | 103 | - *getFilesWild*. Regular expression to find all the files. you want to 104 | work on. It's best to search by suffix: '.*\.laz$' 105 | 106 | - *getFilesDir*. Base directory from where file search will start. If 107 | recursive is set it will start to drill down from this directory. 108 | 109 | - *recursive*. Set this to 1 if the files to ingest are in a nested 110 | directory structure. This is pretty rare. Mostly all the las/laz 111 | files will be in single directory, so generally leave this set to 0. 112 | This keyword only gets used in the module: getFiles. 113 | 114 | - *LAZDir_out*. Directory where LAZ files will be written when converted 115 | from LAS. 116 | 117 | - *pipeline*. Full path to pipeline file. This is a JSON file that will 118 | be used by PDAL to do either a conversion or translation. If LAS2LAZ 119 | is set, and you want to use PDAL, then you must supply a pipeline. 120 | 121 | - *CreatePDALInfo*. Set this to 1 if you want to loop through all the 122 | LAS/LAZ files and create a PDAL log of all the metadata. This file is 123 | usually stored in the logs, and used for most of the QA/QC 124 | 125 | - *PDALInfoFile*. Name of the logfile containing all the PDAL metadata. 126 | Default is: shortname+'_PDALInfoLog.txt' 127 | 128 | - *ReadPDALLog*. Set this to 1 if you want to read in the PDAL log into 129 | an array for doing QAQC. *You will need this for most operations.* 130 | 131 | - *CheckLAZCount*. Set this to 1 if you want to check the count of LAZ 132 | files. This is only mildly useful, and will report if there are 133 | other files other than LAZ in the ingest directory. 134 | 135 | - *MissingHCRS*. Set this to 1 if you want to check in any of the LAZ 136 | files are missing the Horizontal Coordinate System Info in the 137 | header. If at least 1 is missing, it will throw an error. This is 138 | a serious error, so the code will enter the debugger if this occurs. 139 | This will help troubleshoot which file is missing the HCRS 140 | 141 | - *MissingVCRS*. Set this to 1 if you want to check in any of the LAZ 142 | files are missing the Vertical Coordinate System Info in the 143 | header. If at least 1 is missing, it will through an warning. Code 144 | will not stop because many datasets don't have any vertical info. A 145 | note is made in the log, but the ingest process does not stop 146 | 147 | - *HCRS_Uniform*. Set this to 1 if you want to check that all of the LAZ 148 | files are in the same Horizontal Coordinate System. If more than 1 149 | HCRS is detected, it will throw an error. This is a serious error, so 150 | the code will enter the debugger if this occurs. 151 | 152 | - *VCRS_Uniform*. Set this to 1 if you want to check that all of the LAZ 153 | files are in the same Vertical Coordinate System. If more than 1 154 | VCRS is detected, it will throw an error. This is a serious error, so 155 | the code will enter the debugger if this occurs. 156 | 157 | - *VersionCheck*. Set this to 1 if you want to check that all the 158 | LAS/LAZ files are in the same version. 159 | 160 | - *PointTypeCheck*. Set this to 1 if you want to check that all the 161 | LAS/LAZ files have the same 'Point Type' value. 162 | 163 | - *GlobalEncodingCheck*. Set this to 1 if you want to check that all the 164 | LAS/LAZ files have the same 'Global Encoding' value. 165 | 166 | - *PointCountCheck*. Set this to 1 if you want to check to make sure 167 | that all the lidar files have points. If this module finds any points 168 | that have a point count of 0, it will issue a warning, but will not stop 169 | execution of the code. 170 | 171 | - *CreatePDALBoundary*. Set this to 1 if you want to create a boundary 172 | of the datasets using PDAL. PDAL uses a different method than 173 | LASTools, and there are several steps involved. It is *MUCH* slower, 174 | and also seems a bit buggy. 175 | 176 | - *bounds_PDAL*. Full path of shapefile that will be the initial 177 | boundary created from PDAL. This file will usually be in segments, and 178 | needs to be dissolved with a later step. Example value is: 179 | 180 | /Users/matt/OT/DataIngest/shortname/bounds/Boundary_PDAL.shp 181 | 182 | - *BufferSize*. When doing the dissolve, sometimes you need to specify a 183 | small buffer to remove any anomalies. Enter a value in meters. Usually 184 | 1 or 2 meters is fine to give good results. This is only used when 185 | creating a boundary with PDAL. 186 | 187 | - *epsg*. Set this to the EPSG code for the dataset. This is only used 188 | when creating a boundary with PDAL. 189 | 190 | - *bounds_PDALmerge*. Full path to a shapefile that will contain the 191 | dissolved/merged version of initial shapefile that was created. 192 | 193 | - *bounds_PDALmergeArea*. Full path to a shapefile that will contain the 194 | area of the polygon added to the attribute table (in KM^2). 195 | 196 | - *bounds_PDALKML*. Full path to the KML version of the final PDAL 197 | shapefile that is merged and contains the area in the attribute table. 198 | 199 | - *CreateLASBoundary*. Set this to 1 if you want to create a boundary of 200 | the dataset using LASTools. 201 | 202 | - *winePath*. Path to LASTools executables. Default is: 203 | /Applications/LASTools/bin 204 | 205 | - *bounds_LT*. Full path to a shapefile that will contain the boundary 206 | created by LASTools. 207 | 208 | - *randFrac*. This is an abbrevation for "Random Fraction", and is a 209 | parameter that is fed into lasboundary. This specifies the amount of 210 | randomly selected data to keep for processing. This speeds the process 211 | up greatly. Usually best to keep this set to 0.30 (30 %) or less. 212 | 213 | - *concavity*. This is another parameter to lasboundary. The default is 214 | 100, meaning that voids with distances of more than 100 meters are 215 | considered the exterior (or part of an interior hole) 216 | 217 | - *bounds_LTArea*. Full path to shapefile that will add the area in KM^2 218 | to the boundary shapefile initially created by LASTools. 219 | 220 | - *bounds_LTKML*. Full path to the KML version of the LASTools-derived 221 | boundary shapefile that contains the area in the attribute table. 222 | 223 | - *CheckRasMeta*. Set this to 1 if you want to get an initial check of 224 | the raster metadata. This is good to do as a first check to see if the 225 | rasters have CRS, or are in different formats, etc. 226 | 227 | - *SetRasterCRS*. Set this to 1 if you just need to add the CRS info 228 | to the raster header. Note this does not do any reprojection. It is 229 | simply adding the CRS info to the header of the rasters. 230 | 231 | - *a_srs*. Set this to a EPSG code string. This only gets used by the 232 | module, SetRasterCRS. Value should be only the numeric code, but in 233 | string form. ex: '6339' 234 | 235 | - *Translate2Tiff*. Set this to 1 if you want to convert raster files to 236 | tiffs. Note you set getFilesWild to get the files you want to 237 | convert. This just converts the file type, and *does not* do 238 | reprojection. 239 | 240 | - *RasOutDir*. Directory where you want to write out the newly created 241 | raster files. If not set, output files will be written to same 242 | directory as input files. 243 | 244 | - *Warp2Tiff*. Set this to 1 if you want to reproject the tiff 245 | files. Note you set getFilesWild to get the files you want to convert. 246 | You can specify a single output directory by setting RasOutDir=1, 247 | otherwise, output files will be written to the same directory as the 248 | input files. 249 | 250 | - *ras_xBlock*. This is the size of the tiles that gdal will tile at in 251 | the X direction. This is usually: 128, 256, or 512. default is set 252 | to 256. This keyword is only used in modules: Translate2TIFF, and 253 | Warp2TIFF. 254 | 255 | - *ras_yBlock*. This is the size of the tiles that gdal will tile at in 256 | the Y direction. This is usually: 128, 256, or 512. default is set 257 | to 256. This keyword is only used in modules: Translate2TIFF, and 258 | Warp2TIFF. 259 | 260 | - *warp_t_srs*. This is the EPSG code that you want the newly 261 | projected tiff to be in. Input file must contain SRS info in the 262 | header. Value should be only the numeric code, but in string form. 263 | ex: '6339'. This keyword is only used for module, Warp2TIFF. 264 | -------------------------------------------------------------------------------- /ot_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | import json,glob,os,sys,ipdb,subprocess 4 | import ogr,logging,datetime,re 5 | import pandas as pd 6 | from datetime import datetime 7 | from osgeo import gdal,osr 8 | from subprocess import Popen, PIPE 9 | from progress.bar import Bar 10 | from shutil import copyfile,move 11 | 12 | # this allows GDAL to throw Python Exceptions 13 | gdal.UseExceptions() 14 | 15 | 16 | 17 | """Name: 18 | Description: This is the prototype of code to do all the checking 19 | for a CRS ingest. 20 | 21 | Date Created: 02/11/2019 22 | 23 | Input(s): 24 | Output(s): 25 | Keyword(s): 26 | Update(s): 27 | Notes: 28 | #Test to see if git push is working.... 29 | 30 | """ 31 | 32 | __author__ = "Matthew Beckley" 33 | 34 | #---------------------------------------------------------------------- 35 | def initializeNullConfig(): 36 | #Config file for Converting LAS2LAS 37 | config = {'log_dir':'', 38 | 'ingestLog':'', 39 | 'AddCRS2Header':0, 40 | 'fsuffix':'', 41 | 'overwrite':0, 42 | 'ftype':'f', 43 | 'LAS2LAZ':0, 44 | 'getFilesWild':'', 45 | 'getFilesDir': '', 46 | 'recursive':0, 47 | 'LAZDir_out':'', 48 | 'pipeline': '', 49 | 'CreatePDALInfo':0, 50 | 'PDALInfoFile':'', 51 | 'ReadPDALLog':0, 52 | 'CheckLAZCount':0, 53 | 'MissingHCRS':0, 54 | 'MissingVCRS':0, 55 | 'HCRS_Uniform':0, 56 | 'VCRS_Uniform':0, 57 | 'VersionCheck':0, 58 | 'PointTypeCheck':0, 59 | 'GlobalEncodingCheck':0, 60 | 'PointCountCheck':0, 61 | 'CreatePDALBoundary':0, 62 | 'bounds_PDAL':'', 63 | 'BufferSize':0, 64 | 'epsg':'', 65 | 'bounds_PDALmerge':'', 66 | 'bounds_PDALmergeArea':'', 67 | 'bounds_PDALKML':'', 68 | 'CreateLASBoundary':0, 69 | 'winePath':'', 70 | 'bounds_LT':'', 71 | 'randFrac':0, 72 | 'concavity':0, 73 | 'bounds_LTArea':'', 74 | 'bounds_LTKML':'', 75 | 'CheckRasMeta':0, 76 | 'SetRasterCRS':0, 77 | 'a_srs':'', 78 | 'Translate2Tiff':0, 79 | 'RasOutDir':'', 80 | 'Warp2Tiff':0, 81 | 'ras_xBlock':0, 82 | 'ras_yBlock':0, 83 | 'warp_t_srs':''} 84 | 85 | return config 86 | #---------------------------------------------------------------------- 87 | 88 | #---------------------------------------------------------------------- 89 | def ElapsedTime(start_time,end_time): 90 | 91 | print('Program Duration: {}'.format(end_time - start_time)) 92 | #---------------------------------------------------------------------- 93 | 94 | #---------------------------------------------------------------------- 95 | def setup_logger(name, log_file,stdout=0, level=logging.INFO): 96 | """Function setup as many loggers as you want""" 97 | formatter = logging.Formatter('%(message)s') 98 | 99 | if stdout: 100 | handler = logging.StreamHandler(sys.stdout) 101 | else: 102 | handler = logging.FileHandler(log_file) 103 | 104 | handler.setFormatter(formatter) 105 | 106 | logger = logging.getLogger(name) 107 | logger.setLevel(level) 108 | logger.addHandler(handler) 109 | 110 | return logger 111 | #---------------------------------------------------------------------- 112 | 113 | #---------------------------------------------------------------------- 114 | def LogHeader(log,indir): 115 | archived_date = str(datetime.now()) 116 | log.info('------------------------------------------------------') 117 | log.info('Program started on: '+archived_date) 118 | log.info('\nWorking on files from directory:\n') 119 | log.info(indir) 120 | log.info('------------------------------------------------------\n') 121 | #---------------------------------------------------------------------- 122 | 123 | #-------------------------------------------------- 124 | def initDirs(dirBase,readme_template, 125 | ingest_template='ingest_template.py', 126 | pipeline_template='pipeline.json'): 127 | 128 | #routine to set up a standard set of directories that I will use for 129 | #every project 130 | 131 | #check if base exists 132 | dirCheck = CheckDir(dirBase) 133 | if dirCheck is False: 134 | DirWarning(dirBase) 135 | 136 | #check if README template exists 137 | fcheck = CheckFile(readme_template) 138 | if fcheck is False: 139 | FileWarning(readme_template) 140 | 141 | #check if ingest template exists 142 | fcheck = CheckFile(ingest_template) 143 | if fcheck is False: 144 | FileWarning(ingest_template) 145 | 146 | #check if pipeline template exists 147 | fcheck = CheckFile(pipeline_template) 148 | if fcheck is False: 149 | FileWarning(pipeline_template) 150 | 151 | dirs2create= ['scripts','bounds','OTDocs','logs', 152 | 'testing/prod','testing/beta'] 153 | 154 | #if you use makedirs, it will create the subdirectories. 155 | for dval in dirs2create: 156 | dirval = os.path.join(dirBase,dval) 157 | os.makedirs(dirval) 158 | 159 | #move a template of the readme so that I have 160 | #consistent checks and notes 161 | bname = os.path.basename(dirBase) 162 | new_template = 'ingest_'+str(bname)+'.org' 163 | new_readme = os.path.join(dirBase,new_template) 164 | move(readme_template,new_readme) 165 | 166 | #move over a template of the ingest script 167 | bname = os.path.basename(dirBase) 168 | new_py_template = 'ingest_'+str(bname)+'.py' 169 | newBase = os.path.join(dirBase,'scripts') 170 | new_ingest = os.path.join(newBase,new_py_template) 171 | move(ingest_template,new_ingest) 172 | 173 | #move over a template of the PDAL pipeline 174 | bname = os.path.basename(pipeline_template) 175 | newBase = os.path.join(dirBase,'scripts') 176 | new_pipe = os.path.join(newBase,bname) 177 | move(pipeline_template,new_pipe) 178 | 179 | #move main routine, ot_utils.py to scripts 180 | newBase = os.path.join(dirBase,'scripts') 181 | newUtils = os.path.join(newBase,'ot_utils.py') 182 | move('ot_utils.py',newUtils) 183 | 184 | #-------------------------------------------------- 185 | 186 | 187 | #-------------------------------------------------- 188 | def DirWarning(dirName): 189 | print( "*******************") 190 | print( "WARNING! Directory:\n") 191 | print( dirName+"\n") 192 | print( "DOES NOT EXIST! CHECK PATH.") 193 | print( "*******************") 194 | sys.exit() 195 | #-------------------------------------------------- 196 | 197 | #-------------------------------------------------- 198 | def FileWarning(infile): 199 | print( "*******************") 200 | print( "WARNING! FILE:\n") 201 | print( infile+"\n") 202 | print( "DOES NOT EXIST! CHECK PATH.") 203 | print( "*******************") 204 | sys.exit() 205 | #-------------------------------------------------- 206 | 207 | 208 | #----------------------------------------------------------------- 209 | def CreatePDALInfo(files,outdir,outfile,errors='errors.txt',progress=1): 210 | 211 | #check that outdir exists 212 | out_check = CheckDir(outdir) 213 | 214 | if out_check is False: 215 | print("Directory does not exist:\n"+outdir) 216 | sys.exit() 217 | 218 | out_fpath = os.path.join(outdir,outfile) 219 | FileOverWrite(out_fpath,ForceOverwrite=1) 220 | 221 | #collect errors in a separate file... 222 | out_errors = os.path.join(outdir,errors) 223 | FileOverWrite(out_errors,ForceOverwrite=1) 224 | 225 | cmd1 = ['echo [ > '+out_fpath] 226 | p1 = subprocess.run(cmd1,shell=True) 227 | 228 | if progress: 229 | bar = Bar('Creating PDAL Log', max=len(files)) 230 | for f in files: 231 | 232 | cmd2 = ['pdal info \"'+f+'\" --metadata >> '+out_fpath] 233 | 234 | p2 = subprocess.run(cmd2,shell=True,stderr=subprocess.PIPE) 235 | 236 | #check for errors. Don't want to stop execution because some 237 | #errors could be minor? 238 | if (p2.returncode == 1) or (len(p2.stderr) > 0): 239 | cmd3 = ['echo Error creating PDAL info for ' 240 | +f+'. Standard Error of: ' 241 | +'\"'+str(p2.stderr)+'\" >> '+out_errors] 242 | 243 | #adds an extra comma for some errors, and I don't know why 244 | p3 = subprocess.run(cmd3,shell=True,stderr=subprocess.PIPE) 245 | 246 | #separate JSON files with ',' so I can read in as array 247 | cmd4 = ['echo "," >> '+out_fpath] 248 | p4 = subprocess.run(cmd4,shell=True,stderr=subprocess.PIPE) 249 | 250 | if progress: 251 | bar.next() 252 | 253 | #remove final comma from file... 254 | cmd5 = ['sed -i \'\' -e \'$ d \' '+out_fpath] 255 | p5 = subprocess.run(cmd5,shell=True) 256 | 257 | #add final bracket to complete the JSON array 258 | cmd6 = ['echo ] >> '+out_fpath] 259 | p6 = subprocess.run(cmd6,shell=True) 260 | 261 | if progress: 262 | bar.finish() 263 | #----------------------------------------------------------------- 264 | 265 | 266 | #----------------------------------------------------------------- 267 | def CheckFile(infile): 268 | #check that the input file exists 269 | fcheck = os.path.isfile(infile) 270 | return fcheck 271 | #----------------------------------------------------------------- 272 | 273 | #----------------------------------------------------------------- 274 | def CheckDir(indir): 275 | #check that the input file exists 276 | fcheck = os.path.isdir(indir) 277 | 278 | return fcheck 279 | #----------------------------------------------------------------- 280 | 281 | #---------------------------------------------------------------------- 282 | def absoluteFilePaths(directory): 283 | for dirpath,_,filenames in os.walk(directory): 284 | for f in filenames: 285 | yield os.path.abspath(os.path.join(dirpath, f)) 286 | #---------------------------------------------------------------------- 287 | 288 | 289 | #---------------------------------------------------------------------- 290 | def FileOverWrite(infile,ForceOverwrite=0): 291 | """ 292 | check if file exists. If it does prompt for overwriting... 293 | 294 | Beware in that it will remove all files with the basename, so if you 295 | have a csv file associated with a shapefile, it will remove that too 296 | 297 | MAB 03.27.2015 Adding feature to print the filename that already 298 | exists. Also added the ForceOverwrite keyword. this will just 299 | overwrite the file without prompting you. Also if you are forcing 300 | it to overwrite, then I don't print anything to standard output. 301 | 302 | MAB 09.02.2015 I removed the fact that it was removing the filename 303 | with the wildcard*. I only really needed this for shapefiles, and I 304 | fixed that by putting in the shapefile check. So, now, it is not a 305 | shapefile, it just removes the infile name only. 306 | """ 307 | 308 | fstat = os.path.exists(infile) 309 | if fstat == 1: 310 | if ForceOverwrite: 311 | #check if the file is a shapefile.. 312 | shapecheck = CheckShape(infile) 313 | 314 | #if file is a shapefile, get all associated files.... 315 | if shapecheck: 316 | suffs = ["shp","dbf","prj","xml","shx","sbn","sbx"] 317 | 318 | #this will get all the files associated with a shapefile 319 | #only! I do this so that I don't accidentally remove a 320 | #*.csv or *.kml file.... 321 | f2remove = [item for a in suffs for item in glob.glob(infile[:-3]+"*"+a)] 322 | 323 | else: 324 | #f2remove = glob.glob(infile[:-3]+"*") 325 | f2remove = [infile] 326 | 327 | for f in f2remove: 328 | #if forcing, not printing anything to standard out. 329 | #print "Removing: "+f 330 | os.remove(f) 331 | else: 332 | ansO = input("File: "+infile+ "\nAlready exists. Do you want to overwrite? [Y/n]: ") 333 | ansO = ansO.lower() 334 | 335 | if ansO == 'n': 336 | print( "Program Exiting - File already exists.") 337 | sys.exit() 338 | else: 339 | #check if the file is a shapefile.. 340 | shapecheck = CheckShape(infile) 341 | 342 | #if file is a shapefile, get all associated files.... 343 | if shapecheck: 344 | suffs = ["shp","dbf","prj","xml","shx","sbn","sbx"] 345 | 346 | #this will get all the files associated with a shapefile 347 | #only! I do this so that I don't accidentally remove a 348 | #*.csv or *.kml file.... 349 | f2remove = [item for a in suffs for item in glob.glob(infile[:-3]+"*"+a)] 350 | 351 | else: 352 | #f2remove = glob.glob(infile[:-3]+"*") 353 | f2remove = [infile] 354 | for f in f2remove: 355 | print( "Removing: "+f) 356 | os.remove(f) 357 | #---------------------------------------------------------------------- 358 | 359 | 360 | #---------------------------------------------------------------------- 361 | def CheckShape(infile): 362 | """ 363 | This module will check that the file exists, that it is a shapefile 364 | and that it has a shp suffix. 365 | 366 | It will return a 1 if the file is o.k., 0 if it is not 367 | 368 | """ 369 | 370 | #return flag 371 | fcheck = 0 372 | 373 | #check that the file exists! 374 | in_fstat = os.path.exists(infile) 375 | if in_fstat == 0: 376 | fcheck = 0 377 | else: 378 | #file exists, but is it a shapefile??? 379 | fsplit = infile.split('.') 380 | if len(fsplit) == 1: 381 | fcheck = 0 382 | else: 383 | #here file has a suffix, but it is not a shapefile 384 | suffix = fsplit[1].lower() 385 | if suffix != "shp": 386 | fcheck = 0 387 | else: 388 | fcheck = 1 389 | 390 | return fcheck 391 | #---------------------------------------------------------------------- 392 | 393 | def Translate2Tiff(files,log,outdir_1="",xblock=256,yblock=256, 394 | progress=0): 395 | 396 | """ 397 | Description: This module will "translate" a tif using 398 | gdal_translate. Note that this doesn't do a reprojection. This 399 | module is for just changing formats, and is useful if that is all 400 | you have to do. I have baked in the ability to tile, and compress 401 | the output 402 | 403 | Date Created: 05/23/2019 404 | 405 | Input(s): 406 | 1. files. this is a list of files that you want to convert. Note 407 | that these files can be in different directories. 408 | 409 | Output(s): 410 | 1. Output will be written to the outdir_1. Otherwise, the tiff for 411 | each file in the list will be written to the same directory as the 412 | path of the inputfile in the list. 413 | 414 | Keyword(s): 415 | 1. outdir. Set this to a single directory if you want all the 416 | output written to a single directory. 417 | 2. xblock. This is the size of the tiles that gdal will tile at in 418 | the X direction. This is usually: 128, 256, or 512. default is set 419 | to 128. 420 | 3. yblock. This is the size of the tiles that gdal will tile at in 421 | the Y direction. This is usually: 128, 256, or 512. default is set 422 | to 128. 423 | 4. progress. Set this to 1 if you want to see a progress bar. 424 | 425 | Update(s): 426 | Notes: 427 | 1. Errors are written to a log in the output directory. I may want 428 | to change this to write to the logs directory for the project, but 429 | that would require me feeding the full path of the error file, and I 430 | don't know if that is really necessary right now. 431 | """ 432 | 433 | log.info('Convert Raster to TIFF Format...') 434 | log.info('------------------------------------------------------') 435 | 436 | #check that output directory exists... 437 | if len(outdir_1) > 1: 438 | dirCheck = CheckDir(outdir_1) 439 | if dirCheck is False: 440 | DirWarning(outdir_1) 441 | 442 | if progress: 443 | bar = Bar('Translating rasters to Geotiff', max=len(files)) 444 | 445 | 446 | 447 | for infile in files: 448 | 449 | #get basename 450 | outbase = os.path.basename(infile) 451 | 452 | #strip out suffix and add tiff suffix. 453 | outfile = outbase.split(".")[0]+".tif" 454 | 455 | #default is to write in same directory as input file. If you 456 | #want output to another directory, you must set one. 457 | if len(outdir_1) > 1: 458 | outfile = os.path.join(outdir_1,outfile) 459 | errorfile = os.path.join(outdir_1,'Translate2Tiff_errors.txt') 460 | else: 461 | outdir = os.path.dirname(infile) 462 | outfile = os.path.join(outdir,outfile) 463 | errorfile = os.path.join(outdir,'Translate2Tiff_errors.txt') 464 | 465 | errors = [] 466 | if outfile: 467 | #need double quotes for paths with spaces! 468 | cmd = ['gdal_translate -of GTIFF -co \"COMPRESS=LZW\"' 469 | +' -co \"TILED=YES\" -co \"blockxsize='+str(xblock)+'\"' 470 | +' -co \"blockysize='+str(yblock)+'\"' 471 | +' \"'+infile+'\"' 472 | +' '+'\"'+outfile+'\" 2>> \"'+errorfile+'\"'] 473 | 474 | #needed the shell=True for this to work 475 | p = subprocess.run(cmd,shell=True,stderr=subprocess.PIPE) 476 | 477 | #Want to know if there is a problem... 478 | if (p.returncode == 1): 479 | print("\nProblem with the TIFF translation for file:\n"+infile) 480 | print("\nCHECK ERROR LOG:\n"+errorfile+"\n when completed") 481 | cmd2 = ['echo "error with file:" \"'+infile+'\" >> \"'+errorfile+'\"'] 482 | p2 = subprocess.run(cmd2,shell=True,stderr=subprocess.PIPE) 483 | errors.append(1) 484 | 485 | if progress: 486 | bar.next() 487 | 488 | if progress: 489 | bar.finish() 490 | 491 | if any(errors): 492 | log.info("FAIL: Problem Converting Raster(s) to TIFF(s)") 493 | log.info("CHECK ERROR LOG:\n"+errorfile+"\n") 494 | else: 495 | log.info("PASS: Converted Raster(s) to TIFF(s)") 496 | log.info('------------------------------------------------------\n') 497 | 498 | #---------------------------------------------------------------------- 499 | 500 | #---------------------------------------------------------------------- 501 | def Warp2Tiff(files,log,t_srs,outdir_1='',xblock=128,yblock=128, 502 | progress=0): 503 | """ 504 | Description: This module will "transform" a tif using 505 | gdalwarp. Use this module if you want to actually do a reprojection 506 | of the raster. I have baked in the ability to tile, and compress 507 | the output. 508 | 509 | Date Created: 05/23/2019 510 | 511 | Input(s): 512 | 1. files. this is a list of files that you want to convert. Note 513 | that these files can be in different directories. 514 | 2. t_srs. This is the EPSG code for what you want the output to 515 | be. I am assuming that it can get the input CRS, so I don't have 516 | that as an input right now, but that may be necessary in the future? 517 | 518 | Output(s): 519 | 1. Output will be written to the outdir. Otherwise, the tiff for 520 | each file in the list will be written to the same directory as the 521 | path of the inputfile in the list. 522 | 523 | Keyword(s): 524 | 1. outdir. Set this to a single directory if you want all the 525 | output written to a single directory. 526 | 2. xblock. This is the size of the tiles that gdal will tile at in 527 | the X direction. This is usually: 128, 256, or 512. default is set 528 | to 128. 529 | 3. yblock. This is the size of the tiles that gdal will tile at in 530 | the Y direction. This is usually: 128, 256, or 512. default is set 531 | to 128. 532 | 4. progress. Set this to 1 if you want to see a progress bar. 533 | 534 | Update(s): 535 | 536 | Notes: 537 | 1. Errors are written to a log in the output directory. I may want 538 | to change this to write to the logs directory for the project, but 539 | that would require me feeding the full path of the error file, and I 540 | don't know if that is really necessary right now. 541 | """ 542 | 543 | log.info('Reprojecting TIFFs...') 544 | log.info('------------------------------------------------------') 545 | 546 | #check that output directory exists... 547 | if len(outdir_1) > 1: 548 | dirCheck = CheckDir(outdir_1) 549 | if dirCheck is False: 550 | DirWarning(outdir_1) 551 | 552 | if progress: 553 | bar = Bar('Transforming rasters to Geotiff', max=len(files)) 554 | 555 | if not t_srs: 556 | log.info('FAIL: Must set a target EPSG') 557 | print('FAIL: Must set a target EPSG') 558 | ipdb.set_trace() 559 | 560 | for infile in files: 561 | 562 | #get basename 563 | outbase = os.path.basename(infile) 564 | 565 | if len(outdir_1) > 1: 566 | #strip out suffix and add tiff suffix. 567 | outfile = outbase.split(".")[0]+".tif" 568 | outfile = os.path.join(outdir_1,outfile) 569 | errorfile = os.path.join(outdir_1,'Transform2Tiff_errors.txt') 570 | else: 571 | #in GDAL, you cannot overwrite input file, so need to 572 | #output a new file with EPSG as suffix 573 | outfile = outbase.split(".")[0]+"_EPSG"+str(t_srs)+".tif" 574 | outdir = os.path.dirname(infile) 575 | outfile = os.path.join(outdir,outfile) 576 | errorfile = os.path.join(outdir,'Transform2Tiff_errors.txt') 577 | 578 | 579 | errors = [] 580 | if outfile: 581 | #need double quotes for paths with spaces! 582 | cmd = ['gdalwarp -co \"COMPRESS=LZW\"' 583 | +' -co \"TILED=YES\" -co \"blockxsize='+str(xblock)+'\"' 584 | +' -co \"blockysize='+str(yblock)+'\"' 585 | +' -t_srs \"EPSG:'+str(t_srs)+'\"'+' \"'+infile+'\"' 586 | +' '+'\"'+outfile+'\" 2>> \"'+errorfile+'\"'] 587 | 588 | #needed the shell=True for this to work 589 | p = subprocess.run(cmd,shell=True,stderr=subprocess.PIPE) 590 | 591 | #Want to know if there is a problem... 592 | if (p.returncode == 1): 593 | print("\nProblem with the TIFF transformation for file:\n"+infile) 594 | print("\nCHECK ERROR LOG:\n"+errorfile+"\n when completed") 595 | cmd2 = ['echo "error with file:" \"'+infile+'\" >> \"'+errorfile+'\"'] 596 | p2 = subprocess.run(cmd2,shell=True,stderr=subprocess.PIPE) 597 | errors.append(1) 598 | 599 | if progress: 600 | bar.next() 601 | 602 | if progress: 603 | bar.finish() 604 | 605 | if any(errors): 606 | log.info("FAIL: Problem Projecting Raster(s)") 607 | log.info("CHECK ERROR LOG:\n"+errorfile+"\n") 608 | else: 609 | log.info("PASS: Projected Raster(s) to TIFF(s)") 610 | 611 | log.info('------------------------------------------------------\n') 612 | #---------------------------------------------------------------------- 613 | 614 | #---------------------------------------------------------------------- 615 | def GetRasterInfo(FileName): 616 | """ 617 | Description: This module will take in a raster and return the basic 618 | info about it: 619 | 620 | Date Created: 05/22/2019 621 | 622 | Input(s): 623 | 1. Filename. Full path to raster to get info on. 624 | 625 | Output(s): 626 | 1. structure with the following variables: 627 | NDV - no data value 628 | xsize - xsize in pixels 629 | ysize - ysize in pixels 630 | PixelRes_EW - Pixel resolution in EW direction 631 | PixelRes_NS - Pixel resolution in NS direction 632 | Projection - WKT version of projection 633 | DataType - DataType of raster (ex: byte, float32, etc) 634 | ColorType - colortype of raster (grayscale, RGB, etc) 635 | 636 | Update(s): 637 | 638 | Notes: 639 | 1. 640 | """ 641 | 642 | SourceDS = gdal.Open(FileName) 643 | NDV = SourceDS.GetRasterBand(1).GetNoDataValue() 644 | xsize = SourceDS.RasterXSize 645 | ysize = SourceDS.RasterYSize 646 | GeoT = SourceDS.GetGeoTransform() 647 | Projection = SourceDS.GetProjection() 648 | DataType = SourceDS.GetRasterBand(1).DataType 649 | DataType = gdal.GetDataTypeName(DataType) 650 | 651 | ColorType = SourceDS.GetRasterBand(1).GetColorInterpretation() 652 | ColorType = gdal.GetColorInterpretationName(ColorType) 653 | 654 | PixelRes_EW = abs(GeoT[1]) 655 | PixelRes_NS = abs(GeoT[5]) 656 | 657 | output={} 658 | output["NDV"] = NDV 659 | output["xsize"] = xsize 660 | output["ysize"] = ysize 661 | output["PixelRes_EW"] = PixelRes_EW 662 | output["PixelRes_NS"] = PixelRes_NS 663 | output["projection"] = Projection 664 | output["DataType"] = DataType 665 | output["ColorType"] = ColorType 666 | 667 | return output 668 | #---------------------------------------------------------------------- 669 | 670 | 671 | #---------------------------------------------------------------------- 672 | def CheckRasterInfo(infiles): 673 | """ 674 | Description: check if list of rasters is missing CRS 675 | Date Created: 02/11/2019 676 | 677 | Input(s): list of raster files to check 678 | Output(s): pandas dataframe 679 | Update(s): 680 | Notes: should I put a check in if the file is a valid raster file? 681 | """ 682 | 683 | output = [] 684 | for f in infiles: 685 | #reset record to blank each time 686 | out_struct = {"filename":'',"MissingCRS":0,"ActualCRS":'', 687 | "PixelRes_EW":0,"PixelRes_NS":0, 688 | "DataType":'',"ColorType":''} 689 | 690 | bname = os.path.basename(f) 691 | out_struct['filename'] = bname 692 | 693 | rasInfo = GetRasterInfo(f) 694 | 695 | if len(rasInfo['projection']) == 0: 696 | out_struct['MissingCRS'] = 1 697 | 698 | out_struct['ActualCRS'] = rasInfo['projection'] 699 | 700 | #for Pixel res, want to avoid cases where the values are 701 | #basically the same, but do to float or rounding issues may be 702 | #technically different (e.g. 1.00005 vs 1.0000499999 703 | out_struct['PixelRes_EW'] = round(rasInfo['PixelRes_EW'],1) 704 | out_struct['PixelRes_NS'] = round(rasInfo['PixelRes_NS'],1) 705 | 706 | out_struct['DataType'] = rasInfo['DataType'] 707 | out_struct['ColorType'] = rasInfo['ColorType'] 708 | 709 | output.append(out_struct) 710 | 711 | df = pd.DataFrame(output) 712 | 713 | return df 714 | 715 | #---------------------------------------------------------------------- 716 | 717 | #---------------------------------------------------------------------- 718 | def SetRasterCRS(infiles,log,a_srs,progress=0): 719 | """ 720 | Description: Set the CRS info in the header of raster files. 721 | Date Created: 07/22/2019 722 | 723 | Input(s): list of raster files 724 | Notes: Adding the header values in place, so may want to add an 725 | option to write to a separate location? 726 | """ 727 | 728 | log.info('Adding CRS Info to Header...') 729 | log.info('------------------------------------------------------') 730 | 731 | if progress: 732 | bar = Bar('Transforming rasters to Geotiff', max=len(infiles)) 733 | 734 | 735 | for f in infiles: 736 | fcheck = CheckFile(f) 737 | if fcheck is False: 738 | FileWarning(f) 739 | 740 | #need double quotes for paths with spaces! 741 | cmd = ['gdal_edit.py -a_srs \"EPSG:'+str(a_srs)+'\" '+f] 742 | 743 | #needed the shell=True for this to work 744 | p = subprocess.run(cmd,shell=True,stderr=subprocess.PIPE) 745 | 746 | errors = [] 747 | #Want to know if there is a problem... 748 | if (p.returncode == 1): 749 | print("WARNING: Problem with Adding CRS to header for file:\n"+f) 750 | log.info("WARNING: Problem with Adding CRS to header for file:\n"+f) 751 | errors.append(1) 752 | 753 | 754 | if progress: 755 | bar.next() 756 | 757 | if progress: 758 | bar.finish() 759 | 760 | if any(errors): 761 | log.info("WARNING: Problem Adding CRS info to some Raster(s)") 762 | else: 763 | log.info("PASS: Added CRS Info to Raster(s)") 764 | 765 | log.info('------------------------------------------------------\n') 766 | 767 | #---------------------------------------------------------------------- 768 | 769 | 770 | #---------------------------------------------------------------------- 771 | def LAZCount(indir): 772 | #report if the files are in laz or not. output could be: 773 | #{totalFileCount,totalLAZCount,totalLASCount, list of file suffixes} 774 | 775 | #don't think I need to worry about case? 776 | 777 | 778 | laz_files = [filename for filename in os.listdir(indir) 779 | if re.search(r'\.laz$', filename, re.IGNORECASE)] 780 | laz_count = len(laz_files) 781 | 782 | las_files = [filename for filename in os.listdir(indir) 783 | if re.search(r'\.las$', filename, re.IGNORECASE)] 784 | las_count = len(las_files) 785 | 786 | #just a list of ALL the files, BUT filter out directories. 787 | onlyfiles = [f for f in os.listdir(indir) if os.path.isfile(os.path.join(indir, f))] 788 | all_count = len(onlyfiles) 789 | 790 | suffixes = [] 791 | for f in onlyfiles: 792 | suffixes.append(f.split(".")[-1]) 793 | 794 | output = {"TotalFileCount":all_count, "TotalLAZCount":laz_count, 795 | "TotalLASCount":las_count,"FileTypes":list(set(suffixes))} 796 | 797 | 798 | return output 799 | #---------------------------------------------------------------------- 800 | 801 | #---------------------------------------------------------------------- 802 | def CreatePDALLog(script_name,las_dir,output_dir,logfile): 803 | #log.info('\nCreating PDAL output for all files...') 804 | #log.info('------------------------------------------------------') 805 | #createPDAL_output = script_name+' '+las_dir+' '+output_dir+' '+logfile 806 | #call the unix script that creates the PDAL output as an array of JSON 807 | #files. 808 | 809 | #check that script exists. 810 | 811 | fcheck = CheckFile(script_name) 812 | if fcheck is False: 813 | FileWarning(script_name) 814 | 815 | dirCheck = CheckDir(las_dir) 816 | if dirCheck is False: 817 | DirWarning(indir) 818 | 819 | #check that la[sz] files exist in las_dir: 820 | absPath = os.path.abspath(las_dir) 821 | 822 | #find all LAS or LAZ files - account for case 823 | files = [f for f in os.listdir(absPath) if re.match(r'.*\.[LlAaSsZz]', f)] 824 | 825 | if len(files) == 0: 826 | print("No LA[SZ] files in: \n"+absPath+"\nQuitting!") 827 | sys.exit() 828 | else: 829 | p = subprocess.Popen([script_name,las_dir,output_dir,logfile]) 830 | p.wait() 831 | 832 | #log.info('\nPDAL output created successfully.') 833 | #log.info('\nJSON file written to: \n') 834 | #log.info(os.path.join(output_dir,logfile) 835 | #log.info('------------------------------------------------------\n') 836 | #---------------------------------------------------------------------- 837 | 838 | #---------------------------------------------------------------------- 839 | def readJSONARRAY(output_dir,logfile): 840 | 841 | PDAL_file = os.path.join(output_dir,logfile) 842 | 843 | #only want to do this once...no need to keep re-do 844 | #read in the PDAL output for ALL the files. 845 | with open(PDAL_file,'r') as read_file: 846 | data = json.load(read_file) 847 | 848 | return data 849 | #---------------------------------------------------------------------- 850 | 851 | 852 | #---------------------------------------------------------------------- 853 | def CountCRS(json): 854 | 855 | #missingCRS: 0/1 856 | #multipleCRS: 0/1 857 | #missingVCRS:0/1 858 | #multipleVCRS:0/1 859 | #looping through thousands of files. may want to know which files 860 | #have different CRS? Maybe have a separate routine for that, so one 861 | #routine to just check if they are all the same and what they are, 862 | #then if there is a problem, have a different routine to isolate 863 | #which files have which CRS? 864 | #get a count of the different CRS fields for each file 865 | 866 | output = [] 867 | for d in json: 868 | #reset record to blank each time 869 | out_struct = {"filename":'',"MissingHCRS":0,"MissingVCRS":0} 870 | 871 | fname = d['filename'] 872 | metadata = d['metadata'] 873 | 874 | HCRS_1 = metadata['comp_spatialreference'] 875 | HCRS_2 = metadata['spatialreference'] 876 | HCRS_3 = metadata['srs']['compoundwkt'] 877 | HCRS_4 = metadata['srs']['horizontal'] 878 | HCRS_5 = metadata['srs']['prettycompoundwkt'] 879 | HCRS_6 = metadata['srs']['prettywkt'] 880 | HCRS_7 = metadata['srs']['proj4'] 881 | 882 | VCRS_1 = metadata['srs']['vertical'] 883 | 884 | #Horizontal CRS check - if ALL of the potential Horizontal CRS 885 | #fields are blank, then warn... 886 | #need to also test if it is just set to "unknown"! 887 | if (len(HCRS_1) == 0) and (len(HCRS_2) == 0) and (len(HCRS_3) == 0) and \ 888 | (len(HCRS_4) == 0) and (len(HCRS_5) == 0) and (len(HCRS_6) == 0) and \ 889 | (len(HCRS_7) == 0): 890 | #print("Missing Horizontal CRS info: "+fname) 891 | out_struct["filename"] = fname 892 | out_struct["MissingHCRS"] = 1 893 | 894 | if (len(VCRS_1) == 0): 895 | #print("Missing Vertical CRS Info: "+fname) 896 | out_struct["filename"] = fname 897 | out_struct["MissingVCRS"] = 1 898 | 899 | output.append(out_struct) 900 | 901 | 902 | out_pandas = pd.DataFrame(output) 903 | 904 | return out_pandas 905 | 906 | 907 | #---------------------------------------------------------------------- 908 | 909 | #---------------------------------------------------------------------- 910 | def getHCRS_EPSG(json): 911 | 912 | #Need to pull out the horizontal coordinate EPSG code. This is 913 | #probably not fool proof, but worth a shot. It could be that the 914 | #EPSG for the horizontal CRS is in a field that I cannot easily 915 | #check. the comp_spatial reference, for example, will have VCRS 916 | #at the end, and there is not way to easily pick out the EPSG that 917 | #is unique. 918 | 919 | output = [] 920 | for d in json: 921 | #reset record to blank each time 922 | out_struct = {"filename":'',"HCRS_EPSG":''} 923 | 924 | fname = d['filename'] 925 | out_struct["filename"] = fname 926 | 927 | metadata = d['metadata'] 928 | 929 | HCRS_1 = metadata['srs']['horizontal'] 930 | HCRS_2 = metadata['srs']['prettywkt'] 931 | HCRS_3 = metadata['srs']['wkt'] 932 | 933 | if (len(HCRS_1) == 0) and (len(HCRS_2) == 0) and (len(HCRS_3) == 0): 934 | print("Missing Horizontal CRS info: "+fname) 935 | out_struct["HCRS_EPSG"] = 'None' 936 | else: 937 | #need to slice up the list 938 | wkt2 = HCRS_3.split(',')[-1] 939 | wkt3 = [val for val in wkt2 if val.isdigit()] 940 | epsg = ''.join(wkt3) 941 | 942 | out_struct['HCRS_EPSG'] = epsg 943 | 944 | output.append(out_struct) 945 | 946 | out_pandas = pd.DataFrame(output) 947 | 948 | return out_pandas 949 | #---------------------------------------------------------------------- 950 | 951 | #---------------------------------------------------------------------- 952 | def getVCRS_EPSG(json): 953 | 954 | #Need to pull out the vertical coordinate EPSG code. This is 955 | #probably not fool proof, but worth a shot. It could be that the 956 | #EPSG for the vertical CRS is in a field that I cannot easily 957 | #check. Or they just may not be a Vertical CRS 958 | 959 | #---------------------------------------------- 960 | def firstNon0(inlist,startindex): 961 | for index in range(startindex,len(inlist)): 962 | if inlist[index]!=0:return index 963 | 964 | return None 965 | #---------------------------------------------- 966 | 967 | output = [] 968 | for d in json: 969 | #reset record to blank each time 970 | out_struct = {"filename":'',"VCRS_EPSG":''} 971 | 972 | fname = d['filename'] 973 | out_struct["filename"] = fname 974 | 975 | metadata = d['metadata'] 976 | 977 | #it could be buried in any of these schemas... 978 | #['compoundwkt', 'prettycompoundwkt', 'prettywkt', 'vertical','wkt'] 979 | compoundwkt = metadata['srs']['compoundwkt'] 980 | Pcompoundwkt = metadata['srs']['prettycompoundwkt'] 981 | Pwkt = metadata['srs']['prettywkt'] 982 | vertical = metadata['srs']['vertical'] 983 | wkt = metadata['srs']['wkt'] 984 | 985 | VCRS_types = [compoundwkt,Pcompoundwkt,Pwkt,vertical,wkt] 986 | 987 | VCRS_vals = [] 988 | for v in VCRS_types: 989 | 990 | #get the index of VERT_DATUM 991 | VCRS_i = v.find('VERT_DATUM') 992 | if VCRS_i != -1: 993 | VCRS_vals.append(v[VCRS_i:]) 994 | else: 995 | VCRS_vals.append(0) 996 | 997 | if any(VCRS_vals) is False: 998 | #print("Missing Vertical CRS info: "+fname) 999 | out_struct["VCRS_EPSG"] = 'None' 1000 | else: 1001 | #get the element that had a defined CRS string 1002 | validCRS = firstNon0(VCRS_vals,0) 1003 | #need to slice up the list 1004 | v = VCRS_vals[validCRS] 1005 | wkt2 = v.split(',')[-1] 1006 | wkt3 = [val for val in wkt2 if val.isdigit()] 1007 | epsg = ''.join(wkt3) 1008 | 1009 | out_struct['VCRS_EPSG'] = epsg 1010 | 1011 | output.append(out_struct) 1012 | 1013 | 1014 | out_pandas = pd.DataFrame(output) 1015 | 1016 | return out_pandas 1017 | #---------------------------------------------------------------------- 1018 | 1019 | #---------------------------------------------------------------------- 1020 | def getGlobalEncoding(json): 1021 | 1022 | #Routine to get the point types for each of the las files, and see 1023 | #if they are uniform. 1024 | 1025 | output = [] 1026 | for d in json: 1027 | #reset record to blank each time 1028 | out_struct = {"filename":'',"GlobalEncoding":0} 1029 | 1030 | fname = d['filename'] 1031 | out_struct["filename"] = fname 1032 | 1033 | metadata = d['metadata'] 1034 | 1035 | GlobalE = metadata['global_encoding'] 1036 | 1037 | out_struct['GlobalEncoding'] = GlobalE 1038 | 1039 | output.append(out_struct) 1040 | 1041 | 1042 | out_pandas = pd.DataFrame(output) 1043 | 1044 | return out_pandas 1045 | #---------------------------------------------------------------------- 1046 | 1047 | #---------------------------------------------------------------------- 1048 | def getPointType(json): 1049 | 1050 | #Routine to get the point types for each of the las files, and see 1051 | #if they are uniform. 1052 | 1053 | output = [] 1054 | for d in json: 1055 | #reset record to blank each time 1056 | out_struct = {"filename":'',"PointType":0} 1057 | 1058 | fname = d['filename'] 1059 | out_struct["filename"] = fname 1060 | 1061 | metadata = d['metadata'] 1062 | 1063 | pType = metadata['dataformat_id'] 1064 | 1065 | out_struct['PointType'] = pType 1066 | 1067 | output.append(out_struct) 1068 | 1069 | 1070 | out_pandas = pd.DataFrame(output) 1071 | 1072 | return out_pandas 1073 | #---------------------------------------------------------------------- 1074 | 1075 | #---------------------------------------------------------------------- 1076 | def getPointCount(json): 1077 | """ 1078 | Make sure the file has points. If it is just a header, sometimes 1079 | this will mess things up. Want to warn if there are empty files. 1080 | 1081 | Input: 1082 | 1. PDAL metadata output in JSON of all the files 1083 | 1084 | """ 1085 | 1086 | output = [] 1087 | for d in json: 1088 | #reset record to blank each time 1089 | out_struct = {"filename":'',"PointCount":0} 1090 | 1091 | fname = d['filename'] 1092 | out_struct["filename"] = fname 1093 | 1094 | metadata = d['metadata'] 1095 | ptCount = metadata['count'] 1096 | 1097 | out_struct['PointCount'] = ptCount 1098 | 1099 | output.append(out_struct) 1100 | 1101 | 1102 | out_pandas = pd.DataFrame(output) 1103 | 1104 | return out_pandas 1105 | #---------------------------------------------------------------------- 1106 | 1107 | 1108 | #---------------------------------------------------------------------- 1109 | def checkLASVersion(json): 1110 | 1111 | output = [] 1112 | for d in json: 1113 | #reset record to blank each time 1114 | out_struct = {"filename":'',"Version":0.0} 1115 | 1116 | fname = d['filename'] 1117 | metadata = d['metadata'] 1118 | 1119 | major = metadata["major_version"] 1120 | minor = metadata["minor_version"] 1121 | las_version = float(major)+(float(minor)/10.0) 1122 | 1123 | 1124 | out_struct["filename"] = fname 1125 | out_struct["Version"] = las_version 1126 | 1127 | output.append(out_struct) 1128 | 1129 | #convert to pandas dataframe 1130 | out_pandas = pd.DataFrame(output) 1131 | 1132 | return out_pandas 1133 | 1134 | #---------------------------------------------------------------------- 1135 | 1136 | 1137 | #---------------------------------------------------------------------- 1138 | def Convert2LAZ(files,pipeline,outdir='',progress=1,method='pdal', 1139 | wine_path='/Applications/LASTools/bin'): 1140 | 1141 | #convert from las to laz using pdal OR lastools... 1142 | 1143 | method = method.lower() 1144 | if method not in ['lastools','pdal']: 1145 | print('FAIL: must set method to pdal or lastools') 1146 | ipdb.set_trace() 1147 | 1148 | #check that output directory exists... 1149 | if len(outdir) > 1: 1150 | dirCheck = CheckDir(outdir) 1151 | if dirCheck is False: 1152 | DirWarning(outdir) 1153 | 1154 | if progress: 1155 | bar = Bar('Converting from LAS to LAZ', max=len(files)) 1156 | 1157 | for infile in files: 1158 | 1159 | #check that it is a las file 1160 | suffix = infile.split(".")[-1] 1161 | 1162 | if suffix == 'las': 1163 | outfile = infile.replace('.las','.laz') 1164 | elif suffix == 'LAS': 1165 | outfile = infile.replace('.LAS','.laz') 1166 | 1167 | #get base filename 1168 | outfile = os.path.basename(outfile) 1169 | 1170 | if len(outdir) > 1: 1171 | #write output to a different directory... 1172 | outfile = os.path.join(outdir,outfile) 1173 | else: 1174 | #write output to same as input... 1175 | outdir = os.path.dirname(infile) 1176 | outfile = os.path.join(outdir,outfile) 1177 | 1178 | errorfile = os.path.join(outdir,'LAS2LAZ_errors.txt') 1179 | 1180 | if outfile: 1181 | 1182 | if method == 'pdal': 1183 | #check that pipeline exists: 1184 | fcheck = CheckFile(pipeline) 1185 | if fcheck is False: 1186 | FileWarning(pipeline) 1187 | 1188 | #need double quotes for paths with spaces! 1189 | #Added "forward" to preserve header values, Otherwise PDAL 1190 | #updates them and I want to keep the file as close to 1191 | #original as possible. 1192 | #Convert using PDAL... 1193 | cmd = ['pdal pipeline '+pipeline+ 1194 | ' --readers.las.filename=\"'+infile+ 1195 | '\" --writers.las.filename=\"'+outfile+ 1196 | '\" --writers.las.forward=\"header\" 2>> \"'+errorfile+'\"'] 1197 | 1198 | if method == 'lastools': 1199 | wineCheck = CheckDir(wine_path) 1200 | if wineCheck is False: 1201 | DirWarning(wine_path) 1202 | 1203 | #Convert using LASTools... 1204 | cmd = ['wine '+os.path.join(wine_path,'las2las.exe') 1205 | +' -i \"'+infile+'\" -o \"'+outfile+'\" 2>/dev/null'] 1206 | 1207 | 1208 | #needed the shell=True for this to work 1209 | p = subprocess.run(cmd,shell=True,stderr=subprocess.PIPE) 1210 | 1211 | #Want to know if there is a problem... 1212 | if (p.returncode == 1): 1213 | print("\nProblem with the LAS conversion for file:\n"+infile) 1214 | print("\nCHECK ERROR LOG:\n"+errorfile+"\n when completed") 1215 | cmd2 = ['echo "error with file:" \"'+infile+'\" >> \"'+errorfile+'\"'] 1216 | p2 = subprocess.run(cmd2,shell=True,stderr=subprocess.PIPE) 1217 | ipdb.set_trace() 1218 | 1219 | if progress: 1220 | bar.next() 1221 | 1222 | if progress: 1223 | bar.finish() 1224 | 1225 | #end of Convert2LAZ 1226 | #---------------------------------------------------------------------- 1227 | 1228 | #---------------------------------------------------------------------- 1229 | def AddCRS2Header(files,log_dir,pipeline,outdir='', 1230 | out_suffix='_wCRS',overwrite=0,progress=1): 1231 | 1232 | #Add CRS to lidar file header... 1233 | 1234 | #test for a blank string as suffix. if so, make one so that it 1235 | #doesn't bomb if overwriting. 1236 | if not out_suffix.strip(): 1237 | out_suffix = '_wCRS' 1238 | 1239 | if progress: 1240 | bar = Bar('Adding CRS to Lidar files:', max=len(files)) 1241 | 1242 | #reads in a pipline and executes it. This module should add the CRS 1243 | #info to the header of the files... 1244 | 1245 | #check that pipeline exists: 1246 | fcheck = CheckFile(pipeline) 1247 | if fcheck is False: 1248 | FileWarning(pipeline) 1249 | 1250 | #check that output directory exists... 1251 | if len(outdir) > 1: 1252 | dirCheck = CheckDir(outdir) 1253 | if dirCheck is False: 1254 | DirWarning(outdir) 1255 | 1256 | for infile in files: 1257 | 1258 | #isolate filename 1259 | outfile = os.path.basename(infile) 1260 | 1261 | #add suffix to output - not overwriting files.... 1262 | outfile = outfile.replace('.',out_suffix+'.') 1263 | 1264 | if len(outdir) > 1: 1265 | outfile = os.path.join(outdir,outfile) 1266 | else: 1267 | outdir = os.path.dirname(infile) 1268 | outfile = os.path.join(outdir,outfile) 1269 | 1270 | if outfile: 1271 | #abs_outfile = os.path.join(absPath,outfile) 1272 | #abs_infile = os.path.join(absPath,infile) 1273 | #write errors to a file 1274 | cmd = ['pdal pipeline '+pipeline+' --readers.las.filename=\"' 1275 | +infile+'\" --writers.las.filename=\"'+outfile+'\"' 1276 | +' --writers.las.forward=\"header\" + 2>> ' 1277 | +os.path.join(log_dir,'CRSDefineErrors.txt')] 1278 | 1279 | #needed the shell=True for this to work 1280 | p = subprocess.run(cmd,shell=True)#,stderr=subprocess.PIPE) 1281 | 1282 | if overwrite: 1283 | #in_suffix = suffix.lower() 1284 | #Over_out_suffix = outfile.split(".")[-1] 1285 | #Over_out_suffix = Over_out_suffix.lower() 1286 | 1287 | #if in_suffix == Over_out_suffix: 1288 | cmd2 = ['mv \"'+outfile+'\" \"'+infile+'\"'] 1289 | p2 = subprocess.run(cmd2,shell=True) 1290 | 1291 | if progress: 1292 | bar.next() 1293 | 1294 | if progress: 1295 | bar.finish() 1296 | 1297 | #---------------------------------------------------------------------- 1298 | 1299 | #---------------------------------------------------------------------- 1300 | def CreateBounds(infiles,out_boundary,epsg,edge_size=50): 1301 | #setting edge_size=10 gets a better approximation of the boundary. 1302 | #Edge_size of 1 finds to many gaps. 1303 | #Needed to escape asterisk to avoid getting weird errors.... 1304 | #Need extra "quotes" for paths with spaces... 1305 | #regular expression is a pain in the ass. This will match LAZ 1306 | #files. the .* matches ANY previous characters, and the $ indicates 1307 | #that the text needs to end in LAZ or laz or any combo. Change the 1308 | #Zz to Ss if want las files. 1309 | 1310 | indir = os.getcwd() 1311 | CreateTempFile(infiles,indir) 1312 | 1313 | cat_cmd = 'cat tmp.txt' 1314 | 1315 | if edge_size == 0: 1316 | cmd = [cat_cmd+'|pdal tindex create '+out_boundary 1317 | +' --stdin' 1318 | +' -f "ESRI Shapefile"' 1319 | +' --t_srs \"EPSG:'+str(epsg)+'\"'] 1320 | else: 1321 | cmd = [cat_cmd+'|pdal tindex create '+out_boundary 1322 | +' --stdin' 1323 | +' -f "ESRI Shapefile"' 1324 | +' --filters.hexbin.edge_size='+str(edge_size) 1325 | +' --t_srs \"EPSG:'+str(epsg)+'\"'] 1326 | 1327 | p = subprocess.run(cmd,shell=True,stderr=subprocess.PIPE) 1328 | if (p.returncode == 1): 1329 | print('Error Creating Boundary with PDAL..\n') 1330 | print(p) 1331 | ipdb.set_trace() 1332 | 1333 | #remove the file tmp.txt that contains the list of files... 1334 | p2 = subprocess.run('rm -f tmp.txt',shell=True) 1335 | #---------------------------------------------------------------------- 1336 | 1337 | #---------------------------------------------------------------------- 1338 | def DissolveBounds(inbounds, outbounds,buffer=0): 1339 | """ 1340 | Name: 1341 | Description: Dissolves a boundary so that you have a single polygon. 1342 | You can add a buffer so that it will remove slivers if you have them. 1343 | 1344 | Date Created: 02/11/2019 1345 | 1346 | Input(s): 1347 | inbounds. This is the shapefile that you want to dissolve 1348 | 1349 | Output(s): 1350 | outbounds. This is the output shapefile that will be dissolved 1351 | 1352 | Keyword(s): 1353 | buffer. Set this to remove tiny slivers that sometimes pop up. 1354 | Default is 0, but if you set it to 1 or a low number, it will take 1355 | care of the slivers without altering the boundary too much. 1356 | 1357 | Update(s): 1358 | 1359 | Notes: 1360 | """ 1361 | 1362 | #check that boundary exists: 1363 | fcheck = CheckFile(inbounds) 1364 | if fcheck is False: 1365 | FileWarning(inbounds) 1366 | 1367 | #this will dissolve the file 1368 | 1369 | bname = os.path.basename(inbounds) 1370 | bname_nosuffix = bname.split('.')[0] 1371 | 1372 | if buffer == 0: 1373 | cmd = ['ogr2ogr \"'+outbounds+'\" \"'+inbounds 1374 | +'\" -dialect sqlite -sql \"SELECT ST_UNION(geometry) FROM \'' 1375 | +bname_nosuffix+'\'\"'] 1376 | else: 1377 | #if you get slivers, use a small buffer: 1378 | cmd = ['ogr2ogr \"'+outbounds+'\" \"'+inbounds 1379 | +'\" -dialect sqlite -sql \"SELECT ' 1380 | +'ST_UNION(ST_BUFFER(geometry,'+str(buffer)+')) FROM \''+bname_nosuffix+'\'\"'] 1381 | 1382 | p = subprocess.run(cmd,shell=True,stdout=PIPE) 1383 | if (p.returncode == 1): 1384 | print('Error Dissolving Boundary with OGR..\n') 1385 | print(p) 1386 | ipdb.set_trace() 1387 | #---------------------------------------------------------------------- 1388 | 1389 | #---------------------------------------------------------------------- 1390 | def LASBoundary(files,out_boundary,rand_fract=0.2,concavity=100, 1391 | wine_path='/Users/beckley/Documents/LAStools/bin'): 1392 | 1393 | #Create a Boundary using the lastools lasboundary.exe 1394 | 1395 | wineCheck = CheckDir(wine_path) 1396 | if wineCheck is False: 1397 | DirWarning(wine_path) 1398 | 1399 | numfiles = len(files) 1400 | if numfiles == 0: 1401 | print('Cannot make boundary - file list is empty') 1402 | sys.exit() 1403 | 1404 | print('Creating LAS boundary for: '+str(numfiles)+' files...') 1405 | 1406 | 1407 | bounds_base = os.path.dirname(out_boundary) 1408 | 1409 | baseCheck = CheckDir(bounds_base) 1410 | if baseCheck is False: 1411 | DirWarning(bounds_base) 1412 | 1413 | #need to output a temporary file 1414 | tmpfile = CreateTempFile(files,bounds_base) 1415 | 1416 | #disjoint prevents connecting lines between polygons that are 1417 | #separate. 1418 | #holes will create a separate polygon for where there are holes in 1419 | #the data. 1420 | cmd = ['wine '+os.path.join(wine_path,'lasboundary.exe') 1421 | +' -lof '+tmpfile+' -merged -keep_random_fraction ' 1422 | +str(rand_fract)+' -disjoint -holes -concavity '+str(concavity)+' -o ' 1423 | +out_boundary+' 2>/dev/null'] 1424 | 1425 | p2 = subprocess.run(cmd,shell=True,stdout=PIPE) 1426 | p3 = subprocess.run('rm -f '+tmpfile,shell=True) 1427 | 1428 | if (p2.returncode == 1): 1429 | print('Error Creating Boundary with LASTools..\n') 1430 | print(p2) 1431 | ipdb.set_trace() 1432 | 1433 | #---------------------------------------------------------------------- 1434 | 1435 | #---------------------------------------------------------------------- 1436 | def CreateTempFile(files,indir): 1437 | outfile = os.path.join(indir,'tmp.txt') 1438 | fopen = open(outfile,'w') 1439 | 1440 | for f in files: 1441 | fopen.write(str(f)+'\n') 1442 | 1443 | fopen.close() 1444 | 1445 | return outfile 1446 | #---------------------------------------------------------------------- 1447 | 1448 | 1449 | #---------------------------------------------------------------------- 1450 | def getFiles(indir, wild='.*[LlAaZz]$',ftype='f',recursive=1): 1451 | 1452 | """ 1453 | Description: main routine to get all the files you want to work on. 1454 | Use Regular Expressions to get what you want. This routine produces 1455 | the files that are fed into all the other routines. 1456 | 1457 | Input(s): 1458 | indir. The directory that you want to search on. I add 1459 | "" around it within this module, so it should be able to handle 1460 | directories with spaces or other weird characters. 1461 | 1462 | wild. This is the regular expression that you want to use to filter 1463 | the results. 1464 | 1465 | ftype. This will almost always be 'f'. This is what gets fed into 1466 | the unix find command (-type). In rare cases where you want to 1467 | convert ESRI grids, you need to set this to 'd' so that it will just 1468 | find the ESRI grid directory and convert it properly. 1469 | 1470 | recursive. Set this if you want to drill down to all directories. 1471 | Note that if you do this, then when doing conversions, if you do not 1472 | set an output directory, the modules will write the output to the 1473 | same directory as the input. 1474 | 1475 | Output(s): list of filenames that match the Reg expression. 1476 | 1477 | Update(s): 1478 | Notes: 1479 | 1480 | """ 1481 | 1482 | 1483 | #type is needed because if they are ESRI GRID files, it will be a 1484 | #directory of files that needs to be converted. 1485 | ftype = ftype.lower() 1486 | ftype = ftype.strip() 1487 | if ftype not in ['f','d']: 1488 | print('ftype for find command must be: f or d') 1489 | ipdb.set_trace() 1490 | 1491 | #get listing of files that match the reg expression and output a 1492 | #list. ireg will ignore case 1493 | if recursive == 0: 1494 | find_cmd = 'find \"'+indir+'\" -iregex '+wild+' -type '+ftype+' -maxdepth 1 -print' 1495 | if recursive == 1: 1496 | find_cmd = 'find \"'+indir+'\" -iregex '+wild+' -type '+ftype+' -print' 1497 | 1498 | 1499 | p1 = subprocess.run(find_cmd,shell=True,stdout=PIPE) 1500 | 1501 | #do some error checking... 1502 | if (p1.returncode == 0) and (len(p1.stdout) != 0): 1503 | out = p1.stdout 1504 | 1505 | #convert from bytes to string 1506 | out_str = out.decode() 1507 | 1508 | #remove blank areas 1509 | out_str = out_str.strip() 1510 | 1511 | out = out_str.split('\n') 1512 | elif (len(p1.stdout) == 0) and (p1.returncode == 0): 1513 | print('No files found in '+indir+' with wildcard: '+wild) 1514 | sys.exit() 1515 | elif (p1.returncode == 1): 1516 | print('Error - Check if path exists:\n'+indir) 1517 | sys.exit() 1518 | 1519 | 1520 | return out 1521 | #---------------------------------------------------------------------- 1522 | 1523 | 1524 | 1525 | #---------------------------------------------------------------------- 1526 | def getArea(inpolygon,outpolygon,conv_factor=1.0e6,colName='AREA'): 1527 | #default conversion is to convert m^2 to km^2 1528 | #may need to put in checks for spaces as well... 1529 | #default column name is "AREA" 1530 | 1531 | 1532 | #verify that in/out are shapefiles. 1533 | inpoly_check = CheckShape(inpolygon) 1534 | #out doesn't exist yet, so check that it ends in '.shp' 1535 | outpoly_check = outpolygon.split('.')[1] 1536 | 1537 | if (inpoly_check == 0) or (outpoly_check.lower() != 'shp'): 1538 | print('input and output must be shapefiles. Quitting') 1539 | ipdb.set_trace() 1540 | sys.exit() 1541 | 1542 | #check for dashes in names - messes with the sql queries. 1543 | if '-' in inpolygon: 1544 | print("Filenames with dashes mess up the SQL query\nReplace: " 1545 | +inpolygon+" with underscores...") 1546 | sys.exit() 1547 | 1548 | #don't want "-" in names, it makes it a pain to work with in SQL 1549 | #Also, only want to replace dashes in filename, and not full path... 1550 | if '-' in os.path.basename(outpolygon): 1551 | print("Replacing dashes with underscores..") 1552 | out_new = os.path.basename(outpolygon).replace('-','_') 1553 | outpolygon = os.path.join(os.path.dirname(outpolygon),out_new) 1554 | print("New output file: "+outpolygon) 1555 | 1556 | #get basename of input table.. 1557 | bname = os.path.basename(inpolygon.split('.')[0]) 1558 | 1559 | #get the area. again must be run from OSGEO shell. Also, shapefile 1560 | #needs to be projected! 1561 | cmd1 = ['ogr2ogr \"'+outpolygon+'\" \"'+inpolygon 1562 | +'\" -sql "select OGR_GEOM_AREA/'+str(conv_factor) 1563 | +' AS '+colName+' from ' 1564 | +bname+'\"'] 1565 | 1566 | p = subprocess.run(cmd1,shell=True) 1567 | if (p.returncode == 1): 1568 | print('Error Calculating Area with OGR..\n') 1569 | print(p) 1570 | ipdb.set_trace() 1571 | 1572 | #need to remove the FID from the shapefile, so that when you convert it 1573 | #to kml, it won't be in there. 1574 | #ogrinfo PDALmerged.shp -sql "alter table NZ16_Otago drop column FID" 1575 | 1576 | #to put it into KMZ: 1577 | #zip -r FINAL_Bounds.kmz FINAL_Bounds.kml 1578 | #---------------------------------------------------------------------- 1579 | 1580 | #----------------------------------------------------------------- 1581 | def shape2KML(infile,outfile): 1582 | """Shape2KML function will make a call to ogr2ogr and convert an 1583 | ESRI shapefile to a kml. It shouldn't matter if it is a line or point 1584 | file. 1585 | To Call you need to supply the filename to convert. 1586 | 1587 | Ex: 1588 | from mab_utils import Shape2KML 1589 | Shape2KML("RefGround_Sep24to30_2013_Line.shp") 1590 | 1591 | Quirks: 1592 | 1. code will put output file in current directory. 1593 | 2. Point shapefile come up with the default "ThumbTack" Icon. 1594 | 3. Attributes are there, but there are in a ugly table. 1595 | 1596 | NOTES: 1597 | 1. MAB 10/27/2014. Now need to supply outfile name. 1598 | 2. #to get into a kml format: 1599 | ogr2ogr -f "KML" FINAL_Bounds.kml PDALmerged.shp 1600 | 1601 | """ 1602 | #check for input file 1603 | in_fstat = os.path.exists(infile) 1604 | if in_fstat == 0: 1605 | print("File:\n"+infile+"\n does not exist. Provide a valid input filename!") 1606 | sys.exit() 1607 | 1608 | #check if file exists. If it does prompt for overwriting... 1609 | FileOverWrite(outfile) 1610 | 1611 | #Make sure input is a shape, and output is a kml file 1612 | inShape_check = CheckShape(infile) 1613 | if inShape_check == 0: 1614 | print("Problem with:"+infile+ 1615 | "\nIt either does not exist, or is not a shapefile") 1616 | sys.exit() 1617 | 1618 | #make sure output file is a kml... 1619 | outkml = outfile.split('.')[-1] 1620 | if outkml.lower() != 'kml': 1621 | print('Output file is not a kml!') 1622 | sys.exit() 1623 | 1624 | #this calls the OGR utility from UNIX to do the conversion. 1625 | cmd=['ogr2ogr -f "KML" \"'+outfile+'\" \"'+infile+'\"'] 1626 | p = subprocess.run(cmd,shell=True) 1627 | 1628 | 1629 | if (p.returncode == 1): 1630 | print('Error Creating KML with OGR..\n') 1631 | print(p) 1632 | ipdb.set_trace() 1633 | 1634 | 1635 | #End of shape2KML 1636 | #----------------------------------------------------------------- 1637 | 1638 | #----------------------------------------------------------------- 1639 | def RemoveFields(file,fields2delete=[],OnlyKeep=[]): 1640 | """ 1641 | Description: Remove a list of fields or keep only a subset. 1642 | 1643 | Date Created: Decmeber 18th 2015 1644 | 1645 | Input(s): 1646 | 1. file. The file that you want to remove columns from. It needs to be 1647 | a shapefile. It has only been tested on a line file. 1648 | 1649 | 2. fields2delete. List of field names you want to delete. Must be 1650 | a list of strings. This is optional. 1651 | 1652 | 3. OnlyKeep. List of fields that you want to keep. Must be a list 1653 | of strings. If this is set, it will remove ALL other field names. 1654 | 1655 | Notes: This code will alter the file, so be cautious. I have not 1656 | done extensive testing on this yet. It worked on a bunch of line 1657 | trajectories. 1658 | 1659 | 1660 | Examples: 1661 | file = "traj57291_LVIS.shp" 1662 | dropFields = ["Id","FID","Region","Length_km","garbage"] 1663 | Fields2Keep = ["MJD", "Date", "Platform"] 1664 | mb.RemoveFields(file,OnlyKeep=Fields2Keep) 1665 | 1666 | """ 1667 | 1668 | __author__ = "Matthew Beckley" 1669 | 1670 | 1671 | if (len(OnlyKeep) > 0) & (len(fields2delete) > 0): 1672 | print( "You cannot set both 'OnlyKeep' AND 'Fields2Keep'\n") 1673 | print( "Quitting!") 1674 | sys.exit 1675 | 1676 | 1677 | #check that file exists... 1678 | fcheck = CheckFile(file) 1679 | if fcheck is False: 1680 | FileWarning(file) 1681 | 1682 | driver = ogr.GetDriverByName('ESRI Shapefile') 1683 | 1684 | #set to 0 if want read-only,1 if you want to write/edit 1685 | datasource = driver.Open(file, 1) 1686 | layer = datasource.GetLayer() 1687 | ldef = layer.GetLayerDefn() 1688 | 1689 | 1690 | if len(OnlyKeep) > 0: 1691 | 1692 | numfields = ldef.GetFieldCount() 1693 | count=0 1694 | while (numfields != len(OnlyKeep)): 1695 | field = ldef.GetFieldDefn(count).GetName() 1696 | #print "count: ",str(count) 1697 | #print field 1698 | #print numfields, len(OnlyKeep) 1699 | if field not in OnlyKeep: 1700 | count = 0 1701 | #print "Searching for: ",field 1702 | findex = layer.FindFieldIndex(field,1) 1703 | if findex == -1: 1704 | print( "Field: "+field+" not found!") 1705 | print( "Not removing: "+field) 1706 | 1707 | else: 1708 | layer.DeleteField(findex) 1709 | else: 1710 | count+=1 1711 | 1712 | numfields = ldef.GetFieldCount() 1713 | 1714 | 1715 | elif len(fields2delete) > 0: 1716 | 1717 | for field in fields2delete: 1718 | print( "Searching for: ",field) 1719 | findex = layer.FindFieldIndex(field,1) 1720 | if findex == -1: 1721 | print( "Field: "+field+" not found!") 1722 | print( "Not removing: "+field) 1723 | 1724 | else: 1725 | layer.DeleteField(findex) 1726 | 1727 | else: 1728 | print( "you must set either 'fields2delete' OR 'OnlyKeep'\n") 1729 | print( "Exiting!") 1730 | sys.exit() 1731 | 1732 | # close the data source and text file 1733 | datasource.Destroy() 1734 | 1735 | #End of RemoveFields 1736 | #----------------------------------------------------------------- 1737 | 1738 | def RunQAQC(config): 1739 | """ 1740 | Description: This is the module that reads in the config file and 1741 | runs the modules to do the actual QAQC. Users can set which modules 1742 | they want to run, and the parameters to use in their config file. 1743 | It's probably easiest to set up multiple config files and run 1744 | multiple times to do specific tasks. 1745 | 1746 | Date Created: Jul 12 2019 1747 | Input(s): Config file in form of dictionary. Refer to module, 1748 | initializeNullConfig in this file to see the list of parameters to 1749 | set. 1750 | 1751 | Update(s): 1752 | Notes: 1753 | 1754 | """ 1755 | 1756 | ingest_start_time = datetime.now() 1757 | 1758 | #For now, always create std out and file logs... 1759 | #------------------------------------------------------------ 1760 | #always start a new log? Check if one exists, and if so 1761 | #delete it. 1762 | 1763 | if os.path.exists(config['ingestLog']): 1764 | os.remove(config['ingestLog']) 1765 | 1766 | #open log 1767 | log = setup_logger('Log1', config['ingestLog']) 1768 | 1769 | #Write header to Log 1770 | LogHeader(log,config['getFilesDir']) 1771 | 1772 | stdout = setup_logger('Log2', '', stdout=1) 1773 | #------------------------------------------------------------ 1774 | 1775 | #There could be a case where you just want to read an existing PDAL 1776 | #log file, and not have to find files? 1777 | if config['getFilesDir']: 1778 | #This is the list of files that you will work on. To do multiple 1779 | #operations, you will need to run RunIngest multiple times with 1780 | #different configs. 1781 | infiles = getFiles(config['getFilesDir'],wild=config['getFilesWild'], 1782 | ftype=config['ftype'],recursive=config['recursive']) 1783 | 1784 | stdout.info('Working on: '+str(len(infiles))+' files...') 1785 | log.info('\nWorking on: '+str(len(infiles))+' files...\n') 1786 | 1787 | 1788 | if config['AddCRS2Header']: 1789 | stdout.info('Adding CRS to header of lidar files...') 1790 | log.info('------------------------------------------------------') 1791 | log.info('Adding CRS to header of lidar files...') 1792 | AddCRS2Header(infiles,config['log_dir'],config['pipeline'], 1793 | outdir=config['LAZDir_out'], 1794 | out_suffix=config['fsuffix'], 1795 | overwrite=config['overwrite']) 1796 | 1797 | log.info('------------------------------------------------------\n') 1798 | 1799 | if config['LAS2LAZ']: 1800 | stdout.info('Converting files from LAS to LAZ...\n') 1801 | log.info('------------------------------------------------------') 1802 | log.info('Converting files from LAS to LAZ...') 1803 | log.info('LAZ files will be in:\n') 1804 | 1805 | if len(config['LAZDir_out']) > 1: 1806 | log.info(config['LAZDir_out']) 1807 | else: 1808 | log.info('Same directory as input files') 1809 | 1810 | Convert2LAZ(infiles,config['pipeline'],outdir=config['LAZDir_out'], 1811 | method=config['LAS2LAZ_method'],progress=1) 1812 | log.info('------------------------------------------------------\n') 1813 | 1814 | if config['CreatePDALInfo']: 1815 | #create PDAL Info output from all files 1816 | log.info('------------------------------------------------------') 1817 | CreatePDALInfo(infiles,config['log_dir'],config['PDALInfoFile']) 1818 | stdout.info('PASS: Created PDAL info log...') 1819 | log.info('PASS: Created PDAL info log...') 1820 | log.info('------------------------------------------------------\n') 1821 | 1822 | if config['CheckLAZCount']: 1823 | fileCount = LAZCount(config['getFilesDir']) 1824 | stdout.info('Checking if LAZ count matches file count in directory...') 1825 | log.info('------------------------------------------------------') 1826 | log.info('Checking if LAZ count matches file count in directory...') 1827 | if fileCount["TotalLAZCount"] != fileCount["TotalFileCount"]: 1828 | log.info('WARNING: LAZ count does not match Total File Count.') 1829 | log.info('Directory contains following filetypes:') 1830 | for f in fileCount['FileTypes']: 1831 | log.info(str(f)) 1832 | log.info('\nCheck path:\n'+config['getFilesDir']) 1833 | else: 1834 | log.info('PASS: LAZ count matches Total File Count.') 1835 | log.info('------------------------------------------------------\n') 1836 | 1837 | if config['ReadPDALLog']: 1838 | #Get the json data of PDAL info 1839 | json = readJSONARRAY(config['log_dir'],config['PDALInfoFile']) 1840 | 1841 | #Check if Horizontal or Vertical CRS is missing on any of the files... 1842 | #---------------------------------------------------------------------- 1843 | if config['MissingHCRS']: 1844 | 1845 | if not config['ReadPDALLog']: 1846 | print("FAIL: You must set config['ReadPDALLog']=1") 1847 | 1848 | CRS_check = CountCRS(json) 1849 | 1850 | htest = CRS_check.MissingHCRS.isin([1]) 1851 | 1852 | stdout.info('Checking for missing horizontal CRS...') 1853 | log.info('------------------------------------------------------') 1854 | log.info('Checking for missing horizontal CRS...') 1855 | if any(htest): 1856 | stdout.info("FAIL: Some of the files are missing Horizontal CRS info") 1857 | log.info("FAIL: Some of the files are missing Horizontal CRS info") 1858 | 1859 | fname = CRS_check[CRS_check.MissingHCRS == 1]['filename'] 1860 | fname_L = fname.to_list() 1861 | 1862 | log.info("The following files are missing Horizontal CRS info:\n") 1863 | for f in fname_L: 1864 | log.info(f) 1865 | 1866 | 1867 | ipdb.set_trace() 1868 | else: 1869 | stdout.info("PASS: All files have a horizontal CRS") 1870 | log.info("PASS: All files have a horizontal CRS") 1871 | log.info('------------------------------------------------------\n') 1872 | 1873 | if config['MissingVCRS']: 1874 | if not config['ReadPDALLog']: 1875 | print("FAIL: You must set config['ReadPDALLog']=1") 1876 | 1877 | CRS_check = CountCRS(json) 1878 | 1879 | vtest = CRS_check.MissingVCRS.isin([1]) 1880 | stdout.info('Checking for missing vertical CRS...') 1881 | log.info('------------------------------------------------------') 1882 | log.info('Checking for missing vertical CRS...') 1883 | 1884 | if any(vtest): 1885 | stdout.info("WARNING: Some (or ALL) of the files are missing Vertical CRS info") 1886 | log.info("WARNING: Some (or ALL) of the files are missing Vertical CRS info") 1887 | 1888 | fname = CRS_check[CRS_check.MissingVCRS == 1]['filename'] 1889 | fname_L = fname.tolist() 1890 | 1891 | log.info("The following files are missing Vertical CRS info:\n") 1892 | 1893 | for f in fname_L: 1894 | log.info(f) 1895 | 1896 | else: 1897 | stdout.info("PASS: All files have a vertical CRS") 1898 | log.info("PASS: All files have a vertical CRS") 1899 | 1900 | log.info('------------------------------------------------------\n') 1901 | #---------------------------------------------------------------------- 1902 | 1903 | #check if CRS is uniform... 1904 | #---------------------------------------------------------------------- 1905 | if config['HCRS_Uniform']: 1906 | stdout.info('Checking if horizontal CRS is uniform...') 1907 | log.info('------------------------------------------------------') 1908 | log.info('Checking if horizontal CRS is uniform...') 1909 | 1910 | if not config['ReadPDALLog']: 1911 | print("FAIL: You must set config['ReadPDALLog']=1") 1912 | 1913 | HCRS_epsgs = getHCRS_EPSG(json) 1914 | unique_epsgs = set(HCRS_epsgs.HCRS_EPSG) 1915 | if len(unique_epsgs) > 1: 1916 | log.info('FAIL: More than 1 EPSG for the Horizontal CRS') 1917 | log.info('There are '+str(len(unique_epsgs))+'different horizontal CRS epsg values') 1918 | log.info('Dataset contains the following horizontal CRS epsg codes:') 1919 | for val in unique_epsgs: 1920 | log.info(str(val)) 1921 | 1922 | ipdb.set_trace() 1923 | else: 1924 | stdout.info("PASS: All files in same HCRS: "+str(unique_epsgs)) 1925 | log.info("PASS: All files in same HCRS: "+str(unique_epsgs)) 1926 | log.info('------------------------------------------------------\n') 1927 | 1928 | if config['VCRS_Uniform']: 1929 | stdout.info('Checking if vertical CRS is uniform...') 1930 | log.info('------------------------------------------------------') 1931 | log.info('Checking if vertical CRS is uniform...') 1932 | 1933 | if not config['ReadPDALLog']: 1934 | print("FAIL: You must set config['ReadPDALLog']=1") 1935 | 1936 | VCRS_epsgs = getVCRS_EPSG(json) 1937 | unique_Vepsgs = set(VCRS_epsgs.VCRS_EPSG) 1938 | if len(unique_Vepsgs) > 1: 1939 | log.info('FAIL: More than 1 EPSG for the Vertical CRS') 1940 | log.info('There are '+str(len(unique_Vepsgs))+'different vertical CRS epsg values') 1941 | log.info('Dataset contains the following vertical CRS epsg codes:') 1942 | for val in unique_Vepsgs: 1943 | log.info(str(val)) 1944 | 1945 | ipdb.set_trace() 1946 | else: 1947 | stdout.info("PASS: All files in same VCRS: "+str(unique_Vepsgs)) 1948 | log.info("PASS: All files in same VCRS: "+str(unique_Vepsgs)) 1949 | 1950 | log.info('------------------------------------------------------\n') 1951 | #---------------------------------------------------------------------- 1952 | 1953 | #Make sure the files are all in the same LAS version... 1954 | #---------------------------------------------------------------------- 1955 | if config['VersionCheck']: 1956 | if not config['ReadPDALLog']: 1957 | print("FAIL: You must set config['ReadPDALLog']=1") 1958 | 1959 | Version_check = checkLASVersion(json) 1960 | NumVersions = len(Version_check.Version.unique()) 1961 | stdout.info('Checking the version of the las and if it is uniform...') 1962 | log.info('------------------------------------------------------') 1963 | log.info('Checking the version of the las and if it is uniform...') 1964 | if NumVersions > 1: 1965 | log.info("FAIL: Files are in more than one LAS version") 1966 | ipdb.set_trace() 1967 | else: 1968 | stdout.info("PASS: All files are in version: "+str(Version_check.Version.unique())) 1969 | log.info("PASS: All files are in version: "+str(Version_check.Version.unique())) 1970 | 1971 | log.info('------------------------------------------------------\n') 1972 | #---------------------------------------------------------------------- 1973 | 1974 | #Check the point type of all files, and if they are uniform 1975 | #---------------------------------------------------------------------- 1976 | if config['PointTypeCheck']: 1977 | stdout.info('Checking if Point Type is uniform...') 1978 | log.info('------------------------------------------------------') 1979 | log.info('Checking if Point Type is uniform...') 1980 | 1981 | if not config['ReadPDALLog']: 1982 | print("FAIL: You must set config['ReadPDALLog']=1") 1983 | 1984 | pType = getPointType(json) 1985 | unique_pType = set(pType.PointType) 1986 | if len(unique_pType) > 1: 1987 | log.info('WARNING: More than 1 Point Types for the lidar files') 1988 | log.info('There are '+str(len(unique_pType))+' different Point Type values') 1989 | log.info('Dataset contains files with Point Types: ') 1990 | for val in unique_pType: 1991 | log.info(str(val)) 1992 | else: 1993 | stdout.info("PASS: All files have the same Point Type: "+str(unique_pType)) 1994 | log.info("PASS: All files have the same Point Type: "+str(unique_pType)) 1995 | 1996 | log.info('------------------------------------------------------\n') 1997 | #---------------------------------------------------------------------- 1998 | 1999 | #Check the global encoding of all files, and if they are uniform 2000 | #---------------------------------------------------------------------- 2001 | if config['GlobalEncodingCheck']: 2002 | stdout.info('Checking if Global Encoding is uniform...') 2003 | log.info('------------------------------------------------------') 2004 | log.info('Checking if Global Encoding is uniform...') 2005 | 2006 | if not config['ReadPDALLog']: 2007 | print("FAIL: You must set config['ReadPDALLog']=1") 2008 | 2009 | GE = getGlobalEncoding(json) 2010 | unique_GE = set(GE.GlobalEncoding) 2011 | if len(unique_GE) > 1: 2012 | log.info('WARNING: More than 1 Global Encoding for the lidar files') 2013 | log.info('There are '+str(len(unique_GE))+' different Global Encoding values') 2014 | log.info('Dataset contains files with Global Encoding Values of: ') 2015 | for val in unique_GE: 2016 | log.info(str(val)) 2017 | else: 2018 | stdout.info("PASS: All files have the same Global Encoding Value: "+str(unique_GE)) 2019 | log.info("PASS: All files have the same Global Encoding Value: "+str(unique_GE)) 2020 | 2021 | log.info('------------------------------------------------------\n') 2022 | #---------------------------------------------------------------------- 2023 | 2024 | #Check if any of the files have a point count of 0 2025 | #---------------------------------------------------------------------- 2026 | if config['PointCountCheck']: 2027 | stdout.info('Checking if Point Count is 0') 2028 | log.info('------------------------------------------------------') 2029 | log.info('Checking if Point Count is 0') 2030 | 2031 | if not config['ReadPDALLog']: 2032 | print("FAIL: You must set config['ReadPDALLog']=1") 2033 | 2034 | ptCount_str = getPointCount(json) 2035 | zero_vals = ptCount_str[ptCount_str['PointCount'] == 0] 2036 | 2037 | if len(zero_vals) > 1: 2038 | log.info('WARNING: Some files are empty') 2039 | log.info('There are '+str(len(zero_vals))+' empty files') 2040 | log.info('Following files are empty: ') 2041 | for fname in zero_vals['filename']: 2042 | log.info(fname) 2043 | ipdb.set_trace() 2044 | else: 2045 | stdout.info("PASS: No empty files") 2046 | log.info("PASS: No empty files") 2047 | 2048 | log.info('------------------------------------------------------\n') 2049 | #---------------------------------------------------------------------- 2050 | 2051 | #Create Boundary via PDAL 2052 | #---------------------------------------------------------------------- 2053 | if config['CreatePDALBoundary']: 2054 | start_PDAL = datetime.now() 2055 | stdout.info('Creating Data Boundary with PDAL...') 2056 | log.info('Creating Data Boundary with PDAL...') 2057 | log.info('------------------------------------------------------') 2058 | CreateBounds(infiles,config['bounds_PDAL'], 2059 | config['epsg']) 2060 | stdout.info("PASS: Created initial boundary with PDAL...") 2061 | log.info('PASS:Created initial boundary with PDAL...') 2062 | 2063 | DissolveBounds(config['bounds_PDAL'],config['bounds_PDALmerge'], 2064 | buffer=config['BufferSize']) 2065 | stdout.info('PASS: Dissolved boundary with PDAL...') 2066 | log.info('PASS: Dissolved boundary with PDAL...') 2067 | 2068 | end_PDAL = datetime.now() 2069 | log.info('PASS: PDAL Boundary Creation took:\n') 2070 | log.info('{}\n'.format(end_PDAL - start_PDAL)) 2071 | 2072 | getArea(config['bounds_PDALmerge'],config['bounds_PDALmergeArea']) 2073 | stdout.info('PASS: Calculated Boundary Area with PDAL...') 2074 | log.info('PASS: Calculated Boundary Area with PDAL...') 2075 | 2076 | shape2KML(config['bounds_PDALmergeArea'],config['bounds_PDALKML']) 2077 | stdout.info("PASS: Converted PDAL-derived boundary to KML") 2078 | log.info("PASS: Converted PDAL-derived boundary to KML") 2079 | log.info('------------------------------------------------------\n') 2080 | #---------------------------------------------------------------------- 2081 | 2082 | 2083 | #---------------------------------------------------------------------- 2084 | if config['CreateLASBoundary']: 2085 | start_LAS = datetime.now() 2086 | stdout.info('Creating Data Boundary with LASTools...') 2087 | log.info('Creating Data Boundary with LASTools...') 2088 | log.info('------------------------------------------------------') 2089 | LASBoundary(infiles,config['bounds_LT'], 2090 | rand_fract=config['randFrac'], 2091 | concavity=config['concavity'], 2092 | wine_path=config['winePath']) 2093 | end_LAS = datetime.now() 2094 | 2095 | stdout.info("PASS: Created boundary with LASTools...") 2096 | log.info('PASS:Created boundary with LASTools...') 2097 | log.info('PASS: LASTools Boundary Creation took:\n') 2098 | log.info('{}\n'.format(end_LAS - start_LAS)) 2099 | 2100 | getArea(config['bounds_LT'],config['bounds_LTArea']) 2101 | stdout.info('PASS: Calculated Boundary Area with LASTools...') 2102 | log.info('PASS: Calculated Boundary Area with LASTools...') 2103 | 2104 | shape2KML(config['bounds_LTArea'],config['bounds_LTKML']) 2105 | stdout.info("PASS: Converted LASTools-derived boundary to KML") 2106 | log.info("PASS: Converted LASTools-derived boundary to KML") 2107 | log.info('------------------------------------------------------\n') 2108 | #---------------------------------------------------------------------- 2109 | 2110 | #Overall check of raster metadata... 2111 | #---------------------------------------------------------------------- 2112 | if config['CheckRasMeta']: 2113 | stdout.info('Checking Raster Metadata...') 2114 | log.info('Checking Raster Metadata...') 2115 | log.info('------------------------------------------------------') 2116 | 2117 | ras_meta = CheckRasterInfo(infiles) 2118 | 2119 | #-------------------------------------------------- 2120 | CRStest = ras_meta.MissingCRS.isin([1]) 2121 | 2122 | stdout.info('Checking for missing CRS...') 2123 | log.info('--------------------------------') 2124 | log.info('Checking for missing CRS...') 2125 | if any(CRStest): 2126 | print("FAIL: Some of the rasters are missing CRS info") 2127 | log.info("FAIL: Some of the rasters are missing CRS info") 2128 | 2129 | #get data frame of filenames that are missing CRS info. 2130 | fname = ras_meta[ras_meta.MissingCRS == 1]['filename'] 2131 | fname_L = fname.to_list() 2132 | 2133 | log.info("The following rasters are missing CRS info:\n") 2134 | for f in fname_L: 2135 | log.info(f) 2136 | 2137 | ipdb.set_trace() 2138 | 2139 | else: 2140 | stdout.info("PASS: All rasters have a CRS") 2141 | log.info("PASS: All rasters have a CRS") 2142 | log.info('--------------------------------\n') 2143 | #-------------------------------------------------- 2144 | 2145 | #-------------------------------------------------- 2146 | stdout.info('Checking if CRS is uniform for all rasters...') 2147 | log.info('------------------------------------------------------') 2148 | log.info('Checking if CRS is uniform for all rasters...') 2149 | 2150 | unique_WKT = set(ras_meta.ActualCRS) 2151 | if len(unique_WKT) > 1: 2152 | print('FAIL: More than 1 WKT format for the CRS') 2153 | log.info('FAIL: More than 1 WKT format for the CRS') 2154 | log.info('There are '+str(len(unique_WKT))+'different CRS values') 2155 | log.info('Dataset contains the following CRS WKT values:') 2156 | for val in unique_WKT: 2157 | log.info(str(val)) 2158 | 2159 | #ipdb.set_trace() 2160 | else: 2161 | stdout.info("PASS: All files in same CRS: \n"+str(unique_WKT)) 2162 | log.info("PASS: All files in same CRS: \n"+str(unique_WKT)) 2163 | log.info('------------------------------------------------------\n') 2164 | #-------------------------------------------------- 2165 | 2166 | 2167 | #-------------------------------------------------- 2168 | stdout.info('Checking if Color Type is uniform for all rasters...') 2169 | log.info('------------------------------------------------------') 2170 | log.info('Checking if Color Type is uniform for all rasters...') 2171 | 2172 | colortype = set(ras_meta.ColorType) 2173 | if len(colortype) > 1: 2174 | print('FAIL: More than 1 Color Type for the rasters') 2175 | log.info('FAIL: More than 1 Color Type for the rasters') 2176 | log.info('There are '+str(len(colortype))+'different color types') 2177 | log.info('Dataset contains the following color type values:') 2178 | for val in colortype: 2179 | log.info(str(val)) 2180 | 2181 | ipdb.set_trace() 2182 | else: 2183 | stdout.info("PASS: All files have same color type: "+str(colortype)) 2184 | log.info("PASS: All files have same color type: "+str(colortype)) 2185 | log.info('------------------------------------------------------\n') 2186 | #-------------------------------------------------- 2187 | 2188 | #-------------------------------------------------- 2189 | stdout.info('Checking if Data Type is uniform for all rasters...') 2190 | log.info('------------------------------------------------------') 2191 | log.info('Checking if Data Type is uniform for all rasters...') 2192 | 2193 | datatype = set(ras_meta.DataType) 2194 | if len(datatype) > 1: 2195 | print('FAIL: More than 1 Data Type for the rasters') 2196 | log.info('FAIL: More than 1 Data Type for the rasters') 2197 | log.info('There are '+str(len(datatype))+'different data types') 2198 | log.info('Dataset contains the following data type values:') 2199 | for val in datatype: 2200 | log.info(str(val)) 2201 | 2202 | ipdb.set_trace() 2203 | else: 2204 | stdout.info("PASS: All files have same data type: "+str(datatype)) 2205 | log.info("PASS: All files have same data type: "+str(datatype)) 2206 | log.info('------------------------------------------------------\n') 2207 | #-------------------------------------------------- 2208 | 2209 | #-------------------------------------------------- 2210 | stdout.info('Checking if Pixel Size is uniform for all rasters...') 2211 | log.info('------------------------------------------------------') 2212 | log.info('Checking if Pixel Size is uniform for all rasters...') 2213 | 2214 | #check both pixel res in NS and EW directions. These should 2215 | #always be the same, but GDAL breaks it up like this.... 2216 | pix = pd.concat([ras_meta.PixelRes_EW,ras_meta.PixelRes_NS]) 2217 | 2218 | pix_res = set(pix) 2219 | if len(pix_res) > 1: 2220 | print('FAIL: More than 1 pixel size for the rasters') 2221 | log.info('FAIL: More than 1 pixel size for the rasters') 2222 | log.info('There are '+str(len(pix_res))+'different pixel sizes') 2223 | log.info('Dataset contains the following pixel sizes:') 2224 | for val in pix_res: 2225 | log.info(str(val)) 2226 | 2227 | ipdb.set_trace() 2228 | else: 2229 | stdout.info("PASS: All files have same pixel size: "+str(pix_res)) 2230 | log.info("PASS: All files have same pixel size: "+str(pix_res)) 2231 | log.info('------------------------------------------------------\n') 2232 | #-------------------------------------------------- 2233 | 2234 | 2235 | stdout.info("PASS: Checked Raster Metadata") 2236 | log.info("PASS: Checked Raster Metadata") 2237 | log.info('------------------------------------------------------\n') 2238 | 2239 | #End checking Raster metadata 2240 | #---------------------------------------------------------------------- 2241 | 2242 | #Setting CRS info in header of a raster 2243 | #---------------------------------------------------------------------- 2244 | if config['SetRasterCRS']: 2245 | stdout.info('Adding CRS Info to Raster...') 2246 | SetRasterCRS(infiles,log,config['a_srs'],progress=1) 2247 | 2248 | stdout.info('PASS: Added CRS Info to Raster...') 2249 | #---------------------------------------------------------------------- 2250 | 2251 | #Converting a raster from a Non-TIFF to a tiff. 2252 | #---------------------------------------------------------------------- 2253 | if config['Translate2Tiff']: 2254 | stdout.info('Convert Raster to TIFF Format...') 2255 | 2256 | Translate2Tiff(infiles,log,outdir_1=config['RasOutDir'], 2257 | xblock=config['ras_xBlock'], 2258 | yblock=config['ras_yBlock'], 2259 | progress=0) 2260 | stdout.info("PASS: Converted Raster(s) to TIFF(s)") 2261 | #---------------------------------------------------------------------- 2262 | 2263 | #RE-Projecting a raster into a different Projection... 2264 | #---------------------------------------------------------------------- 2265 | if config['Warp2Tiff']: 2266 | stdout.info('Reprojecting TIFFs...') 2267 | 2268 | Warp2Tiff(infiles,log,config['warp_t_srs'], 2269 | outdir_1=config['RasOutDir'],xblock=config['ras_xBlock'], 2270 | yblock=config['ras_yBlock'], 2271 | progress=0) 2272 | 2273 | stdout.info("PASS: Reprojected TIFFs") 2274 | #---------------------------------------------------------------------- 2275 | 2276 | 2277 | ingest_end_time = datetime.now() 2278 | log.info('Total Time:\n') 2279 | log.info('{}\n'.format(ingest_end_time - ingest_start_time)) 2280 | log.info("\nProgram finished successfully!") 2281 | log.info('------------------------------------------------------\n') 2282 | 2283 | # remember to close the handlers. This will close the log. 2284 | for handler in log.handlers.copy(): 2285 | log.removeFilter(handler) 2286 | log.removeHandler(handler) 2287 | handler.flush() 2288 | handler.close() 2289 | 2290 | for handler in stdout.handlers.copy(): 2291 | stdout.removeFilter(handler) 2292 | stdout.removeHandler(handler) 2293 | handler.flush() 2294 | handler.close() 2295 | 2296 | 2297 | stdout.info("Program finished successfully!") 2298 | 2299 | #End runQAQC module 2300 | #----------------------------------------------------------------- 2301 | 2302 | if __name__ == "__main__": 2303 | #initialize directories by running: 2304 | #python3 ~/ot/dev/ot_utils.py $PWD 2305 | 2306 | #This will copy over all the necessary files and directories you 2307 | #need for doing the ingest. 2308 | 2309 | if len(sys.argv) < 1: 2310 | print("Need to specify the directory to copy files to.") 2311 | sys.exit() 2312 | else: 2313 | dirBase = sys.argv[1] 2314 | template = os.path.join(dirBase,'ingest_template.org') 2315 | ingest_template = os.path.join(dirBase,'ingest_template.py') 2316 | pipeline_template = os.path.join(dirBase,'pipeline.json') 2317 | 2318 | #This will copy all the templates and set up the directory structure. 2319 | initDirs(dirBase,template,ingest_template=ingest_template, 2320 | pipeline_template=pipeline_template) 2321 | 2322 | 2323 | 2324 | --------------------------------------------------------------------------------