├── 6 result_analysis ├── corr.npy ├── paper_result.npy ├── permute_band.npy ├── permute_time.npy ├── Compare_result.npz ├── variogram_data.mat ├── Compare_result_final.npz ├── Compare_result_ridge.npz ├── variogram.py ├── monthly_read.py ├── colorbar.py ├── corr.py ├── permute.py ├── yield_map.py ├── yield_map_function.py └── GP_crop_v3.py ├── .idea └── .gitignore ├── README.md ├── 1 download data ├── pull_MODIS_world_hist.py ├── pull_MODIS.py ├── pull_MODIS_landcover.py ├── pull_MODIS_entire_county.py ├── pull_MODIS_landcover_entire_county.py ├── pull_MODIS_temperature_entire_county.py ├── pull_MODIS_landcover_entire_county_clip.py ├── pull_MODIS_temperature_entire_county_clip.py ├── pull_MODIS_entire_county_clip.py └── pull_MODIS_world.py ├── 3 model ├── nnet_lstm.py ├── nnet_for_hist_dropout_stride.py ├── train_for_hist_alldata.py ├── train_for_hist_alldata_lstm.py └── GP_crop_v3.py ├── 4 model_batch ├── nnet_lstm.py ├── nnet_for_hist_dropout_stride.py ├── train_for_hist_alldata_loop_permute.py ├── train_for_hist_alldata_loop_lstm.py ├── train_for_hist_alldata_loop_result.py ├── train_for_hist_alldata_loop.py └── train_for_hist_alldata_loop_corn.py ├── 2 clean data └── final_clean_data.py └── 5 model_semi_supervised └── train_for_semi.py /6 result_analysis/corr.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/corr.npy -------------------------------------------------------------------------------- /6 result_analysis/paper_result.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/paper_result.npy -------------------------------------------------------------------------------- /6 result_analysis/permute_band.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/permute_band.npy -------------------------------------------------------------------------------- /6 result_analysis/permute_time.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/permute_time.npy -------------------------------------------------------------------------------- /6 result_analysis/Compare_result.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/Compare_result.npz -------------------------------------------------------------------------------- /6 result_analysis/variogram_data.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/variogram_data.mat -------------------------------------------------------------------------------- /6 result_analysis/Compare_result_final.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/Compare_result_final.npz -------------------------------------------------------------------------------- /6 result_analysis/Compare_result_ridge.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/Compare_result_ridge.npz -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /6 result_analysis/variogram.py: -------------------------------------------------------------------------------- 1 | import scipy.io as io 2 | import numpy as np 3 | 4 | save_path = '/atlas/u/jiaxuan/data/train_results/final/monthly/' 5 | path_current = save_path+str(0)+str(30)+str(2013)+'result_prediction.npz' 6 | data = np.load(path_current) 7 | year = data['year_out'] 8 | real = data['real_out'] 9 | pred = data['pred_out'] 10 | locations = data['locations_out'] 11 | 12 | err = pred-real 13 | print err.shape,year.shape,locations.shape 14 | 15 | result = np.concatenate((year[:,np.newaxis], locations, err[:,np.newaxis]),axis=1) 16 | 17 | io.savemat('variogram_data.mat', {'result':result}) 18 | print 'saved' -------------------------------------------------------------------------------- /6 result_analysis/monthly_read.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | save_path = '/atlas/u/jiaxuan/data/train_results/final/monthly/' 5 | 6 | 7 | for loop in range(0,1): 8 | RMSE_all = np.zeros([6]) 9 | ME_all = np.zeros([6]) 10 | for predict_year in range(2009,2016): 11 | RMSE = np.zeros([6]) 12 | ME = np.zeros([6]) 13 | for i,time in enumerate(range(10,31,4)): 14 | data = np.load(save_path+str(loop)+str(time)+str(predict_year)+'result_prediction.npz') 15 | year_all = data['year_out'] 16 | real = data['real_out'] 17 | pred = data['pred_out'] 18 | 19 | validate = np.nonzero(year_all == predict_year)[0] 20 | train = np.nonzero(year_all < predict_year)[0] 21 | 22 | rmse=np.sqrt(np.mean((real[validate]-pred[validate])**2)) 23 | me = np.mean(pred[validate]-real[validate]) 24 | RMSE[i]=rmse 25 | ME[i]=me 26 | RMSE_all+=RMSE 27 | ME_all+=np.absolute(ME) 28 | RMSE_all/=7 29 | ME_all/=7 30 | 31 | 32 | 33 | 34 | plt.plot(range(6),RMSE_all) 35 | plt.title(str(predict_year)) 36 | plt.show() 37 | plt.plot(range(6),ME_all) 38 | plt.title(str(predict_year)) 39 | plt.show() 40 | -------------------------------------------------------------------------------- /6 result_analysis/colorbar.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot 2 | import matplotlib as mpl 3 | 4 | # Make a figure and axes with dimensions as desired. 5 | fig = pyplot.figure() 6 | # vertical 7 | # ax2 = fig.add_axes([0.1, 0.1, 0.02, 0.8]) 8 | # horizontal 9 | ax2 = fig.add_axes([0.1, 0.1, 0.8, 0.04]) 10 | 11 | 12 | 13 | # The second example illustrates the use of a ListedColormap, a 14 | # BoundaryNorm, and extended ends to show the "over" and "under" 15 | # value colors. 16 | cmap = mpl.colors.ListedColormap(['#4575b4','#74add1','#abd9e9','#e0f3f8','#ffffbf','#fee090','#fdae61','#f46d43','#d73027']) 17 | cmap.set_over('#a50026') 18 | cmap.set_under('#313695') 19 | 20 | # If a ListedColormap is used, the length of the bounds array must be 21 | # one greater than the length of the color list. The bounds must be 22 | # monotonically increasing. 23 | 24 | # # soybean 25 | # bounds = [15,20,25,30,35,40,45,50,55,60] 26 | # corn 27 | bounds = [20,40,60,80,100,120,140,160,180,200] 28 | norm = mpl.colors.BoundaryNorm(bounds, cmap.N) 29 | cb2 = mpl.colorbar.ColorbarBase(ax2, cmap=cmap, 30 | norm=norm, 31 | # to use 'extend', you must 32 | # specify two extra boundaries: 33 | boundaries=[0] + bounds + [220], 34 | extend='both', 35 | ticks=bounds, # optional 36 | spacing='proportional', 37 | orientation='horizontal') 38 | 39 | 40 | 41 | pyplot.show() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Crop yield Prediction with Deep Learning 2 | The necessary code for our paper, [Deep Gaussian Process for Crop Yield Prediction Based on Remote Sensing Data](http://jiaxuanyou.me/files/Jiaxuan_AAAI17.pdf), AAAI 2017 (Best Student Paper Award in Computational Sustainability Track). We are glad to win the "Best Big Data Solution" in [World Bank Big Data Innovation Chanllenge](http://bigdatainnovationchallenge.org/) as well. 3 | 4 | Here is a brief introduction on the utilities for each folder. 5 | 6 | - **"/1 download data"** How we download data from Google Earth Engine to Google Drive. Users then need to export data from Google Drive to their local folder, e.g., their clusters. The trick there is that we first concatenated all images across all the years available (say 2003 to 2015), then download the huge image at once, which could be hundreds of times faster. 7 | - **"/2 clean data"** How the raw data is preprocessed, including slicing the huge images to get individual images, 3-D histogram calculations, etc. 8 | - **"/3 model"** The CNN/LSTM model structure, written in tensorflow (v0.9). The Gaussian Process model, written in python. 9 | - **"/4 model_batch"** Since we are training different models for each year and each month, a batch code is used for training. 10 | - **"/5 model_semi_supervised"** A recent contribution, extending the model with semi-supervised deep generative model, however it doesn't work well. We are happy to discuss the model if you can make it work. 11 | - **"/6 result_analysis"** Plot results, plot yield map, etc. 12 | 13 | For more information, please contact Jiaxuan You. 14 | 15 | youjiaxuan@gmail.com. -------------------------------------------------------------------------------- /1 download data/pull_MODIS_world_hist.py: -------------------------------------------------------------------------------- 1 | import ee 2 | import time 3 | import sys 4 | import numpy as np 5 | import pandas as pd 6 | import itertools 7 | import os 8 | import urllib 9 | 10 | ee.Initialize() 11 | 12 | # locations = pd.read_csv('locations_remedy.csv') 13 | locations = pd.read_csv('world_locations.csv',header=None) 14 | 15 | def appendBand(current, previous): 16 | # Rename the band 17 | previous=ee.Image(previous) 18 | current = current.select([0,1,2,3,4,5,6]) 19 | # Append it to the result (Note: only return current item on first element/iteration) 20 | accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current))) 21 | # Return the accumulation 22 | return accum 23 | 24 | # county_region = ee.FeatureCollection('ft:18Ayj5e7JxxtTPm1BdMnnzWbZMrxMB49eqGDTsaSp') 25 | world_region = ee.FeatureCollection('ft:1tdSwUL7MVpOauSgRzqVTOwdfy17KDbw-1d9omPw') 26 | 27 | imgcoll = ee.ImageCollection('MODIS/MOD09A1') \ 28 | .filterDate('2001-12-31','2015-12-31') 29 | img=imgcoll.iterate(appendBand) 30 | img=ee.Image(img) 31 | 32 | for country,index in locations.values: 33 | scale = 500 34 | crs='EPSG:4326' 35 | 36 | # filter for a county 37 | region = world_region.filterMetadata('Country', 'equals', country) 38 | if region==None: 39 | print country,index,'not found' 40 | continue 41 | region = region.first() 42 | # region = region.geometry().coordinates().getInfo()[0] 43 | 44 | img_temp = img.clip(region) 45 | hist = ee.Feature(None, {'mean': img_temp.reduceRegion(ee.Reducer.fixedHistogram(1,4999,32), region, scale, crs,None,False,1e12,16)}) 46 | 47 | hist_info = hist.getInfo()['features'] 48 | print hist_info 49 | -------------------------------------------------------------------------------- /6 result_analysis/corr.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import matplotlib.pyplot as plt 4 | 5 | def preprocess_save_data(file): 6 | path = "/atlas/u/jiaxuan/data/google_drive/img_output/" 7 | if file.endswith(".npy"): 8 | path_current=os.path.join(path, file) 9 | image_temp = np.load(path_current) 10 | 11 | image_temp=np.reshape(image_temp,(image_temp.shape[0]*image_temp.shape[1],image_temp.shape[2])) 12 | image_temp=np.reshape(image_temp,(-1,46,9)) 13 | image_temp=np.reshape(image_temp,(-1,9)) 14 | 15 | f_0=image_temp>0 16 | f_5000=image_temp<5000 17 | f=f_0*f_5000 18 | f=np.squeeze(np.prod(f,1).nonzero()) 19 | 20 | # print image_temp.shape 21 | image_temp=image_temp[f,:] 22 | print image_temp.shape 23 | 24 | corr = np.corrcoef(np.transpose(image_temp)) 25 | 26 | # print np.absolute(corr) 27 | # plt.imshow(np.absolute(corr),cmap='Greys_r',interpolation='none') 28 | # plt.show() 29 | 30 | return np.absolute(corr) 31 | 32 | if __name__ == "__main__": 33 | # # save data 34 | corr = np.zeros([9,9]) 35 | path = "/atlas/u/jiaxuan/data/google_drive/img_output/" 36 | count=0 37 | try: 38 | for _, _, files in os.walk(path): 39 | for file in files: 40 | try: 41 | corr += preprocess_save_data(file) 42 | count+=1 43 | except: 44 | continue 45 | except: 46 | print 'break' 47 | np.save('corr.npy', corr) 48 | corr = np.load('corr.npy') 49 | fig, ax = plt.subplots() 50 | img = plt.imshow(corr/count,cmap='Greys_r',interpolation='none',vmin=0,vmax=1) 51 | cbar = fig.colorbar(img, ticks=[0,0.5,1]) 52 | cbar.ax.set_yticklabels(['0','0.5','1']) 53 | plt.show() 54 | -------------------------------------------------------------------------------- /1 download data/pull_MODIS.py: -------------------------------------------------------------------------------- 1 | import ee 2 | import time 3 | import sys 4 | import numpy as np 5 | import pandas as pd 6 | import itertools 7 | import os 8 | import urllib 9 | 10 | ee.Initialize() 11 | 12 | def export_oneimage(img,folder,name,region,scale,crs): 13 | task = ee.batch.Export.image(img, name, { 14 | 'driveFolder':folder, 15 | 'driveFileNamePrefix':name, 16 | 'region': region, 17 | 'scale':scale, 18 | 'crs':crs 19 | }) 20 | task.start() 21 | while task.status()['state'] == 'RUNNING': 22 | print 'Running...' 23 | # Perhaps task.cancel() at some point. 24 | time.sleep(10) 25 | print 'Done.', task.status() 26 | 27 | 28 | 29 | 30 | locations = pd.read_csv('locations_final.csv') 31 | 32 | 33 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands 34 | # Author: Jamie Vleeshouwer 35 | 36 | def appendBand(current, previous): 37 | # Rename the band 38 | previous=ee.Image(previous) 39 | current = current.select([0,1,2,3,4,5,6]) 40 | # Append it to the result (Note: only return current item on first element/iteration) 41 | accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current))) 42 | # Return the accumulation 43 | return accum 44 | 45 | imgcoll = ee.ImageCollection('MODIS/MOD09A1') \ 46 | .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23)) 47 | img=imgcoll.iterate(appendBand) 48 | 49 | for loc1, loc2, lat, lon in locations.values: 50 | fname = '{}_{}'.format(int(loc1), int(loc2)) 51 | 52 | offset = 0.11 53 | scale = 500 54 | crs='EPSG:4326' 55 | 56 | region = str([ 57 | [lat - offset, lon + offset], 58 | [lat + offset, lon + offset], 59 | [lat + offset, lon - offset], 60 | [lat - offset, lon - offset]]) 61 | 62 | while True: 63 | try: 64 | export_oneimage(img,'Data',fname,region,scale,crs) 65 | except: 66 | print 'retry' 67 | time.sleep(10) 68 | continue 69 | break -------------------------------------------------------------------------------- /1 download data/pull_MODIS_landcover.py: -------------------------------------------------------------------------------- 1 | import ee 2 | import time 3 | import sys 4 | import numpy as np 5 | import pandas as pd 6 | import itertools 7 | import os 8 | import urllib 9 | 10 | ee.Initialize() 11 | 12 | def export_oneimage(img,folder,name,region,scale,crs): 13 | task = ee.batch.Export.image(img, name, { 14 | 'driveFolder':folder, 15 | 'driveFileNamePrefix':name, 16 | 'region': region, 17 | 'scale':scale, 18 | 'crs':crs 19 | }) 20 | task.start() 21 | while task.status()['state'] == 'RUNNING': 22 | print 'Running...' 23 | # Perhaps task.cancel() at some point. 24 | time.sleep(10) 25 | print 'Done.', task.status() 26 | 27 | 28 | 29 | 30 | locations = pd.read_csv('locations_final.csv') 31 | 32 | 33 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands 34 | # Author: Jamie Vleeshouwer 35 | 36 | def appendBand(current, previous): 37 | # Rename the band 38 | previous=ee.Image(previous) 39 | current = current.select([0]) 40 | # Append it to the result (Note: only return current item on first element/iteration) 41 | accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current))) 42 | # Return the accumulation 43 | return accum 44 | 45 | imgcoll = ee.ImageCollection('MODIS/051/MCD12Q1') \ 46 | .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23)) 47 | img=imgcoll.iterate(appendBand) 48 | 49 | for loc1, loc2, lat, lon in locations.values: 50 | fname = '{}_{}'.format(int(loc1), int(loc2)) 51 | 52 | offset = 0.11 53 | scale = 500 54 | crs='EPSG:4326' 55 | 56 | region = str([ 57 | [lat - offset, lon + offset], 58 | [lat + offset, lon + offset], 59 | [lat + offset, lon - offset], 60 | [lat - offset, lon - offset]]) 61 | 62 | while True: 63 | try: 64 | export_oneimage(img,'Data_mask',fname,region,scale,crs) 65 | except: 66 | print 'retry' 67 | time.sleep(10) 68 | continue 69 | break -------------------------------------------------------------------------------- /1 download data/pull_MODIS_entire_county.py: -------------------------------------------------------------------------------- 1 | import ee 2 | import time 3 | import sys 4 | import numpy as np 5 | import pandas as pd 6 | import itertools 7 | import os 8 | import urllib 9 | 10 | ee.Initialize() 11 | 12 | def export_oneimage(img,folder,name,region,scale,crs): 13 | task = ee.batch.Export.image(img, name, { 14 | 'driveFolder':folder, 15 | 'driveFileNamePrefix':name, 16 | 'region': region, 17 | 'scale':scale, 18 | 'crs':crs 19 | }) 20 | task.start() 21 | while task.status()['state'] == 'RUNNING': 22 | print 'Running...' 23 | # Perhaps task.cancel() at some point. 24 | time.sleep(10) 25 | print 'Done.', task.status() 26 | 27 | 28 | 29 | 30 | locations = pd.read_csv('locations_remedy.csv') 31 | 32 | 33 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands 34 | # Author: Jamie Vleeshouwer 35 | 36 | def appendBand(current, previous): 37 | # Rename the band 38 | previous=ee.Image(previous) 39 | current = current.select([0,1,2,3,4,5,6]) 40 | # Append it to the result (Note: only return current item on first element/iteration) 41 | accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current))) 42 | # Return the accumulation 43 | return accum 44 | 45 | county_region = ee.FeatureCollection('ft:18Ayj5e7JxxtTPm1BdMnnzWbZMrxMB49eqGDTsaSp') 46 | 47 | imgcoll = ee.ImageCollection('MODIS/MOD09A1') \ 48 | .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))\ 49 | .filterDate('2001-12-31','2015-12-31') 50 | img=imgcoll.iterate(appendBand) 51 | img=ee.Image(img) 52 | 53 | img_0=ee.Image(ee.Number(0)) 54 | img_5000=ee.Image(ee.Number(5000)) 55 | 56 | img=img.min(img_5000) 57 | img=img.max(img_0) 58 | 59 | # img=ee.Image(ee.Number(100)) 60 | # img=ee.ImageCollection('LC8_L1T').mosaic() 61 | 62 | for loc1, loc2, lat, lon in locations.values: 63 | fname = '{}_{}'.format(int(loc1), int(loc2)) 64 | 65 | offset = 0.11 66 | scale = 500 67 | crs='EPSG:4326' 68 | 69 | # filter for a county 70 | region = county_region.filterMetadata('STATE num', 'equals', loc1) 71 | region = ee.FeatureCollection(region).filterMetadata('COUNTY num', 'equals', loc2) 72 | region = region.first() 73 | region = region.geometry().coordinates().getInfo()[0] 74 | 75 | # region = str([ 76 | # [lat - offset, lon + offset], 77 | # [lat + offset, lon + offset], 78 | # [lat + offset, lon - offset], 79 | # [lat - offset, lon - offset]]) 80 | while True: 81 | try: 82 | export_oneimage(img, 'Data_county', fname, region, scale, crs) 83 | except: 84 | print 'retry' 85 | time.sleep(10) 86 | continue 87 | break 88 | # while True: 89 | # try: 90 | # export_oneimage(img,'Data_test',fname,region,scale,crs) 91 | # except: 92 | # print 'retry' 93 | # time.sleep(10) 94 | # continue 95 | # break -------------------------------------------------------------------------------- /1 download data/pull_MODIS_landcover_entire_county.py: -------------------------------------------------------------------------------- 1 | import ee 2 | import time 3 | import sys 4 | import numpy as np 5 | import pandas as pd 6 | import itertools 7 | import os 8 | import urllib 9 | 10 | ee.Initialize() 11 | 12 | def export_oneimage(img,folder,name,region,scale,crs): 13 | task = ee.batch.Export.image(img, name, { 14 | 'driveFolder':folder, 15 | 'driveFileNamePrefix':name, 16 | 'region': region, 17 | 'scale':scale, 18 | 'crs':crs 19 | }) 20 | task.start() 21 | while task.status()['state'] == 'RUNNING': 22 | print 'Running...' 23 | # Perhaps task.cancel() at some point. 24 | time.sleep(10) 25 | print 'Done.', task.status() 26 | 27 | 28 | 29 | 30 | locations = pd.read_csv('locations_major.csv') 31 | 32 | 33 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands 34 | # Author: Jamie Vleeshouwer 35 | 36 | def appendBand(current, previous): 37 | # Rename the band 38 | previous=ee.Image(previous) 39 | current = current.select([0]) 40 | # Append it to the result (Note: only return current item on first element/iteration) 41 | accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current))) 42 | # Return the accumulation 43 | return accum 44 | 45 | county_region = ee.FeatureCollection('ft:18Ayj5e7JxxtTPm1BdMnnzWbZMrxMB49eqGDTsaSp') 46 | 47 | imgcoll = ee.ImageCollection('MODIS/051/MCD12Q1') \ 48 | .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))\ 49 | .filterDate('2001-12-31','2015-12-31') 50 | img=imgcoll.iterate(appendBand) 51 | img=ee.Image(img) 52 | 53 | # img_0=ee.Image(ee.Number(0)) 54 | # img_5000=ee.Image(ee.Number(5000)) 55 | # 56 | # img=img.min(img_5000) 57 | # img=img.max(img_0) 58 | 59 | # img=ee.Image(ee.Number(100)) 60 | # img=ee.ImageCollection('LC8_L1T').mosaic() 61 | 62 | for loc1, loc2, lat, lon in locations.values: 63 | fname = '{}_{}'.format(int(loc1), int(loc2)) 64 | 65 | offset = 0.11 66 | scale = 500 67 | crs='EPSG:4326' 68 | 69 | # filter for a county 70 | region = county_region.filterMetadata('STATE num', 'equals', loc1) 71 | region = ee.FeatureCollection(region).filterMetadata('COUNTY num', 'equals', loc2) 72 | region = region.first() 73 | region = region.geometry().coordinates().getInfo()[0] 74 | 75 | # region = str([ 76 | # [lat - offset, lon + offset], 77 | # [lat + offset, lon + offset], 78 | # [lat + offset, lon - offset], 79 | # [lat - offset, lon - offset]]) 80 | while True: 81 | try: 82 | export_oneimage(img, 'Data_county_mask', fname, region, scale, crs) 83 | except: 84 | print 'retry' 85 | time.sleep(10) 86 | continue 87 | break 88 | # while True: 89 | # try: 90 | # export_oneimage(img,'Data_test',fname,region,scale,crs) 91 | # except: 92 | # print 'retry' 93 | # time.sleep(10) 94 | # continue 95 | # break -------------------------------------------------------------------------------- /1 download data/pull_MODIS_temperature_entire_county.py: -------------------------------------------------------------------------------- 1 | import ee 2 | import time 3 | import sys 4 | import numpy as np 5 | import pandas as pd 6 | import itertools 7 | import os 8 | import urllib 9 | 10 | ee.Initialize() 11 | 12 | def export_oneimage(img,folder,name,region,scale,crs): 13 | task = ee.batch.Export.image(img, name, { 14 | 'driveFolder':folder, 15 | 'driveFileNamePrefix':name, 16 | 'region': region, 17 | 'scale':scale, 18 | 'crs':crs 19 | }) 20 | task.start() 21 | while task.status()['state'] == 'RUNNING': 22 | print 'Running...' 23 | # Perhaps task.cancel() at some point. 24 | time.sleep(10) 25 | print 'Done.', task.status() 26 | 27 | 28 | 29 | 30 | locations = pd.read_csv('locations_major.csv') 31 | 32 | 33 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands 34 | # Author: Jamie Vleeshouwer 35 | 36 | def appendBand(current, previous): 37 | # Rename the band 38 | previous=ee.Image(previous) 39 | current = current.select([0,4]) 40 | # Append it to the result (Note: only return current item on first element/iteration) 41 | accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current))) 42 | # Return the accumulation 43 | return accum 44 | 45 | county_region = ee.FeatureCollection('ft:18Ayj5e7JxxtTPm1BdMnnzWbZMrxMB49eqGDTsaSp') 46 | 47 | imgcoll = ee.ImageCollection('MODIS/MYD11A2') \ 48 | .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))\ 49 | .filterDate('2002-12-31','2015-12-31') 50 | img=imgcoll.iterate(appendBand) 51 | img=ee.Image(img) 52 | 53 | # img_0=ee.Image(ee.Number(0)) 54 | # img_5000=ee.Image(ee.Number(5000)) 55 | # 56 | # img=img.min(img_5000) 57 | # img=img.max(img_0) 58 | 59 | # img=ee.Image(ee.Number(100)) 60 | # img=ee.ImageCollection('LC8_L1T').mosaic() 61 | 62 | for loc1, loc2, lat, lon in locations.values: 63 | fname = '{}_{}'.format(int(loc1), int(loc2)) 64 | 65 | offset = 0.11 66 | scale = 500 67 | crs='EPSG:4326' 68 | 69 | # filter for a county 70 | region = county_region.filterMetadata('STATE num', 'equals', loc1) 71 | region = ee.FeatureCollection(region).filterMetadata('COUNTY num', 'equals', loc2) 72 | region = region.first() 73 | region = region.geometry().coordinates().getInfo()[0] 74 | 75 | # region = str([ 76 | # [lat - offset, lon + offset], 77 | # [lat + offset, lon + offset], 78 | # [lat + offset, lon - offset], 79 | # [lat - offset, lon - offset]]) 80 | while True: 81 | try: 82 | export_oneimage(img, 'Data_county_temperature', fname, region, scale, crs) 83 | except: 84 | print 'retry' 85 | time.sleep(10) 86 | continue 87 | break 88 | # while True: 89 | # try: 90 | # export_oneimage(img,'Data_test',fname,region,scale,crs) 91 | # except: 92 | # print 'retry' 93 | # time.sleep(10) 94 | # continue 95 | # break -------------------------------------------------------------------------------- /1 download data/pull_MODIS_landcover_entire_county_clip.py: -------------------------------------------------------------------------------- 1 | import ee 2 | import time 3 | import sys 4 | import numpy as np 5 | import pandas as pd 6 | import itertools 7 | import os 8 | import urllib 9 | 10 | ee.Initialize() 11 | 12 | def export_oneimage(img,folder,name,scale,crs): 13 | task = ee.batch.Export.image(img, name, { 14 | 'driveFolder':folder, 15 | 'driveFileNamePrefix':name, 16 | 'scale':scale, 17 | 'crs':crs 18 | }) 19 | task.start() 20 | while task.status()['state'] == 'RUNNING': 21 | print 'Running...' 22 | # Perhaps task.cancel() at some point. 23 | time.sleep(10) 24 | print 'Done.', task.status() 25 | 26 | 27 | 28 | 29 | locations = pd.read_csv('locations_final.csv') 30 | 31 | 32 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands 33 | # Author: Jamie Vleeshouwer 34 | 35 | def appendBand(current, previous): 36 | # Rename the band 37 | previous=ee.Image(previous) 38 | current = current.select([0]) 39 | # Append it to the result (Note: only return current item on first element/iteration) 40 | accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current))) 41 | # Return the accumulation 42 | return accum 43 | 44 | county_region = ee.FeatureCollection('ft:1S4EB6319wWW2sWQDPhDvmSBIVrD3iEmCLYB7nMM') 45 | 46 | imgcoll = ee.ImageCollection('MODIS/051/MCD12Q1') \ 47 | .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))\ 48 | .filterDate('2002-12-31','2016-8-4') 49 | img=imgcoll.iterate(appendBand) 50 | img=ee.Image(img) 51 | 52 | # img_0=ee.Image(ee.Number(0)) 53 | # img_5000=ee.Image(ee.Number(5000)) 54 | # 55 | # img=img.min(img_5000) 56 | # img=img.max(img_0) 57 | 58 | # img=ee.Image(ee.Number(100)) 59 | # img=ee.ImageCollection('LC8_L1T').mosaic() 60 | 61 | for loc1, loc2, lat, lon in locations.values: 62 | fname = '{}_{}'.format(int(loc1), int(loc2)) 63 | 64 | # offset = 0.11 65 | scale = 500 66 | crs='EPSG:4326' 67 | 68 | # filter for a county 69 | region = county_region.filterMetadata('StateFips', 'equals', int(loc1)) 70 | region = ee.FeatureCollection(region).filterMetadata('CntyFips', 'equals', int(loc2)) 71 | region = ee.Feature(region.first()) 72 | 73 | # region = str([ 74 | # [lat - offset, lon + offset], 75 | # [lat + offset, lon + offset], 76 | # [lat + offset, lon - offset], 77 | # [lat - offset, lon - offset]]) 78 | while True: 79 | try: 80 | export_oneimage(img.clip(region), 'data_mask', fname, scale, crs) 81 | except: 82 | print 'retry' 83 | time.sleep(10) 84 | continue 85 | break 86 | # while True: 87 | # try: 88 | # export_oneimage(img,'Data_test',fname,region,scale,crs) 89 | # except: 90 | # print 'retry' 91 | # time.sleep(10) 92 | # continue 93 | # break -------------------------------------------------------------------------------- /1 download data/pull_MODIS_temperature_entire_county_clip.py: -------------------------------------------------------------------------------- 1 | import ee 2 | import time 3 | import sys 4 | import numpy as np 5 | import pandas as pd 6 | import itertools 7 | import os 8 | import urllib 9 | 10 | ee.Initialize() 11 | 12 | def export_oneimage(img,folder,name,scale,crs): 13 | task = ee.batch.Export.image(img, name, { 14 | 'driveFolder':folder, 15 | 'driveFileNamePrefix':name, 16 | 'scale':scale, 17 | 'crs':crs 18 | }) 19 | task.start() 20 | while task.status()['state'] == 'RUNNING': 21 | print 'Running...' 22 | # Perhaps task.cancel() at some point. 23 | time.sleep(10) 24 | print 'Done.', task.status() 25 | 26 | 27 | 28 | 29 | locations = pd.read_csv('locations_final.csv') 30 | 31 | 32 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands 33 | # Author: Jamie Vleeshouwer 34 | 35 | def appendBand(current, previous): 36 | # Rename the band 37 | previous=ee.Image(previous) 38 | current = current.select([0,4]) 39 | # Append it to the result (Note: only return current item on first element/iteration) 40 | accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current))) 41 | # Return the accumulation 42 | return accum 43 | 44 | county_region = ee.FeatureCollection('ft:1S4EB6319wWW2sWQDPhDvmSBIVrD3iEmCLYB7nMM') 45 | 46 | imgcoll = ee.ImageCollection('MODIS/MYD11A2') \ 47 | .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))\ 48 | .filterDate('2002-12-31','2016-8-4') 49 | img=imgcoll.iterate(appendBand) 50 | img=ee.Image(img) 51 | 52 | # img_0=ee.Image(ee.Number(0)) 53 | # img_5000=ee.Image(ee.Number(5000)) 54 | # 55 | # img=img.min(img_5000) 56 | # img=img.max(img_0) 57 | 58 | # img=ee.Image(ee.Number(100)) 59 | # img=ee.ImageCollection('LC8_L1T').mosaic() 60 | 61 | for loc1, loc2, lat, lon in locations.values: 62 | fname = '{}_{}'.format(int(loc1), int(loc2)) 63 | 64 | # offset = 0.11 65 | scale = 500 66 | crs='EPSG:4326' 67 | 68 | # filter for a county 69 | region = county_region.filterMetadata('StateFips', 'equals', int(loc1)) 70 | region = ee.FeatureCollection(region).filterMetadata('CntyFips', 'equals', int(loc2)) 71 | region = ee.Feature(region.first()) 72 | 73 | # region = str([ 74 | # [lat - offset, lon + offset], 75 | # [lat + offset, lon + offset], 76 | # [lat + offset, lon - offset], 77 | # [lat - offset, lon - offset]]) 78 | while True: 79 | try: 80 | export_oneimage(img.clip(region), 'data_temperature', fname, scale, crs) 81 | except: 82 | print 'retry' 83 | time.sleep(10) 84 | continue 85 | break 86 | # while True: 87 | # try: 88 | # export_oneimage(img,'Data_test',fname,region,scale,crs) 89 | # except: 90 | # print 'retry' 91 | # time.sleep(10) 92 | # continue 93 | # break -------------------------------------------------------------------------------- /1 download data/pull_MODIS_entire_county_clip.py: -------------------------------------------------------------------------------- 1 | import ee 2 | import time 3 | import sys 4 | import numpy as np 5 | import pandas as pd 6 | import itertools 7 | import os 8 | import urllib 9 | 10 | ee.Initialize() 11 | 12 | def export_oneimage(img,folder,name,scale,crs): 13 | task = ee.batch.Export.image(img, name, { 14 | 'driveFolder':folder, 15 | 'driveFileNamePrefix':name, 16 | 'scale':scale, 17 | 'crs':crs 18 | }) 19 | task.start() 20 | while task.status()['state'] == 'RUNNING': 21 | print 'Running...' 22 | # Perhaps task.cancel() at some point. 23 | time.sleep(10) 24 | print 'Done.', task.status() 25 | 26 | 27 | 28 | 29 | locations = pd.read_csv('locations_final.csv',header=None) 30 | 31 | 32 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands 33 | # Author: Jamie Vleeshouwer 34 | 35 | def appendBand(current, previous): 36 | # Rename the band 37 | previous=ee.Image(previous) 38 | current = current.select([0,1,2,3,4,5,6]) 39 | # Append it to the result (Note: only return current item on first element/iteration) 40 | accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current))) 41 | # Return the accumulation 42 | return accum 43 | 44 | county_region = ee.FeatureCollection('ft:1S4EB6319wWW2sWQDPhDvmSBIVrD3iEmCLYB7nMM') 45 | 46 | imgcoll = ee.ImageCollection('MODIS/MOD09A1') \ 47 | .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))\ 48 | .filterDate('2002-12-31','2016-8-4') 49 | img=imgcoll.iterate(appendBand) 50 | img=ee.Image(img) 51 | 52 | img_0=ee.Image(ee.Number(-100)) 53 | img_16000=ee.Image(ee.Number(16000)) 54 | 55 | img=img.min(img_16000) 56 | img=img.max(img_0) 57 | 58 | # img=ee.Image(ee.Number(100)) 59 | # img=ee.ImageCollection('LC8_L1T').mosaic() 60 | 61 | for loc1, loc2, lat, lon in locations.values: 62 | fname = '{}_{}'.format(int(loc1), int(loc2)) 63 | 64 | # offset = 0.11 65 | scale = 500 66 | crs='EPSG:4326' 67 | 68 | # filter for a county 69 | region = county_region.filterMetadata('StateFips', 'equals', int(loc1)) 70 | region = ee.FeatureCollection(region).filterMetadata('CntyFips', 'equals', int(loc2)) 71 | region = ee.Feature(region.first()) 72 | # region = region.geometry().coordinates().getInfo()[0] 73 | 74 | # region = str([ 75 | # [lat - offset, lon + offset], 76 | # [lat + offset, lon + offset], 77 | # [lat + offset, lon - offset], 78 | # [lat - offset, lon - offset]]) 79 | while True: 80 | try: 81 | export_oneimage(img.clip(region), 'test', fname, scale, crs) 82 | except: 83 | print 'retry' 84 | time.sleep(10) 85 | continue 86 | break 87 | # while True: 88 | # try: 89 | # export_oneimage(img,'Data_test',fname,region,scale,crs) 90 | # except: 91 | # print 'retry' 92 | # time.sleep(10) 93 | # continue 94 | # break -------------------------------------------------------------------------------- /1 download data/pull_MODIS_world.py: -------------------------------------------------------------------------------- 1 | import ee 2 | import time 3 | import sys 4 | import numpy as np 5 | import pandas as pd 6 | import itertools 7 | import os 8 | import urllib 9 | 10 | ee.Initialize() 11 | 12 | def export_oneimage(img,folder,name,region,scale,crs): 13 | task = ee.batch.Export.image(img, name, { 14 | 'driveFolder':folder, 15 | 'driveFileNamePrefix':name, 16 | 'region': region, 17 | 'scale':scale, 18 | 'crs':crs 19 | }) 20 | task.start() 21 | while task.status()['state'] == 'RUNNING': 22 | print 'Running...' 23 | # Perhaps task.cancel() at some point. 24 | time.sleep(10) 25 | print 'Done.', task.status() 26 | 27 | 28 | 29 | 30 | # locations = pd.read_csv('locations_remedy.csv') 31 | locations = pd.read_csv('world_locations.csv',header=None) 32 | 33 | 34 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands 35 | # Author: Jamie Vleeshouwer 36 | 37 | def appendBand(current, previous): 38 | # Rename the band 39 | previous=ee.Image(previous) 40 | current = current.select([0,1,2,3,4,5,6]) 41 | # Append it to the result (Note: only return current item on first element/iteration) 42 | accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current))) 43 | # Return the accumulation 44 | return accum 45 | 46 | # county_region = ee.FeatureCollection('ft:18Ayj5e7JxxtTPm1BdMnnzWbZMrxMB49eqGDTsaSp') 47 | world_region = ee.FeatureCollection('ft:1tdSwUL7MVpOauSgRzqVTOwdfy17KDbw-1d9omPw') 48 | 49 | imgcoll = ee.ImageCollection('MODIS/MOD09A1') \ 50 | .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))\ 51 | .filterDate('2001-12-31','2015-12-31') 52 | img=imgcoll.iterate(appendBand) 53 | img=ee.Image(img) 54 | 55 | img_0=ee.Image(ee.Number(0)) 56 | img_5000=ee.Image(ee.Number(5000)) 57 | 58 | img=img.min(img_5000) 59 | img=img.max(img_0) 60 | 61 | # img=ee.Image(ee.Number(100)) 62 | # img=ee.ImageCollection('LC8_L1T').mosaic() 63 | 64 | for country,index in locations.values: 65 | fname = 'index'+'{}'.format(int(index)) 66 | 67 | # offset = 0.11 68 | scale = 500 69 | crs='EPSG:4326' 70 | 71 | # filter for a county 72 | region = world_region.filterMetadata('Country', 'equals', country) 73 | if region==None: 74 | print country,index,'not found' 75 | continue 76 | region = region.first() 77 | region = region.geometry().coordinates().getInfo()[0] 78 | 79 | # region = str([ 80 | # [lat - offset, lon + offset], 81 | # [lat + offset, lon + offset], 82 | # [lat + offset, lon - offset], 83 | # [lat - offset, lon - offset]]) 84 | while True: 85 | try: 86 | export_oneimage(img, 'Data_world', fname, region, scale, crs) 87 | except: 88 | print 'retry' 89 | time.sleep(10) 90 | continue 91 | break 92 | # while True: 93 | # try: 94 | # export_oneimage(img,'Data_test',fname,region,scale,crs) 95 | # except: 96 | # print 'retry' 97 | # time.sleep(10) 98 | # continue 99 | # break -------------------------------------------------------------------------------- /6 result_analysis/permute.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | # result = np.load('paper_result.npy') 5 | # result = result[1:,:] 6 | # result_mean = np.mean(result,axis=0,keepdims=True) 7 | # result = np.concatenate((result, result_mean),axis=0) 8 | 9 | # print np.round(result,2) 10 | 11 | permute_band = np.load('permute_band.npy') 12 | permute_band_plot_temp = permute_band[0:9]-permute_band[9] 13 | print permute_band_plot_temp.shape 14 | permute_band_plot = np.zeros([permute_band_plot_temp.shape[0],permute_band_plot_temp.shape[1],permute_band_plot_temp.shape[2],3]) 15 | permute_band_plot[:,:,:,0] = (permute_band_plot_temp[:,:,:,0]+permute_band_plot_temp[:,:,:,1])/2 16 | permute_band_plot[:,:,:,1] = (permute_band_plot_temp[:,:,:,2]+permute_band_plot_temp[:,:,:,3])/2 17 | permute_band_plot[:,:,:,2] = (permute_band_plot_temp[:,:,:,4]+permute_band_plot_temp[:,:,:,5])/2 18 | 19 | # plt.plot(range(10),permute_band_mean[:,0]) 20 | # plt.plot(range(10),permute_band_mean[:,1]) 21 | # plt.plot(range(10),permute_band_mean[:,2]) 22 | # plt.plot(range(10),permute_band_mean[:,3]) 23 | # plt.plot(range(10),permute_band_mean[:,4]) 24 | # plt.plot(range(10),permute_band_mean[:,5]) 25 | # plt.legend(['5','6','7','8','9','10']) 26 | # plt.show() 27 | 28 | # bar plot 29 | n_groups = 9 30 | fig, ax = plt.subplots() 31 | index = np.arange(n_groups) 32 | bar_width = 0.22 33 | opacity = 0.6 34 | error_config = {'ecolor': '0.3'} 35 | 36 | rects1 = plt.bar(index, np.mean(permute_band_plot,axis=(1,2))[:,0], bar_width, 37 | alpha=opacity, 38 | color='b', 39 | yerr=0, 40 | error_kw=error_config, 41 | label='May&Jun') 42 | rects2 = plt.bar(index + bar_width, np.mean(permute_band_plot,axis=(1,2))[:,1], bar_width, 43 | alpha=opacity, 44 | color='g', 45 | yerr=0, 46 | error_kw=error_config, 47 | label='Jul&Aug') 48 | rects3 = plt.bar(index + bar_width*2, np.mean(permute_band_plot,axis=(1,2))[:,2], bar_width, 49 | alpha=opacity, 50 | color='r', 51 | yerr=0, 52 | error_kw=error_config, 53 | label='Sept&Oct') 54 | # rects4 = plt.bar(index + bar_width*3, np.mean(permute_band_plot,axis=(1,2))[:,3], bar_width, 55 | # alpha=opacity, 56 | # color='c', 57 | # yerr=np.std(permute_band_plot,axis=(1,2))[:,3], 58 | # error_kw=error_config, 59 | # label='Aug') 60 | # rects5 = plt.bar(index + bar_width*4, np.mean(permute_band_plot,axis=(1,2))[:,4], bar_width, 61 | # alpha=opacity, 62 | # color='m', 63 | # yerr=np.std(permute_band_plot,axis=(1,2))[:,4], 64 | # error_kw=error_config, 65 | # label='Sept') 66 | # rects6 = plt.bar(index + bar_width*5, np.mean(permute_band_plot,axis=(1,2))[:,5], bar_width, 67 | # alpha=opacity, 68 | # color='y', 69 | # yerr=np.std(permute_band_plot,axis=(1,2))[:,5], 70 | # error_kw=error_config, 71 | # label='Oct') 72 | plt.xlabel('Spectral bands in remote sensing image',fontsize=16) 73 | plt.ylabel('Increase of RMSE',fontsize=16) 74 | # plt.title('Root Mean Square Error') 75 | plt.xticks(index + bar_width*1.5, ('1', '2', '3', '4', '5', '6','7','8','9')) 76 | plt.legend(fontsize=14,loc=2) 77 | 78 | axes = plt.gca() 79 | axes.set_ylim([0,3.5]) 80 | 81 | plt.tight_layout() 82 | plt.show() 83 | 84 | 85 | 86 | 87 | permute_time = np.load('permute_time.npy') 88 | permute_time_plot = permute_time[0:30]-permute_time[30] 89 | 90 | x = range(49,282,8) 91 | y = np.mean(permute_time_plot,axis=(1,2)) 92 | # example error bar values that vary with x-position 93 | error = 0 94 | 95 | plt.errorbar(x, y, yerr=error, fmt='-o',ecolor='0.3',linewidth=1,color='b') 96 | plt.xlabel('Day of year',fontsize=16) 97 | plt.ylabel('Increase of RMSE',fontsize=16) 98 | 99 | plt.show() 100 | 101 | -------------------------------------------------------------------------------- /6 result_analysis/yield_map.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import csv 3 | from BeautifulSoup import BeautifulSoup 4 | from GP_crop_v3 import * 5 | 6 | 7 | # Read CNN_err prediction 8 | CNN = {} 9 | GP = {} 10 | save_path = '/atlas/u/jiaxuan/data/train_results/final/monthly/' 11 | save_path = 'C:/360Downloads/final/monthly/' 12 | path_current = save_path+str(0)+str(30)+str(2014)+'result_prediction.npz' 13 | data = np.load(path_current) 14 | 15 | year = data['year_out'] 16 | real = data['real_out'] 17 | pred = data['pred_out'] 18 | index=data['index_out'] 19 | 20 | validate = np.nonzero(year == 2014)[0] 21 | year = year[validate] 22 | real = real[validate] 23 | pred = pred[validate] 24 | index = index[validate] 25 | err_CNN = pred-real 26 | 27 | rmse,me,err_GP = GaussianProcess(2014,path_current) 28 | 29 | 30 | print 'CNN',err_CNN.min(),err_CNN.max() 31 | print 'GP',err_GP.min(),err_GP.max() 32 | 33 | for i in range(year.shape[0]): 34 | loc1 = str(int(index[i,0])) 35 | loc2 = str(int(index[i,1])) 36 | if len(loc1)==1: 37 | loc1='0'+loc1 38 | if len(loc2)==1: 39 | loc2='00'+loc2 40 | if len(loc2)==2: 41 | loc2='0'+loc2 42 | fips = loc1+loc2 43 | CNN[fips] = err_CNN[i] 44 | GP[fips] = err_GP[i] 45 | 46 | '''CNN''' 47 | # Load the SVG map 48 | svg = open('counties.svg', 'r').read() 49 | # Load into Beautiful Soup 50 | soup = BeautifulSoup(svg, selfClosingTags=['defs','sodipodi:namedview']) 51 | # Find counties 52 | paths = soup.findAll('path') 53 | # Map colors 54 | colors = ["#b2182b", "#d6604d", "#f4a582", "#fddbc7", "#d1e5f0", "#92c5de", "#4393c3", "#2166ac"] 55 | 56 | # County style 57 | path_style = 'font-size:12px;fill-rule:nonzero;stroke:#FFFFFF;stroke-opacity:1;stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none;stroke-linecap:butt;marker-start:none;stroke-linejoin:bevel;fill:' 58 | # Color the counties based on unemployment rate 59 | for p in paths: 60 | if p['id'] not in ["State_Lines", "separator"]: 61 | try: 62 | rate = CNN[p['id']] 63 | except: 64 | continue 65 | if rate > 15: 66 | color_class = 7 67 | elif rate > 10: 68 | color_class = 6 69 | elif rate > 5: 70 | color_class = 5 71 | elif rate > 0: 72 | color_class = 4 73 | elif rate > -5: 74 | color_class = 3 75 | elif rate > -10: 76 | color_class = 2 77 | elif rate > -15: 78 | color_class = 1 79 | else: 80 | color_class = 0 81 | 82 | color = colors[color_class] 83 | p['style'] = path_style + color 84 | 85 | soup=soup.prettify() 86 | with open('CNN_err.svg', 'wb') as f: 87 | f.write(soup) 88 | 89 | '''GP''' 90 | # Load the SVG map 91 | svg = open('counties.svg', 'r').read() 92 | # Load into Beautiful Soup 93 | soup = BeautifulSoup(svg, selfClosingTags=['defs','sodipodi:namedview']) 94 | # Find counties 95 | paths = soup.findAll('path') 96 | # Map colors 97 | colors = ["#b2182b", "#d6604d", "#f4a582", "#fddbc7", "#d1e5f0", "#92c5de", "#4393c3", "#2166ac"] 98 | 99 | # County style 100 | path_style = 'font-size:12px;fill-rule:nonzero;stroke:#FFFFFF;stroke-opacity:1;stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none;stroke-linecap:butt;marker-start:none;stroke-linejoin:bevel;fill:' 101 | # Color the counties based on unemployment rate 102 | for p in paths: 103 | if p['id'] not in ["State_Lines", "separator"]: 104 | try: 105 | rate = GP[p['id']] 106 | except: 107 | continue 108 | if rate > 15: 109 | color_class = 7 110 | elif rate > 10: 111 | color_class = 6 112 | elif rate > 5: 113 | color_class = 5 114 | elif rate > 0: 115 | color_class = 4 116 | elif rate > -5: 117 | color_class = 3 118 | elif rate > -10: 119 | color_class = 2 120 | elif rate > -15: 121 | color_class = 1 122 | else: 123 | color_class = 0 124 | 125 | color = colors[color_class] 126 | p['style'] = path_style + color 127 | 128 | soup=soup.prettify() 129 | with open('GP_err.svg', 'wb') as f: 130 | f.write(soup) 131 | -------------------------------------------------------------------------------- /3 model/nnet_lstm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import threading 4 | from fetch_data_county import * 5 | import sys 6 | import matplotlib.pyplot as plt 7 | import time 8 | from datetime import datetime 9 | # datetime.now().strftime('%Y-%m-%d %H:%M:%S') 10 | 11 | class Config(): 12 | B, W, H, C = 32, 32,32, 9 13 | 14 | lstm_layers = 1 15 | # 64 16 | # hidden 256(default) 17 | lstm_H = 128 18 | 19 | # dense 256(default) 20 | dense = 256 21 | 22 | train_step = 10000 23 | lr = 1e-3 24 | drop_out = 0.75 25 | # weight_decay = 0.005 26 | load_path = "/atlas/u/jiaxuan/data/google_drive/img_output/" 27 | save_path = '/atlas/u/jiaxuan/data/train_results/final/lstm/' 28 | # save_path = '~/Downloads/' 29 | 30 | def conv2d(input_data, out_channels, filter_size, in_channels=None, name="conv2d"): 31 | if not in_channels: 32 | in_channels = input_data.get_shape()[-1] 33 | with tf.variable_scope(name): 34 | W = tf.get_variable("W", [filter_size, filter_size, in_channels, out_channels], 35 | initializer=tf.contrib.layers.variance_scaling_initializer()) 36 | b = tf.get_variable("b", [1, 1, 1, out_channels]) 37 | return tf.nn.conv2d(input_data, W, [1, 1, 1, 1], "SAME") + b 38 | 39 | def pool2d(input_data, ksize, name="pool2d"): 40 | with tf.variable_scope(name): 41 | return tf.nn.max_pool(input_data, [1, ksize, ksize, 1], [1, ksize, ksize, 1], "SAME") 42 | 43 | 44 | def conv_relu_batch(input_data, out_channels, filter_size, in_channels=None, name="crb"): 45 | with tf.variable_scope(name): 46 | a = conv2d(input_data, out_channels, filter_size, in_channels) 47 | b = batch_normalization(a,axes=[0,1,2]) 48 | r = tf.nn.relu(b) 49 | return r 50 | 51 | def dense(input_data, H, N=None, name="dense"): 52 | if not N: 53 | N = input_data.get_shape()[-1] 54 | with tf.variable_scope(name): 55 | W = tf.get_variable("W", [N, H], initializer=tf.contrib.layers.variance_scaling_initializer()) 56 | b = tf.get_variable("b", [1, H]) 57 | return tf.matmul(input_data, W, name="matmul") + b 58 | 59 | def batch_normalization(input_data, axes=[0], name="batch"): 60 | with tf.variable_scope(name): 61 | mean, variance = tf.nn.moments(input_data, axes, keep_dims=True, name="moments") 62 | return tf.nn.batch_normalization(input_data, mean, variance, None, None, 1e-6, name="batch") 63 | 64 | 65 | def lstm_net(input_data,output_data,config,keep_prob = 1,name='lstm_net'): 66 | with tf.variable_scope(name): 67 | lstm_cell = tf.nn.rnn_cell.LSTMCell(config.lstm_H,state_is_tuple=True) 68 | lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob) 69 | cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.lstm_layers,state_is_tuple=True) 70 | state = cell.zero_state(config.B, tf.float32) 71 | outputs, final_state = tf.nn.dynamic_rnn(cell, input_data, 72 | initial_state=state, time_major=True) 73 | output_final = tf.squeeze(tf.slice(outputs, [config.H-1,0,0] , [1,-1,-1])) 74 | # print outputs.get_shape().as_list() 75 | fc1 = dense(output_final, config.dense, name="dense") 76 | 77 | logit = tf.squeeze(dense(fc1,1,name='logit')) 78 | loss = tf.nn.l2_loss(logit - output_data) 79 | 80 | return logit,loss,fc1 81 | 82 | class NeuralModel(): 83 | def __init__(self, config, name): 84 | self.x = tf.placeholder(tf.float32, [None, config.W, config.H, config.C], name="x") 85 | self.y = tf.placeholder(tf.float32, [None]) 86 | self.lr = tf.placeholder(tf.float32, []) 87 | self.keep_prob = tf.placeholder(tf.float32, []) 88 | 89 | input_data = tf.transpose(self.x, [2,0,1,3]) 90 | dim = input_data.get_shape().as_list() 91 | input_data = tf.reshape(input_data,[dim[0],-1,dim[2]*dim[3]]) 92 | print 'lstm input shape',input_data.get_shape() 93 | 94 | with tf.variable_scope('LSTM') as scope: 95 | self.pred,self.loss,self.feature = lstm_net(input_data, self.y, config, keep_prob=self.keep_prob) 96 | 97 | self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss) 98 | with tf.variable_scope('LSTM/lstm_net/logit') as scope: 99 | scope.reuse_variables() 100 | self.dense_W = tf.get_variable('W') 101 | self.dense_B = tf.get_variable('b') 102 | 103 | # if __name__ == '__main__': 104 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) 105 | # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 106 | # config = Config() 107 | # model = NeuralModel(config, "model") 108 | 109 | # dummy_x = np.random.rand(config.B, config.W, config.H, config.C) 110 | # dummy_y = np.random.rand(config.B) 111 | 112 | # sess.run(tf.initialize_all_variables()) 113 | # for i in range(1000): 114 | # # model.state = model.cell.zero_state(config.B, tf.float32) 115 | # if i % 100 == 0: 116 | # config.lr /= 2 117 | # _, loss, pred = sess.run([model.train_op, model.loss, model.pred], feed_dict={ 118 | # model.x: dummy_x, 119 | # model.y: dummy_y, 120 | # model.lr: config.lr, 121 | # model.keep_prob: config.drop_out 122 | # }) 123 | 124 | # print loss 125 | 126 | -------------------------------------------------------------------------------- /4 model_batch/nnet_lstm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import threading 4 | from fetch_data_county import * 5 | import sys 6 | import matplotlib.pyplot as plt 7 | import time 8 | from datetime import datetime 9 | # datetime.now().strftime('%Y-%m-%d %H:%M:%S') 10 | 11 | class Config(): 12 | B, W, H, C = 32, 32,32, 9 13 | 14 | lstm_layers = 1 15 | # 64 16 | # hidden 256(default) 17 | lstm_H = 128 18 | 19 | # dense 256(default) 20 | dense = 256 21 | 22 | train_step = 10000 23 | lr = 1e-3 24 | drop_out = 0.75 25 | # weight_decay = 0.005 26 | load_path = "/atlas/u/jiaxuan/data/google_drive/img_output/" 27 | save_path = '/atlas/u/jiaxuan/data/train_results/final/lstm/' 28 | # save_path = '~/Downloads/' 29 | 30 | def conv2d(input_data, out_channels, filter_size, in_channels=None, name="conv2d"): 31 | if not in_channels: 32 | in_channels = input_data.get_shape()[-1] 33 | with tf.variable_scope(name): 34 | W = tf.get_variable("W", [filter_size, filter_size, in_channels, out_channels], 35 | initializer=tf.contrib.layers.variance_scaling_initializer()) 36 | b = tf.get_variable("b", [1, 1, 1, out_channels]) 37 | return tf.nn.conv2d(input_data, W, [1, 1, 1, 1], "SAME") + b 38 | 39 | def pool2d(input_data, ksize, name="pool2d"): 40 | with tf.variable_scope(name): 41 | return tf.nn.max_pool(input_data, [1, ksize, ksize, 1], [1, ksize, ksize, 1], "SAME") 42 | 43 | 44 | def conv_relu_batch(input_data, out_channels, filter_size, in_channels=None, name="crb"): 45 | with tf.variable_scope(name): 46 | a = conv2d(input_data, out_channels, filter_size, in_channels) 47 | b = batch_normalization(a,axes=[0,1,2]) 48 | r = tf.nn.relu(b) 49 | return r 50 | 51 | def dense(input_data, H, N=None, name="dense"): 52 | if not N: 53 | N = input_data.get_shape()[-1] 54 | with tf.variable_scope(name): 55 | W = tf.get_variable("W", [N, H], initializer=tf.contrib.layers.variance_scaling_initializer()) 56 | b = tf.get_variable("b", [1, H]) 57 | return tf.matmul(input_data, W, name="matmul") + b 58 | 59 | def batch_normalization(input_data, axes=[0], name="batch"): 60 | with tf.variable_scope(name): 61 | mean, variance = tf.nn.moments(input_data, axes, keep_dims=True, name="moments") 62 | return tf.nn.batch_normalization(input_data, mean, variance, None, None, 1e-6, name="batch") 63 | 64 | 65 | def lstm_net(input_data,output_data,config,keep_prob = 1,name='lstm_net'): 66 | with tf.variable_scope(name): 67 | lstm_cell = tf.nn.rnn_cell.LSTMCell(config.lstm_H,state_is_tuple=True) 68 | lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob) 69 | cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.lstm_layers,state_is_tuple=True) 70 | state = cell.zero_state(config.B, tf.float32) 71 | outputs, final_state = tf.nn.dynamic_rnn(cell, input_data, 72 | initial_state=state, time_major=True) 73 | output_final = tf.squeeze(tf.slice(outputs, [config.H-1,0,0] , [1,-1,-1])) 74 | # print outputs.get_shape().as_list() 75 | fc1 = dense(output_final, config.dense, name="dense") 76 | 77 | logit = tf.squeeze(dense(fc1,1,name='logit')) 78 | loss = tf.nn.l2_loss(logit - output_data) 79 | 80 | return logit,loss,fc1 81 | 82 | class NeuralModel(): 83 | def __init__(self, config, name): 84 | self.x = tf.placeholder(tf.float32, [None, config.W, config.H, config.C], name="x") 85 | self.y = tf.placeholder(tf.float32, [None]) 86 | self.lr = tf.placeholder(tf.float32, []) 87 | self.keep_prob = tf.placeholder(tf.float32, []) 88 | 89 | input_data = tf.transpose(self.x, [2,0,1,3]) 90 | dim = input_data.get_shape().as_list() 91 | input_data = tf.reshape(input_data,[dim[0],-1,dim[2]*dim[3]]) 92 | print 'lstm input shape',input_data.get_shape() 93 | 94 | with tf.variable_scope('LSTM') as scope: 95 | self.pred,self.loss,self.feature = lstm_net(input_data, self.y, config, keep_prob=self.keep_prob) 96 | 97 | self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss) 98 | with tf.variable_scope('LSTM/lstm_net/logit') as scope: 99 | scope.reuse_variables() 100 | self.dense_W = tf.get_variable('W') 101 | self.dense_B = tf.get_variable('b') 102 | 103 | # if __name__ == '__main__': 104 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) 105 | # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 106 | # config = Config() 107 | # model = NeuralModel(config, "model") 108 | 109 | # dummy_x = np.random.rand(config.B, config.W, config.H, config.C) 110 | # dummy_y = np.random.rand(config.B) 111 | 112 | # sess.run(tf.initialize_all_variables()) 113 | # for i in range(1000): 114 | # # model.state = model.cell.zero_state(config.B, tf.float32) 115 | # if i % 100 == 0: 116 | # config.lr /= 2 117 | # _, loss, pred = sess.run([model.train_op, model.loss, model.pred], feed_dict={ 118 | # model.x: dummy_x, 119 | # model.y: dummy_y, 120 | # model.lr: config.lr, 121 | # model.keep_prob: config.drop_out 122 | # }) 123 | 124 | # print loss 125 | 126 | -------------------------------------------------------------------------------- /4 model_batch/nnet_for_hist_dropout_stride.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import threading 4 | # from fetch_data_histogram import * 5 | import sys 6 | import matplotlib.pyplot as plt 7 | import time 8 | import scipy.misc 9 | from datetime import datetime 10 | # datetime.now().strftime('%Y-%m-%d %H:%M:%S') 11 | 12 | class Config(): 13 | B, W, H, C = 32, 32,32, 9 14 | train_step = 25000 15 | lr = 1e-3 16 | weight_decay = 0.005 17 | 18 | keep_prob = 0.25 19 | # load_path = '/atlas/u/jiaxuan/data/MODIS_data_county_processed_compressed/' 20 | load_path = "/atlas/u/jiaxuan/data/google_drive/img_output/" 21 | # load_path = "/atlas/u/jiaxuan/data/google_drive/img_full_output/" 22 | # save_path = '/atlas/u/jiaxuan/data/train_results/histogram_new/test21/' 23 | # save_path = '/atlas/u/jiaxuan/data/train_results/histogram_new/test22_optimize/' 24 | save_path = '/atlas/u/jiaxuan/data/train_results/final/corn_yearly/' 25 | 26 | 27 | def conv2d(input_data, out_channels, filter_size,stride, in_channels=None, name="conv2d"): 28 | if not in_channels: 29 | in_channels = input_data.get_shape()[-1] 30 | with tf.variable_scope(name): 31 | W = tf.get_variable("W", [filter_size, filter_size, in_channels, out_channels], 32 | initializer=tf.contrib.layers.variance_scaling_initializer()) 33 | b = tf.get_variable("b", [1, 1, 1, out_channels]) 34 | return tf.nn.conv2d(input_data, W, [1, stride, stride, 1], "SAME") + b 35 | 36 | 37 | def pool2d(input_data, ksize, name="pool2d"): 38 | with tf.variable_scope(name): 39 | return tf.nn.max_pool(input_data, [1, ksize, ksize, 1], [1, ksize, ksize, 1], "SAME") 40 | 41 | 42 | def conv_relu_batch(input_data, out_channels, filter_size,stride, in_channels=None, name="crb"): 43 | with tf.variable_scope(name): 44 | a = conv2d(input_data, out_channels, filter_size, stride, in_channels) 45 | b = batch_normalization(a,axes=[0,1,2]) 46 | r = tf.nn.relu(b) 47 | return r 48 | 49 | def dense(input_data, H, N=None, name="dense"): 50 | if not N: 51 | N = input_data.get_shape()[-1] 52 | with tf.variable_scope(name): 53 | W = tf.get_variable("W", [N, H], initializer=tf.contrib.layers.variance_scaling_initializer()) 54 | b = tf.get_variable("b", [1, H]) 55 | return tf.matmul(input_data, W, name="matmul") + b 56 | 57 | def batch_normalization(input_data, axes=[0], name="batch"): 58 | with tf.variable_scope(name): 59 | mean, variance = tf.nn.moments(input_data, axes, keep_dims=True, name="moments") 60 | return tf.nn.batch_normalization(input_data, mean, variance, None, None, 1e-6, name="batch") 61 | 62 | class batch_norm(object): 63 | """Code modification of http://stackoverflow.com/a/33950177""" 64 | def __init__(self, epsilon=1e-5, momentum = 0.9, name="batch_norm"): 65 | with tf.variable_scope(name): 66 | self.epsilon = epsilon 67 | self.momentum = momentum 68 | 69 | self.ema = tf.train.ExponentialMovingAverage(decay=self.momentum) 70 | self.name = name 71 | 72 | def __call__(self, x, axes=[0,1,2], train=True): 73 | shape = x.get_shape().as_list() 74 | 75 | if train: 76 | with tf.variable_scope(self.name) as scope: 77 | self.beta = tf.get_variable("beta", [shape[-1]], 78 | initializer=tf.constant_initializer(0.)) 79 | self.gamma = tf.get_variable("gamma", [shape[-1]], 80 | initializer=tf.random_normal_initializer(1., 0.02)) 81 | 82 | batch_mean, batch_var = tf.nn.moments(x, axes, name='moments') 83 | ema_apply_op = self.ema.apply([batch_mean, batch_var]) 84 | self.ema_mean, self.ema_var = self.ema.average(batch_mean), self.ema.average(batch_var) 85 | 86 | with tf.control_dependencies([ema_apply_op]): 87 | mean, var = tf.identity(batch_mean), tf.identity(batch_var) 88 | else: 89 | mean, var = self.ema_mean, self.ema_var 90 | 91 | normed = tf.nn.batch_norm_with_global_normalization( 92 | x, mean, var, self.beta, self.gamma, self.epsilon, scale_after_normalization=True) 93 | 94 | return normed 95 | 96 | class NeuralModel(): 97 | def __init__(self, config, name): 98 | 99 | self.x = tf.placeholder(tf.float32, [None, config.W, config.H, config.C], name="x") 100 | self.y = tf.placeholder(tf.float32, [None]) 101 | self.lr = tf.placeholder(tf.float32, []) 102 | self.keep_prob = tf.placeholder(tf.float32, []) 103 | # self.year = tf.placeholder(tf.float32, [None,1]) 104 | # used for max image 105 | # self.image = tf.Variable(initial_value=init,name="image") 106 | 107 | self.conv1_1 = conv_relu_batch(self.x, 128, 3,1, name="conv1_1") 108 | conv1_1_d = tf.nn.dropout(self.conv1_1, self.keep_prob) 109 | conv1_2 = conv_relu_batch(conv1_1_d, 256, 3,2, name="conv1_2") 110 | conv1_2_d = tf.nn.dropout(conv1_2, self.keep_prob) 111 | 112 | conv2_1 = conv_relu_batch(conv1_2_d, 256, 3,1, name="conv2_1") 113 | conv2_1_d = tf.nn.dropout(conv2_1, self.keep_prob) 114 | conv2_2 = conv_relu_batch(conv2_1_d, 512, 3,2, name="conv2_2") 115 | conv2_2_d = tf.nn.dropout(conv2_2, self.keep_prob) 116 | 117 | conv3_1 = conv_relu_batch(conv2_2_d, 512, 3,1, name="conv3_1") 118 | conv3_1_d = tf.nn.dropout(conv3_1, self.keep_prob) 119 | conv3_2= conv_relu_batch(conv3_1_d, 1024, 3,2, name="conv3_2") 120 | conv3_2_d = tf.nn.dropout(conv3_2, self.keep_prob) 121 | 122 | 123 | dim = np.prod(conv3_2_d.get_shape().as_list()[1:]) 124 | flattened = tf.reshape(conv3_2_d, [-1, dim]) 125 | # flattened_d = tf.nn.dropout(flattened, 0.25) 126 | 127 | self.fc6 = dense(flattened, 1024, name="fc6") 128 | # self.fc6 = tf.concat(1, [self.fc6_img,self.year]) 129 | 130 | 131 | self.logits = tf.squeeze(dense(self.fc6, 1, name="dense")) 132 | self.loss_err = tf.nn.l2_loss(self.logits - self.y) 133 | 134 | 135 | with tf.variable_scope('dense') as scope: 136 | scope.reuse_variables() 137 | self.dense_W = tf.get_variable('W') 138 | self.dense_B = tf.get_variable('b') 139 | with tf.variable_scope('conv1_1/conv2d') as scope: 140 | scope.reuse_variables() 141 | self.conv_W = tf.get_variable('W') 142 | self.conv_B = tf.get_variable('b') 143 | 144 | # L1 term 145 | self.loss_reg = tf.abs(tf.reduce_sum(self.logits - self.y)) 146 | # soybean 147 | # alpha = 1.5 148 | # corn 149 | alpha = 5 150 | self.loss = self.loss_err+self.loss_reg*alpha 151 | 152 | self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss) 153 | 154 | 155 | 156 | -------------------------------------------------------------------------------- /4 model_batch/train_for_hist_alldata_loop_permute.py: -------------------------------------------------------------------------------- 1 | from nnet_for_hist_dropout_stride import * 2 | import logging 3 | 4 | 5 | 6 | if __name__ == "__main__": 7 | config = Config() 8 | summary_train_loss = [] 9 | summary_eval_loss = [] 10 | summary_RMSE = [] 11 | summary_ME = [] 12 | 13 | 14 | # load data to memory 15 | filename = 'histogram_all' + '.npz' 16 | # filename = 'histogram_all_soilweather' + '.npz' 17 | content = np.load(config.load_path + filename) 18 | image_all = content['output_image'] 19 | yield_all = content['output_yield'] 20 | year_all = content['output_year'] 21 | locations_all = content['output_locations'] 22 | index_all = content['output_index'] 23 | 24 | # delete broken image 25 | list_delete=[] 26 | for i in range(image_all.shape[0]): 27 | if np.sum(image_all[i,:,:,:])<=287: 28 | if year_all[i]<2016: 29 | list_delete.append(i) 30 | image_all=np.delete(image_all,list_delete,0) 31 | yield_all=np.delete(yield_all,list_delete,0) 32 | year_all = np.delete(year_all,list_delete, 0) 33 | locations_all = np.delete(locations_all, list_delete, 0) 34 | index_all = np.delete(index_all, list_delete, 0) 35 | 36 | 37 | # keep major counties 38 | list_keep=[] 39 | for i in range(image_all.shape[0]): 40 | if (index_all[i,0]==5)or(index_all[i,0]==17)or(index_all[i,0]==18)or(index_all[i,0]==19)or(index_all[i,0]==20)or(index_all[i,0]==27)or(index_all[i,0]==29)or(index_all[i,0]==31)or(index_all[i,0]==38)or(index_all[i,0]==39)or(index_all[i,0]==46): 41 | list_keep.append(i) 42 | image_all=image_all[list_keep,:,:,:] 43 | yield_all=yield_all[list_keep] 44 | year_all = year_all[list_keep] 45 | locations_all = locations_all[list_keep,:] 46 | index_all = index_all[list_keep,:] 47 | 48 | image_all_save = np.copy(image_all) 49 | 50 | 51 | # result_band = np.zeros([10,2,7,6]) 52 | # for p in range(10): 53 | # for loop in range(0,2): 54 | # for predict_year in range(2009,2016): 55 | # image_all = np.copy(image_all_save) 56 | # if p!=9: 57 | # np.take(image_all[:,:,:,p],np.random.permutation(image_all.shape[0]),axis=0,out=image_all[:,:,:,p]) 58 | # index_train = np.nonzero(year_all < predict_year)[0] 59 | # index_validate = np.nonzero(year_all == predict_year)[0] 60 | 61 | 62 | # # calc train image mean (for each band), and then detract (broadcast) 63 | # image_mean=np.mean(image_all[index_train],(0,1,2)) 64 | # image_all = image_all - image_mean 65 | 66 | # image_train=image_all[index_train] 67 | # yield_train=yield_all[index_train] 68 | 69 | # for count,time in enumerate(range(10,31,4)): 70 | # g = tf.Graph() 71 | # with g.as_default(): 72 | # # modify config 73 | # config = Config() 74 | # config.H=time 75 | 76 | # model= NeuralModel(config,'net') 77 | 78 | # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.22) 79 | # # Launch the graph. 80 | # sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 81 | # sess.run(tf.initialize_all_variables()) 82 | # saver=tf.train.Saver() 83 | # if predict_year==2012: 84 | # saver.restore(sess, config.save_path+str(loop+2)+str(time) + str(predict_year)+'CNN_model.ckpt') 85 | # else: 86 | # saver.restore(sess, config.save_path+str(loop)+str(time) + str(predict_year)+'CNN_model.ckpt') 87 | 88 | # # save result 89 | # pred_out = [] 90 | # real_out = [] 91 | # feature_out = [] 92 | # for i in range(image_train.shape[0] / config.B): 93 | # feature,pred = sess.run( 94 | # [model.fc6,model.logits], feed_dict={ 95 | # model.x: image_train[i * config.B:(i + 1) * config.B,:,0:config.H,:], 96 | # model.y: yield_train[i * config.B:(i + 1) * config.B], 97 | # model.keep_prob:1 98 | # }) 99 | # real = yield_train[i * config.B:(i + 1) * config.B] 100 | 101 | # pred_out.append(pred) 102 | # real_out.append(real) 103 | # feature_out.append(feature) 104 | # pred_out=np.concatenate(pred_out) 105 | # real_out=np.concatenate(real_out) 106 | # feature_out=np.concatenate(feature_out) 107 | 108 | # rmse = np.sqrt(np.mean((pred_out-real_out)**2)) 109 | # print 'p',p 110 | # print rmse 111 | # result_band[p,loop,predict_year-2009,count]=rmse 112 | # np.save('permute_band.npy', result_band) 113 | 114 | result_time = np.zeros([31,2,7]) 115 | for p in range(31): 116 | for loop in range(0,2): 117 | for predict_year in range(2009,2016): 118 | image_all = np.copy(image_all_save) 119 | if p!=30: 120 | np.take(image_all[:,:,p,:],np.random.permutation(image_all.shape[0]),axis=0,out=image_all[:,:,p,:]) 121 | index_train = np.nonzero(year_all < predict_year)[0] 122 | index_validate = np.nonzero(year_all == predict_year)[0] 123 | 124 | # calc train image mean (for each band), and then detract (broadcast) 125 | image_mean=np.mean(image_all[index_train],(0,1,2)) 126 | image_all = image_all - image_mean 127 | 128 | image_train=image_all[index_train] 129 | yield_train=yield_all[index_train] 130 | 131 | for time in range(30,31): 132 | g = tf.Graph() 133 | with g.as_default(): 134 | # modify config 135 | config = Config() 136 | config.H=time 137 | 138 | model= NeuralModel(config,'net') 139 | 140 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.22) 141 | # Launch the graph. 142 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 143 | sess.run(tf.initialize_all_variables()) 144 | saver=tf.train.Saver() 145 | if predict_year==2012: 146 | saver.restore(sess, config.save_path+str(loop+2)+str(time) + str(predict_year)+'CNN_model.ckpt') 147 | else: 148 | saver.restore(sess, config.save_path+str(loop)+str(time) + str(predict_year)+'CNN_model.ckpt') 149 | 150 | # save result 151 | pred_out = [] 152 | real_out = [] 153 | feature_out = [] 154 | for i in range(image_train.shape[0] / config.B): 155 | feature,pred = sess.run( 156 | [model.fc6,model.logits], feed_dict={ 157 | model.x: image_train[i * config.B:(i + 1) * config.B,:,0:config.H,:], 158 | model.y: yield_train[i * config.B:(i + 1) * config.B], 159 | model.keep_prob:1 160 | }) 161 | real = yield_train[i * config.B:(i + 1) * config.B] 162 | 163 | pred_out.append(pred) 164 | real_out.append(real) 165 | feature_out.append(feature) 166 | pred_out=np.concatenate(pred_out) 167 | real_out=np.concatenate(real_out) 168 | feature_out=np.concatenate(feature_out) 169 | 170 | rmse = np.sqrt(np.mean((pred_out-real_out)**2)) 171 | print 'p',p 172 | print rmse 173 | result_time[p,loop,predict_year-2009]=rmse 174 | np.save('permute_time.npy', result_time) 175 | -------------------------------------------------------------------------------- /3 model/nnet_for_hist_dropout_stride.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import threading 4 | # from fetch_data_histogram import * 5 | import sys 6 | import matplotlib.pyplot as plt 7 | import time 8 | import scipy.misc 9 | from datetime import datetime 10 | # datetime.now().strftime('%Y-%m-%d %H:%M:%S') 11 | 12 | class Config(): 13 | B, W, H, C = 32, 32,32, 9 14 | train_step = 25000 15 | lr = 1e-3 16 | weight_decay = 0.005 17 | 18 | drop_out = 0.25 19 | # load_path = '/atlas/u/jiaxuan/data/MODIS_data_county_processed_compressed/' 20 | load_path = "/atlas/u/jiaxuan/data/google_drive/img_output/" 21 | # load_path = "/atlas/u/jiaxuan/data/google_drive/img_full_output/" 22 | # save_path = '/atlas/u/jiaxuan/data/train_results/histogram_new/test21/' 23 | # save_path = '/atlas/u/jiaxuan/data/train_results/histogram_new/test22_optimize/' 24 | save_path = '/atlas/u/jiaxuan/data/train_results/final/monthly/' 25 | 26 | 27 | def conv2d(input_data, out_channels, filter_size,stride, in_channels=None, name="conv2d"): 28 | if not in_channels: 29 | in_channels = input_data.get_shape()[-1] 30 | with tf.variable_scope(name): 31 | W = tf.get_variable("W", [filter_size, filter_size, in_channels, out_channels], 32 | initializer=tf.contrib.layers.variance_scaling_initializer()) 33 | b = tf.get_variable("b", [1, 1, 1, out_channels]) 34 | return tf.nn.conv2d(input_data, W, [1, stride, stride, 1], "SAME") + b 35 | 36 | 37 | def pool2d(input_data, ksize, name="pool2d"): 38 | with tf.variable_scope(name): 39 | return tf.nn.max_pool(input_data, [1, ksize, ksize, 1], [1, ksize, ksize, 1], "SAME") 40 | 41 | 42 | def conv_relu_batch(input_data, out_channels, filter_size,stride, in_channels=None, name="crb"): 43 | with tf.variable_scope(name): 44 | a = conv2d(input_data, out_channels, filter_size, stride, in_channels) 45 | b = batch_normalization(a,axes=[0,1,2]) 46 | r = tf.nn.relu(b) 47 | return r 48 | 49 | def dense(input_data, H, N=None, name="dense"): 50 | if not N: 51 | N = input_data.get_shape()[-1] 52 | with tf.variable_scope(name): 53 | W = tf.get_variable("W", [N, H], initializer=tf.contrib.layers.variance_scaling_initializer()) 54 | b = tf.get_variable("b", [1, H]) 55 | return tf.matmul(input_data, W, name="matmul") + b 56 | 57 | def batch_normalization(input_data, axes=[0], name="batch"): 58 | with tf.variable_scope(name): 59 | mean, variance = tf.nn.moments(input_data, axes, keep_dims=True, name="moments") 60 | return tf.nn.batch_normalization(input_data, mean, variance, None, None, 1e-6, name="batch") 61 | 62 | class batch_norm(object): 63 | """Code modification of http://stackoverflow.com/a/33950177""" 64 | def __init__(self, epsilon=1e-5, momentum = 0.9, name="batch_norm"): 65 | with tf.variable_scope(name): 66 | self.epsilon = epsilon 67 | self.momentum = momentum 68 | 69 | self.ema = tf.train.ExponentialMovingAverage(decay=self.momentum) 70 | self.name = name 71 | 72 | def __call__(self, x, axes=[0,1,2], train=True): 73 | shape = x.get_shape().as_list() 74 | 75 | if train: 76 | with tf.variable_scope(self.name) as scope: 77 | self.beta = tf.get_variable("beta", [shape[-1]], 78 | initializer=tf.constant_initializer(0.)) 79 | self.gamma = tf.get_variable("gamma", [shape[-1]], 80 | initializer=tf.random_normal_initializer(1., 0.02)) 81 | 82 | batch_mean, batch_var = tf.nn.moments(x, axes, name='moments') 83 | ema_apply_op = self.ema.apply([batch_mean, batch_var]) 84 | self.ema_mean, self.ema_var = self.ema.average(batch_mean), self.ema.average(batch_var) 85 | 86 | with tf.control_dependencies([ema_apply_op]): 87 | mean, var = tf.identity(batch_mean), tf.identity(batch_var) 88 | else: 89 | mean, var = self.ema_mean, self.ema_var 90 | 91 | normed = tf.nn.batch_norm_with_global_normalization( 92 | x, mean, var, self.beta, self.gamma, self.epsilon, scale_after_normalization=True) 93 | 94 | return normed 95 | 96 | class NeuralModel(): 97 | def __init__(self, config, name): 98 | 99 | self.x = tf.placeholder(tf.float32, [None, config.W, config.H, config.C], name="x") 100 | self.y = tf.placeholder(tf.float32, [None]) 101 | self.lr = tf.placeholder(tf.float32, []) 102 | self.keep_prob = tf.placeholder(tf.float32, []) 103 | # self.year = tf.placeholder(tf.float32, [None,1]) 104 | # used for max image 105 | # self.image = tf.Variable(initial_value=init,name="image") 106 | 107 | self.conv1_1 = conv_relu_batch(self.x, 128, 3,1, name="conv1_1") 108 | conv1_1_d = tf.nn.dropout(self.conv1_1, self.keep_prob) 109 | conv1_2 = conv_relu_batch(conv1_1_d, 128, 3,2, name="conv1_2") 110 | conv1_2_d = tf.nn.dropout(conv1_2, self.keep_prob) 111 | 112 | conv2_1 = conv_relu_batch(conv1_2_d, 256, 3,1, name="conv2_1") 113 | conv2_1_d = tf.nn.dropout(conv2_1, self.keep_prob) 114 | conv2_2 = conv_relu_batch(conv2_1_d, 256, 3,2, name="conv2_2") 115 | conv2_2_d = tf.nn.dropout(conv2_2, self.keep_prob) 116 | 117 | conv3_1 = conv_relu_batch(conv2_2_d, 512, 3,1, name="conv3_1") 118 | conv3_1_d = tf.nn.dropout(conv3_1, self.keep_prob) 119 | conv3_2= conv_relu_batch(conv3_1_d, 512, 3,1, name="conv3_2") 120 | conv3_2_d = tf.nn.dropout(conv3_2, self.keep_prob) 121 | conv3_3 = conv_relu_batch(conv3_2_d, 512, 3,2, name="conv3_3") 122 | conv3_3_d = tf.nn.dropout(conv3_3, self.keep_prob) 123 | 124 | # conv4_1 = conv_relu_batch(pool3, 512, 3, name="conv4_1") 125 | # conv4_1_d = tf.nn.dropout(conv4_1, self.keep_prob) 126 | # conv4_2 = conv_relu_batch(conv4_1_d, 512, 3, name="conv4_2") 127 | # conv4_2_d = tf.nn.dropout(conv4_2, self.keep_prob) 128 | # conv4_3 = conv_relu_batch(conv4_2_d, 512, 3, name="conv4_3") 129 | # conv4_3_d = tf.nn.dropout(conv4_3, self.keep_prob) 130 | # pool4 = pool2d(conv4_3_d, 2, name="pool4") 131 | 132 | # input size=48*48, we can only pool 4 times 133 | # conv5_1 = conv_relu_batch(pool4, 2, 3, name="conv5_1") 134 | # conv5_2 = conv_relu_batch(conv5_1, 2, 3, name="conv5_2") 135 | # conv5_3 = conv_relu_batch(conv5_2, 2, 3, name="conv5_3") 136 | # pool5 = pool2d(conv5_3, 2, name="pool5") 137 | 138 | dim = np.prod(conv3_3_d.get_shape().as_list()[1:]) 139 | flattened = tf.reshape(conv3_3_d, [-1, dim]) 140 | # flattened_d = tf.nn.dropout(flattened, 0.25) 141 | 142 | print flattened.get_shape() 143 | self.fc6 = dense(flattened, 2048, name="fc6") 144 | # self.fc6 = tf.concat(1, [self.fc6_img,self.year]) 145 | 146 | 147 | # fc6_b = batch_normalization(fc6) 148 | # self.fc6_r = tf.nn.relu(fc6_b) 149 | # self.fc6_d = tf.nn.dropout(fc6_r, self.keep_prob) 150 | # 151 | # 152 | # fc7 = dense(fc6_d, 1024, name="fc7") 153 | # fc7_r = tf.nn.relu(fc7) 154 | # fc7_b = batch_normalization(fc7_r) 155 | # fc7_d = tf.nn.dropout(fc7_b, self.keep_prob) 156 | 157 | self.logits = tf.squeeze(dense(self.fc6, 1, name="dense")) 158 | # l2 159 | self.loss_err = tf.nn.l2_loss(self.logits - self.y) 160 | # l1 161 | # self.loss_err = tf.reduce_sum(tf.abs(self.logits - self.y)) 162 | # average 163 | # self.loss_err = tf.abs(tf.reduce_sum(self.logits - self.y)) 164 | 165 | with tf.variable_scope('dense') as scope: 166 | scope.reuse_variables() 167 | self.dense_W = tf.get_variable('W') 168 | self.dense_B = tf.get_variable('b') 169 | with tf.variable_scope('conv1_1/conv2d') as scope: 170 | scope.reuse_variables() 171 | self.conv_W = tf.get_variable('W') 172 | self.conv_B = tf.get_variable('b') 173 | 174 | self.loss_reg = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) 175 | self.loss = self.loss_err+self.loss_reg 176 | # self.loss = self.loss_err 177 | 178 | # # learning rate decay 179 | # global_step = tf.Variable(0, name='global_step', trainable=False) 180 | # self.lr = tf.train.exponential_decay(config.lr_start, global_step, 181 | # config.lr_decay_step, config.lr_decay_rate, staircase=False) 182 | 183 | self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss) 184 | 185 | 186 | 187 | -------------------------------------------------------------------------------- /4 model_batch/train_for_hist_alldata_loop_lstm.py: -------------------------------------------------------------------------------- 1 | from nnet_lstm import * 2 | import logging 3 | 4 | 5 | 6 | if __name__ == "__main__": 7 | config = Config() 8 | summary_train_loss = [] 9 | summary_eval_loss = [] 10 | summary_RMSE = [] 11 | summary_ME = [] 12 | 13 | 14 | # load data to memory 15 | filename = 'histogram_all' + '.npz' 16 | # filename = 'histogram_all_soilweather' + '.npz' 17 | content = np.load(config.load_path + filename) 18 | image_all = content['output_image'] 19 | yield_all = content['output_yield'] 20 | year_all = content['output_year'] 21 | locations_all = content['output_locations'] 22 | index_all = content['output_index'] 23 | 24 | # delete broken image 25 | list_delete=[] 26 | for i in range(image_all.shape[0]): 27 | if np.sum(image_all[i,:,:,:])<=287: 28 | if year_all[i]<2016: 29 | list_delete.append(i) 30 | image_all=np.delete(image_all,list_delete,0) 31 | yield_all=np.delete(yield_all,list_delete,0) 32 | year_all = np.delete(year_all,list_delete, 0) 33 | locations_all = np.delete(locations_all, list_delete, 0) 34 | index_all = np.delete(index_all, list_delete, 0) 35 | 36 | 37 | # keep major counties 38 | list_keep=[] 39 | for i in range(image_all.shape[0]): 40 | if (index_all[i,0]==5)or(index_all[i,0]==17)or(index_all[i,0]==18)or(index_all[i,0]==19)or(index_all[i,0]==20)or(index_all[i,0]==27)or(index_all[i,0]==29)or(index_all[i,0]==31)or(index_all[i,0]==38)or(index_all[i,0]==39)or(index_all[i,0]==46): 41 | list_keep.append(i) 42 | image_all=image_all[list_keep,:,:,:] 43 | yield_all=yield_all[list_keep] 44 | year_all = year_all[list_keep] 45 | locations_all = locations_all[list_keep,:] 46 | index_all = index_all[list_keep,:] 47 | 48 | for loop in range(2,3): 49 | for predict_year in range(2009,2016): 50 | logging.basicConfig(filename='train_for_hist_alldata_loop'+str(predict_year)+str(loop)+'.log',level=logging.DEBUG) 51 | # # split into train and validate 52 | # index_train = np.nonzero(year_all < predict_year)[0] 53 | # index_validate = np.nonzero(year_all == predict_year)[0] 54 | # index_test = np.nonzero(year_all == predict_year+1)[0] 55 | 56 | # random choose validation set 57 | index_train = np.nonzero(year_all < predict_year)[0] 58 | index_validate = np.nonzero(year_all == predict_year)[0] 59 | print 'train size',index_train.shape[0] 60 | print 'validate size',index_validate.shape[0] 61 | logging.info('train size %d',index_train.shape[0]) 62 | logging.info('validate size',index_validate.shape[0]) 63 | 64 | # calc train image mean (for each band), and then detract (broadcast) 65 | image_mean=np.mean(image_all[index_train],(0,1,2)) 66 | image_all = image_all - image_mean 67 | 68 | image_validate=image_all[index_validate] 69 | yield_validate=yield_all[index_validate] 70 | 71 | for time in range(10,31,4): 72 | RMSE_min = 100 73 | g = tf.Graph() 74 | with g.as_default(): 75 | # modify config 76 | config = Config() 77 | config.H=time 78 | 79 | model= NeuralModel(config,'net') 80 | 81 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.22) 82 | # Launch the graph. 83 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 84 | sess.run(tf.initialize_all_variables()) 85 | saver=tf.train.Saver() 86 | for i in range(config.train_step): 87 | if i==3000: 88 | config.lr/=10 89 | 90 | if i==8000: 91 | config.lr/=10 92 | 93 | # index_train_batch = np.random.choice(index_train,size=config.B) 94 | index_validate_batch = np.random.choice(index_validate, size=config.B) 95 | 96 | # try data augmentation while training 97 | index_train_batch_1 = np.random.choice(index_train,size=config.B) 98 | index_train_batch_2 = np.random.choice(index_train,size=config.B) 99 | image_train_batch = (image_all[index_train_batch_1,:,0:config.H,:]+image_all[index_train_batch_1,:,0:config.H,:])/2 100 | yield_train_batch = (yield_all[index_train_batch_1]+yield_all[index_train_batch_1])/2 101 | 102 | _, train_loss = sess.run([model.train_op, model.loss], feed_dict={ 103 | model.x:image_train_batch, 104 | model.y:yield_train_batch, 105 | model.lr:config.lr, 106 | model.keep_prob: config.drop_out 107 | }) 108 | 109 | if i%500 == 0: 110 | val_loss = sess.run(model.loss, feed_dict={ 111 | model.x: image_all[index_validate_batch, :, 0:config.H, :], 112 | model.y: yield_all[index_validate_batch], 113 | model.keep_prob: 1 114 | }) 115 | 116 | print str(loop)+str(time)+'predict year'+str(predict_year)+'step'+str(i),train_loss,val_loss,config.lr 117 | logging.info('%d %d %d step %d %f %f %f',loop,time,predict_year,i,train_loss,val_loss,config.lr) 118 | if i%500 == 0: 119 | # do validation 120 | pred = [] 121 | real = [] 122 | for j in range(image_validate.shape[0] / config.B): 123 | real_temp = yield_validate[j * config.B:(j + 1) * config.B] 124 | pred_temp= sess.run(model.pred, feed_dict={ 125 | model.x: image_validate[j * config.B:(j + 1) * config.B,:,0:config.H,:], 126 | model.y: yield_validate[j * config.B:(j + 1) * config.B], 127 | model.keep_prob: 1 128 | }) 129 | pred.append(pred_temp) 130 | real.append(real_temp) 131 | pred=np.concatenate(pred) 132 | real=np.concatenate(real) 133 | RMSE=np.sqrt(np.mean((pred-real)**2)) 134 | ME=np.mean(pred-real) 135 | 136 | if RMSE 15: 71 | # color_class = 7 72 | # elif rate > 10: 73 | # color_class = 6 74 | # elif rate > 5: 75 | # color_class = 5 76 | # elif rate > 0: 77 | # color_class = 4 78 | # elif rate > -5: 79 | # color_class = 3 80 | # elif rate > -10: 81 | # color_class = 2 82 | # elif rate > -15: 83 | # color_class = 1 84 | # else: 85 | # color_class = 0 86 | 87 | # # plot soybean yield 88 | # if rate > 60: 89 | # color_class = 0 90 | # elif rate > 55: 91 | # color_class = 1 92 | # elif rate > 50: 93 | # color_class = 2 94 | # elif rate > 45: 95 | # color_class = 3 96 | # elif rate > 40: 97 | # color_class = 4 98 | # elif rate > 35: 99 | # color_class = 5 100 | # elif rate > 30: 101 | # color_class = 6 102 | # elif rate > 25: 103 | # color_class = 7 104 | # elif rate > 20: 105 | # color_class = 8 106 | # elif rate > 15: 107 | # color_class = 9 108 | # else: 109 | # color_class = 10 110 | 111 | # plot corn yield 112 | if rate > 200: 113 | color_class = 0 114 | elif rate > 180: 115 | color_class = 1 116 | elif rate > 160: 117 | color_class = 2 118 | elif rate > 140: 119 | color_class = 3 120 | elif rate > 120: 121 | color_class = 4 122 | elif rate > 100: 123 | color_class = 5 124 | elif rate > 80: 125 | color_class = 6 126 | elif rate > 60: 127 | color_class = 7 128 | elif rate > 40: 129 | color_class = 8 130 | elif rate > 20: 131 | color_class = 9 132 | else: 133 | color_class = 10 134 | 135 | color = colors[color_class] 136 | p['style'] = path_style + color 137 | 138 | soup=soup.prettify() 139 | with open(path_save, 'wb') as f: 140 | f.write(soup) 141 | 142 | 143 | def yield_map_raw(real,index,path_save,predict_year): 144 | # Read CNN_err prediction 145 | CNN = {} 146 | err_CNN = real 147 | 148 | print 'CNN',err_CNN.min(),err_CNN.max() 149 | 150 | 151 | 152 | 153 | for i in range(real.shape[0]): 154 | loc1 = str(int(index[i,0])) 155 | loc2 = str(int(index[i,1])) 156 | if len(loc1)==1: 157 | loc1='0'+loc1 158 | if len(loc2)==1: 159 | loc2='00'+loc2 160 | if len(loc2)==2: 161 | loc2='0'+loc2 162 | fips = loc1+loc2 163 | CNN[fips] = err_CNN[i] 164 | 165 | '''CNN''' 166 | # Load the SVG map 167 | svg = open('counties.svg', 'r').read() 168 | # Load into Beautiful Soup 169 | soup = BeautifulSoup(svg, selfClosingTags=['defs','sodipodi:namedview']) 170 | # Find counties 171 | paths = soup.findAll('path') 172 | # Map colors 173 | # # plot error: 8 classes 174 | # colors = ["#b2182b", "#d6604d", "#f4a582", "#fddbc7", "#d1e5f0", "#92c5de", "#4393c3", "#2166ac"] 175 | # plot yield: 11 classes 176 | colors = ['#a50026','#d73027','#f46d43','#fdae61','#fee090','#ffffbf','#e0f3f8','#abd9e9','#74add1','#4575b4','#313695'] 177 | 178 | # County style 179 | path_style = 'font-size:12px;fill-rule:nonzero;stroke:#FFFFFF;stroke-opacity:1;stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none;stroke-linecap:butt;marker-start:none;stroke-linejoin:bevel;fill:' 180 | # Color the counties based on unemployment rate 181 | for p in paths: 182 | if p['id'] not in ["State_Lines", "separator"]: 183 | try: 184 | rate = CNN[p['id']] 185 | except: 186 | continue 187 | 188 | # # plot error 189 | # if rate > 15: 190 | # color_class = 7 191 | # elif rate > 10: 192 | # color_class = 6 193 | # elif rate > 5: 194 | # color_class = 5 195 | # elif rate > 0: 196 | # color_class = 4 197 | # elif rate > -5: 198 | # color_class = 3 199 | # elif rate > -10: 200 | # color_class = 2 201 | # elif rate > -15: 202 | # color_class = 1 203 | # else: 204 | # color_class = 0 205 | 206 | # plot yield 207 | if rate > 60: 208 | color_class = 0 209 | elif rate > 55: 210 | color_class = 1 211 | elif rate > 50: 212 | color_class = 2 213 | elif rate > 45: 214 | color_class = 3 215 | elif rate > 40: 216 | color_class = 4 217 | elif rate > 35: 218 | color_class = 5 219 | elif rate > 30: 220 | color_class = 6 221 | elif rate > 25: 222 | color_class = 7 223 | elif rate > 20: 224 | color_class = 8 225 | elif rate > 15: 226 | color_class = 9 227 | else: 228 | color_class = 10 229 | 230 | color = colors[color_class] 231 | p['style'] = path_style + color 232 | 233 | soup=soup.prettify() 234 | with open(path_save, 'wb') as f: 235 | f.write(soup) 236 | 237 | if __name__ == "__main__": 238 | path = '/atlas/u/jiaxuan/data/train_results/final/new_L1_L2/' 239 | 240 | # # load baseline 241 | # '''LOAD 2009-2015, no weather''' 242 | # path_data = '/atlas/u/jiaxuan/data/google_drive/img_output/' 243 | # # load mean data 244 | # filename = 'histogram_all_mean.npz' 245 | # content = np.load(path_data + filename) 246 | # image_all = content['output_image'] 247 | # yield_all = content['output_yield'] 248 | # year_all = content['output_year'] 249 | # locations_all = content['output_locations'] 250 | # index_all = content['output_index'] 251 | 252 | # # copy index 253 | # path_load = path+str(0)+str(10)+str(2014)+'result_prediction.npz' 254 | # content_ref=np.load(path_load) 255 | # year_ref=content_ref['year_out'] 256 | # index_ref=content_ref['index_out'] 257 | # ref=np.concatenate((year_ref[:,np.newaxis], index_ref),axis=1) 258 | 259 | # print 'before',index_all.shape[0] 260 | # # remove extra index 261 | # list_delete=[] 262 | # for i in range(index_all.shape[0]): 263 | # key = np.array([year_all[i],index_all[i,0],index_all[i,1]]) 264 | # index = np.where(np.all(ref[:,0:3] == key, axis=1)) 265 | # if index[0].shape[0] == 0: 266 | # list_delete.append(i) 267 | # image_all=np.delete(image_all,list_delete,0) 268 | # yield_all=np.delete(yield_all,list_delete,0) 269 | # year_all = np.delete(year_all,list_delete, 0) 270 | # locations_all = np.delete(locations_all, list_delete, 0) 271 | # index_all = np.delete(index_all, list_delete, 0) 272 | # print 'after',index_all.shape[0] 273 | 274 | # # calc NDVI 275 | # image_NDVI = np.zeros([image_all.shape[0],32]) 276 | # for i in range(32): 277 | # image_NDVI[:,i] = (image_all[:,1+9*i]-image_all[:,9*i])/(image_all[:,1+9*i]+image_all[:,9*i]) 278 | 279 | 280 | 281 | 282 | for predict_year in range(2009,2014): 283 | # validate = np.nonzero(year_all == predict_year)[0] 284 | # train = np.nonzero(year_all < predict_year)[0] 285 | for day in range(10,31,4): 286 | # # Ridge regression, NDVI 287 | # feature = image_NDVI[:,0:day] 288 | 289 | # lr = linear_model.Ridge(10) 290 | # lr.fit(feature[train],yield_all[train]) 291 | # Y_pred_reg = lr.predict(feature[validate]) 292 | 293 | # rmse = np.sqrt(np.mean((Y_pred_reg-yield_all[validate])**2)) 294 | # me = np.mean(Y_pred_reg-yield_all[validate])/np.mean(yield_all[validate])*100 295 | # print 'Ridge',predict_year,day,rmse,me 296 | 297 | # # print baseline figure 298 | # path_save = path+'map_baseline/'+str(0)+str(predict_year)+str(day)+'baseline.svg' 299 | # yield_map_raw(Y_pred_reg, index_all[validate], path_save, predict_year) 300 | 301 | 302 | # print CNN figure 303 | path_load = path+str(2)+str(day)+str(predict_year)+'result_prediction.npz' 304 | path_save = path+'map_real/'+str(0)+str(predict_year)+str(day)+'real.svg' 305 | yield_map(path_load, path_save, predict_year,'real') 306 | print predict_year,day 307 | 308 | # print CNN figure 309 | path_load = path+str(2)+str(day)+str(predict_year)+'result_prediction.npz' 310 | path_save = path+'map_pred/'+str(0)+str(predict_year)+str(day)+'pred.svg' 311 | yield_map(path_load, path_save, predict_year,'pred') 312 | print predict_year,day 313 | 314 | -------------------------------------------------------------------------------- /4 model_batch/train_for_hist_alldata_loop_corn.py: -------------------------------------------------------------------------------- 1 | from nnet_for_hist_dropout_stride import * 2 | import logging 3 | 4 | 5 | 6 | if __name__ == "__main__": 7 | config = Config() 8 | summary_train_loss = [] 9 | summary_eval_loss = [] 10 | summary_RMSE = [] 11 | summary_ME = [] 12 | 13 | 14 | # load data to memory 15 | filename = 'histogram_all' + '.npz' 16 | # filename = 'histogram_all_soilweather' + '.npz' 17 | content = np.load(config.load_path + filename) 18 | image_all = content['output_image'] 19 | yield_all = content['output_yield'] 20 | year_all = content['output_year'] 21 | locations_all = content['output_locations'] 22 | index_all = content['output_index'] 23 | 24 | # delete broken image 25 | list_delete=[] 26 | for i in range(image_all.shape[0]): 27 | if np.sum(image_all[i,:,:,:])<=287: 28 | if year_all[i]<2016: 29 | list_delete.append(i) 30 | image_all=np.delete(image_all,list_delete,0) 31 | yield_all=np.delete(yield_all,list_delete,0) 32 | year_all = np.delete(year_all,list_delete, 0) 33 | locations_all = np.delete(locations_all, list_delete, 0) 34 | index_all = np.delete(index_all, list_delete, 0) 35 | 36 | 37 | # keep major counties 38 | list_keep=[] 39 | for i in range(image_all.shape[0]): 40 | if (index_all[i,0]==5)or(index_all[i,0]==17)or(index_all[i,0]==18)or(index_all[i,0]==19)or(index_all[i,0]==20)or(index_all[i,0]==27)or(index_all[i,0]==29)or(index_all[i,0]==31)or(index_all[i,0]==38)or(index_all[i,0]==39)or(index_all[i,0]==46): 41 | list_keep.append(i) 42 | image_all=image_all[list_keep,:,:,:] 43 | yield_all=yield_all[list_keep] 44 | year_all = year_all[list_keep] 45 | locations_all = locations_all[list_keep,:] 46 | index_all = index_all[list_keep,:] 47 | 48 | # match corn yield 49 | # load corn yield 50 | corn_yield = np.genfromtxt('/atlas/u/jiaxuan/git/crop-forecasting/corn_yield.csv', delimiter=',') 51 | # keep data with corn yield 52 | list_delete=[] 53 | for i in range(image_all.shape[0]): 54 | key = np.array([year_all[i],index_all[i,0],index_all[i,1]]) 55 | index = np.where(np.all(corn_yield[:,0:3] == key, axis=1)) 56 | # print index[0].shape 57 | if index[0].shape[0] != 0: 58 | yield_all[i]=corn_yield[index,3] 59 | else: 60 | # print 'del' 61 | list_delete.append(i) 62 | image_all=np.delete(image_all,list_delete,0) 63 | yield_all=np.delete(yield_all,list_delete,0) 64 | year_all = np.delete(year_all,list_delete, 0) 65 | locations_all = np.delete(locations_all, list_delete, 0) 66 | index_all = np.delete(index_all, list_delete, 0) 67 | 68 | 69 | 70 | for loop in range(0,1): 71 | for predict_year in range(2009,2016): 72 | logging.basicConfig(filename=config.save_path+'log/train_for_hist_alldata_loop'+str(predict_year)+str(loop)+'.log',level=logging.DEBUG) 73 | # # split into train and validate 74 | # index_train = np.nonzero(year_all < predict_year)[0] 75 | # index_validate = np.nonzero(year_all == predict_year)[0] 76 | # index_test = np.nonzero(year_all == predict_year+1)[0] 77 | 78 | # random choose validation set 79 | index_train = np.nonzero(year_all < predict_year)[0] 80 | index_validate = np.nonzero(year_all == predict_year)[0] 81 | print 'train size',index_train.shape[0] 82 | print 'validate size',index_validate.shape[0] 83 | logging.info('train size %d',index_train.shape[0]) 84 | logging.info('validate size %d',index_validate.shape[0]) 85 | 86 | 87 | # # calc train image mean (for each band), and then detract (broadcast) 88 | # image_mean=np.mean(image_all[index_train],(0,1,2)) 89 | # image_all = image_all - image_mean 90 | 91 | image_validate=image_all[index_validate] 92 | yield_validate=yield_all[index_validate] 93 | 94 | for time in range(30,31): 95 | RMSE_min = 100 96 | g = tf.Graph() 97 | with g.as_default(): 98 | # modify config 99 | config = Config() 100 | config.H=time 101 | 102 | model= NeuralModel(config,'net') 103 | 104 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.22) 105 | # Launch the graph. 106 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 107 | sess.run(tf.initialize_all_variables()) 108 | saver=tf.train.Saver() 109 | for i in range(config.train_step): 110 | if i==4000: 111 | config.lr/=10 112 | 113 | if i==20000: 114 | config.lr/=10 115 | 116 | # index_train_batch = np.random.choice(index_train,size=config.B) 117 | index_validate_batch = np.random.choice(index_validate, size=config.B) 118 | 119 | # try data augmentation while training 120 | shift = 1 121 | index_train_batch_1 = np.random.choice(index_train,size=config.B+shift*2) 122 | index_train_batch_2 = np.random.choice(index_train,size=config.B+shift*2) 123 | image_train_batch = (image_all[index_train_batch_1,:,0:config.H,:]+image_all[index_train_batch_1,:,0:config.H,:])/2 124 | yield_train_batch = (yield_all[index_train_batch_1]+yield_all[index_train_batch_1])/2 125 | 126 | arg_index = np.argsort(yield_train_batch) 127 | yield_train_batch = yield_train_batch[arg_index][shift:-shift] 128 | image_train_batch = image_train_batch[arg_index][shift:-shift] 129 | 130 | _, train_loss, train_loss_reg = sess.run([model.train_op, model.loss_err, model.loss_reg], feed_dict={ 131 | model.x:image_train_batch, 132 | model.y:yield_train_batch, 133 | model.lr:config.lr, 134 | model.keep_prob: config.keep_prob 135 | }) 136 | 137 | if i%500 == 0: 138 | val_loss,val_loss_reg = sess.run([model.loss_err,model.loss_reg], feed_dict={ 139 | model.x: image_all[index_validate_batch, :, 0:config.H, :], 140 | model.y: yield_all[index_validate_batch], 141 | model.keep_prob: 1 142 | }) 143 | 144 | print str(loop)+str(time)+'predict year'+str(predict_year)+'step'+str(i),train_loss,train_loss_reg,val_loss,val_loss_reg,config.lr 145 | logging.info('%d %d %d step %d %f %f %f %f %f',loop,time,predict_year,i,train_loss,train_loss_reg,val_loss,val_loss_reg,config.lr) 146 | if i%500 == 0: 147 | # do validation 148 | pred = [] 149 | real = [] 150 | for j in range(image_validate.shape[0] / config.B): 151 | real_temp = yield_validate[j * config.B:(j + 1) * config.B] 152 | pred_temp= sess.run(model.logits, feed_dict={ 153 | model.x: image_validate[j * config.B:(j + 1) * config.B,:,0:config.H,:], 154 | model.y: yield_validate[j * config.B:(j + 1) * config.B], 155 | model.keep_prob: 1 156 | }) 157 | pred.append(pred_temp) 158 | real.append(real_temp) 159 | pred=np.concatenate(pred) 160 | real=np.concatenate(real) 161 | RMSE=np.sqrt(np.mean((pred-real)**2)) 162 | ME=np.mean(pred-real) 163 | RMSE_ideal = np.sqrt(np.mean((pred-ME-real)**2)) 164 | arg_index = np.argsort(pred) 165 | pred = pred[arg_index][50:-50] 166 | real = real[arg_index][50:-50] 167 | ME_part = np.mean(pred-real) 168 | 169 | if RMSE0 110 | filter_5000=image_temp<5000 111 | filter=filter_0*filter_5000 112 | return float(np.count_nonzero(filter))/image_temp.size 113 | 114 | def preprocess_save_data(): 115 | 116 | MODIS_dir="/atlas/u/jiaxuan/data/google_drive/data_image" 117 | MODIS_temperature_dir="/atlas/u/jiaxuan/data/google_drive/data_temperature" 118 | MODIS_mask_dir="/atlas/u/jiaxuan/data/google_drive/data_mask" 119 | 120 | img_output_dir="/atlas/u/jiaxuan/data/google_drive/img_output/" 121 | 122 | # MODIS_processed_dir="C:/360Downloads/6_Data_county_processed_scaled/" 123 | 124 | # MODIS_dir="/atlas/u/jiaxuan/data/MODIS_data_county/3_Data_county" 125 | # MODIS_temperature_dir="/atlas/u/jiaxuan/data/MODIS_data_county_temperature" 126 | # MODIS_mask_dir="/atlas/u/jiaxuan/data/MODIS_data_county_mask" 127 | # MODIS_processed_dir="/atlas/u/jiaxuan/data/MODIS_data_county_processed_compressed/" 128 | 129 | data_yield = np.genfromtxt('yield_final.csv', delimiter=',', dtype=float) 130 | count=1 131 | for root, dirs, files in os.walk(MODIS_dir): 132 | for file in files: 133 | if file.endswith(".tif"): 134 | MODIS_path=os.path.join(MODIS_dir, file) 135 | # check file size to see if it's broken 136 | # if os.path.getsize(MODIS_path) < 10000000: 137 | # print 'file broken, continue' 138 | # continue 139 | MODIS_temperature_path=os.path.join(MODIS_temperature_dir,file) 140 | MODIS_mask_path=os.path.join(MODIS_mask_dir,file) 141 | 142 | # get geo location 143 | raw = file.replace('_',' ').replace('.',' ').split() 144 | loc1 = int(raw[0]) 145 | loc2 = int(raw[1]) 146 | # read image 147 | try: 148 | MODIS_img = np.transpose(np.array(gdal.Open(MODIS_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0)) 149 | except ValueError as msg: 150 | print msg 151 | continue 152 | # read temperature 153 | MODIS_temperature_img = np.transpose(np.array(gdal.Open(MODIS_temperature_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0)) 154 | # shift 155 | MODIS_temperature_img = MODIS_temperature_img-12000 156 | # scale 157 | MODIS_temperature_img = MODIS_temperature_img*1.25 158 | # clean 159 | MODIS_temperature_img[MODIS_temperature_img<0]=0 160 | MODIS_temperature_img[MODIS_temperature_img>5000]=5000 161 | # read mask 162 | MODIS_mask_img = np.transpose(np.array(gdal.Open(MODIS_mask_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0)) 163 | # Non-crop = 0, crop = 1 164 | MODIS_mask_img[MODIS_mask_img != 12] = 0 165 | MODIS_mask_img[MODIS_mask_img == 12] = 1 166 | 167 | # Divide image into years 168 | MODIS_img_list=divide_image(MODIS_img, 0, 46 * 7, 14) 169 | MODIS_temperature_img_list = divide_image(MODIS_temperature_img, 0, 46 * 2, 14) 170 | MODIS_mask_img = extend_mask(MODIS_mask_img, 3) 171 | MODIS_mask_img_list = divide_image(MODIS_mask_img, 0, 1, 14) 172 | 173 | # Merge image and temperature 174 | MODIS_list = merge_image(MODIS_img_list,MODIS_temperature_img_list) 175 | 176 | # Do the mask job 177 | MODIS_list_masked = mask_image(MODIS_list,MODIS_mask_img_list) 178 | 179 | # check if the result is in the list 180 | year_start = 2003 181 | for i in range(0, 14): 182 | year = i+year_start 183 | key = np.array([year,loc1,loc2]) 184 | if np.sum(np.all(data_yield[:,0:3] == key, axis=1))>0: 185 | # save as .npy 186 | filename=img_output_dir+str(year)+'_'+str(loc1)+'_'+str(loc2)+'.npy' 187 | np.save(filename,MODIS_list_masked[i]) 188 | print filename,':written ',str(count) 189 | count+=1 190 | 191 | def preprocess_save_data_parallel(file): 192 | 193 | MODIS_dir="/atlas/u/jiaxuan/data/google_drive/data_image_full" 194 | MODIS_temperature_dir="/atlas/u/jiaxuan/data/google_drive/data_temperature" 195 | MODIS_mask_dir="/atlas/u/jiaxuan/data/google_drive/data_mask" 196 | 197 | img_output_dir="/atlas/u/jiaxuan/data/google_drive/img_full_output/" 198 | img_zoom_output_dir="/atlas/u/jiaxuan/data/google_drive/img_zoom_full_output/" 199 | 200 | # MODIS_processed_dir="C:/360Downloads/6_Data_county_processed_scaled/" 201 | 202 | # MODIS_dir="/atlas/u/jiaxuan/data/MODIS_data_county/3_Data_county" 203 | # MODIS_temperature_dir="/atlas/u/jiaxuan/data/MODIS_data_county_temperature" 204 | # MODIS_mask_dir="/atlas/u/jiaxuan/data/MODIS_data_county_mask" 205 | # MODIS_processed_dir="/atlas/u/jiaxuan/data/MODIS_data_county_processed_compressed/" 206 | 207 | data_yield = np.genfromtxt('yield_final.csv', delimiter=',', dtype=float) 208 | if file.endswith(".tif"): 209 | MODIS_path=os.path.join(MODIS_dir, file) 210 | # check file size to see if it's broken 211 | # if os.path.getsize(MODIS_path) < 10000000: 212 | # print 'file broken, continue' 213 | # continue 214 | MODIS_temperature_path=os.path.join(MODIS_temperature_dir,file) 215 | MODIS_mask_path=os.path.join(MODIS_mask_dir,file) 216 | 217 | # get geo location 218 | raw = file.replace('_',' ').replace('.',' ').split() 219 | loc1 = int(raw[0]) 220 | loc2 = int(raw[1]) 221 | # read image 222 | try: 223 | MODIS_img = np.transpose(np.array(gdal.Open(MODIS_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0)) 224 | except ValueError as msg: 225 | print msg 226 | # read temperature 227 | MODIS_temperature_img = np.transpose(np.array(gdal.Open(MODIS_temperature_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0)) 228 | # shift 229 | # MODIS_temperature_img = MODIS_temperature_img-12000 230 | # scale 231 | # MODIS_temperature_img = MODIS_temperature_img*1.25 232 | # clean 233 | # MODIS_temperature_img[MODIS_temperature_img<0]=0 234 | # MODIS_temperature_img[MODIS_temperature_img>5000]=5000 235 | # read mask 236 | MODIS_mask_img = np.transpose(np.array(gdal.Open(MODIS_mask_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0)) 237 | # Non-crop = 0, crop = 1 238 | MODIS_mask_img[MODIS_mask_img != 12] = 0 239 | MODIS_mask_img[MODIS_mask_img == 12] = 1 240 | 241 | # Divide image into years 242 | MODIS_img_list=divide_image(MODIS_img, 0, 46 * 7, 14) 243 | MODIS_temperature_img_list = divide_image(MODIS_temperature_img, 0, 46 * 2, 14) 244 | MODIS_mask_img = extend_mask(MODIS_mask_img, 3) 245 | MODIS_mask_img_list = divide_image(MODIS_mask_img, 0, 1, 14) 246 | 247 | # Merge image and temperature 248 | MODIS_list = merge_image(MODIS_img_list,MODIS_temperature_img_list) 249 | 250 | # Do the mask job 251 | MODIS_list_masked = mask_image(MODIS_list,MODIS_mask_img_list) 252 | 253 | # check if the result is in the list 254 | year_start = 2003 255 | for i in range(0, 14): 256 | year = i+year_start 257 | key = np.array([year,loc1,loc2]) 258 | if np.sum(np.all(data_yield[:,0:3] == key, axis=1))>0: 259 | # # detect quality 260 | # quality = quality_dector(MODIS_list_masked[i]) 261 | # if quality < 0.01: 262 | # print 'omitted' 263 | # print year,loc1,loc2,quality 264 | 265 | # # delete 266 | # yield_all = np.genfromtxt('yield_final_highquality.csv', delimiter=',') 267 | # key = np.array([year,loc1,loc2]) 268 | # index = np.where(np.all(yield_all[:,0:3] == key, axis=1)) 269 | # yield_all=np.delete(yield_all, index, axis=0) 270 | # np.savetxt("yield_final_highquality.csv", yield_all, delimiter=",") 271 | 272 | # continue 273 | 274 | ## 1 save original file 275 | filename=img_output_dir+str(year)+'_'+str(loc1)+'_'+str(loc2)+'.npy' 276 | np.save(filename,MODIS_list_masked[i]) 277 | print filename,':written ' 278 | 279 | ## 2 save zoomed file (48*48) 280 | zoom0 = float(48) / MODIS_list_masked[i].shape[0] 281 | zoom1 = float(48) / MODIS_list_masked[i].shape[1] 282 | output_image = zoom(MODIS_list_masked[i], (zoom0, zoom1, 1)) 283 | 284 | filename=img_zoom_output_dir+str(year)+'_'+str(loc1)+'_'+str(loc2)+'.npy' 285 | np.save(filename,output_image) 286 | print filename,':written ' 287 | 288 | 289 | 290 | 291 | if __name__ == "__main__": 292 | # # save data 293 | MODIS_dir="/atlas/u/jiaxuan/data/google_drive/data_image_full" 294 | for _, _, files in os.walk(MODIS_dir): 295 | Parallel(n_jobs=12)(delayed(preprocess_save_data_parallel)(file) for file in files) 296 | 297 | # # clean yield (low quality) 298 | # check_data_integrity_del() 299 | # # check integrity 300 | # check_data_integrity() 301 | 302 | -------------------------------------------------------------------------------- /3 model/train_for_hist_alldata_lstm.py: -------------------------------------------------------------------------------- 1 | from nnet_lstm import * 2 | from GP_crop_v3 import * 3 | import logging 4 | 5 | 6 | 7 | if __name__ == "__main__": 8 | predict_year = 2013 9 | logging.basicConfig(filename='train_for_hist_alldata'+str(predict_year)+'.log',level=logging.DEBUG) 10 | # Create a coordinator 11 | config = Config() 12 | 13 | # load data to memory 14 | filename = 'histogram_all' + '.npz' 15 | # filename = 'histogram_all_soilweather' + '.npz' 16 | content = np.load(config.load_path + filename) 17 | image_all = content['output_image'] 18 | yield_all = content['output_yield'] 19 | year_all = content['output_year'] 20 | locations_all = content['output_locations'] 21 | index_all = content['output_index'] 22 | 23 | # delete broken image 24 | list_delete=[] 25 | for i in range(image_all.shape[0]): 26 | if np.sum(image_all[i,:,:,:])<=287: 27 | if year_all[i]<2016: 28 | list_delete.append(i) 29 | image_all=np.delete(image_all,list_delete,0) 30 | yield_all=np.delete(yield_all,list_delete,0) 31 | year_all = np.delete(year_all,list_delete, 0) 32 | locations_all = np.delete(locations_all, list_delete, 0) 33 | index_all = np.delete(index_all, list_delete, 0) 34 | 35 | 36 | # keep major counties 37 | list_keep=[] 38 | for i in range(image_all.shape[0]): 39 | if (index_all[i,0]==5)or(index_all[i,0]==17)or(index_all[i,0]==18)or(index_all[i,0]==19)or(index_all[i,0]==20)or(index_all[i,0]==27)or(index_all[i,0]==29)or(index_all[i,0]==31)or(index_all[i,0]==38)or(index_all[i,0]==39)or(index_all[i,0]==46): 40 | list_keep.append(i) 41 | image_all=image_all[list_keep,:,:,:] 42 | yield_all=yield_all[list_keep] 43 | year_all = year_all[list_keep] 44 | locations_all = locations_all[list_keep,:] 45 | index_all = index_all[list_keep,:] 46 | 47 | # split into train and validate 48 | index_train = np.nonzero(year_all < predict_year)[0] 49 | index_validate = np.nonzero(year_all == predict_year)[0] 50 | print 'train size',index_train.shape[0] 51 | print 'validate size',index_validate.shape[0] 52 | 53 | # calc train image mean (for each band), and then detract (broadcast) 54 | image_mean=np.mean(image_all[index_train],(0,1,2)) 55 | image_all = image_all - image_mean 56 | 57 | image_validate=image_all[index_validate] 58 | yield_validate=yield_all[index_validate] 59 | 60 | model= NeuralModel(config,'net') 61 | 62 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.22) 63 | # Launch the graph. 64 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 65 | sess.run(tf.initialize_all_variables()) 66 | 67 | summary_train_loss = [] 68 | summary_eval_loss = [] 69 | summary_RMSE = [] 70 | summary_ME = [] 71 | 72 | train_loss=0 73 | val_loss=0 74 | val_prediction = 0 75 | val_deviation = np.zeros([config.B]) 76 | # ######################### 77 | # block when test 78 | # add saver 79 | saver=tf.train.Saver() 80 | # Restore variables from disk. 81 | try: 82 | saver.restore(sess, config.save_path+str(predict_year)+"CNN_model.ckpt") 83 | # Restore log results 84 | npzfile = np.load(config.save_path + str(predict_year)+'result.npz') 85 | summary_train_loss = npzfile['summary_train_loss'].tolist() 86 | summary_eval_loss = npzfile['summary_eval_loss'].tolist() 87 | summary_RMSE = npzfile['summary_RMSE'].tolist() 88 | summary_ME = npzfile['summary_ME'].tolist() 89 | print("Model restored.") 90 | except: 91 | print 'No history model found' 92 | # ######################### 93 | 94 | 95 | RMSE_min = 100 96 | try: 97 | for i in range(config.train_step): 98 | if i==3000: 99 | config.lr/=10 100 | # saver.restore(sess, config.save_path+str(predict_year)+"CNN_model.ckpt") 101 | # # Restore log results 102 | # npzfile = np.load(config.save_path + str(predict_year)+'result.npz') 103 | # summary_train_loss = npzfile['summary_train_loss'].tolist() 104 | # summary_eval_loss = npzfile['summary_eval_loss'].tolist() 105 | # summary_RMSE = npzfile['summary_RMSE'].tolist() 106 | # summary_ME = npzfile['summary_ME'].tolist() 107 | # print("Model restored.") 108 | if i==8000: 109 | config.lr/=10 110 | # saver.restore(sess, config.save_path+str(predict_year)+"CNN_model.ckpt") 111 | # # Restore log results 112 | # npzfile = np.load(config.save_path + str(predict_year)+'result.npz') 113 | # summary_train_loss = npzfile['summary_train_loss'].tolist() 114 | # summary_eval_loss = npzfile['summary_eval_loss'].tolist() 115 | # summary_RMSE = npzfile['summary_RMSE'].tolist() 116 | # summary_ME = npzfile['summary_ME'].tolist() 117 | # print("Model restored.") 118 | # if i==12000: 119 | # config.lr/=10 120 | # saver.restore(sess, config.save_path+str(predict_year)+"CNN_model.ckpt") 121 | # # Restore log results 122 | # npzfile = np.load(config.save_path + str(predict_year)+'result.npz') 123 | # summary_train_loss = npzfile['summary_train_loss'].tolist() 124 | # summary_eval_loss = npzfile['summary_eval_loss'].tolist() 125 | # summary_RMSE = npzfile['summary_RMSE'].tolist() 126 | # summary_ME = npzfile['summary_ME'].tolist() 127 | # print("Model restored.") 128 | 129 | # No augmentation 130 | # index_train_batch = np.random.choice(index_train,size=config.B) 131 | # image_train_batch = image_all[index_train_batch,:,0:config.H,:] 132 | # yield_train_batch = yield_all[index_train_batch] 133 | # year_train_batch = year_all[index_train_batch,np.newaxis] 134 | 135 | # try data augmentation while training 136 | index_train_batch_1 = np.random.choice(index_train,size=config.B) 137 | index_train_batch_2 = np.random.choice(index_train,size=config.B) 138 | image_train_batch = (image_all[index_train_batch_1,:,0:config.H,:]+image_all[index_train_batch_1,:,0:config.H,:])/2 139 | yield_train_batch = (yield_all[index_train_batch_1]+yield_all[index_train_batch_1])/2 140 | # year_train_batch = (year_all[index_train_batch_1,np.newaxis]+year_all[index_train_batch_2,np.newaxis])/2 141 | 142 | index_validate_batch = np.random.choice(index_validate, size=config.B) 143 | 144 | _, train_loss = sess.run([model.train_op, model.loss], feed_dict={ 145 | model.x:image_train_batch, 146 | model.y:yield_train_batch, 147 | model.lr:config.lr, 148 | model.keep_prob: config.drop_out 149 | }) 150 | 151 | if i%200 == 0: 152 | val_loss,fc6,W,B = sess.run([model.loss,model.feature,model.dense_W,model.dense_B], feed_dict={ 153 | model.x: image_all[index_validate_batch, :, 0:config.H, :], 154 | model.y: yield_all[index_validate_batch], 155 | model.keep_prob: 1 156 | }) 157 | 158 | print 'predict year'+str(predict_year)+'step'+str(i),train_loss,val_loss,config.lr 159 | logging.info('predict year %d step %d %f %f %f',predict_year,i,train_loss,val_loss,config.lr) 160 | if i%200 == 0: 161 | # do validation 162 | pred = [] 163 | real = [] 164 | for j in range(image_validate.shape[0] / config.B): 165 | real_temp = yield_validate[j * config.B:(j + 1) * config.B] 166 | pred_temp= sess.run(model.pred, feed_dict={ 167 | model.x: image_validate[j * config.B:(j + 1) * config.B,:,0:config.H,:], 168 | model.y: yield_validate[j * config.B:(j + 1) * config.B], 169 | model.keep_prob: 1 170 | }) 171 | pred.append(pred_temp) 172 | real.append(real_temp) 173 | pred=np.concatenate(pred) 174 | real=np.concatenate(real) 175 | RMSE=np.sqrt(np.mean((pred-real)**2)) 176 | ME=np.mean(pred-real) 177 | 178 | if RMSEyear_current-6 160 | # c5 = year_out!=2012 161 | # c3 = year_out==year_current-1 162 | ind_train = np.where(c3*c4)[0] 163 | print 'shape of train set',ind_train.shape 164 | index_reg = np.where(year_out!=2016)[0] 165 | 166 | '''4 normalize all features''' 167 | bias = np.ones([feature_out.shape[0],1]) 168 | feature_out = np.concatenate((feature_out,bias),axis=1) 169 | 170 | locations_mean = np.mean(locations_out, axis=0,keepdims=True) 171 | locations_std = np.mean(locations_out,axis=0,keepdims=True) 172 | locations_scale = np.amax(locations_out,axis=0)-np.amin(locations_out,axis=0) 173 | locations_out -= locations_mean 174 | locations_out /= locations_scale 175 | 176 | year_out = year_out[:,np.newaxis] 177 | year_mean = np.mean(year_out, axis=0,keepdims=True) 178 | year_std = np.mean(year_out,axis=0,keepdims=True) 179 | year_scale = np.amax(year_out,axis=0)-np.amin(year_out,axis=0) 180 | year_out -= year_mean 181 | year_out /= year_scale 182 | 183 | real_out = real_out[:,np.newaxis] 184 | # print 'year_out',np.amin(year_out),np.amax(year_out) 185 | 186 | 187 | 188 | # print index_reg.shape 189 | # ''' 190 | # remove yearly effect (optional) 191 | # ''' 192 | # print 'remove yearly effect' 193 | # lr = linear_model.Ridge(alpha=0.1,fit_intercept=True) 194 | # lr.fit(year_out[index_reg,:],real_out[index_reg,:]) 195 | # year_weight = lr.coef_[0] 196 | # print lr.coef_.shape 197 | # print lr.coef_,lr.intercept_ 198 | 199 | # plt.plot(year_out[index_reg,:],real_out[index_reg,:]) 200 | # plt.show() 201 | # real_out = real_out-year_out*year_weight 202 | 203 | 204 | # split dataset 205 | feat_train = feature_out[ind_train,] 206 | feat_test = feature_out[ind_test,] 207 | Y_train = real_out[ind_train,] 208 | Y_test = real_out[ind_test,] 209 | loc_train = locations_out[ind_train,] 210 | loc_test = locations_out[ind_test,] 211 | year_train = year_out[ind_train,] 212 | year_test = year_out[ind_test,] 213 | 214 | 215 | 216 | '''baseline''' 217 | # print "The std deviation of test yield is", np.std(real_out[ind_test,]) 218 | # print "Average yield is", np.mean(real_out[ind_test]) 219 | 220 | # '''Ridge regression''' 221 | # for alpha in np.linspace(-4, 2,num=5): 222 | # '''Ridge regression''' 223 | # print np.power(10,alpha) 224 | # lr = linear_model.Ridge(alpha =np.power(10,alpha),fit_intercept=False) 225 | # lr.fit(np.concatenate((feat_train,year_train),axis=1),Y_train) 226 | # Y_pred_reg = lr.predict(np.concatenate((feat_test,year_test),axis=1)) 227 | 228 | # # lr = linear_model.Ridge(alpha =np.power(10,alpha)) 229 | # # lr.fit(feat_train,Y_train) 230 | # # Y_pred_reg = lr.predict(feat_test) 231 | 232 | # # print lr.coef_.shape 233 | # # print lr.coef_ 234 | # # print lr.intercept_ 235 | 236 | # print "The RMSE of ridge regression is", np.sqrt(np.mean((Y_pred_reg-real_out[ind_test,])**2)) 237 | # print "Mean Error of ridge regression is",np.mean(Y_pred_reg-real_out[ind_test,]) 238 | 239 | 240 | '''CNN baseline''' 241 | print "The RMSE of CNN model is", np.sqrt(np.mean((real_out[ind_test,0]-pred_out[ind_test])**2)) 242 | '''CNN weight regression''' 243 | # print "The RMSE of regression, using CNN weight", np.sqrt(np.mean((real_out[ind_test,]-(np.dot(feat_test,W)))**2)) 244 | print "Mean Error of CNN is",np.mean(pred_out[ind_test]-real_out[ind_test,0]) 245 | # print "Average prediction of CNN is", np.mean(pred_out[ind_test]) 246 | 247 | 248 | 249 | ''' 250 | Gaussian Prcoess Model 3, 251 | Linear GP as on page 28 of GP for machine learning 252 | kernel: spatial*time 253 | 254 | ''' 255 | 256 | sigma=1 257 | l_s = 0.5 258 | l_t = 1.5 259 | noise = 0.1 260 | const = 0.01 261 | 262 | X_train = feat_train 263 | X_test = feat_test 264 | n1 = X_train.shape[0] 265 | n2 = X_test.shape[0] 266 | X = np.concatenate((X_train,X_test),axis=0) 267 | LOC = np.concatenate((loc_train,loc_test),axis=0) 268 | YEAR = np.concatenate((year_train,year_test),axis=0) 269 | pairwise_dists_loc = squareform(pdist(LOC, 'euclidean'))**2/l_s**2 270 | pairwise_dists_year = squareform(pdist(YEAR, 'euclidean'))**2/l_t**2 271 | 272 | n=np.zeros([n1+n2,n1+n2]) 273 | n[0:n1,0:n1] += noise*np.identity(n1) 274 | kernel_mat_3 = sigma*(np.exp(-pairwise_dists_loc)*np.exp(-pairwise_dists_year))+n 275 | b = W 276 | B = np.identity(X_train.shape[1]) 277 | 278 | print l_s,l_t,noise,const 279 | B /= const # B is diag, inverse is simplified 280 | K_inv = np.linalg.inv(kernel_mat_3[0:n1,0:n1]) 281 | beta = np.linalg.inv(B+X_train.T.dot(K_inv).dot(X_train)).dot( 282 | X_train.T.dot(K_inv).dot(Y_train.reshape([n1,1]))+B.dot(b)) 283 | Y_pred_3 = X_test.dot(beta) + kernel_mat_3[n1:(n1+n2),0:n1].dot(K_inv\ 284 | ).dot(Y_train.reshape([n1,1])-X_train.dot(beta)) 285 | 286 | RMSE_GP=np.sqrt(np.mean((Y_pred_3-real_out[ind_test,].reshape(Y_pred_3.shape))**2)) 287 | ME_GP=np.mean(Y_pred_3[:,0]-real_out[ind_test,0]) 288 | Average_GP=np.mean(Y_pred_3[:,0]) 289 | print "The RMSE of GP model is", RMSE_GP 290 | print "Mean Error of GP model is",ME_GP 291 | # print "Average prediction of GP is",Average_GP 292 | 293 | '''If there is no bias''' 294 | print "if there is no bias, the RMSE is" 295 | print "CNN",np.sqrt(np.mean((real_out[ind_test,0]-pred_out[ind_test]+np.mean(pred_out[ind_test]-real_out[ind_test,0]))**2)) 296 | print "GP",np.sqrt(np.mean((Y_pred_3-real_out[ind_test,].reshape(Y_pred_3.shape)-np.mean(Y_pred_3[:,0]-real_out[ind_test,0]))**2)) 297 | 298 | return (RMSE_GP,ME_GP) 299 | 300 | # '''3 weighted sum''' 301 | # area_total = 0 302 | # yield_real_total = 0 303 | # yield_pred_total_CNN = 0 304 | # yield_pred_total_GP = 0 305 | # for count,i in enumerate(ind_test): 306 | # # get area 307 | # year = year_current 308 | # loc1 = index_out[i,0] 309 | # loc2 = index_out[i,1] 310 | # key = np.array([year,loc1,loc2]) 311 | # index = np.where(np.all(area[:,0:3] == key, axis=1)) 312 | # # print i,key,index 313 | # area_current = area[index,3] 314 | 315 | # area_total+=area_current 316 | # yield_real_total+=area_current*real_out[i,] 317 | # yield_pred_total_CNN+=area_current*pred_out[i] 318 | # yield_pred_total_GP+=area_current*Y_pred_3[count,] 319 | 320 | # real_average = yield_real_total/area_total 321 | # pred_average_CNN = yield_pred_total_CNN/area_total 322 | # pred_average_GP = yield_pred_total_GP/area_total 323 | # print 'real_average',real_average 324 | # print 'pred_average_CNN',pred_average_CNN 325 | # print 'pred_average_GP',pred_average_GP 326 | 327 | 328 | 329 | 330 | # if __name__ == "__main__": 331 | # # predict_year = 2013 332 | 333 | # # save_path = '/atlas/u/jiaxuan/data/train_results/histogram_new/test22_optimize/' 334 | # # path = save_path + str(predict_year)+'result_prediction.npz' 335 | 336 | # # RMSE = Parallel(n_jobs=8)(delayed(GaussianProcess)(2012,path_normal+'2012result_day'+str(i)+'.npz') for i in range(9,33)) 337 | # # Ridge(predict_year,path) 338 | 339 | # for predict_year in range(2012,2016): 340 | # save_path = '/atlas/u/jiaxuan/data/train_results/final/yearly/archive/' 341 | # path = save_path + str(predict_year)+'result_prediction.npz' 342 | # GaussianProcess(predict_year,path) 343 | 344 | 345 | 346 | 347 | -------------------------------------------------------------------------------- /6 result_analysis/GP_crop_v3.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | # import pdb 4 | from scipy.spatial.distance import pdist, squareform 5 | from sklearn import linear_model 6 | 7 | from joblib import Parallel, delayed 8 | import multiprocessing 9 | import logging 10 | 11 | def Ridge(year,path): 12 | year_current = year 13 | npzfile = np.load(path) 14 | 15 | # load crop area 16 | area = np.genfromtxt('acres_harvested.csv', delimiter=',') 17 | 18 | # read 19 | pred_out=npzfile['pred_out'] 20 | real_out=npzfile['real_out'] 21 | feature_out=npzfile['feature_out'] 22 | year_out=npzfile['year_out'] 23 | locations_out=npzfile['locations_out'] 24 | index_out=npzfile['index_out'] 25 | W = npzfile['weight_out'] 26 | b = npzfile['b_out'] 27 | W = np.concatenate((W,b)) 28 | 29 | print pred_out.shape,real_out.shape 30 | print year_out.shape,locations_out.shape 31 | # plt.plot(year_out,pred_out-real_out,'.') 32 | # plt.show() 33 | 34 | '''2 divide dataset''' 35 | 36 | # get train, validate, test index 37 | c1 = year_out==year_current 38 | # c2 = (index_out[:,0]==5)+(index_out[:,0]==17)+(index_out[:,0]==18)+(index_out[:,0]==19)+(index_out[:,0]==20)+(index_out[:,0]==27)+(index_out[:,0]==29)+(index_out[:,0]==31)+(index_out[:,0]==38)+(index_out[:,0]==39)+(index_out[:,0]==46) 39 | ind_test = np.where(c1)[0] 40 | print 'shape of test set',ind_test.shape 41 | 42 | c3 = year_outyear_current-6 160 | # c5 = year_out!=2012 161 | # c3 = year_out==year_current-1 162 | ind_train = np.where(c3*c4)[0] 163 | print 'shape of train set',ind_train.shape 164 | index_reg = np.where(year_out!=2016)[0] 165 | 166 | '''4 normalize all features''' 167 | bias = np.ones([feature_out.shape[0],1]) 168 | feature_out = np.concatenate((feature_out,bias),axis=1) 169 | 170 | locations_mean = np.mean(locations_out, axis=0,keepdims=True) 171 | locations_std = np.mean(locations_out,axis=0,keepdims=True) 172 | locations_scale = np.amax(locations_out,axis=0)-np.amin(locations_out,axis=0) 173 | locations_out -= locations_mean 174 | locations_out /= locations_scale 175 | 176 | year_out = year_out[:,np.newaxis] 177 | year_mean = np.mean(year_out, axis=0,keepdims=True) 178 | year_std = np.mean(year_out,axis=0,keepdims=True) 179 | year_scale = np.amax(year_out,axis=0)-np.amin(year_out,axis=0) 180 | year_out -= year_mean 181 | year_out /= year_scale 182 | 183 | real_out = real_out[:,np.newaxis] 184 | # print 'year_out',np.amin(year_out),np.amax(year_out) 185 | 186 | 187 | 188 | # print index_reg.shape 189 | # ''' 190 | # remove yearly effect (optional) 191 | # ''' 192 | # print 'remove yearly effect' 193 | # lr = linear_model.Ridge(alpha=0.1,fit_intercept=True) 194 | # lr.fit(year_out[index_reg,:],real_out[index_reg,:]) 195 | # year_weight = lr.coef_[0] 196 | # print lr.coef_.shape 197 | # print lr.coef_,lr.intercept_ 198 | 199 | # plt.plot(year_out[index_reg,:],real_out[index_reg,:]) 200 | # plt.show() 201 | # real_out = real_out-year_out*year_weight 202 | 203 | 204 | # split dataset 205 | feat_train = feature_out[ind_train,] 206 | feat_test = feature_out[ind_test,] 207 | Y_train = real_out[ind_train,] 208 | Y_test = real_out[ind_test,] 209 | loc_train = locations_out[ind_train,] 210 | loc_test = locations_out[ind_test,] 211 | year_train = year_out[ind_train,] 212 | year_test = year_out[ind_test,] 213 | 214 | 215 | 216 | '''baseline''' 217 | # print "The std deviation of test yield is", np.std(real_out[ind_test,]) 218 | # print "Average yield is", np.mean(real_out[ind_test]) 219 | 220 | # '''Ridge regression''' 221 | # for alpha in np.linspace(-4, 2,num=5): 222 | # '''Ridge regression''' 223 | # print np.power(10,alpha) 224 | # lr = linear_model.Ridge(alpha =np.power(10,alpha),fit_intercept=False) 225 | # lr.fit(np.concatenate((feat_train,year_train),axis=1),Y_train) 226 | # Y_pred_reg = lr.predict(np.concatenate((feat_test,year_test),axis=1)) 227 | 228 | # # lr = linear_model.Ridge(alpha =np.power(10,alpha)) 229 | # # lr.fit(feat_train,Y_train) 230 | # # Y_pred_reg = lr.predict(feat_test) 231 | 232 | # # print lr.coef_.shape 233 | # # print lr.coef_ 234 | # # print lr.intercept_ 235 | 236 | # print "The RMSE of ridge regression is", np.sqrt(np.mean((Y_pred_reg-real_out[ind_test,])**2)) 237 | # print "Mean Error of ridge regression is",np.mean(Y_pred_reg-real_out[ind_test,]) 238 | 239 | 240 | '''CNN baseline''' 241 | print "The RMSE of CNN model is", np.sqrt(np.mean((real_out[ind_test,0]-pred_out[ind_test])**2)) 242 | '''CNN weight regression''' 243 | # print "The RMSE of regression, using CNN weight", np.sqrt(np.mean((real_out[ind_test,]-(np.dot(feat_test,W)))**2)) 244 | print "Mean Error of CNN is",np.mean(pred_out[ind_test]-real_out[ind_test,0]) 245 | # print "Average prediction of CNN is", np.mean(pred_out[ind_test]) 246 | 247 | 248 | 249 | ''' 250 | Gaussian Prcoess Model 3, 251 | Linear GP as on page 28 of GP for machine learning 252 | kernel: spatial*time 253 | 254 | ''' 255 | 256 | sigma=1 257 | l_s = 0.5 258 | l_t = 1.5 259 | noise = 0.1 260 | const = 0.01 261 | 262 | X_train = feat_train 263 | X_test = feat_test 264 | n1 = X_train.shape[0] 265 | n2 = X_test.shape[0] 266 | X = np.concatenate((X_train,X_test),axis=0) 267 | LOC = np.concatenate((loc_train,loc_test),axis=0) 268 | YEAR = np.concatenate((year_train,year_test),axis=0) 269 | pairwise_dists_loc = squareform(pdist(LOC, 'euclidean'))**2/l_s**2 270 | pairwise_dists_year = squareform(pdist(YEAR, 'euclidean'))**2/l_t**2 271 | 272 | n=np.zeros([n1+n2,n1+n2]) 273 | n[0:n1,0:n1] += noise*np.identity(n1) 274 | kernel_mat_3 = sigma*(np.exp(-pairwise_dists_loc)*np.exp(-pairwise_dists_year))+n 275 | b = W 276 | B = np.identity(X_train.shape[1]) 277 | 278 | print l_s,l_t,noise,const 279 | B /= const # B is diag, inverse is simplified 280 | K_inv = np.linalg.inv(kernel_mat_3[0:n1,0:n1]) 281 | beta = np.linalg.inv(B+X_train.T.dot(K_inv).dot(X_train)).dot( 282 | X_train.T.dot(K_inv).dot(Y_train.reshape([n1,1]))+B.dot(b)) 283 | Y_pred_3 = X_test.dot(beta) + kernel_mat_3[n1:(n1+n2),0:n1].dot(K_inv\ 284 | ).dot(Y_train.reshape([n1,1])-X_train.dot(beta)) 285 | 286 | RMSE_GP=np.sqrt(np.mean((Y_pred_3-real_out[ind_test,].reshape(Y_pred_3.shape))**2)) 287 | ME_GP=np.mean(Y_pred_3[:,0]-real_out[ind_test,0]) 288 | Average_GP=np.mean(Y_pred_3[:,0]) 289 | print "The RMSE of GP model is", RMSE_GP 290 | print "Mean Error of GP model is",ME_GP 291 | # print "Average prediction of GP is",Average_GP 292 | 293 | '''If there is no bias''' 294 | print "if there is no bias, the RMSE is" 295 | print "CNN",np.sqrt(np.mean((real_out[ind_test,0]-pred_out[ind_test]+np.mean(pred_out[ind_test]-real_out[ind_test,0]))**2)) 296 | print "GP",np.sqrt(np.mean((Y_pred_3-real_out[ind_test,].reshape(Y_pred_3.shape)-np.mean(Y_pred_3[:,0]-real_out[ind_test,0]))**2)) 297 | 298 | return (RMSE_GP,ME_GP) 299 | 300 | # '''3 weighted sum''' 301 | # area_total = 0 302 | # yield_real_total = 0 303 | # yield_pred_total_CNN = 0 304 | # yield_pred_total_GP = 0 305 | # for count,i in enumerate(ind_test): 306 | # # get area 307 | # year = year_current 308 | # loc1 = index_out[i,0] 309 | # loc2 = index_out[i,1] 310 | # key = np.array([year,loc1,loc2]) 311 | # index = np.where(np.all(area[:,0:3] == key, axis=1)) 312 | # # print i,key,index 313 | # area_current = area[index,3] 314 | 315 | # area_total+=area_current 316 | # yield_real_total+=area_current*real_out[i,] 317 | # yield_pred_total_CNN+=area_current*pred_out[i] 318 | # yield_pred_total_GP+=area_current*Y_pred_3[count,] 319 | 320 | # real_average = yield_real_total/area_total 321 | # pred_average_CNN = yield_pred_total_CNN/area_total 322 | # pred_average_GP = yield_pred_total_GP/area_total 323 | # print 'real_average',real_average 324 | # print 'pred_average_CNN',pred_average_CNN 325 | # print 'pred_average_GP',pred_average_GP 326 | 327 | 328 | 329 | 330 | # if __name__ == "__main__": 331 | # # predict_year = 2013 332 | 333 | # # save_path = '/atlas/u/jiaxuan/data/train_results/histogram_new/test22_optimize/' 334 | # # path = save_path + str(predict_year)+'result_prediction.npz' 335 | 336 | # # RMSE = Parallel(n_jobs=8)(delayed(GaussianProcess)(2012,path_normal+'2012result_day'+str(i)+'.npz') for i in range(9,33)) 337 | # # Ridge(predict_year,path) 338 | 339 | # for predict_year in range(2012,2016): 340 | # save_path = '/atlas/u/jiaxuan/data/train_results/final/yearly/archive/' 341 | # path = save_path + str(predict_year)+'result_prediction.npz' 342 | # GaussianProcess(predict_year,path) 343 | 344 | 345 | 346 | 347 | -------------------------------------------------------------------------------- /5 model_semi_supervised/train_for_semi.py: -------------------------------------------------------------------------------- 1 | from nnet_semi import * 2 | from GP_crop_v3 import * 3 | import logging 4 | import time 5 | 6 | predict_year = 2015 7 | 8 | def load_data(filename,config): 9 | content = np.load(config.load_path + filename) 10 | image_all = content['output_image'] 11 | yield_all = content['output_yield'] 12 | year_all = content['output_year'] 13 | locations_all = content['output_locations'] 14 | index_all = content['output_index'] 15 | 16 | # delete broken image 17 | list_delete=[] 18 | for i in range(image_all.shape[0]): 19 | if np.sum(image_all[i,:,:,:])<=287: 20 | if year_all[i]<2016: 21 | list_delete.append(i) 22 | image_all=np.delete(image_all,list_delete,0) 23 | yield_all=np.delete(yield_all,list_delete,0) 24 | year_all = np.delete(year_all,list_delete, 0) 25 | locations_all = np.delete(locations_all, list_delete, 0) 26 | index_all = np.delete(index_all, list_delete, 0) 27 | 28 | 29 | # keep major counties 30 | list_keep=[] 31 | for i in range(image_all.shape[0]): 32 | if (index_all[i,0]==5)or(index_all[i,0]==17)or(index_all[i,0]==18)or(index_all[i,0]==19)or(index_all[i,0]==20)or(index_all[i,0]==27)or(index_all[i,0]==29)or(index_all[i,0]==31)or(index_all[i,0]==38)or(index_all[i,0]==39)or(index_all[i,0]==46): 33 | list_keep.append(i) 34 | image_all=image_all[list_keep,:,:,:] 35 | yield_all=yield_all[list_keep] 36 | year_all = year_all[list_keep] 37 | locations_all = locations_all[list_keep,:] 38 | index_all = index_all[list_keep,:] 39 | 40 | # split into train and validate 41 | index_train = np.nonzero(year_all < predict_year)[0] 42 | index_validate = np.nonzero(year_all == predict_year)[0] 43 | index_train_validate = np.nonzero(year_all <= predict_year)[0] 44 | print 'train size',index_train.shape[0] 45 | print 'validate size',index_validate.shape[0] 46 | 47 | # calc train image mean (for each band), and then detract (broadcast) 48 | image_mean=np.mean(image_all[index_train],(0,1,2)) 49 | image_all = image_all - image_mean 50 | 51 | return image_all,yield_all,year_all,locations_all,index_all,index_train,index_validate,index_train_validate 52 | 53 | 54 | if __name__ == "__main__": 55 | logging.basicConfig(filename='/logging_semi/'+str(predict_year)+'.log',level=logging.DEBUG) 56 | # Create a coordinator 57 | config = Config() 58 | 59 | filename = 'histogram_all' + '.npz' 60 | # filename = 'histogram_all_soilweather' + '.npz' 61 | time1 = time.time() 62 | image_all,yield_all,year_all,locations_all,index_all,index_train,index_validate,_ = load_data(filename, config) 63 | print("load time: %ss" % (time.time() - time1)) 64 | image_validate=image_all[index_validate] 65 | yield_validate=yield_all[index_validate] 66 | 67 | model= NeuralModel(config,'net') 68 | 69 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.48) 70 | # Launch the graph. 71 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 72 | sess.run(tf.initialize_all_variables()) 73 | 74 | # summary_train_loss = [] 75 | # summary_eval_loss = [] 76 | # summary_RMSE = [] 77 | # summary_ME = [] 78 | 79 | train_loss=0 80 | val_loss=0 81 | val_prediction = 0 82 | val_deviation = np.zeros([config.B]) 83 | # # ######################### 84 | # # block when test 85 | # # add saver 86 | # saver=tf.train.Saver() 87 | # # Restore variables from disk. 88 | # try: 89 | # saver.restore(sess, config.save_path+str(predict_year)+"CNN_model.ckpt") 90 | # # Restore log results 91 | # # npzfile = np.load(config.save_path + str(predict_year)+'result.npz') 92 | # # summary_train_loss = npzfile['summary_train_loss'].tolist() 93 | # # summary_eval_loss = npzfile['summary_eval_loss'].tolist() 94 | # # summary_RMSE = npzfile['summary_RMSE'].tolist() 95 | # # summary_ME = npzfile['summary_ME'].tolist() 96 | # print("Model restored.") 97 | # except: 98 | # print 'No history model found' 99 | # # ######################### 100 | 101 | RMSE_min = 100 102 | chkpoint_loop = 500 103 | try: 104 | for i in range(config.train_step): 105 | 106 | # load extra unlabel data 107 | if i%chkpoint_loop ==0: 108 | chkpoint = i/chkpoint_loop + 1 109 | # load unsupervised data 110 | filename = 'histogram_semi_rand_200_20000'+str(chkpoint)+'.npz' 111 | time1 = time.time() 112 | image_all_ulab,_,_,_,_,_,index_validate_ulab,index_ulab = load_data(filename, config) 113 | print("load time: %ss" % (time.time() - time1)) 114 | 115 | # No augmentation 116 | index_train_batch = np.random.choice(index_train,size=config.B) 117 | image_train_batch = image_all[index_train_batch,:,0:config.H,:] 118 | index_train_batch_ulab = np.random.choice(index_ulab,size=config.B) 119 | image_train_batch_ulab = image_all_ulab[index_train_batch_ulab,:,0:config.H,:] 120 | image_train_batch = np.concatenate((image_train_batch, image_train_batch_ulab),axis=0) 121 | # image_train_batch = np.concatenate((image_train_batch, image_train_batch),axis=0) 122 | 123 | yield_train_batch = yield_all[index_train_batch,np.newaxis] 124 | 125 | # # try data augmentation while training 126 | # index_train_batch_1 = np.random.choice(index_train,size=config.B) 127 | # index_train_batch_2 = np.random.choice(index_train,size=config.B) 128 | # image_train_batch = (image_all[index_train_batch_1,:,0:config.H,:]+image_all[index_train_batch_1,:,0:config.H,:])/2 129 | # yield_train_batch = (yield_all[index_train_batch_1]+yield_all[index_train_batch_1])/2 130 | # # year_train_batch = (year_all[index_train_batch_1,np.newaxis]+year_all[index_train_batch_2,np.newaxis])/2 131 | 132 | index_validate_batch = np.random.choice(index_validate, size=config.B) 133 | image_validate_batch = image_all[index_validate_batch,:,0:config.H,:] 134 | index_validate_batch_ulab = np.random.choice(index_validate_ulab,size=config.B) 135 | image_validate_batch_ulab = image_all_ulab[index_validate_batch_ulab,:,0:config.H,:] 136 | image_validate_batch = np.concatenate((image_validate_batch, image_validate_batch_ulab),axis=0) 137 | # image_validate_batch = np.concatenate((image_validate_batch, image_validate_batch),axis=0) 138 | 139 | yield_validate_batch = yield_all[index_validate_batch,np.newaxis] 140 | 141 | _,t_L,t_C,t_U,t_R,t_loss,t_pred,t_real,t_err = sess.run( 142 | [model.train_op,model.L,model.C,model.U,model.R,model.loss,model.pred,model.real,model.pred_err], feed_dict={ 143 | model.x:image_train_batch, 144 | model.y_lab:yield_train_batch, 145 | model.lr:config.lr, 146 | model.keep_prob:config.keep_prob 147 | }) 148 | 149 | if i%10 == 0: 150 | v_L,v_C,v_U,v_R,v_loss,v_pred,v_real,v_err = sess.run( 151 | [model.L,model.C,model.U,model.R,model.loss,model.pred,model.real,model.pred_err], feed_dict={ 152 | model.x: image_validate_batch, 153 | model.y_lab: yield_validate_batch, 154 | model.keep_prob:1 155 | }) 156 | 157 | print 'predict year'+str(predict_year)+'step'+str(i),config.lr 158 | print t_L,t_C,t_U,t_R,t_loss,np.mean(t_pred),np.mean(t_real),np.mean(t_pred-t_real),t_err 159 | print v_L,v_C,v_U,v_R,v_loss,np.mean(v_pred),np.mean(v_real),np.mean(v_pred-v_real),v_err 160 | logging.info('predict year %d step %d lr %f'predict_year,i,config.lr) 161 | logging.info('%f %f %f %f %f %f %f %f %f',t_L,t_C,t_U,t_R,t_loss,np.mean(t_pred),np.mean(t_real),np.mean(t_pred-t_real),t_err) 162 | logging.info('%f %f %f %f %f %f %f %f %f',v_L,v_C,v_U,v_R,v_loss,np.mean(v_pred),np.mean(v_real),np.mean(v_pred-v_real),v_err) 163 | if i%10 == 0: 164 | # do validation 165 | pred = [] 166 | real = [] 167 | for j in range(image_validate.shape[0] / config.B): 168 | real_temp = yield_validate[j * config.B:(j + 1) * config.B] 169 | image_batch = image_validate[j * config.B:(j + 1) * config.B,:,0:config.H,:] 170 | image_batch = np.concatenate((image_batch, image_batch),axis=0) 171 | # image_batch = np.concatenate((image_batch, image_batch),axis=0) 172 | yield_batch = yield_validate[j * config.B:(j + 1) * config.B,np.newaxis] 173 | pred_temp= sess.run(model.y_lab_pred, feed_dict={ 174 | model.x: image_batch, 175 | model.y_lab: yield_batch, 176 | model.keep_prob: 1 177 | }) 178 | pred.append(np.squeeze(pred_temp)) 179 | real.append(np.squeeze(real_temp)) 180 | pred=np.concatenate(pred,axis=0) 181 | real=np.concatenate(real,axis=0) 182 | RMSE=np.sqrt(np.mean((pred-real)**2)) 183 | ME=np.mean(pred-real) 184 | 185 | if RMSE