├── 6 result_analysis
    ├── corr.npy
    ├── paper_result.npy
    ├── permute_band.npy
    ├── permute_time.npy
    ├── Compare_result.npz
    ├── variogram_data.mat
    ├── Compare_result_final.npz
    ├── Compare_result_ridge.npz
    ├── variogram.py
    ├── monthly_read.py
    ├── colorbar.py
    ├── corr.py
    ├── permute.py
    ├── yield_map.py
    ├── yield_map_function.py
    └── GP_crop_v3.py
├── .idea
    └── .gitignore
├── README.md
├── 1 download data
    ├── pull_MODIS_world_hist.py
    ├── pull_MODIS.py
    ├── pull_MODIS_landcover.py
    ├── pull_MODIS_entire_county.py
    ├── pull_MODIS_landcover_entire_county.py
    ├── pull_MODIS_temperature_entire_county.py
    ├── pull_MODIS_landcover_entire_county_clip.py
    ├── pull_MODIS_temperature_entire_county_clip.py
    ├── pull_MODIS_entire_county_clip.py
    └── pull_MODIS_world.py
├── 3 model
    ├── nnet_lstm.py
    ├── nnet_for_hist_dropout_stride.py
    ├── train_for_hist_alldata.py
    ├── train_for_hist_alldata_lstm.py
    └── GP_crop_v3.py
├── 4 model_batch
    ├── nnet_lstm.py
    ├── nnet_for_hist_dropout_stride.py
    ├── train_for_hist_alldata_loop_permute.py
    ├── train_for_hist_alldata_loop_lstm.py
    ├── train_for_hist_alldata_loop_result.py
    ├── train_for_hist_alldata_loop.py
    └── train_for_hist_alldata_loop_corn.py
├── 2 clean data
    └── final_clean_data.py
└── 5 model_semi_supervised
    └── train_for_semi.py


/6 result_analysis/corr.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/corr.npy


--------------------------------------------------------------------------------
/6 result_analysis/paper_result.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/paper_result.npy


--------------------------------------------------------------------------------
/6 result_analysis/permute_band.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/permute_band.npy


--------------------------------------------------------------------------------
/6 result_analysis/permute_time.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/permute_time.npy


--------------------------------------------------------------------------------
/6 result_analysis/Compare_result.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/Compare_result.npz


--------------------------------------------------------------------------------
/6 result_analysis/variogram_data.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/variogram_data.mat


--------------------------------------------------------------------------------
/6 result_analysis/Compare_result_final.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/Compare_result_final.npz


--------------------------------------------------------------------------------
/6 result_analysis/Compare_result_ridge.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JiaxuanYou/crop_yield_prediction/HEAD/6 result_analysis/Compare_result_ridge.npz


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 | 


--------------------------------------------------------------------------------
/6 result_analysis/variogram.py:
--------------------------------------------------------------------------------
 1 | import scipy.io as io
 2 | import numpy as np
 3 | 
 4 | save_path = '/atlas/u/jiaxuan/data/train_results/final/monthly/'
 5 | path_current = save_path+str(0)+str(30)+str(2013)+'result_prediction.npz'
 6 | data = np.load(path_current)
 7 | year = data['year_out']
 8 | real = data['real_out']
 9 | pred = data['pred_out']
10 | locations = data['locations_out']
11 | 
12 | err = pred-real
13 | print err.shape,year.shape,locations.shape
14 | 
15 | result = np.concatenate((year[:,np.newaxis], locations, err[:,np.newaxis]),axis=1)
16 | 
17 | io.savemat('variogram_data.mat', {'result':result})
18 | print 'saved'


--------------------------------------------------------------------------------
/6 result_analysis/monthly_read.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | save_path = '/atlas/u/jiaxuan/data/train_results/final/monthly/'
 5 | 
 6 | 
 7 | for loop in range(0,1):
 8 | 	RMSE_all = np.zeros([6])
 9 | 	ME_all = np.zeros([6])
10 | 	for predict_year in range(2009,2016):
11 | 		RMSE = np.zeros([6])
12 | 		ME = np.zeros([6])
13 | 		for i,time in enumerate(range(10,31,4)):
14 | 			data = np.load(save_path+str(loop)+str(time)+str(predict_year)+'result_prediction.npz')
15 | 			year_all = data['year_out']
16 | 			real = data['real_out']
17 | 			pred = data['pred_out']
18 | 
19 | 			validate = np.nonzero(year_all == predict_year)[0]
20 | 			train = np.nonzero(year_all < predict_year)[0]
21 | 			
22 | 			rmse=np.sqrt(np.mean((real[validate]-pred[validate])**2))
23 | 			me = np.mean(pred[validate]-real[validate])
24 | 			RMSE[i]=rmse
25 | 			ME[i]=me
26 | 		RMSE_all+=RMSE
27 | 		ME_all+=np.absolute(ME)
28 | 	RMSE_all/=7
29 | 	ME_all/=7
30 | 
31 | 
32 | 
33 | 
34 | 	plt.plot(range(6),RMSE_all)
35 | 	plt.title(str(predict_year))
36 | 	plt.show()
37 | 	plt.plot(range(6),ME_all)
38 | 	plt.title(str(predict_year))
39 | 	plt.show()
40 | 


--------------------------------------------------------------------------------
/6 result_analysis/colorbar.py:
--------------------------------------------------------------------------------
 1 | from matplotlib import pyplot
 2 | import matplotlib as mpl
 3 | 
 4 | # Make a figure and axes with dimensions as desired.
 5 | fig = pyplot.figure()
 6 | # vertical
 7 | # ax2 = fig.add_axes([0.1, 0.1, 0.02, 0.8])
 8 | # horizontal
 9 | ax2 = fig.add_axes([0.1, 0.1, 0.8, 0.04])
10 | 
11 | 
12 | 
13 | # The second example illustrates the use of a ListedColormap, a
14 | # BoundaryNorm, and extended ends to show the "over" and "under"
15 | # value colors.
16 | cmap = mpl.colors.ListedColormap(['#4575b4','#74add1','#abd9e9','#e0f3f8','#ffffbf','#fee090','#fdae61','#f46d43','#d73027'])
17 | cmap.set_over('#a50026')
18 | cmap.set_under('#313695')
19 | 
20 | # If a ListedColormap is used, the length of the bounds array must be
21 | # one greater than the length of the color list.  The bounds must be
22 | # monotonically increasing.
23 | 
24 | # # soybean
25 | # bounds = [15,20,25,30,35,40,45,50,55,60]
26 | # corn
27 | bounds = [20,40,60,80,100,120,140,160,180,200]
28 | norm = mpl.colors.BoundaryNorm(bounds, cmap.N)
29 | cb2 = mpl.colorbar.ColorbarBase(ax2, cmap=cmap,
30 |                                 norm=norm,
31 |                                 # to use 'extend', you must
32 |                                 # specify two extra boundaries:
33 |                                 boundaries=[0] + bounds + [220],
34 |                                 extend='both',
35 |                                 ticks=bounds,  # optional
36 |                                 spacing='proportional',
37 |                                 orientation='horizontal')
38 | 
39 | 
40 | 
41 | pyplot.show()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Crop yield Prediction with Deep Learning
 2 | The necessary code for our paper, [Deep Gaussian Process for Crop Yield Prediction Based on Remote Sensing Data](http://jiaxuanyou.me/files/Jiaxuan_AAAI17.pdf), AAAI 2017 (Best Student Paper Award in Computational Sustainability Track). We are glad to win the "Best Big Data Solution" in [World Bank Big Data Innovation Chanllenge](http://bigdatainnovationchallenge.org/) as well. 
 3 | 
 4 | Here is a brief introduction on the utilities for each folder.
 5 | 
 6 | - **"/1 download data"** How we download data from Google Earth Engine to Google Drive. Users then need to export data from Google Drive to their local folder, e.g., their clusters. The trick there is that we first concatenated all images across all the years available (say 2003 to 2015), then download the huge image at once, which could be hundreds of times faster.
 7 | - **"/2 clean data"** How the raw data is preprocessed, including slicing the huge images to get individual images, 3-D histogram calculations, etc.
 8 | - **"/3 model"** The CNN/LSTM model structure, written in tensorflow (v0.9). The Gaussian Process model, written in python.
 9 | - **"/4 model_batch"** Since we are training different models for each year and each month, a batch code is used for training.
10 | - **"/5 model_semi_supervised"** A recent contribution, extending the model with semi-supervised deep generative model, however it doesn't work well.  We are happy to discuss the model if you can make it work.
11 | - **"/6 result_analysis"** Plot results, plot yield map, etc.
12 | 
13 | For more information, please contact Jiaxuan You.
14 | 
15 | youjiaxuan@gmail.com.


--------------------------------------------------------------------------------
/1 download data/pull_MODIS_world_hist.py:
--------------------------------------------------------------------------------
 1 | import ee
 2 | import time
 3 | import sys
 4 | import numpy as np
 5 | import pandas as pd
 6 | import itertools
 7 | import os
 8 | import urllib
 9 | 
10 | ee.Initialize()
11 | 
12 | # locations = pd.read_csv('locations_remedy.csv')
13 | locations = pd.read_csv('world_locations.csv',header=None)
14 | 
15 | def appendBand(current, previous):
16 |     # Rename the band
17 |     previous=ee.Image(previous)
18 |     current = current.select([0,1,2,3,4,5,6])
19 |     # Append it to the result (Note: only return current item on first element/iteration)
20 |     accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current)))
21 |     # Return the accumulation
22 |     return accum
23 | 
24 | # county_region = ee.FeatureCollection('ft:18Ayj5e7JxxtTPm1BdMnnzWbZMrxMB49eqGDTsaSp')
25 | world_region = ee.FeatureCollection('ft:1tdSwUL7MVpOauSgRzqVTOwdfy17KDbw-1d9omPw')
26 | 
27 | imgcoll = ee.ImageCollection('MODIS/MOD09A1') \
28 |     .filterDate('2001-12-31','2015-12-31')
29 | img=imgcoll.iterate(appendBand)
30 | img=ee.Image(img)
31 | 
32 | for country,index in locations.values:
33 |     scale  = 500
34 |     crs='EPSG:4326'
35 | 
36 |     # filter for a county
37 |     region = world_region.filterMetadata('Country', 'equals', country)
38 |     if region==None:
39 |         print country,index,'not found'
40 |         continue
41 |     region = region.first()
42 |     # region = region.geometry().coordinates().getInfo()[0]
43 | 
44 |     img_temp = img.clip(region)
45 |     hist = ee.Feature(None, {'mean': img_temp.reduceRegion(ee.Reducer.fixedHistogram(1,4999,32), region, scale, crs,None,False,1e12,16)})
46 | 
47 |     hist_info = hist.getInfo()['features']
48 |     print hist_info
49 | 


--------------------------------------------------------------------------------
/6 result_analysis/corr.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | def preprocess_save_data(file):
 6 |     path = "/atlas/u/jiaxuan/data/google_drive/img_output/"
 7 |     if file.endswith(".npy"):
 8 |         path_current=os.path.join(path, file)
 9 |         image_temp = np.load(path_current)
10 | 
11 |         image_temp=np.reshape(image_temp,(image_temp.shape[0]*image_temp.shape[1],image_temp.shape[2]))
12 |         image_temp=np.reshape(image_temp,(-1,46,9))
13 |         image_temp=np.reshape(image_temp,(-1,9))
14 | 
15 |         f_0=image_temp>0
16 |         f_5000=image_temp<5000
17 |         f=f_0*f_5000
18 |         f=np.squeeze(np.prod(f,1).nonzero())
19 | 
20 |         # print image_temp.shape
21 |         image_temp=image_temp[f,:]
22 |         print image_temp.shape
23 | 
24 |         corr = np.corrcoef(np.transpose(image_temp))
25 | 
26 |         # print np.absolute(corr)
27 |         # plt.imshow(np.absolute(corr),cmap='Greys_r',interpolation='none')
28 |         # plt.show()
29 | 
30 |         return np.absolute(corr)
31 | 
32 | if __name__ == "__main__":
33 |     # # save data
34 |     corr = np.zeros([9,9])
35 |     path = "/atlas/u/jiaxuan/data/google_drive/img_output/"
36 |     count=0
37 |     try:
38 |         for _, _, files in os.walk(path):
39 |             for file in files:
40 |                 try:
41 |                     corr += preprocess_save_data(file)
42 |                     count+=1
43 |                 except:
44 |                     continue
45 |     except:
46 |         print 'break'
47 |     np.save('corr.npy', corr)
48 |     corr = np.load('corr.npy')
49 |     fig, ax = plt.subplots()
50 |     img = plt.imshow(corr/count,cmap='Greys_r',interpolation='none',vmin=0,vmax=1)
51 |     cbar = fig.colorbar(img, ticks=[0,0.5,1])
52 |     cbar.ax.set_yticklabels(['0','0.5','1'])
53 |     plt.show()
54 |         


--------------------------------------------------------------------------------
/1 download data/pull_MODIS.py:
--------------------------------------------------------------------------------
 1 | import ee
 2 | import time
 3 | import sys
 4 | import numpy as np
 5 | import pandas as pd
 6 | import itertools
 7 | import os
 8 | import urllib
 9 | 
10 | ee.Initialize()
11 | 
12 | def export_oneimage(img,folder,name,region,scale,crs):
13 |   task = ee.batch.Export.image(img, name, {
14 |       'driveFolder':folder,
15 |       'driveFileNamePrefix':name,
16 |       'region': region,
17 |       'scale':scale,
18 |       'crs':crs
19 |   })
20 |   task.start()
21 |   while task.status()['state'] == 'RUNNING':
22 |     print 'Running...'
23 |     # Perhaps task.cancel() at some point.
24 |     time.sleep(10)
25 |   print 'Done.', task.status()
26 | 
27 | 
28 | 
29 | 
30 | locations = pd.read_csv('locations_final.csv')
31 | 
32 | 
33 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands
34 | # Author: Jamie Vleeshouwer
35 | 
36 | def appendBand(current, previous):
37 |     # Rename the band
38 |     previous=ee.Image(previous)
39 |     current = current.select([0,1,2,3,4,5,6])
40 |     # Append it to the result (Note: only return current item on first element/iteration)
41 |     accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current)))
42 |     # Return the accumulation
43 |     return accum
44 | 
45 | imgcoll = ee.ImageCollection('MODIS/MOD09A1') \
46 |     .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))
47 | img=imgcoll.iterate(appendBand)
48 | 
49 | for loc1, loc2, lat, lon in locations.values:
50 |     fname = '{}_{}'.format(int(loc1), int(loc2))
51 | 
52 |     offset = 0.11
53 |     scale  = 500
54 |     crs='EPSG:4326'
55 | 
56 |     region = str([
57 |         [lat - offset, lon + offset],
58 |         [lat + offset, lon + offset],
59 |         [lat + offset, lon - offset],
60 |         [lat - offset, lon - offset]])
61 | 
62 |     while True:
63 |         try:
64 |             export_oneimage(img,'Data',fname,region,scale,crs)
65 |         except:
66 |             print 'retry'
67 |             time.sleep(10)
68 |             continue
69 |         break


--------------------------------------------------------------------------------
/1 download data/pull_MODIS_landcover.py:
--------------------------------------------------------------------------------
 1 | import ee
 2 | import time
 3 | import sys
 4 | import numpy as np
 5 | import pandas as pd
 6 | import itertools
 7 | import os
 8 | import urllib
 9 | 
10 | ee.Initialize()
11 | 
12 | def export_oneimage(img,folder,name,region,scale,crs):
13 |   task = ee.batch.Export.image(img, name, {
14 |       'driveFolder':folder,
15 |       'driveFileNamePrefix':name,
16 |       'region': region,
17 |       'scale':scale,
18 |       'crs':crs
19 |   })
20 |   task.start()
21 |   while task.status()['state'] == 'RUNNING':
22 |     print 'Running...'
23 |     # Perhaps task.cancel() at some point.
24 |     time.sleep(10)
25 |   print 'Done.', task.status()
26 | 
27 | 
28 | 
29 | 
30 | locations = pd.read_csv('locations_final.csv')
31 | 
32 | 
33 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands
34 | # Author: Jamie Vleeshouwer
35 | 
36 | def appendBand(current, previous):
37 |     # Rename the band
38 |     previous=ee.Image(previous)
39 |     current = current.select([0])
40 |     # Append it to the result (Note: only return current item on first element/iteration)
41 |     accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current)))
42 |     # Return the accumulation
43 |     return accum
44 | 
45 | imgcoll = ee.ImageCollection('MODIS/051/MCD12Q1') \
46 |     .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))
47 | img=imgcoll.iterate(appendBand)
48 | 
49 | for loc1, loc2, lat, lon in locations.values:
50 |     fname = '{}_{}'.format(int(loc1), int(loc2))
51 | 
52 |     offset = 0.11
53 |     scale  = 500
54 |     crs='EPSG:4326'
55 | 
56 |     region = str([
57 |         [lat - offset, lon + offset],
58 |         [lat + offset, lon + offset],
59 |         [lat + offset, lon - offset],
60 |         [lat - offset, lon - offset]])
61 | 
62 |     while True:
63 |         try:
64 |             export_oneimage(img,'Data_mask',fname,region,scale,crs)
65 |         except:
66 |             print 'retry'
67 |             time.sleep(10)
68 |             continue
69 |         break


--------------------------------------------------------------------------------
/1 download data/pull_MODIS_entire_county.py:
--------------------------------------------------------------------------------
 1 | import ee
 2 | import time
 3 | import sys
 4 | import numpy as np
 5 | import pandas as pd
 6 | import itertools
 7 | import os
 8 | import urllib
 9 | 
10 | ee.Initialize()
11 | 
12 | def export_oneimage(img,folder,name,region,scale,crs):
13 |   task = ee.batch.Export.image(img, name, {
14 |       'driveFolder':folder,
15 |       'driveFileNamePrefix':name,
16 |       'region': region,
17 |       'scale':scale,
18 |       'crs':crs
19 |   })
20 |   task.start()
21 |   while task.status()['state'] == 'RUNNING':
22 |     print 'Running...'
23 |     # Perhaps task.cancel() at some point.
24 |     time.sleep(10)
25 |   print 'Done.', task.status()
26 | 
27 | 
28 | 
29 | 
30 | locations = pd.read_csv('locations_remedy.csv')
31 | 
32 | 
33 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands
34 | # Author: Jamie Vleeshouwer
35 | 
36 | def appendBand(current, previous):
37 |     # Rename the band
38 |     previous=ee.Image(previous)
39 |     current = current.select([0,1,2,3,4,5,6])
40 |     # Append it to the result (Note: only return current item on first element/iteration)
41 |     accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current)))
42 |     # Return the accumulation
43 |     return accum
44 | 
45 | county_region = ee.FeatureCollection('ft:18Ayj5e7JxxtTPm1BdMnnzWbZMrxMB49eqGDTsaSp')
46 | 
47 | imgcoll = ee.ImageCollection('MODIS/MOD09A1') \
48 |     .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))\
49 |     .filterDate('2001-12-31','2015-12-31')
50 | img=imgcoll.iterate(appendBand)
51 | img=ee.Image(img)
52 | 
53 | img_0=ee.Image(ee.Number(0))
54 | img_5000=ee.Image(ee.Number(5000))
55 | 
56 | img=img.min(img_5000)
57 | img=img.max(img_0)
58 | 
59 | # img=ee.Image(ee.Number(100))
60 | # img=ee.ImageCollection('LC8_L1T').mosaic()
61 | 
62 | for loc1, loc2, lat, lon in locations.values:
63 |     fname = '{}_{}'.format(int(loc1), int(loc2))
64 | 
65 |     offset = 0.11
66 |     scale  = 500
67 |     crs='EPSG:4326'
68 | 
69 |     # filter for a county
70 |     region = county_region.filterMetadata('STATE num', 'equals', loc1)
71 |     region = ee.FeatureCollection(region).filterMetadata('COUNTY num', 'equals', loc2)
72 |     region = region.first()
73 |     region = region.geometry().coordinates().getInfo()[0]
74 | 
75 |     # region = str([
76 |     #     [lat - offset, lon + offset],
77 |     #     [lat + offset, lon + offset],
78 |     #     [lat + offset, lon - offset],
79 |     #     [lat - offset, lon - offset]])
80 |     while True:
81 |         try:
82 |             export_oneimage(img, 'Data_county', fname, region, scale, crs)
83 |         except:
84 |             print 'retry'
85 |             time.sleep(10)
86 |             continue
87 |         break
88 |     # while True:
89 |     #     try:
90 |     #         export_oneimage(img,'Data_test',fname,region,scale,crs)
91 |     #     except:
92 |     #         print 'retry'
93 |     #         time.sleep(10)
94 |     #         continue
95 |     #     break


--------------------------------------------------------------------------------
/1 download data/pull_MODIS_landcover_entire_county.py:
--------------------------------------------------------------------------------
 1 | import ee
 2 | import time
 3 | import sys
 4 | import numpy as np
 5 | import pandas as pd
 6 | import itertools
 7 | import os
 8 | import urllib
 9 | 
10 | ee.Initialize()
11 | 
12 | def export_oneimage(img,folder,name,region,scale,crs):
13 |   task = ee.batch.Export.image(img, name, {
14 |       'driveFolder':folder,
15 |       'driveFileNamePrefix':name,
16 |       'region': region,
17 |       'scale':scale,
18 |       'crs':crs
19 |   })
20 |   task.start()
21 |   while task.status()['state'] == 'RUNNING':
22 |     print 'Running...'
23 |     # Perhaps task.cancel() at some point.
24 |     time.sleep(10)
25 |   print 'Done.', task.status()
26 | 
27 | 
28 | 
29 | 
30 | locations = pd.read_csv('locations_major.csv')
31 | 
32 | 
33 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands
34 | # Author: Jamie Vleeshouwer
35 | 
36 | def appendBand(current, previous):
37 |     # Rename the band
38 |     previous=ee.Image(previous)
39 |     current = current.select([0])
40 |     # Append it to the result (Note: only return current item on first element/iteration)
41 |     accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current)))
42 |     # Return the accumulation
43 |     return accum
44 | 
45 | county_region = ee.FeatureCollection('ft:18Ayj5e7JxxtTPm1BdMnnzWbZMrxMB49eqGDTsaSp')
46 | 
47 | imgcoll = ee.ImageCollection('MODIS/051/MCD12Q1') \
48 |     .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))\
49 |     .filterDate('2001-12-31','2015-12-31')
50 | img=imgcoll.iterate(appendBand)
51 | img=ee.Image(img)
52 | 
53 | # img_0=ee.Image(ee.Number(0))
54 | # img_5000=ee.Image(ee.Number(5000))
55 | #
56 | # img=img.min(img_5000)
57 | # img=img.max(img_0)
58 | 
59 | # img=ee.Image(ee.Number(100))
60 | # img=ee.ImageCollection('LC8_L1T').mosaic()
61 | 
62 | for loc1, loc2, lat, lon in locations.values:
63 |     fname = '{}_{}'.format(int(loc1), int(loc2))
64 | 
65 |     offset = 0.11
66 |     scale  = 500
67 |     crs='EPSG:4326'
68 | 
69 |     # filter for a county
70 |     region = county_region.filterMetadata('STATE num', 'equals', loc1)
71 |     region = ee.FeatureCollection(region).filterMetadata('COUNTY num', 'equals', loc2)
72 |     region = region.first()
73 |     region = region.geometry().coordinates().getInfo()[0]
74 | 
75 |     # region = str([
76 |     #     [lat - offset, lon + offset],
77 |     #     [lat + offset, lon + offset],
78 |     #     [lat + offset, lon - offset],
79 |     #     [lat - offset, lon - offset]])
80 |     while True:
81 |         try:
82 |             export_oneimage(img, 'Data_county_mask', fname, region, scale, crs)
83 |         except:
84 |             print 'retry'
85 |             time.sleep(10)
86 |             continue
87 |         break
88 |     # while True:
89 |     #     try:
90 |     #         export_oneimage(img,'Data_test',fname,region,scale,crs)
91 |     #     except:
92 |     #         print 'retry'
93 |     #         time.sleep(10)
94 |     #         continue
95 |     #     break


--------------------------------------------------------------------------------
/1 download data/pull_MODIS_temperature_entire_county.py:
--------------------------------------------------------------------------------
 1 | import ee
 2 | import time
 3 | import sys
 4 | import numpy as np
 5 | import pandas as pd
 6 | import itertools
 7 | import os
 8 | import urllib
 9 | 
10 | ee.Initialize()
11 | 
12 | def export_oneimage(img,folder,name,region,scale,crs):
13 |   task = ee.batch.Export.image(img, name, {
14 |       'driveFolder':folder,
15 |       'driveFileNamePrefix':name,
16 |       'region': region,
17 |       'scale':scale,
18 |       'crs':crs
19 |   })
20 |   task.start()
21 |   while task.status()['state'] == 'RUNNING':
22 |     print 'Running...'
23 |     # Perhaps task.cancel() at some point.
24 |     time.sleep(10)
25 |   print 'Done.', task.status()
26 | 
27 | 
28 | 
29 | 
30 | locations = pd.read_csv('locations_major.csv')
31 | 
32 | 
33 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands
34 | # Author: Jamie Vleeshouwer
35 | 
36 | def appendBand(current, previous):
37 |     # Rename the band
38 |     previous=ee.Image(previous)
39 |     current = current.select([0,4])
40 |     # Append it to the result (Note: only return current item on first element/iteration)
41 |     accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current)))
42 |     # Return the accumulation
43 |     return accum
44 | 
45 | county_region = ee.FeatureCollection('ft:18Ayj5e7JxxtTPm1BdMnnzWbZMrxMB49eqGDTsaSp')
46 | 
47 | imgcoll = ee.ImageCollection('MODIS/MYD11A2') \
48 |     .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))\
49 |     .filterDate('2002-12-31','2015-12-31')
50 | img=imgcoll.iterate(appendBand)
51 | img=ee.Image(img)
52 | 
53 | # img_0=ee.Image(ee.Number(0))
54 | # img_5000=ee.Image(ee.Number(5000))
55 | #
56 | # img=img.min(img_5000)
57 | # img=img.max(img_0)
58 | 
59 | # img=ee.Image(ee.Number(100))
60 | # img=ee.ImageCollection('LC8_L1T').mosaic()
61 | 
62 | for loc1, loc2, lat, lon in locations.values:
63 |     fname = '{}_{}'.format(int(loc1), int(loc2))
64 | 
65 |     offset = 0.11
66 |     scale  = 500
67 |     crs='EPSG:4326'
68 | 
69 |     # filter for a county
70 |     region = county_region.filterMetadata('STATE num', 'equals', loc1)
71 |     region = ee.FeatureCollection(region).filterMetadata('COUNTY num', 'equals', loc2)
72 |     region = region.first()
73 |     region = region.geometry().coordinates().getInfo()[0]
74 | 
75 |     # region = str([
76 |     #     [lat - offset, lon + offset],
77 |     #     [lat + offset, lon + offset],
78 |     #     [lat + offset, lon - offset],
79 |     #     [lat - offset, lon - offset]])
80 |     while True:
81 |         try:
82 |             export_oneimage(img, 'Data_county_temperature', fname, region, scale, crs)
83 |         except:
84 |             print 'retry'
85 |             time.sleep(10)
86 |             continue
87 |         break
88 |     # while True:
89 |     #     try:
90 |     #         export_oneimage(img,'Data_test',fname,region,scale,crs)
91 |     #     except:
92 |     #         print 'retry'
93 |     #         time.sleep(10)
94 |     #         continue
95 |     #     break


--------------------------------------------------------------------------------
/1 download data/pull_MODIS_landcover_entire_county_clip.py:
--------------------------------------------------------------------------------
 1 | import ee
 2 | import time
 3 | import sys
 4 | import numpy as np
 5 | import pandas as pd
 6 | import itertools
 7 | import os
 8 | import urllib
 9 | 
10 | ee.Initialize()
11 | 
12 | def export_oneimage(img,folder,name,scale,crs):
13 |   task = ee.batch.Export.image(img, name, {
14 |       'driveFolder':folder,
15 |       'driveFileNamePrefix':name,
16 |       'scale':scale,
17 |       'crs':crs
18 |   })
19 |   task.start()
20 |   while task.status()['state'] == 'RUNNING':
21 |     print 'Running...'
22 |     # Perhaps task.cancel() at some point.
23 |     time.sleep(10)
24 |   print 'Done.', task.status()
25 | 
26 | 
27 | 
28 | 
29 | locations = pd.read_csv('locations_final.csv')
30 | 
31 | 
32 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands
33 | # Author: Jamie Vleeshouwer
34 | 
35 | def appendBand(current, previous):
36 |     # Rename the band
37 |     previous=ee.Image(previous)
38 |     current = current.select([0])
39 |     # Append it to the result (Note: only return current item on first element/iteration)
40 |     accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current)))
41 |     # Return the accumulation
42 |     return accum
43 | 
44 | county_region = ee.FeatureCollection('ft:1S4EB6319wWW2sWQDPhDvmSBIVrD3iEmCLYB7nMM')
45 | 
46 | imgcoll = ee.ImageCollection('MODIS/051/MCD12Q1') \
47 |     .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))\
48 |     .filterDate('2002-12-31','2016-8-4')
49 | img=imgcoll.iterate(appendBand)
50 | img=ee.Image(img)
51 | 
52 | # img_0=ee.Image(ee.Number(0))
53 | # img_5000=ee.Image(ee.Number(5000))
54 | #
55 | # img=img.min(img_5000)
56 | # img=img.max(img_0)
57 | 
58 | # img=ee.Image(ee.Number(100))
59 | # img=ee.ImageCollection('LC8_L1T').mosaic()
60 | 
61 | for loc1, loc2, lat, lon in locations.values:
62 |     fname = '{}_{}'.format(int(loc1), int(loc2))
63 | 
64 |     # offset = 0.11
65 |     scale  = 500
66 |     crs='EPSG:4326'
67 | 
68 |     # filter for a county
69 |     region = county_region.filterMetadata('StateFips', 'equals', int(loc1))
70 |     region = ee.FeatureCollection(region).filterMetadata('CntyFips', 'equals', int(loc2))
71 |     region = ee.Feature(region.first())
72 | 
73 |     # region = str([
74 |     #     [lat - offset, lon + offset],
75 |     #     [lat + offset, lon + offset],
76 |     #     [lat + offset, lon - offset],
77 |     #     [lat - offset, lon - offset]])
78 |     while True:
79 |         try:
80 |             export_oneimage(img.clip(region), 'data_mask', fname, scale, crs)
81 |         except:
82 |             print 'retry'
83 |             time.sleep(10)
84 |             continue
85 |         break
86 |     # while True:
87 |     #     try:
88 |     #         export_oneimage(img,'Data_test',fname,region,scale,crs)
89 |     #     except:
90 |     #         print 'retry'
91 |     #         time.sleep(10)
92 |     #         continue
93 |     #     break


--------------------------------------------------------------------------------
/1 download data/pull_MODIS_temperature_entire_county_clip.py:
--------------------------------------------------------------------------------
 1 | import ee
 2 | import time
 3 | import sys
 4 | import numpy as np
 5 | import pandas as pd
 6 | import itertools
 7 | import os
 8 | import urllib
 9 | 
10 | ee.Initialize()
11 | 
12 | def export_oneimage(img,folder,name,scale,crs):
13 |   task = ee.batch.Export.image(img, name, {
14 |       'driveFolder':folder,
15 |       'driveFileNamePrefix':name,
16 |       'scale':scale,
17 |       'crs':crs
18 |   })
19 |   task.start()
20 |   while task.status()['state'] == 'RUNNING':
21 |     print 'Running...'
22 |     # Perhaps task.cancel() at some point.
23 |     time.sleep(10)
24 |   print 'Done.', task.status()
25 | 
26 | 
27 | 
28 | 
29 | locations = pd.read_csv('locations_final.csv')
30 | 
31 | 
32 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands
33 | # Author: Jamie Vleeshouwer
34 | 
35 | def appendBand(current, previous):
36 |     # Rename the band
37 |     previous=ee.Image(previous)
38 |     current = current.select([0,4])
39 |     # Append it to the result (Note: only return current item on first element/iteration)
40 |     accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current)))
41 |     # Return the accumulation
42 |     return accum
43 | 
44 | county_region = ee.FeatureCollection('ft:1S4EB6319wWW2sWQDPhDvmSBIVrD3iEmCLYB7nMM')
45 | 
46 | imgcoll = ee.ImageCollection('MODIS/MYD11A2') \
47 |     .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))\
48 |     .filterDate('2002-12-31','2016-8-4')
49 | img=imgcoll.iterate(appendBand)
50 | img=ee.Image(img)
51 | 
52 | # img_0=ee.Image(ee.Number(0))
53 | # img_5000=ee.Image(ee.Number(5000))
54 | #
55 | # img=img.min(img_5000)
56 | # img=img.max(img_0)
57 | 
58 | # img=ee.Image(ee.Number(100))
59 | # img=ee.ImageCollection('LC8_L1T').mosaic()
60 | 
61 | for loc1, loc2, lat, lon in locations.values:
62 |     fname = '{}_{}'.format(int(loc1), int(loc2))
63 | 
64 |     # offset = 0.11
65 |     scale  = 500
66 |     crs='EPSG:4326'
67 | 
68 |     # filter for a county
69 |     region = county_region.filterMetadata('StateFips', 'equals', int(loc1))
70 |     region = ee.FeatureCollection(region).filterMetadata('CntyFips', 'equals', int(loc2))
71 |     region = ee.Feature(region.first())
72 | 
73 |     # region = str([
74 |     #     [lat - offset, lon + offset],
75 |     #     [lat + offset, lon + offset],
76 |     #     [lat + offset, lon - offset],
77 |     #     [lat - offset, lon - offset]])
78 |     while True:
79 |         try:
80 |             export_oneimage(img.clip(region), 'data_temperature', fname, scale, crs)
81 |         except:
82 |             print 'retry'
83 |             time.sleep(10)
84 |             continue
85 |         break
86 |     # while True:
87 |     #     try:
88 |     #         export_oneimage(img,'Data_test',fname,region,scale,crs)
89 |     #     except:
90 |     #         print 'retry'
91 |     #         time.sleep(10)
92 |     #         continue
93 |     #     break


--------------------------------------------------------------------------------
/1 download data/pull_MODIS_entire_county_clip.py:
--------------------------------------------------------------------------------
 1 | import ee
 2 | import time
 3 | import sys
 4 | import numpy as np
 5 | import pandas as pd
 6 | import itertools
 7 | import os
 8 | import urllib
 9 | 
10 | ee.Initialize()
11 | 
12 | def export_oneimage(img,folder,name,scale,crs):
13 |   task = ee.batch.Export.image(img, name, {
14 |       'driveFolder':folder,
15 |       'driveFileNamePrefix':name,
16 |       'scale':scale,
17 |       'crs':crs
18 |   })
19 |   task.start()
20 |   while task.status()['state'] == 'RUNNING':
21 |     print 'Running...'
22 |     # Perhaps task.cancel() at some point.
23 |     time.sleep(10)
24 |   print 'Done.', task.status()
25 | 
26 | 
27 | 
28 | 
29 | locations = pd.read_csv('locations_final.csv',header=None)
30 | 
31 | 
32 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands
33 | # Author: Jamie Vleeshouwer
34 | 
35 | def appendBand(current, previous):
36 |     # Rename the band
37 |     previous=ee.Image(previous)
38 |     current = current.select([0,1,2,3,4,5,6])
39 |     # Append it to the result (Note: only return current item on first element/iteration)
40 |     accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current)))
41 |     # Return the accumulation
42 |     return accum
43 | 
44 | county_region = ee.FeatureCollection('ft:1S4EB6319wWW2sWQDPhDvmSBIVrD3iEmCLYB7nMM')
45 | 
46 | imgcoll = ee.ImageCollection('MODIS/MOD09A1') \
47 |     .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))\
48 |     .filterDate('2002-12-31','2016-8-4')
49 | img=imgcoll.iterate(appendBand)
50 | img=ee.Image(img)
51 | 
52 | img_0=ee.Image(ee.Number(-100))
53 | img_16000=ee.Image(ee.Number(16000))
54 | 
55 | img=img.min(img_16000)
56 | img=img.max(img_0)
57 | 
58 | # img=ee.Image(ee.Number(100))
59 | # img=ee.ImageCollection('LC8_L1T').mosaic()
60 | 
61 | for loc1, loc2, lat, lon in locations.values:
62 |     fname = '{}_{}'.format(int(loc1), int(loc2))
63 | 
64 |     # offset = 0.11
65 |     scale  = 500
66 |     crs='EPSG:4326'
67 | 
68 |     # filter for a county
69 |     region = county_region.filterMetadata('StateFips', 'equals', int(loc1))
70 |     region = ee.FeatureCollection(region).filterMetadata('CntyFips', 'equals', int(loc2))
71 |     region = ee.Feature(region.first())
72 |     # region = region.geometry().coordinates().getInfo()[0]
73 | 
74 |     # region = str([
75 |     #     [lat - offset, lon + offset],
76 |     #     [lat + offset, lon + offset],
77 |     #     [lat + offset, lon - offset],
78 |     #     [lat - offset, lon - offset]])
79 |     while True:
80 |         try:
81 |             export_oneimage(img.clip(region), 'test', fname, scale, crs)
82 |         except:
83 |             print 'retry'
84 |             time.sleep(10)
85 |             continue
86 |         break
87 |     # while True:
88 |     #     try:
89 |     #         export_oneimage(img,'Data_test',fname,region,scale,crs)
90 |     #     except:
91 |     #         print 'retry'
92 |     #         time.sleep(10)
93 |     #         continue
94 |     #     break


--------------------------------------------------------------------------------
/1 download data/pull_MODIS_world.py:
--------------------------------------------------------------------------------
 1 | import ee
 2 | import time
 3 | import sys
 4 | import numpy as np
 5 | import pandas as pd
 6 | import itertools
 7 | import os
 8 | import urllib
 9 | 
10 | ee.Initialize()
11 | 
12 | def export_oneimage(img,folder,name,region,scale,crs):
13 |   task = ee.batch.Export.image(img, name, {
14 |       'driveFolder':folder,
15 |       'driveFileNamePrefix':name,
16 |       'region': region,
17 |       'scale':scale,
18 |       'crs':crs
19 |   })
20 |   task.start()
21 |   while task.status()['state'] == 'RUNNING':
22 |     print 'Running...'
23 |     # Perhaps task.cancel() at some point.
24 |     time.sleep(10)
25 |   print 'Done.', task.status()
26 | 
27 | 
28 | 
29 | 
30 | # locations = pd.read_csv('locations_remedy.csv')
31 | locations = pd.read_csv('world_locations.csv',header=None)
32 | 
33 | 
34 | # Transforms an Image Collection with 1 band per Image into a single Image with items as bands
35 | # Author: Jamie Vleeshouwer
36 | 
37 | def appendBand(current, previous):
38 |     # Rename the band
39 |     previous=ee.Image(previous)
40 |     current = current.select([0,1,2,3,4,5,6])
41 |     # Append it to the result (Note: only return current item on first element/iteration)
42 |     accum = ee.Algorithms.If(ee.Algorithms.IsEqual(previous,None), current, previous.addBands(ee.Image(current)))
43 |     # Return the accumulation
44 |     return accum
45 | 
46 | # county_region = ee.FeatureCollection('ft:18Ayj5e7JxxtTPm1BdMnnzWbZMrxMB49eqGDTsaSp')
47 | world_region = ee.FeatureCollection('ft:1tdSwUL7MVpOauSgRzqVTOwdfy17KDbw-1d9omPw')
48 | 
49 | imgcoll = ee.ImageCollection('MODIS/MOD09A1') \
50 |     .filterBounds(ee.Geometry.Rectangle(-106.5, 50,-64, 23))\
51 |     .filterDate('2001-12-31','2015-12-31')
52 | img=imgcoll.iterate(appendBand)
53 | img=ee.Image(img)
54 | 
55 | img_0=ee.Image(ee.Number(0))
56 | img_5000=ee.Image(ee.Number(5000))
57 | 
58 | img=img.min(img_5000)
59 | img=img.max(img_0)
60 | 
61 | # img=ee.Image(ee.Number(100))
62 | # img=ee.ImageCollection('LC8_L1T').mosaic()
63 | 
64 | for country,index in locations.values:
65 |     fname = 'index'+'{}'.format(int(index))
66 | 
67 |     # offset = 0.11
68 |     scale  = 500
69 |     crs='EPSG:4326'
70 | 
71 |     # filter for a county
72 |     region = world_region.filterMetadata('Country', 'equals', country)
73 |     if region==None:
74 |         print country,index,'not found'
75 |         continue
76 |     region = region.first()
77 |     region = region.geometry().coordinates().getInfo()[0]
78 | 
79 |     # region = str([
80 |     #     [lat - offset, lon + offset],
81 |     #     [lat + offset, lon + offset],
82 |     #     [lat + offset, lon - offset],
83 |     #     [lat - offset, lon - offset]])
84 |     while True:
85 |         try:
86 |             export_oneimage(img, 'Data_world', fname, region, scale, crs)
87 |         except:
88 |             print 'retry'
89 |             time.sleep(10)
90 |             continue
91 |         break
92 |     # while True:
93 |     #     try:
94 |     #         export_oneimage(img,'Data_test',fname,region,scale,crs)
95 |     #     except:
96 |     #         print 'retry'
97 |     #         time.sleep(10)
98 |     #         continue
99 |     #     break


--------------------------------------------------------------------------------
/6 result_analysis/permute.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | 
  4 | # result = np.load('paper_result.npy')
  5 | # result = result[1:,:]
  6 | # result_mean = np.mean(result,axis=0,keepdims=True)
  7 | # result = np.concatenate((result, result_mean),axis=0)
  8 | 
  9 | # print np.round(result,2)
 10 | 
 11 | permute_band = np.load('permute_band.npy')
 12 | permute_band_plot_temp = permute_band[0:9]-permute_band[9]
 13 | print permute_band_plot_temp.shape
 14 | permute_band_plot = np.zeros([permute_band_plot_temp.shape[0],permute_band_plot_temp.shape[1],permute_band_plot_temp.shape[2],3])
 15 | permute_band_plot[:,:,:,0] = (permute_band_plot_temp[:,:,:,0]+permute_band_plot_temp[:,:,:,1])/2
 16 | permute_band_plot[:,:,:,1] = (permute_band_plot_temp[:,:,:,2]+permute_band_plot_temp[:,:,:,3])/2
 17 | permute_band_plot[:,:,:,2] = (permute_band_plot_temp[:,:,:,4]+permute_band_plot_temp[:,:,:,5])/2
 18 | 
 19 | # plt.plot(range(10),permute_band_mean[:,0])
 20 | # plt.plot(range(10),permute_band_mean[:,1])
 21 | # plt.plot(range(10),permute_band_mean[:,2])
 22 | # plt.plot(range(10),permute_band_mean[:,3])
 23 | # plt.plot(range(10),permute_band_mean[:,4])
 24 | # plt.plot(range(10),permute_band_mean[:,5])
 25 | # plt.legend(['5','6','7','8','9','10'])
 26 | # plt.show()
 27 | 
 28 | # bar plot
 29 | n_groups = 9
 30 | fig, ax = plt.subplots()
 31 | index = np.arange(n_groups)
 32 | bar_width = 0.22
 33 | opacity = 0.6
 34 | error_config = {'ecolor': '0.3'}
 35 | 
 36 | rects1 = plt.bar(index, np.mean(permute_band_plot,axis=(1,2))[:,0], bar_width,
 37 |                  alpha=opacity,
 38 |                  color='b',
 39 |                  yerr=0,
 40 |                  error_kw=error_config,
 41 |                  label='May&Jun')
 42 | rects2 = plt.bar(index + bar_width, np.mean(permute_band_plot,axis=(1,2))[:,1], bar_width,
 43 |                  alpha=opacity,
 44 |                  color='g',
 45 |                  yerr=0,
 46 |                  error_kw=error_config,
 47 |                  label='Jul&Aug')
 48 | rects3 = plt.bar(index + bar_width*2, np.mean(permute_band_plot,axis=(1,2))[:,2], bar_width,
 49 |                  alpha=opacity,
 50 |                  color='r',
 51 |                  yerr=0,
 52 |                  error_kw=error_config,
 53 |                  label='Sept&Oct')
 54 | # rects4 = plt.bar(index + bar_width*3, np.mean(permute_band_plot,axis=(1,2))[:,3], bar_width,
 55 | #                  alpha=opacity,
 56 | #                  color='c',
 57 | #                  yerr=np.std(permute_band_plot,axis=(1,2))[:,3],
 58 | #                  error_kw=error_config,
 59 | #                  label='Aug')
 60 | # rects5 = plt.bar(index + bar_width*4, np.mean(permute_band_plot,axis=(1,2))[:,4], bar_width,
 61 | #                  alpha=opacity,
 62 | #                  color='m',
 63 | #                  yerr=np.std(permute_band_plot,axis=(1,2))[:,4],
 64 | #                  error_kw=error_config,
 65 | #                  label='Sept')
 66 | # rects6 = plt.bar(index + bar_width*5, np.mean(permute_band_plot,axis=(1,2))[:,5], bar_width,
 67 | #                  alpha=opacity,
 68 | #                  color='y',
 69 | #                  yerr=np.std(permute_band_plot,axis=(1,2))[:,5],
 70 | #                  error_kw=error_config,
 71 | #                  label='Oct')
 72 | plt.xlabel('Spectral bands in remote sensing image',fontsize=16)
 73 | plt.ylabel('Increase of RMSE',fontsize=16)
 74 | # plt.title('Root Mean Square Error')
 75 | plt.xticks(index + bar_width*1.5, ('1', '2', '3', '4', '5', '6','7','8','9'))
 76 | plt.legend(fontsize=14,loc=2)
 77 | 
 78 | axes = plt.gca()
 79 | axes.set_ylim([0,3.5])
 80 | 
 81 | plt.tight_layout()
 82 | plt.show()
 83 | 
 84 | 
 85 | 
 86 | 
 87 | permute_time = np.load('permute_time.npy')
 88 | permute_time_plot = permute_time[0:30]-permute_time[30]
 89 | 
 90 | x = range(49,282,8)
 91 | y = np.mean(permute_time_plot,axis=(1,2))
 92 | # example error bar values that vary with x-position
 93 | error = 0
 94 | 
 95 | plt.errorbar(x, y, yerr=error, fmt='-o',ecolor='0.3',linewidth=1,color='b')
 96 | plt.xlabel('Day of year',fontsize=16)
 97 | plt.ylabel('Increase of RMSE',fontsize=16)
 98 | 
 99 | plt.show()
100 | 
101 | 


--------------------------------------------------------------------------------
/6 result_analysis/yield_map.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import csv
  3 | from BeautifulSoup import BeautifulSoup
  4 | from GP_crop_v3 import *
  5 |  
  6 | 
  7 | # Read CNN_err prediction
  8 | CNN = {}
  9 | GP = {}
 10 | save_path = '/atlas/u/jiaxuan/data/train_results/final/monthly/'
 11 | save_path = 'C:/360Downloads/final/monthly/'
 12 | path_current = save_path+str(0)+str(30)+str(2014)+'result_prediction.npz'
 13 | data = np.load(path_current)
 14 | 
 15 | year = data['year_out']
 16 | real = data['real_out']
 17 | pred = data['pred_out']
 18 | index=data['index_out']
 19 | 
 20 | validate = np.nonzero(year == 2014)[0]
 21 | year = year[validate]
 22 | real = real[validate]
 23 | pred = pred[validate]
 24 | index = index[validate]
 25 | err_CNN = pred-real
 26 | 
 27 | rmse,me,err_GP = GaussianProcess(2014,path_current)
 28 | 
 29 | 
 30 | print 'CNN',err_CNN.min(),err_CNN.max()
 31 | print 'GP',err_GP.min(),err_GP.max()
 32 | 
 33 | for i in range(year.shape[0]):
 34 |     loc1 = str(int(index[i,0]))
 35 |     loc2 = str(int(index[i,1]))
 36 |     if len(loc1)==1:
 37 |         loc1='0'+loc1
 38 |     if len(loc2)==1:
 39 |         loc2='00'+loc2
 40 |     if len(loc2)==2:
 41 |         loc2='0'+loc2
 42 |     fips = loc1+loc2
 43 |     CNN[fips] = err_CNN[i]
 44 |     GP[fips] = err_GP[i]
 45 | 
 46 | '''CNN'''
 47 | # Load the SVG map
 48 | svg = open('counties.svg', 'r').read()
 49 | # Load into Beautiful Soup
 50 | soup = BeautifulSoup(svg, selfClosingTags=['defs','sodipodi:namedview'])
 51 | # Find counties
 52 | paths = soup.findAll('path')
 53 | # Map colors
 54 | colors = ["#b2182b", "#d6604d", "#f4a582", "#fddbc7", "#d1e5f0", "#92c5de", "#4393c3", "#2166ac"]
 55 | 
 56 | # County style
 57 | path_style = 'font-size:12px;fill-rule:nonzero;stroke:#FFFFFF;stroke-opacity:1;stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none;stroke-linecap:butt;marker-start:none;stroke-linejoin:bevel;fill:'
 58 | # Color the counties based on unemployment rate
 59 | for p in paths:    
 60 |     if p['id'] not in ["State_Lines", "separator"]:
 61 |         try:
 62 |             rate = CNN[p['id']]
 63 |         except:
 64 |             continue
 65 |         if rate > 15:
 66 |             color_class = 7
 67 |         elif rate > 10:
 68 |             color_class = 6
 69 |         elif rate > 5:
 70 |             color_class = 5
 71 |         elif rate > 0:
 72 |             color_class = 4
 73 |         elif rate > -5:
 74 |             color_class = 3
 75 |         elif rate > -10:
 76 |             color_class = 2            
 77 |         elif rate > -15:
 78 |             color_class = 1
 79 |         else:
 80 |             color_class = 0
 81 | 
 82 |         color = colors[color_class]
 83 |         p['style'] = path_style + color
 84 |  
 85 | soup=soup.prettify()
 86 | with open('CNN_err.svg', 'wb') as f:
 87 |     f.write(soup)
 88 | 
 89 | '''GP'''
 90 | # Load the SVG map
 91 | svg = open('counties.svg', 'r').read()
 92 | # Load into Beautiful Soup
 93 | soup = BeautifulSoup(svg, selfClosingTags=['defs','sodipodi:namedview'])
 94 | # Find counties
 95 | paths = soup.findAll('path')
 96 | # Map colors
 97 | colors = ["#b2182b", "#d6604d", "#f4a582", "#fddbc7", "#d1e5f0", "#92c5de", "#4393c3", "#2166ac"]
 98 | 
 99 | # County style
100 | path_style = 'font-size:12px;fill-rule:nonzero;stroke:#FFFFFF;stroke-opacity:1;stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none;stroke-linecap:butt;marker-start:none;stroke-linejoin:bevel;fill:'
101 | # Color the counties based on unemployment rate
102 | for p in paths:    
103 |     if p['id'] not in ["State_Lines", "separator"]:
104 |         try:
105 |             rate = GP[p['id']]
106 |         except:
107 |             continue
108 |         if rate > 15:
109 |             color_class = 7
110 |         elif rate > 10:
111 |             color_class = 6
112 |         elif rate > 5:
113 |             color_class = 5
114 |         elif rate > 0:
115 |             color_class = 4
116 |         elif rate > -5:
117 |             color_class = 3
118 |         elif rate > -10:
119 |             color_class = 2            
120 |         elif rate > -15:
121 |             color_class = 1
122 |         else:
123 |             color_class = 0
124 | 
125 |         color = colors[color_class]
126 |         p['style'] = path_style + color
127 |  
128 | soup=soup.prettify()
129 | with open('GP_err.svg', 'wb') as f:
130 |     f.write(soup)
131 | 


--------------------------------------------------------------------------------
/3 model/nnet_lstm.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import threading
  4 | from fetch_data_county import *
  5 | import sys
  6 | import matplotlib.pyplot as plt
  7 | import time
  8 | from datetime import datetime
  9 | # datetime.now().strftime('%Y-%m-%d %H:%M:%S')
 10 | 
 11 | class Config():
 12 |     B, W, H, C = 32, 32,32, 9
 13 | 
 14 |     lstm_layers = 1
 15 |     # 64
 16 |     # hidden 256(default)
 17 |     lstm_H = 128
 18 | 
 19 |     # dense 256(default)
 20 |     dense = 256
 21 | 
 22 |     train_step = 10000
 23 |     lr = 1e-3
 24 |     drop_out = 0.75
 25 |     # weight_decay = 0.005
 26 |     load_path = "/atlas/u/jiaxuan/data/google_drive/img_output/"
 27 |     save_path = '/atlas/u/jiaxuan/data/train_results/final/lstm/'
 28 |     # save_path = '~/Downloads/'
 29 | 
 30 | def conv2d(input_data, out_channels, filter_size, in_channels=None, name="conv2d"):
 31 |     if not in_channels:
 32 |         in_channels = input_data.get_shape()[-1]
 33 |     with tf.variable_scope(name):
 34 |         W = tf.get_variable("W", [filter_size, filter_size, in_channels, out_channels],
 35 |                 initializer=tf.contrib.layers.variance_scaling_initializer())
 36 |         b = tf.get_variable("b", [1, 1, 1, out_channels])
 37 |         return tf.nn.conv2d(input_data, W, [1, 1, 1, 1], "SAME") + b
 38 | 
 39 | def pool2d(input_data, ksize, name="pool2d"):
 40 |     with tf.variable_scope(name):
 41 |         return tf.nn.max_pool(input_data, [1, ksize, ksize, 1], [1, ksize, ksize, 1], "SAME")
 42 | 
 43 | 
 44 | def conv_relu_batch(input_data, out_channels, filter_size, in_channels=None, name="crb"):
 45 |     with tf.variable_scope(name):
 46 |         a = conv2d(input_data, out_channels, filter_size, in_channels)
 47 |         b = batch_normalization(a,axes=[0,1,2])
 48 |         r = tf.nn.relu(b)
 49 |         return r
 50 | 
 51 | def dense(input_data, H, N=None, name="dense"):
 52 |     if not N:
 53 |         N = input_data.get_shape()[-1]
 54 |     with tf.variable_scope(name):
 55 |         W = tf.get_variable("W", [N, H], initializer=tf.contrib.layers.variance_scaling_initializer())
 56 |         b = tf.get_variable("b", [1, H])
 57 |         return tf.matmul(input_data, W, name="matmul") + b
 58 | 
 59 | def batch_normalization(input_data, axes=[0], name="batch"):
 60 |     with tf.variable_scope(name):
 61 |         mean, variance = tf.nn.moments(input_data, axes, keep_dims=True, name="moments")
 62 |         return tf.nn.batch_normalization(input_data, mean, variance, None, None, 1e-6, name="batch")
 63 | 
 64 | 
 65 | def lstm_net(input_data,output_data,config,keep_prob = 1,name='lstm_net'):
 66 |     with tf.variable_scope(name):
 67 |         lstm_cell = tf.nn.rnn_cell.LSTMCell(config.lstm_H,state_is_tuple=True)
 68 |         lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
 69 |         cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.lstm_layers,state_is_tuple=True)
 70 |         state = cell.zero_state(config.B, tf.float32)
 71 |         outputs, final_state = tf.nn.dynamic_rnn(cell, input_data, 
 72 |                        initial_state=state, time_major=True)
 73 |         output_final = tf.squeeze(tf.slice(outputs, [config.H-1,0,0] , [1,-1,-1]))
 74 |         # print outputs.get_shape().as_list()
 75 |         fc1 = dense(output_final, config.dense, name="dense")
 76 | 
 77 |         logit = tf.squeeze(dense(fc1,1,name='logit'))
 78 |         loss = tf.nn.l2_loss(logit - output_data)
 79 | 
 80 |         return logit,loss,fc1
 81 | 
 82 | class NeuralModel():
 83 |     def __init__(self, config, name):
 84 |         self.x = tf.placeholder(tf.float32, [None, config.W, config.H, config.C], name="x")
 85 |         self.y = tf.placeholder(tf.float32, [None])
 86 |         self.lr = tf.placeholder(tf.float32, [])
 87 |         self.keep_prob = tf.placeholder(tf.float32, [])
 88 | 
 89 |         input_data = tf.transpose(self.x, [2,0,1,3])
 90 |         dim = input_data.get_shape().as_list()
 91 |         input_data = tf.reshape(input_data,[dim[0],-1,dim[2]*dim[3]])
 92 |         print 'lstm input shape',input_data.get_shape()
 93 | 
 94 |         with tf.variable_scope('LSTM') as scope:
 95 |             self.pred,self.loss,self.feature = lstm_net(input_data, self.y, config, keep_prob=self.keep_prob)
 96 | 
 97 |         self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss)
 98 |         with tf.variable_scope('LSTM/lstm_net/logit') as scope:
 99 |             scope.reuse_variables()
100 |             self.dense_W = tf.get_variable('W')
101 |             self.dense_B = tf.get_variable('b')
102 | 
103 | # if __name__ == '__main__':
104 | #     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
105 | #     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
106 | #     config = Config()
107 | #     model = NeuralModel(config, "model")
108 | 
109 | #     dummy_x = np.random.rand(config.B, config.W, config.H, config.C)
110 | #     dummy_y = np.random.rand(config.B)
111 | 
112 | #     sess.run(tf.initialize_all_variables())
113 | #     for i in range(1000):
114 | #         # model.state = model.cell.zero_state(config.B, tf.float32)
115 | #         if i % 100 == 0:
116 | #             config.lr /= 2
117 | #         _, loss, pred = sess.run([model.train_op, model.loss, model.pred], feed_dict={
118 | #             model.x: dummy_x,
119 | #             model.y: dummy_y,
120 | #             model.lr: config.lr,
121 | #             model.keep_prob: config.drop_out
122 | #         })
123 | 
124 | #         print loss
125 | 
126 | 


--------------------------------------------------------------------------------
/4 model_batch/nnet_lstm.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import threading
  4 | from fetch_data_county import *
  5 | import sys
  6 | import matplotlib.pyplot as plt
  7 | import time
  8 | from datetime import datetime
  9 | # datetime.now().strftime('%Y-%m-%d %H:%M:%S')
 10 | 
 11 | class Config():
 12 |     B, W, H, C = 32, 32,32, 9
 13 | 
 14 |     lstm_layers = 1
 15 |     # 64
 16 |     # hidden 256(default)
 17 |     lstm_H = 128
 18 | 
 19 |     # dense 256(default)
 20 |     dense = 256
 21 | 
 22 |     train_step = 10000
 23 |     lr = 1e-3
 24 |     drop_out = 0.75
 25 |     # weight_decay = 0.005
 26 |     load_path = "/atlas/u/jiaxuan/data/google_drive/img_output/"
 27 |     save_path = '/atlas/u/jiaxuan/data/train_results/final/lstm/'
 28 |     # save_path = '~/Downloads/'
 29 | 
 30 | def conv2d(input_data, out_channels, filter_size, in_channels=None, name="conv2d"):
 31 |     if not in_channels:
 32 |         in_channels = input_data.get_shape()[-1]
 33 |     with tf.variable_scope(name):
 34 |         W = tf.get_variable("W", [filter_size, filter_size, in_channels, out_channels],
 35 |                 initializer=tf.contrib.layers.variance_scaling_initializer())
 36 |         b = tf.get_variable("b", [1, 1, 1, out_channels])
 37 |         return tf.nn.conv2d(input_data, W, [1, 1, 1, 1], "SAME") + b
 38 | 
 39 | def pool2d(input_data, ksize, name="pool2d"):
 40 |     with tf.variable_scope(name):
 41 |         return tf.nn.max_pool(input_data, [1, ksize, ksize, 1], [1, ksize, ksize, 1], "SAME")
 42 | 
 43 | 
 44 | def conv_relu_batch(input_data, out_channels, filter_size, in_channels=None, name="crb"):
 45 |     with tf.variable_scope(name):
 46 |         a = conv2d(input_data, out_channels, filter_size, in_channels)
 47 |         b = batch_normalization(a,axes=[0,1,2])
 48 |         r = tf.nn.relu(b)
 49 |         return r
 50 | 
 51 | def dense(input_data, H, N=None, name="dense"):
 52 |     if not N:
 53 |         N = input_data.get_shape()[-1]
 54 |     with tf.variable_scope(name):
 55 |         W = tf.get_variable("W", [N, H], initializer=tf.contrib.layers.variance_scaling_initializer())
 56 |         b = tf.get_variable("b", [1, H])
 57 |         return tf.matmul(input_data, W, name="matmul") + b
 58 | 
 59 | def batch_normalization(input_data, axes=[0], name="batch"):
 60 |     with tf.variable_scope(name):
 61 |         mean, variance = tf.nn.moments(input_data, axes, keep_dims=True, name="moments")
 62 |         return tf.nn.batch_normalization(input_data, mean, variance, None, None, 1e-6, name="batch")
 63 | 
 64 | 
 65 | def lstm_net(input_data,output_data,config,keep_prob = 1,name='lstm_net'):
 66 |     with tf.variable_scope(name):
 67 |         lstm_cell = tf.nn.rnn_cell.LSTMCell(config.lstm_H,state_is_tuple=True)
 68 |         lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
 69 |         cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.lstm_layers,state_is_tuple=True)
 70 |         state = cell.zero_state(config.B, tf.float32)
 71 |         outputs, final_state = tf.nn.dynamic_rnn(cell, input_data, 
 72 |                        initial_state=state, time_major=True)
 73 |         output_final = tf.squeeze(tf.slice(outputs, [config.H-1,0,0] , [1,-1,-1]))
 74 |         # print outputs.get_shape().as_list()
 75 |         fc1 = dense(output_final, config.dense, name="dense")
 76 | 
 77 |         logit = tf.squeeze(dense(fc1,1,name='logit'))
 78 |         loss = tf.nn.l2_loss(logit - output_data)
 79 | 
 80 |         return logit,loss,fc1
 81 | 
 82 | class NeuralModel():
 83 |     def __init__(self, config, name):
 84 |         self.x = tf.placeholder(tf.float32, [None, config.W, config.H, config.C], name="x")
 85 |         self.y = tf.placeholder(tf.float32, [None])
 86 |         self.lr = tf.placeholder(tf.float32, [])
 87 |         self.keep_prob = tf.placeholder(tf.float32, [])
 88 | 
 89 |         input_data = tf.transpose(self.x, [2,0,1,3])
 90 |         dim = input_data.get_shape().as_list()
 91 |         input_data = tf.reshape(input_data,[dim[0],-1,dim[2]*dim[3]])
 92 |         print 'lstm input shape',input_data.get_shape()
 93 | 
 94 |         with tf.variable_scope('LSTM') as scope:
 95 |             self.pred,self.loss,self.feature = lstm_net(input_data, self.y, config, keep_prob=self.keep_prob)
 96 | 
 97 |         self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss)
 98 |         with tf.variable_scope('LSTM/lstm_net/logit') as scope:
 99 |             scope.reuse_variables()
100 |             self.dense_W = tf.get_variable('W')
101 |             self.dense_B = tf.get_variable('b')
102 | 
103 | # if __name__ == '__main__':
104 | #     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
105 | #     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
106 | #     config = Config()
107 | #     model = NeuralModel(config, "model")
108 | 
109 | #     dummy_x = np.random.rand(config.B, config.W, config.H, config.C)
110 | #     dummy_y = np.random.rand(config.B)
111 | 
112 | #     sess.run(tf.initialize_all_variables())
113 | #     for i in range(1000):
114 | #         # model.state = model.cell.zero_state(config.B, tf.float32)
115 | #         if i % 100 == 0:
116 | #             config.lr /= 2
117 | #         _, loss, pred = sess.run([model.train_op, model.loss, model.pred], feed_dict={
118 | #             model.x: dummy_x,
119 | #             model.y: dummy_y,
120 | #             model.lr: config.lr,
121 | #             model.keep_prob: config.drop_out
122 | #         })
123 | 
124 | #         print loss
125 | 
126 | 


--------------------------------------------------------------------------------
/4 model_batch/nnet_for_hist_dropout_stride.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import threading
  4 | # from fetch_data_histogram import *
  5 | import sys
  6 | import matplotlib.pyplot as plt
  7 | import time
  8 | import scipy.misc
  9 | from datetime import datetime
 10 | # datetime.now().strftime('%Y-%m-%d %H:%M:%S')
 11 | 
 12 | class Config():
 13 |     B, W, H, C = 32, 32,32, 9
 14 |     train_step = 25000
 15 |     lr = 1e-3
 16 |     weight_decay = 0.005
 17 | 
 18 |     keep_prob = 0.25
 19 |     # load_path = '/atlas/u/jiaxuan/data/MODIS_data_county_processed_compressed/'
 20 |     load_path = "/atlas/u/jiaxuan/data/google_drive/img_output/"
 21 |     # load_path = "/atlas/u/jiaxuan/data/google_drive/img_full_output/"
 22 |     # save_path = '/atlas/u/jiaxuan/data/train_results/histogram_new/test21/'
 23 |     # save_path = '/atlas/u/jiaxuan/data/train_results/histogram_new/test22_optimize/'
 24 |     save_path = '/atlas/u/jiaxuan/data/train_results/final/corn_yearly/'
 25 | 
 26 | 
 27 | def conv2d(input_data, out_channels, filter_size,stride, in_channels=None, name="conv2d"):
 28 |     if not in_channels:
 29 |         in_channels = input_data.get_shape()[-1]
 30 |     with tf.variable_scope(name):
 31 |         W = tf.get_variable("W", [filter_size, filter_size, in_channels, out_channels],
 32 |                 initializer=tf.contrib.layers.variance_scaling_initializer())
 33 |         b = tf.get_variable("b", [1, 1, 1, out_channels])
 34 |         return tf.nn.conv2d(input_data, W, [1, stride, stride, 1], "SAME") + b
 35 | 
 36 | 
 37 | def pool2d(input_data, ksize, name="pool2d"):
 38 |     with tf.variable_scope(name):
 39 |         return tf.nn.max_pool(input_data, [1, ksize, ksize, 1], [1, ksize, ksize, 1], "SAME")
 40 | 
 41 | 
 42 | def conv_relu_batch(input_data, out_channels, filter_size,stride, in_channels=None, name="crb"):
 43 |     with tf.variable_scope(name):
 44 |         a = conv2d(input_data, out_channels, filter_size, stride, in_channels)
 45 |         b = batch_normalization(a,axes=[0,1,2])
 46 |         r = tf.nn.relu(b)
 47 |         return r
 48 | 
 49 | def dense(input_data, H, N=None, name="dense"):
 50 |     if not N:
 51 |         N = input_data.get_shape()[-1]
 52 |     with tf.variable_scope(name):
 53 |         W = tf.get_variable("W", [N, H], initializer=tf.contrib.layers.variance_scaling_initializer())
 54 |         b = tf.get_variable("b", [1, H])
 55 |         return tf.matmul(input_data, W, name="matmul") + b
 56 | 
 57 | def batch_normalization(input_data, axes=[0], name="batch"):
 58 |     with tf.variable_scope(name):
 59 |         mean, variance = tf.nn.moments(input_data, axes, keep_dims=True, name="moments")
 60 |         return tf.nn.batch_normalization(input_data, mean, variance, None, None, 1e-6, name="batch")
 61 | 
 62 | class batch_norm(object):
 63 |     """Code modification of http://stackoverflow.com/a/33950177"""
 64 |     def __init__(self, epsilon=1e-5, momentum = 0.9, name="batch_norm"):
 65 |         with tf.variable_scope(name):
 66 |             self.epsilon = epsilon
 67 |             self.momentum = momentum
 68 | 
 69 |             self.ema = tf.train.ExponentialMovingAverage(decay=self.momentum)
 70 |             self.name = name
 71 | 
 72 |     def __call__(self, x, axes=[0,1,2], train=True):
 73 |         shape = x.get_shape().as_list()
 74 | 
 75 |         if train:
 76 |             with tf.variable_scope(self.name) as scope:
 77 |                 self.beta = tf.get_variable("beta", [shape[-1]],
 78 |                                     initializer=tf.constant_initializer(0.))
 79 |                 self.gamma = tf.get_variable("gamma", [shape[-1]],
 80 |                                     initializer=tf.random_normal_initializer(1., 0.02))
 81 | 
 82 |                 batch_mean, batch_var = tf.nn.moments(x, axes, name='moments')
 83 |                 ema_apply_op = self.ema.apply([batch_mean, batch_var])
 84 |                 self.ema_mean, self.ema_var = self.ema.average(batch_mean), self.ema.average(batch_var)
 85 | 
 86 |                 with tf.control_dependencies([ema_apply_op]):
 87 |                     mean, var = tf.identity(batch_mean), tf.identity(batch_var)
 88 |         else:
 89 |             mean, var = self.ema_mean, self.ema_var
 90 | 
 91 |         normed = tf.nn.batch_norm_with_global_normalization(
 92 |                 x, mean, var, self.beta, self.gamma, self.epsilon, scale_after_normalization=True)
 93 | 
 94 |         return normed
 95 | 
 96 | class NeuralModel():
 97 |     def __init__(self, config, name):
 98 | 
 99 |         self.x = tf.placeholder(tf.float32, [None, config.W, config.H, config.C], name="x")
100 |         self.y = tf.placeholder(tf.float32, [None])
101 |         self.lr = tf.placeholder(tf.float32, [])
102 |         self.keep_prob = tf.placeholder(tf.float32, [])
103 |         # self.year = tf.placeholder(tf.float32, [None,1])
104 |         # used for max image
105 |         # self.image = tf.Variable(initial_value=init,name="image")
106 | 
107 |         self.conv1_1 = conv_relu_batch(self.x, 128, 3,1, name="conv1_1")
108 |         conv1_1_d = tf.nn.dropout(self.conv1_1, self.keep_prob)
109 |         conv1_2 = conv_relu_batch(conv1_1_d, 256, 3,2, name="conv1_2")
110 |         conv1_2_d = tf.nn.dropout(conv1_2, self.keep_prob)
111 | 
112 |         conv2_1 = conv_relu_batch(conv1_2_d, 256, 3,1, name="conv2_1")
113 |         conv2_1_d = tf.nn.dropout(conv2_1, self.keep_prob)
114 |         conv2_2 = conv_relu_batch(conv2_1_d, 512, 3,2, name="conv2_2")
115 |         conv2_2_d = tf.nn.dropout(conv2_2, self.keep_prob)
116 | 
117 |         conv3_1 = conv_relu_batch(conv2_2_d, 512, 3,1, name="conv3_1")
118 |         conv3_1_d = tf.nn.dropout(conv3_1, self.keep_prob)
119 |         conv3_2= conv_relu_batch(conv3_1_d, 1024, 3,2, name="conv3_2")
120 |         conv3_2_d = tf.nn.dropout(conv3_2, self.keep_prob)
121 | 
122 | 
123 |         dim = np.prod(conv3_2_d.get_shape().as_list()[1:])
124 |         flattened = tf.reshape(conv3_2_d, [-1, dim])
125 |         # flattened_d = tf.nn.dropout(flattened, 0.25)
126 | 
127 |         self.fc6 = dense(flattened, 1024, name="fc6")
128 |         # self.fc6 = tf.concat(1, [self.fc6_img,self.year])
129 | 
130 | 
131 |         self.logits = tf.squeeze(dense(self.fc6, 1, name="dense"))
132 |         self.loss_err = tf.nn.l2_loss(self.logits - self.y)
133 | 
134 | 
135 |         with tf.variable_scope('dense') as scope:
136 |             scope.reuse_variables()
137 |             self.dense_W = tf.get_variable('W')
138 |             self.dense_B = tf.get_variable('b')
139 |         with tf.variable_scope('conv1_1/conv2d') as scope:
140 |             scope.reuse_variables()
141 |             self.conv_W = tf.get_variable('W')
142 |             self.conv_B = tf.get_variable('b')
143 | 
144 |         # L1 term
145 |         self.loss_reg = tf.abs(tf.reduce_sum(self.logits - self.y))
146 |         # soybean
147 |         # alpha = 1.5
148 |         # corn
149 |         alpha = 5
150 |         self.loss = self.loss_err+self.loss_reg*alpha
151 | 
152 |         self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss)
153 | 
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/4 model_batch/train_for_hist_alldata_loop_permute.py:
--------------------------------------------------------------------------------
  1 | from nnet_for_hist_dropout_stride import *
  2 | import logging
  3 | 
  4 | 
  5 | 
  6 | if __name__ == "__main__":
  7 |     config = Config()
  8 |     summary_train_loss = []
  9 |     summary_eval_loss = []
 10 |     summary_RMSE = []
 11 |     summary_ME = []
 12 | 
 13 | 
 14 |     # load data to memory
 15 |     filename = 'histogram_all' + '.npz'
 16 |     # filename = 'histogram_all_soilweather' + '.npz'
 17 |     content = np.load(config.load_path + filename)
 18 |     image_all = content['output_image']
 19 |     yield_all = content['output_yield']
 20 |     year_all = content['output_year']
 21 |     locations_all = content['output_locations']
 22 |     index_all = content['output_index']
 23 |     
 24 |     # delete broken image
 25 |     list_delete=[]
 26 |     for i in range(image_all.shape[0]):
 27 |         if np.sum(image_all[i,:,:,:])<=287:
 28 |             if year_all[i]<2016:
 29 |                 list_delete.append(i)
 30 |     image_all=np.delete(image_all,list_delete,0)
 31 |     yield_all=np.delete(yield_all,list_delete,0)
 32 |     year_all = np.delete(year_all,list_delete, 0)
 33 |     locations_all = np.delete(locations_all, list_delete, 0)
 34 |     index_all = np.delete(index_all, list_delete, 0)
 35 | 
 36 | 
 37 |     # keep major counties
 38 |     list_keep=[]
 39 |     for i in range(image_all.shape[0]):
 40 |         if (index_all[i,0]==5)or(index_all[i,0]==17)or(index_all[i,0]==18)or(index_all[i,0]==19)or(index_all[i,0]==20)or(index_all[i,0]==27)or(index_all[i,0]==29)or(index_all[i,0]==31)or(index_all[i,0]==38)or(index_all[i,0]==39)or(index_all[i,0]==46):
 41 |             list_keep.append(i)
 42 |     image_all=image_all[list_keep,:,:,:]
 43 |     yield_all=yield_all[list_keep]
 44 |     year_all = year_all[list_keep]
 45 |     locations_all = locations_all[list_keep,:]
 46 |     index_all = index_all[list_keep,:]
 47 | 
 48 |     image_all_save = np.copy(image_all)
 49 | 
 50 | 
 51 |     # result_band = np.zeros([10,2,7,6])
 52 |     # for p in range(10):
 53 |     #     for loop in range(0,2):
 54 |     #         for predict_year in range(2009,2016):
 55 |     #             image_all = np.copy(image_all_save)
 56 |     #             if p!=9:
 57 |     #                 np.take(image_all[:,:,:,p],np.random.permutation(image_all.shape[0]),axis=0,out=image_all[:,:,:,p])
 58 |     #             index_train = np.nonzero(year_all < predict_year)[0]
 59 |     #             index_validate = np.nonzero(year_all == predict_year)[0]
 60 | 
 61 | 
 62 |     #             # calc train image mean (for each band), and then detract (broadcast)
 63 |     #             image_mean=np.mean(image_all[index_train],(0,1,2))
 64 |     #             image_all = image_all - image_mean
 65 | 
 66 |     #             image_train=image_all[index_train]
 67 |     #             yield_train=yield_all[index_train]
 68 | 
 69 |     #             for count,time in enumerate(range(10,31,4)):
 70 |     #                 g = tf.Graph()
 71 |     #                 with g.as_default():
 72 |     #                     # modify config
 73 |     #                     config = Config()
 74 |     #                     config.H=time
 75 | 
 76 |     #                     model= NeuralModel(config,'net')
 77 | 
 78 |     #                     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.22)
 79 |     #                     # Launch the graph.
 80 |     #                     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 81 |     #                     sess.run(tf.initialize_all_variables())
 82 |     #                     saver=tf.train.Saver()
 83 |     #                     if predict_year==2012:
 84 |     #                         saver.restore(sess, config.save_path+str(loop+2)+str(time) + str(predict_year)+'CNN_model.ckpt')
 85 |     #                     else:
 86 |     #                         saver.restore(sess, config.save_path+str(loop)+str(time) + str(predict_year)+'CNN_model.ckpt')
 87 | 
 88 |     #                     # save result
 89 |     #                     pred_out = []
 90 |     #                     real_out = []
 91 |     #                     feature_out = []
 92 |     #                     for i in range(image_train.shape[0] / config.B):
 93 |     #                         feature,pred = sess.run(
 94 |     #                             [model.fc6,model.logits], feed_dict={
 95 |     #                             model.x: image_train[i * config.B:(i + 1) * config.B,:,0:config.H,:],
 96 |     #                             model.y: yield_train[i * config.B:(i + 1) * config.B],
 97 |     #                             model.keep_prob:1
 98 |     #                         })
 99 |     #                         real = yield_train[i * config.B:(i + 1) * config.B]
100 | 
101 |     #                         pred_out.append(pred)
102 |     #                         real_out.append(real)
103 |     #                         feature_out.append(feature)
104 |     #                     pred_out=np.concatenate(pred_out)
105 |     #                     real_out=np.concatenate(real_out)
106 |     #                     feature_out=np.concatenate(feature_out)
107 | 
108 |     #                     rmse = np.sqrt(np.mean((pred_out-real_out)**2))
109 |     #                     print 'p',p
110 |     #                     print rmse
111 |     #                     result_band[p,loop,predict_year-2009,count]=rmse
112 |     # np.save('permute_band.npy', result_band)
113 | 
114 |     result_time = np.zeros([31,2,7])
115 |     for p in range(31):
116 |         for loop in range(0,2):
117 |             for predict_year in range(2009,2016):
118 |                 image_all = np.copy(image_all_save)
119 |                 if p!=30:
120 |                     np.take(image_all[:,:,p,:],np.random.permutation(image_all.shape[0]),axis=0,out=image_all[:,:,p,:])
121 |                 index_train = np.nonzero(year_all < predict_year)[0]
122 |                 index_validate = np.nonzero(year_all == predict_year)[0]
123 | 
124 |                 # calc train image mean (for each band), and then detract (broadcast)
125 |                 image_mean=np.mean(image_all[index_train],(0,1,2))
126 |                 image_all = image_all - image_mean
127 | 
128 |                 image_train=image_all[index_train]
129 |                 yield_train=yield_all[index_train]
130 | 
131 |                 for time in range(30,31):
132 |                     g = tf.Graph()
133 |                     with g.as_default():
134 |                         # modify config
135 |                         config = Config()
136 |                         config.H=time
137 | 
138 |                         model= NeuralModel(config,'net')
139 | 
140 |                         gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.22)
141 |                         # Launch the graph.
142 |                         sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
143 |                         sess.run(tf.initialize_all_variables())
144 |                         saver=tf.train.Saver()
145 |                         if predict_year==2012:
146 |                             saver.restore(sess, config.save_path+str(loop+2)+str(time) + str(predict_year)+'CNN_model.ckpt')
147 |                         else:
148 |                             saver.restore(sess, config.save_path+str(loop)+str(time) + str(predict_year)+'CNN_model.ckpt')
149 | 
150 |                         # save result
151 |                         pred_out = []
152 |                         real_out = []
153 |                         feature_out = []
154 |                         for i in range(image_train.shape[0] / config.B):
155 |                             feature,pred = sess.run(
156 |                                 [model.fc6,model.logits], feed_dict={
157 |                                 model.x: image_train[i * config.B:(i + 1) * config.B,:,0:config.H,:],
158 |                                 model.y: yield_train[i * config.B:(i + 1) * config.B],
159 |                                 model.keep_prob:1
160 |                             })
161 |                             real = yield_train[i * config.B:(i + 1) * config.B]
162 | 
163 |                             pred_out.append(pred)
164 |                             real_out.append(real)
165 |                             feature_out.append(feature)
166 |                         pred_out=np.concatenate(pred_out)
167 |                         real_out=np.concatenate(real_out)
168 |                         feature_out=np.concatenate(feature_out)
169 | 
170 |                         rmse = np.sqrt(np.mean((pred_out-real_out)**2))
171 |                         print 'p',p
172 |                         print rmse
173 |                         result_time[p,loop,predict_year-2009]=rmse
174 |     np.save('permute_time.npy', result_time)
175 | 


--------------------------------------------------------------------------------
/3 model/nnet_for_hist_dropout_stride.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | import threading
  4 | # from fetch_data_histogram import *
  5 | import sys
  6 | import matplotlib.pyplot as plt
  7 | import time
  8 | import scipy.misc
  9 | from datetime import datetime
 10 | # datetime.now().strftime('%Y-%m-%d %H:%M:%S')
 11 | 
 12 | class Config():
 13 |     B, W, H, C = 32, 32,32, 9
 14 |     train_step = 25000
 15 |     lr = 1e-3
 16 |     weight_decay = 0.005
 17 | 
 18 |     drop_out = 0.25
 19 |     # load_path = '/atlas/u/jiaxuan/data/MODIS_data_county_processed_compressed/'
 20 |     load_path = "/atlas/u/jiaxuan/data/google_drive/img_output/"
 21 |     # load_path = "/atlas/u/jiaxuan/data/google_drive/img_full_output/"
 22 |     # save_path = '/atlas/u/jiaxuan/data/train_results/histogram_new/test21/'
 23 |     # save_path = '/atlas/u/jiaxuan/data/train_results/histogram_new/test22_optimize/'
 24 |     save_path = '/atlas/u/jiaxuan/data/train_results/final/monthly/'
 25 | 
 26 | 
 27 | def conv2d(input_data, out_channels, filter_size,stride, in_channels=None, name="conv2d"):
 28 |     if not in_channels:
 29 |         in_channels = input_data.get_shape()[-1]
 30 |     with tf.variable_scope(name):
 31 |         W = tf.get_variable("W", [filter_size, filter_size, in_channels, out_channels],
 32 |                 initializer=tf.contrib.layers.variance_scaling_initializer())
 33 |         b = tf.get_variable("b", [1, 1, 1, out_channels])
 34 |         return tf.nn.conv2d(input_data, W, [1, stride, stride, 1], "SAME") + b
 35 | 
 36 | 
 37 | def pool2d(input_data, ksize, name="pool2d"):
 38 |     with tf.variable_scope(name):
 39 |         return tf.nn.max_pool(input_data, [1, ksize, ksize, 1], [1, ksize, ksize, 1], "SAME")
 40 | 
 41 | 
 42 | def conv_relu_batch(input_data, out_channels, filter_size,stride, in_channels=None, name="crb"):
 43 |     with tf.variable_scope(name):
 44 |         a = conv2d(input_data, out_channels, filter_size, stride, in_channels)
 45 |         b = batch_normalization(a,axes=[0,1,2])
 46 |         r = tf.nn.relu(b)
 47 |         return r
 48 | 
 49 | def dense(input_data, H, N=None, name="dense"):
 50 |     if not N:
 51 |         N = input_data.get_shape()[-1]
 52 |     with tf.variable_scope(name):
 53 |         W = tf.get_variable("W", [N, H], initializer=tf.contrib.layers.variance_scaling_initializer())
 54 |         b = tf.get_variable("b", [1, H])
 55 |         return tf.matmul(input_data, W, name="matmul") + b
 56 | 
 57 | def batch_normalization(input_data, axes=[0], name="batch"):
 58 |     with tf.variable_scope(name):
 59 |         mean, variance = tf.nn.moments(input_data, axes, keep_dims=True, name="moments")
 60 |         return tf.nn.batch_normalization(input_data, mean, variance, None, None, 1e-6, name="batch")
 61 | 
 62 | class batch_norm(object):
 63 |     """Code modification of http://stackoverflow.com/a/33950177"""
 64 |     def __init__(self, epsilon=1e-5, momentum = 0.9, name="batch_norm"):
 65 |         with tf.variable_scope(name):
 66 |             self.epsilon = epsilon
 67 |             self.momentum = momentum
 68 | 
 69 |             self.ema = tf.train.ExponentialMovingAverage(decay=self.momentum)
 70 |             self.name = name
 71 | 
 72 |     def __call__(self, x, axes=[0,1,2], train=True):
 73 |         shape = x.get_shape().as_list()
 74 | 
 75 |         if train:
 76 |             with tf.variable_scope(self.name) as scope:
 77 |                 self.beta = tf.get_variable("beta", [shape[-1]],
 78 |                                     initializer=tf.constant_initializer(0.))
 79 |                 self.gamma = tf.get_variable("gamma", [shape[-1]],
 80 |                                     initializer=tf.random_normal_initializer(1., 0.02))
 81 | 
 82 |                 batch_mean, batch_var = tf.nn.moments(x, axes, name='moments')
 83 |                 ema_apply_op = self.ema.apply([batch_mean, batch_var])
 84 |                 self.ema_mean, self.ema_var = self.ema.average(batch_mean), self.ema.average(batch_var)
 85 | 
 86 |                 with tf.control_dependencies([ema_apply_op]):
 87 |                     mean, var = tf.identity(batch_mean), tf.identity(batch_var)
 88 |         else:
 89 |             mean, var = self.ema_mean, self.ema_var
 90 | 
 91 |         normed = tf.nn.batch_norm_with_global_normalization(
 92 |                 x, mean, var, self.beta, self.gamma, self.epsilon, scale_after_normalization=True)
 93 | 
 94 |         return normed
 95 | 
 96 | class NeuralModel():
 97 |     def __init__(self, config, name):
 98 | 
 99 |         self.x = tf.placeholder(tf.float32, [None, config.W, config.H, config.C], name="x")
100 |         self.y = tf.placeholder(tf.float32, [None])
101 |         self.lr = tf.placeholder(tf.float32, [])
102 |         self.keep_prob = tf.placeholder(tf.float32, [])
103 |         # self.year = tf.placeholder(tf.float32, [None,1])
104 |         # used for max image
105 |         # self.image = tf.Variable(initial_value=init,name="image")
106 | 
107 |         self.conv1_1 = conv_relu_batch(self.x, 128, 3,1, name="conv1_1")
108 |         conv1_1_d = tf.nn.dropout(self.conv1_1, self.keep_prob)
109 |         conv1_2 = conv_relu_batch(conv1_1_d, 128, 3,2, name="conv1_2")
110 |         conv1_2_d = tf.nn.dropout(conv1_2, self.keep_prob)
111 | 
112 |         conv2_1 = conv_relu_batch(conv1_2_d, 256, 3,1, name="conv2_1")
113 |         conv2_1_d = tf.nn.dropout(conv2_1, self.keep_prob)
114 |         conv2_2 = conv_relu_batch(conv2_1_d, 256, 3,2, name="conv2_2")
115 |         conv2_2_d = tf.nn.dropout(conv2_2, self.keep_prob)
116 | 
117 |         conv3_1 = conv_relu_batch(conv2_2_d, 512, 3,1, name="conv3_1")
118 |         conv3_1_d = tf.nn.dropout(conv3_1, self.keep_prob)
119 |         conv3_2= conv_relu_batch(conv3_1_d, 512, 3,1, name="conv3_2")
120 |         conv3_2_d = tf.nn.dropout(conv3_2, self.keep_prob)
121 |         conv3_3 = conv_relu_batch(conv3_2_d, 512, 3,2, name="conv3_3")
122 |         conv3_3_d = tf.nn.dropout(conv3_3, self.keep_prob)
123 | 
124 |         # conv4_1 = conv_relu_batch(pool3, 512, 3, name="conv4_1")
125 |         # conv4_1_d = tf.nn.dropout(conv4_1, self.keep_prob)
126 |         # conv4_2 = conv_relu_batch(conv4_1_d, 512, 3, name="conv4_2")
127 |         # conv4_2_d = tf.nn.dropout(conv4_2, self.keep_prob)
128 |         # conv4_3 = conv_relu_batch(conv4_2_d, 512, 3, name="conv4_3")
129 |         # conv4_3_d = tf.nn.dropout(conv4_3, self.keep_prob)
130 |         # pool4 = pool2d(conv4_3_d, 2, name="pool4")
131 | 
132 |         # input size=48*48, we can only pool 4 times
133 |         # conv5_1 = conv_relu_batch(pool4, 2, 3, name="conv5_1")
134 |         # conv5_2 = conv_relu_batch(conv5_1, 2, 3, name="conv5_2")
135 |         # conv5_3 = conv_relu_batch(conv5_2, 2, 3, name="conv5_3")
136 |         # pool5 = pool2d(conv5_3, 2, name="pool5")
137 | 
138 |         dim = np.prod(conv3_3_d.get_shape().as_list()[1:])
139 |         flattened = tf.reshape(conv3_3_d, [-1, dim])
140 |         # flattened_d = tf.nn.dropout(flattened, 0.25)
141 | 
142 |         print flattened.get_shape()
143 |         self.fc6 = dense(flattened, 2048, name="fc6")
144 |         # self.fc6 = tf.concat(1, [self.fc6_img,self.year])
145 | 
146 | 
147 |         # fc6_b = batch_normalization(fc6)
148 |         # self.fc6_r = tf.nn.relu(fc6_b)
149 |         # self.fc6_d = tf.nn.dropout(fc6_r, self.keep_prob)
150 |         #
151 |         #
152 |         # fc7 = dense(fc6_d, 1024, name="fc7")
153 |         # fc7_r = tf.nn.relu(fc7)
154 |         # fc7_b = batch_normalization(fc7_r)
155 |         # fc7_d = tf.nn.dropout(fc7_b, self.keep_prob)
156 | 
157 |         self.logits = tf.squeeze(dense(self.fc6, 1, name="dense"))
158 |         # l2
159 |         self.loss_err = tf.nn.l2_loss(self.logits - self.y)
160 |         # l1
161 |         # self.loss_err = tf.reduce_sum(tf.abs(self.logits - self.y))
162 |         # average
163 |         # self.loss_err = tf.abs(tf.reduce_sum(self.logits - self.y))
164 | 
165 |         with tf.variable_scope('dense') as scope:
166 |             scope.reuse_variables()
167 |             self.dense_W = tf.get_variable('W')
168 |             self.dense_B = tf.get_variable('b')
169 |         with tf.variable_scope('conv1_1/conv2d') as scope:
170 |             scope.reuse_variables()
171 |             self.conv_W = tf.get_variable('W')
172 |             self.conv_B = tf.get_variable('b')
173 | 
174 |         self.loss_reg = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()])
175 |         self.loss = self.loss_err+self.loss_reg
176 |         # self.loss = self.loss_err
177 | 
178 |         # # learning rate decay
179 |         # global_step = tf.Variable(0, name='global_step', trainable=False)
180 |         # self.lr = tf.train.exponential_decay(config.lr_start, global_step,
181 |         #                                            config.lr_decay_step, config.lr_decay_rate, staircase=False)
182 | 
183 |         self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss)
184 | 
185 | 
186 | 
187 | 


--------------------------------------------------------------------------------
/4 model_batch/train_for_hist_alldata_loop_lstm.py:
--------------------------------------------------------------------------------
  1 | from nnet_lstm import *
  2 | import logging
  3 | 
  4 | 
  5 | 
  6 | if __name__ == "__main__":
  7 |     config = Config()
  8 |     summary_train_loss = []
  9 |     summary_eval_loss = []
 10 |     summary_RMSE = []
 11 |     summary_ME = []
 12 | 
 13 | 
 14 |     # load data to memory
 15 |     filename = 'histogram_all' + '.npz'
 16 |     # filename = 'histogram_all_soilweather' + '.npz'
 17 |     content = np.load(config.load_path + filename)
 18 |     image_all = content['output_image']
 19 |     yield_all = content['output_yield']
 20 |     year_all = content['output_year']
 21 |     locations_all = content['output_locations']
 22 |     index_all = content['output_index']
 23 |     
 24 |     # delete broken image
 25 |     list_delete=[]
 26 |     for i in range(image_all.shape[0]):
 27 |         if np.sum(image_all[i,:,:,:])<=287:
 28 |             if year_all[i]<2016:
 29 |                 list_delete.append(i)
 30 |     image_all=np.delete(image_all,list_delete,0)
 31 |     yield_all=np.delete(yield_all,list_delete,0)
 32 |     year_all = np.delete(year_all,list_delete, 0)
 33 |     locations_all = np.delete(locations_all, list_delete, 0)
 34 |     index_all = np.delete(index_all, list_delete, 0)
 35 | 
 36 | 
 37 |     # keep major counties
 38 |     list_keep=[]
 39 |     for i in range(image_all.shape[0]):
 40 |         if (index_all[i,0]==5)or(index_all[i,0]==17)or(index_all[i,0]==18)or(index_all[i,0]==19)or(index_all[i,0]==20)or(index_all[i,0]==27)or(index_all[i,0]==29)or(index_all[i,0]==31)or(index_all[i,0]==38)or(index_all[i,0]==39)or(index_all[i,0]==46):
 41 |             list_keep.append(i)
 42 |     image_all=image_all[list_keep,:,:,:]
 43 |     yield_all=yield_all[list_keep]
 44 |     year_all = year_all[list_keep]
 45 |     locations_all = locations_all[list_keep,:]
 46 |     index_all = index_all[list_keep,:]
 47 | 
 48 |     for loop in range(2,3):
 49 |         for predict_year in range(2009,2016):
 50 |             logging.basicConfig(filename='train_for_hist_alldata_loop'+str(predict_year)+str(loop)+'.log',level=logging.DEBUG)
 51 |             # # split into train and validate
 52 |             # index_train = np.nonzero(year_all < predict_year)[0]
 53 |             # index_validate = np.nonzero(year_all == predict_year)[0]
 54 |             # index_test = np.nonzero(year_all == predict_year+1)[0]
 55 | 
 56 |             # random choose validation set
 57 |             index_train = np.nonzero(year_all < predict_year)[0]
 58 |             index_validate = np.nonzero(year_all == predict_year)[0]
 59 |             print 'train size',index_train.shape[0]
 60 |             print 'validate size',index_validate.shape[0]
 61 |             logging.info('train size %d',index_train.shape[0])
 62 |             logging.info('validate size',index_validate.shape[0])
 63 | 
 64 |             # calc train image mean (for each band), and then detract (broadcast)
 65 |             image_mean=np.mean(image_all[index_train],(0,1,2))
 66 |             image_all = image_all - image_mean
 67 | 
 68 |             image_validate=image_all[index_validate]
 69 |             yield_validate=yield_all[index_validate]
 70 | 
 71 |             for time in range(10,31,4):
 72 |                 RMSE_min = 100
 73 |                 g = tf.Graph()
 74 |                 with g.as_default():
 75 |                     # modify config
 76 |                     config = Config()
 77 |                     config.H=time
 78 | 
 79 |                     model= NeuralModel(config,'net')
 80 | 
 81 |                     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.22)
 82 |                     # Launch the graph.
 83 |                     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 84 |                     sess.run(tf.initialize_all_variables())
 85 |                     saver=tf.train.Saver()
 86 |                     for i in range(config.train_step):
 87 |                         if i==3000:
 88 |                             config.lr/=10
 89 | 
 90 |                         if i==8000:
 91 |                             config.lr/=10
 92 |                        
 93 |                         # index_train_batch = np.random.choice(index_train,size=config.B)
 94 |                         index_validate_batch = np.random.choice(index_validate, size=config.B)
 95 | 
 96 |                         # try data augmentation while training
 97 |                         index_train_batch_1 = np.random.choice(index_train,size=config.B)
 98 |                         index_train_batch_2 = np.random.choice(index_train,size=config.B)
 99 |                         image_train_batch = (image_all[index_train_batch_1,:,0:config.H,:]+image_all[index_train_batch_1,:,0:config.H,:])/2
100 |                         yield_train_batch = (yield_all[index_train_batch_1]+yield_all[index_train_batch_1])/2
101 | 
102 |                         _, train_loss = sess.run([model.train_op, model.loss], feed_dict={
103 |                             model.x:image_train_batch,
104 |                             model.y:yield_train_batch,
105 |                             model.lr:config.lr,
106 |                             model.keep_prob: config.drop_out
107 |                             })
108 | 
109 |                         if i%500 == 0:
110 |                             val_loss = sess.run(model.loss, feed_dict={
111 |                                 model.x: image_all[index_validate_batch, :, 0:config.H, :],
112 |                                 model.y: yield_all[index_validate_batch],
113 |                                 model.keep_prob: 1
114 |                             })
115 | 
116 |                             print str(loop)+str(time)+'predict year'+str(predict_year)+'step'+str(i),train_loss,val_loss,config.lr
117 |                             logging.info('%d %d %d step %d %f %f %f',loop,time,predict_year,i,train_loss,val_loss,config.lr)
118 |                         if i%500 == 0:
119 |                             # do validation
120 |                             pred = []
121 |                             real = []
122 |                             for j in range(image_validate.shape[0] / config.B):
123 |                                 real_temp = yield_validate[j * config.B:(j + 1) * config.B]
124 |                                 pred_temp= sess.run(model.pred, feed_dict={
125 |                                     model.x: image_validate[j * config.B:(j + 1) * config.B,:,0:config.H,:],
126 |                                     model.y: yield_validate[j * config.B:(j + 1) * config.B],
127 |                                     model.keep_prob: 1
128 |                                     })
129 |                                 pred.append(pred_temp)
130 |                                 real.append(real_temp)
131 |                             pred=np.concatenate(pred)
132 |                             real=np.concatenate(real)
133 |                             RMSE=np.sqrt(np.mean((pred-real)**2))
134 |                             ME=np.mean(pred-real)
135 | 
136 |                             if RMSE<RMSE_min:
137 |                                 RMSE_min=RMSE
138 |                                
139 | 
140 |                             print 'Validation set','RMSE',RMSE,'ME',ME,'RMSE_min',RMSE_min
141 |                             logging.info('Validation set RMSE %f ME %f RMSE_min %f',RMSE,ME,RMSE_min)
142 |                         
143 |                             summary_train_loss.append(train_loss)
144 |                             summary_eval_loss.append(val_loss)
145 |                             summary_RMSE.append(RMSE)
146 |                             summary_ME.append(ME)
147 |                     # save
148 |                     save_path = saver.save(sess, config.save_path+str(loop)+str(time) + str(predict_year)+'CNN_model.ckpt')
149 |                     print('save in file: %s' % save_path)
150 |                     logging.info('save in file: %s' % save_path)
151 | 
152 |                     # save result
153 |                     pred_out = []
154 |                     real_out = []
155 |                     feature_out = []
156 |                     year_out = []
157 |                     locations_out =[]
158 |                     index_out = []
159 |                     for i in range(image_all.shape[0] / config.B):
160 |                         feature,pred = sess.run(
161 |                             [model.feature,model.pred], feed_dict={
162 |                             model.x: image_all[i * config.B:(i + 1) * config.B,:,0:config.H,:],
163 |                             model.y: yield_all[i * config.B:(i + 1) * config.B],
164 |                             model.keep_prob:1
165 |                         })
166 |                         real = yield_all[i * config.B:(i + 1) * config.B]
167 | 
168 |                         pred_out.append(pred)
169 |                         real_out.append(real)
170 |                         feature_out.append(feature)
171 |                         year_out.append(year_all[i * config.B:(i + 1) * config.B])
172 |                         locations_out.append(locations_all[i * config.B:(i + 1) * config.B])
173 |                         index_out.append(index_all[i * config.B:(i + 1) * config.B])
174 |                         # print i
175 |                     weight_out, b_out = sess.run(
176 |                         [model.dense_W, model.dense_B], feed_dict={
177 |                             model.x: image_all[0 * config.B:(0 + 1) * config.B, :, 0:config.H, :],
178 |                             model.y: yield_all[0 * config.B:(0 + 1) * config.B],
179 |                             model.keep_prob: 1
180 |                         })
181 |                     pred_out=np.concatenate(pred_out)
182 |                     real_out=np.concatenate(real_out)
183 |                     feature_out=np.concatenate(feature_out)
184 |                     year_out=np.concatenate(year_out)
185 |                     locations_out=np.concatenate(locations_out)
186 |                     index_out=np.concatenate(index_out)
187 |                     
188 |                     np.savez(config.save_path+str(loop)+str(time)+str(predict_year)+'result_prediction.npz',
189 |                         pred_out=pred_out,real_out=real_out,feature_out=feature_out,
190 |                         year_out=year_out,locations_out=locations_out,weight_out=weight_out,b_out=b_out,index_out=index_out)
191 |                     np.savez(config.save_path+str(loop)+str(time)+str(predict_year)+'result.npz',
192 |                         summary_train_loss=summary_train_loss,summary_eval_loss=summary_eval_loss,
193 |                         summary_RMSE=summary_RMSE,summary_ME=summary_RMSE)


--------------------------------------------------------------------------------
/4 model_batch/train_for_hist_alldata_loop_result.py:
--------------------------------------------------------------------------------
  1 | from nnet_for_hist_dropout_stride import *
  2 | from GP_crop_v3 import *
  3 | 
  4 | 
  5 | if __name__ == "__main__":
  6 |     config = Config()
  7 |     summary_train_loss = []
  8 |     summary_eval_loss = []
  9 |     summary_RMSE = []
 10 |     summary_ME = []
 11 | 
 12 | 
 13 |     # load data to memory
 14 |     filename = 'histogram_all' + '.npz'
 15 |     # filename = 'histogram_all_soilweather' + '.npz'
 16 |     content = np.load(config.load_path + filename)
 17 |     image_all = content['output_image']
 18 |     yield_all = content['output_yield']
 19 |     year_all = content['output_year']
 20 |     locations_all = content['output_locations']
 21 |     index_all = content['output_index']
 22 |     
 23 |     # delete broken image
 24 |     list_delete=[]
 25 |     for i in range(image_all.shape[0]):
 26 |         if np.sum(image_all[i,:,:,:])<=287:
 27 |             if year_all[i]<2016:
 28 |                 list_delete.append(i)
 29 |     image_all=np.delete(image_all,list_delete,0)
 30 |     yield_all=np.delete(yield_all,list_delete,0)
 31 |     year_all = np.delete(year_all,list_delete, 0)
 32 |     locations_all = np.delete(locations_all, list_delete, 0)
 33 |     index_all = np.delete(index_all, list_delete, 0)
 34 | 
 35 | 
 36 |     # keep major counties
 37 |     list_keep=[]
 38 |     for i in range(image_all.shape[0]):
 39 |         if (index_all[i,0]==5)or(index_all[i,0]==17)or(index_all[i,0]==18)or(index_all[i,0]==19)or(index_all[i,0]==20)or(index_all[i,0]==27)or(index_all[i,0]==29)or(index_all[i,0]==31)or(index_all[i,0]==38)or(index_all[i,0]==39)or(index_all[i,0]==46):
 40 |             list_keep.append(i)
 41 |     image_all=image_all[list_keep,:,:,:]
 42 |     yield_all=yield_all[list_keep]
 43 |     year_all = year_all[list_keep]
 44 |     locations_all = locations_all[list_keep,:]
 45 |     index_all = index_all[list_keep,:]
 46 | 
 47 |     
 48 |     ME_test_mean=np.zeros([6])
 49 |     ME_val_mean=np.zeros([6])
 50 | 
 51 |     count = 0
 52 |     for predict_year in range(2014,2011,-1):
 53 |         # split into train and validate
 54 |         index_train = np.nonzero(year_all < predict_year)[0]
 55 |         index_validate = np.nonzero(year_all == predict_year)[0]
 56 |         index_test = np.nonzero(year_all == predict_year)[0]
 57 |         print 'train size',index_train.shape[0]
 58 |         print 'validate size',index_validate.shape[0]
 59 |         print 'test size',index_test.shape[0]
 60 | 
 61 |         # calc train image mean (for each band), and then detract (broadcast)
 62 |         image_mean=np.mean(image_all[index_train],(0,1,2))
 63 |         image_all = image_all - image_mean
 64 |         year_mean = np.mean(year_all)
 65 |         print 'year_mean',year_mean
 66 | 
 67 |         image_validate=image_all[index_validate]
 68 |         yield_validate=yield_all[index_validate]
 69 |         image_test=image_all[index_test]
 70 |         yield_test=yield_all[index_test]
 71 | 
 72 |         for loop in range(0,1):
 73 |             RMSE_test_all=[]
 74 |             ME_test_all=[]
 75 |             RMSE_val_all=[]
 76 |             ME_val_all=[]
 77 |             for time in  range(10,31,4):
 78 |                 g = tf.Graph()
 79 |                 with g.as_default():
 80 |                     print 'year',predict_year,'loop',loop,'time',time
 81 |                     # modify config
 82 |                     config = Config()
 83 |                     config.H=time
 84 | 
 85 |                     model= NeuralModel(config,'net')
 86 | 
 87 |                     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.22)
 88 |                     # Launch the graph.
 89 |                     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 90 |                     sess.run(tf.initialize_all_variables())
 91 |                     saver=tf.train.Saver()
 92 |      
 93 |                     saver.restore(sess, config.save_path+str(loop)+str(time)+str(predict_year)+"CNN_model.ckpt")
 94 |                     # Restore log results
 95 |                     # npzfile = np.load(config.save_path+str(loop)+str(time) + str(predict_year)+'result.npz')
 96 |                     # summary_train_loss = npzfile['summary_train_loss'].tolist()
 97 |                     # summary_eval_loss = npzfile['summary_eval_loss'].tolist()
 98 |                     # summary_RMSE = npzfile['summary_RMSE'].tolist()
 99 |                     # summary_ME = npzfile['summary_ME'].tolist()
100 |                     # print("Model restored.")
101 | 
102 |                     # do test
103 |                     pred = []
104 |                     real = []
105 |                     for j in range(image_test.shape[0] / config.B):
106 |                         real_temp = yield_test[j * config.B:(j + 1) * config.B]
107 |                         pred_temp= sess.run(model.logits, feed_dict={
108 |                             model.x: image_test[j * config.B:(j + 1) * config.B,:,0:config.H,:],
109 |                             model.y: yield_test[j * config.B:(j + 1) * config.B],
110 |                             model.keep_prob: 1,
111 |                             model.year: year_all[j * config.B:(j + 1) * config.B,np.newaxis]-year_mean
112 |                             })
113 |                         pred.append(pred_temp)
114 |                         real.append(real_temp)
115 |                     pred=np.concatenate(pred)
116 |                     real=np.concatenate(real)
117 |                     RMSE_test=np.sqrt(np.mean((pred-real)**2))
118 |                     ME_test=np.mean(pred-real)/np.mean(real)*100
119 |                     RMSE_test_all.append(RMSE_test)
120 |                     ME_test_all.append(ME_test)
121 | 
122 |                     print 'Test set','RMSE',RMSE_test,'ME',ME_test
123 | 
124 |                     # do validation
125 |                     pred = []
126 |                     real = []
127 |                     for j in range(image_validate.shape[0] / config.B):
128 |                         real_temp = yield_validate[j * config.B:(j + 1) * config.B]
129 |                         pred_temp= sess.run(model.logits, feed_dict={
130 |                             model.x: image_validate[j * config.B:(j + 1) * config.B,:,0:config.H,:],
131 |                             model.y: yield_validate[j * config.B:(j + 1) * config.B],
132 |                             model.keep_prob: 1,
133 |                             model.year: year_all[j * config.B:(j + 1) * config.B,np.newaxis]-year_mean
134 |                             })
135 |                         pred.append(pred_temp)
136 |                         real.append(real_temp)
137 |                     pred=np.concatenate(pred)
138 |                     real=np.concatenate(real)
139 |                     RMSE_val=np.sqrt(np.mean((pred-real)**2))
140 |                     ME_val=np.mean(pred-real)/np.mean(real)*100
141 |                     RMSE_val_all.append(RMSE_val)
142 |                     ME_val_all.append(ME_val)
143 |                     print 'Validation set','RMSE',RMSE_val,'ME',ME_val
144 | 
145 |                     # save result
146 |                     pred_out = []
147 |                     real_out = []
148 |                     feature_out = []
149 |                     year_out = []
150 |                     locations_out =[]
151 |                     index_out = []
152 |                     for i in range(image_all.shape[0] / config.B):
153 |                         feature,pred = sess.run(
154 |                             [model.fc6,model.logits], feed_dict={
155 |                             model.x: image_all[i * config.B:(i + 1) * config.B,:,0:config.H,:],
156 |                             model.y: yield_all[i * config.B:(i + 1) * config.B],
157 |                             model.keep_prob: config.drop_out,
158 |                             model.year: year_all[i * config.B:(i + 1) * config.B,np.newaxis]-year_mean
159 |                         })
160 |                         real = yield_all[i * config.B:(i + 1) * config.B]
161 | 
162 |                         pred_out.append(pred)
163 |                         real_out.append(real)
164 |                         feature_out.append(feature)
165 |                         year_out.append(year_all[i * config.B:(i + 1) * config.B])
166 |                         locations_out.append(locations_all[i * config.B:(i + 1) * config.B])
167 |                         index_out.append(index_all[i * config.B:(i + 1) * config.B])
168 |                         # print i
169 |                     weight_out, b_out = sess.run(
170 |                         [model.dense_W, model.dense_B], feed_dict={
171 |                             model.x: image_all[0 * config.B:(0 + 1) * config.B, :, 0:config.H, :],
172 |                             model.y: yield_all[0 * config.B:(0 + 1) * config.B],
173 |                             model.keep_prob: config.drop_out,
174 |                             model.year: year_all[i * config.B:(i + 1) * config.B,np.newaxis]-year_mean
175 |                         })
176 |                     pred_out=np.concatenate(pred_out)
177 |                     real_out=np.concatenate(real_out)
178 |                     feature_out=np.concatenate(feature_out)
179 |                     year_out=np.concatenate(year_out)
180 |                     locations_out=np.concatenate(locations_out)
181 |                     index_out=np.concatenate(index_out)
182 |                     
183 |                     path = config.save_path+str(loop)+str(time)+str(predict_year)+'result_prediction.npz'
184 |                     np.savez(path,
185 |                         pred_out=pred_out,real_out=real_out,feature_out=feature_out,
186 |                         year_out=year_out,locations_out=locations_out,weight_out=weight_out,b_out=b_out,index_out=index_out)
187 |                     RMSE_GP,ME_GP,Average_GP=GaussianProcess(predict_year,path)
188 |                     print 'RMSE_GP',RMSE_GP
189 |                     print 'ME_GP',ME_GP
190 |                     print 'Average_GP',Average_GP
191 | 
192 |             ME_test_mean+=np.absolute(np.array(ME_test_all))
193 |             ME_val_mean+=np.absolute(np.array(ME_val_all))
194 |             count += 1
195 |             print count
196 |     print 'theoretical count', 32
197 |     ME_test_mean/=count
198 |     ME_val_mean/=count
199 | 
200 |     plt.plot(range(len(ME_val_mean)),ME_val_mean)
201 |     plt.plot(range(len(ME_test_mean)),ME_test_mean)
202 |     plt.legend(['val','test'])
203 |     plt.show()
204 | 
205 |     # plt.bar(range(len(ME_val_mean)),ME_val_mean)
206 |     plt.bar(range(len(ME_test_mean)),ME_test_mean)
207 |     # plt.legend(['val','test'])
208 |     plt.show()
209 | 
210 | 


--------------------------------------------------------------------------------
/4 model_batch/train_for_hist_alldata_loop.py:
--------------------------------------------------------------------------------
  1 | from nnet_for_hist_dropout_stride import *
  2 | import logging
  3 | 
  4 | 
  5 | 
  6 | if __name__ == "__main__":
  7 |     config = Config()
  8 |     summary_train_loss = []
  9 |     summary_eval_loss = []
 10 |     summary_RMSE = []
 11 |     summary_ME = []
 12 | 
 13 | 
 14 |     # load data to memory
 15 |     filename = 'histogram_all' + '.npz'
 16 |     # filename = 'histogram_all_soilweather' + '.npz'
 17 |     content = np.load(config.load_path + filename)
 18 |     image_all = content['output_image']
 19 |     yield_all = content['output_yield']
 20 |     year_all = content['output_year']
 21 |     locations_all = content['output_locations']
 22 |     index_all = content['output_index']
 23 |     
 24 |     # delete broken image
 25 |     list_delete=[]
 26 |     for i in range(image_all.shape[0]):
 27 |         if np.sum(image_all[i,:,:,:])<=287:
 28 |             if year_all[i]<2016:
 29 |                 list_delete.append(i)
 30 |     image_all=np.delete(image_all,list_delete,0)
 31 |     yield_all=np.delete(yield_all,list_delete,0)
 32 |     year_all = np.delete(year_all,list_delete, 0)
 33 |     locations_all = np.delete(locations_all, list_delete, 0)
 34 |     index_all = np.delete(index_all, list_delete, 0)
 35 | 
 36 | 
 37 |     # keep major counties
 38 |     list_keep=[]
 39 |     for i in range(image_all.shape[0]):
 40 |         if (index_all[i,0]==5)or(index_all[i,0]==17)or(index_all[i,0]==18)or(index_all[i,0]==19)or(index_all[i,0]==20)or(index_all[i,0]==27)or(index_all[i,0]==29)or(index_all[i,0]==31)or(index_all[i,0]==38)or(index_all[i,0]==39)or(index_all[i,0]==46):
 41 |             list_keep.append(i)
 42 |     image_all=image_all[list_keep,:,:,:]
 43 |     yield_all=yield_all[list_keep]
 44 |     year_all = year_all[list_keep]
 45 |     locations_all = locations_all[list_keep,:]
 46 |     index_all = index_all[list_keep,:]
 47 | 
 48 | 
 49 |     for loop in range(2,3):
 50 |         for predict_year in range(2009,2016):
 51 |             logging.basicConfig(filename=config.save_path+'log/train_for_hist_alldata_loop'+str(predict_year)+str(loop)+'.log',level=logging.DEBUG)
 52 |             # # split into train and validate
 53 |             # index_train = np.nonzero(year_all < predict_year)[0]
 54 |             # index_validate = np.nonzero(year_all == predict_year)[0]
 55 |             # index_test = np.nonzero(year_all == predict_year+1)[0]
 56 | 
 57 |             # random choose validation set
 58 |             index_train = np.nonzero(year_all < predict_year)[0]
 59 |             index_validate = np.nonzero(year_all == predict_year)[0]
 60 |             print 'train size',index_train.shape[0]
 61 |             print 'validate size',index_validate.shape[0]
 62 |             logging.info('train size %d',index_train.shape[0])
 63 |             logging.info('validate size %d',index_validate.shape[0])
 64 | 
 65 | 
 66 |             # # calc train image mean (for each band), and then detract (broadcast)
 67 |             # image_mean=np.mean(image_all[index_train],(0,1,2))
 68 |             # image_all = image_all - image_mean
 69 | 
 70 |             image_validate=image_all[index_validate]
 71 |             yield_validate=yield_all[index_validate]
 72 | 
 73 |             for time in range(10,31,4):
 74 |                 RMSE_min = 100
 75 |                 g = tf.Graph()
 76 |                 with g.as_default():
 77 |                     # modify config
 78 |                     config = Config()
 79 |                     config.H=time
 80 | 
 81 |                     model= NeuralModel(config,'net')
 82 | 
 83 |                     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.22)
 84 |                     # Launch the graph.
 85 |                     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 86 |                     sess.run(tf.initialize_all_variables())
 87 |                     saver=tf.train.Saver()
 88 |                     for i in range(config.train_step):
 89 |                         if i==4000:
 90 |                             config.lr/=10
 91 | 
 92 |                         if i==20000:
 93 |                             config.lr/=10
 94 |                        
 95 |                         # index_train_batch = np.random.choice(index_train,size=config.B)
 96 |                         index_validate_batch = np.random.choice(index_validate, size=config.B)
 97 | 
 98 |                         # try data augmentation while training
 99 |                         shift = 1
100 |                         index_train_batch_1 = np.random.choice(index_train,size=config.B+shift*2)
101 |                         index_train_batch_2 = np.random.choice(index_train,size=config.B+shift*2)
102 |                         image_train_batch = (image_all[index_train_batch_1,:,0:config.H,:]+image_all[index_train_batch_1,:,0:config.H,:])/2
103 |                         yield_train_batch = (yield_all[index_train_batch_1]+yield_all[index_train_batch_1])/2
104 | 
105 |                         arg_index = np.argsort(yield_train_batch)
106 |                         yield_train_batch = yield_train_batch[arg_index][shift:-shift]
107 |                         image_train_batch = image_train_batch[arg_index][shift:-shift]
108 | 
109 |                         _, train_loss, train_loss_reg = sess.run([model.train_op, model.loss_err, model.loss_reg], feed_dict={
110 |                             model.x:image_train_batch,
111 |                             model.y:yield_train_batch,
112 |                             model.lr:config.lr,
113 |                             model.keep_prob: config.keep_prob
114 |                             })
115 | 
116 |                         if i%500 == 0:
117 |                             val_loss,val_loss_reg = sess.run([model.loss_err,model.loss_reg], feed_dict={
118 |                                 model.x: image_all[index_validate_batch, :, 0:config.H, :],
119 |                                 model.y: yield_all[index_validate_batch],
120 |                                 model.keep_prob: 1
121 |                             })
122 | 
123 |                             print str(loop)+str(time)+'predict year'+str(predict_year)+'step'+str(i),train_loss,train_loss_reg,val_loss,val_loss_reg,config.lr
124 |                             logging.info('%d %d %d step %d %f %f %f %f %f',loop,time,predict_year,i,train_loss,train_loss_reg,val_loss,val_loss_reg,config.lr)
125 |                         if i%500 == 0:
126 |                             # do validation
127 |                             pred = []
128 |                             real = []
129 |                             for j in range(image_validate.shape[0] / config.B):
130 |                                 real_temp = yield_validate[j * config.B:(j + 1) * config.B]
131 |                                 pred_temp= sess.run(model.logits, feed_dict={
132 |                                     model.x: image_validate[j * config.B:(j + 1) * config.B,:,0:config.H,:],
133 |                                     model.y: yield_validate[j * config.B:(j + 1) * config.B],
134 |                                     model.keep_prob: 1
135 |                                     })
136 |                                 pred.append(pred_temp)
137 |                                 real.append(real_temp)
138 |                             pred=np.concatenate(pred)
139 |                             real=np.concatenate(real)
140 |                             RMSE=np.sqrt(np.mean((pred-real)**2))
141 |                             ME=np.mean(pred-real)
142 |                             RMSE_ideal = np.sqrt(np.mean((pred-ME-real)**2))
143 |                             arg_index = np.argsort(pred)
144 |                             pred = pred[arg_index][50:-50]
145 |                             real = real[arg_index][50:-50]
146 |                             ME_part = np.mean(pred-real)
147 | 
148 |                             if RMSE<RMSE_min:
149 |                                 RMSE_min=RMSE
150 |                                
151 | 
152 |                             # print 'Validation set','RMSE',RMSE,'ME',ME,'RMSE_min',RMSE_min
153 |                             # logging.info('Validation set RMSE %f ME %f RMSE_min %f',RMSE,ME,RMSE_min)
154 |                             print 'Validation set','RMSE',RMSE,'RMSE_ideal',RMSE_ideal,'ME',ME,'ME_part',ME_part,'RMSE_min',RMSE_min
155 |                             logging.info('Validation set RMSE %f RMSE_ideal %f ME %f ME_part %f RMSE_min %f',RMSE,RMSE_ideal,ME,ME_part,RMSE_min)
156 |             
157 |                             summary_train_loss.append(train_loss)
158 |                             summary_eval_loss.append(val_loss)
159 |                             summary_RMSE.append(RMSE)
160 |                             summary_ME.append(ME)
161 |                     # save
162 |                     save_path = saver.save(sess, config.save_path+str(loop)+str(time) + str(predict_year)+'CNN_model.ckpt')
163 |                     print('save in file: %s' % save_path)
164 |                     logging.info('save in file: %s' % save_path)
165 | 
166 |                     # save result
167 |                     pred_out = []
168 |                     real_out = []
169 |                     feature_out = []
170 |                     year_out = []
171 |                     locations_out =[]
172 |                     index_out = []
173 |                     for i in range(image_all.shape[0] / config.B):
174 |                         feature,pred = sess.run(
175 |                             [model.fc6,model.logits], feed_dict={
176 |                             model.x: image_all[i * config.B:(i + 1) * config.B,:,0:config.H,:],
177 |                             model.y: yield_all[i * config.B:(i + 1) * config.B],
178 |                             model.keep_prob:1
179 |                         })
180 |                         real = yield_all[i * config.B:(i + 1) * config.B]
181 | 
182 |                         pred_out.append(pred)
183 |                         real_out.append(real)
184 |                         feature_out.append(feature)
185 |                         year_out.append(year_all[i * config.B:(i + 1) * config.B])
186 |                         locations_out.append(locations_all[i * config.B:(i + 1) * config.B])
187 |                         index_out.append(index_all[i * config.B:(i + 1) * config.B])
188 |                         # print i
189 |                     weight_out, b_out = sess.run(
190 |                         [model.dense_W, model.dense_B], feed_dict={
191 |                             model.x: image_all[0 * config.B:(0 + 1) * config.B, :, 0:config.H, :],
192 |                             model.y: yield_all[0 * config.B:(0 + 1) * config.B],
193 |                             model.keep_prob: 1
194 |                         })
195 |                     pred_out=np.concatenate(pred_out)
196 |                     real_out=np.concatenate(real_out)
197 |                     feature_out=np.concatenate(feature_out)
198 |                     year_out=np.concatenate(year_out)
199 |                     locations_out=np.concatenate(locations_out)
200 |                     index_out=np.concatenate(index_out)
201 |                     
202 |                     np.savez(config.save_path+str(loop)+str(time)+str(predict_year)+'result_prediction.npz',
203 |                         pred_out=pred_out,real_out=real_out,feature_out=feature_out,
204 |                         year_out=year_out,locations_out=locations_out,weight_out=weight_out,b_out=b_out,index_out=index_out)
205 |                     np.savez(config.save_path+str(loop)+str(time)+str(predict_year)+'result.npz',
206 |                         summary_train_loss=summary_train_loss,summary_eval_loss=summary_eval_loss,
207 |                         summary_RMSE=summary_RMSE,summary_ME=summary_RMSE)


--------------------------------------------------------------------------------
/6 result_analysis/yield_map_function.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import csv
  3 | from bs4 import BeautifulSoup
  4 | # from GP_crop_v3 import *
  5 | from sklearn import linear_model
  6 | from sklearn import ensemble
  7 |  
  8 | def yield_map(path_load,path_save,predict_year,flag):
  9 |     # Read CNN_err prediction
 10 |     CNN = {}
 11 |     # save_path = '/atlas/u/jiaxuan/data/train_results/final/monthly/'
 12 |     # path_current = save_path+str(0)+str(30)+str(2014)+'result_prediction.npz'
 13 |     data = np.load(path_load)
 14 | 
 15 |     year = data['year_out']
 16 |     real = data['real_out']
 17 |     pred = data['pred_out']
 18 |     index=data['index_out']
 19 | 
 20 |     validate = np.nonzero(year == predict_year)[0]
 21 |     year = year[validate]
 22 |     real = real[validate]
 23 |     pred = pred[validate]
 24 |     index = index[validate]
 25 |     # err_CNN = pred-real
 26 |     if flag=='real':
 27 |         err_CNN = real
 28 |     elif flag=='pred':
 29 |         err_CNN = pred
 30 | 
 31 |     print 'CNN',err_CNN.min(),err_CNN.max()
 32 |     print 'RMSE',np.sqrt(np.mean((pred-real)**2))
 33 | 
 34 |     for i in range(year.shape[0]):
 35 |         loc1 = str(int(index[i,0]))
 36 |         loc2 = str(int(index[i,1]))
 37 |         if len(loc1)==1:
 38 |             loc1='0'+loc1
 39 |         if len(loc2)==1:
 40 |             loc2='00'+loc2
 41 |         if len(loc2)==2:
 42 |             loc2='0'+loc2
 43 |         fips = loc1+loc2
 44 |         CNN[fips] = err_CNN[i]
 45 | 
 46 |     '''CNN'''
 47 |     # Load the SVG map
 48 |     svg = open('counties.svg', 'r').read()
 49 |     # Load into Beautiful Soup
 50 |     soup = BeautifulSoup(svg, selfClosingTags=['defs','sodipodi:namedview'])
 51 |     # Find counties
 52 |     paths = soup.findAll('path')
 53 |     # Map colors
 54 |     # # plot error: 8 classes
 55 |     # colors = ["#b2182b", "#d6604d", "#f4a582", "#fddbc7", "#d1e5f0", "#92c5de", "#4393c3", "#2166ac"]
 56 |     # plot yield: 11 classes
 57 |     colors = ['#a50026','#d73027','#f46d43','#fdae61','#fee090','#ffffbf','#e0f3f8','#abd9e9','#74add1','#4575b4','#313695']
 58 | 
 59 |     # County style
 60 |     path_style = 'font-size:12px;fill-rule:nonzero;stroke:#FFFFFF;stroke-opacity:1;stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none;stroke-linecap:butt;marker-start:none;stroke-linejoin:bevel;fill:'
 61 |     # Color the counties based on unemployment rate
 62 |     for p in paths:    
 63 |         if p['id'] not in ["State_Lines", "separator"]:
 64 |             try:
 65 |                 rate = CNN[p['id']]
 66 |             except:
 67 |                 continue
 68 | 
 69 |             # # plot error
 70 |             # if rate > 15:
 71 |             #     color_class = 7
 72 |             # elif rate > 10:
 73 |             #     color_class = 6
 74 |             # elif rate > 5:
 75 |             #     color_class = 5
 76 |             # elif rate > 0:
 77 |             #     color_class = 4
 78 |             # elif rate > -5:
 79 |             #     color_class = 3
 80 |             # elif rate > -10:
 81 |             #     color_class = 2            
 82 |             # elif rate > -15:
 83 |             #     color_class = 1
 84 |             # else:
 85 |             #     color_class = 0
 86 | 
 87 |             # # plot soybean yield
 88 |             # if rate > 60:
 89 |             #     color_class = 0
 90 |             # elif rate > 55:
 91 |             #     color_class = 1
 92 |             # elif rate > 50:
 93 |             #     color_class = 2
 94 |             # elif rate > 45:
 95 |             #     color_class = 3
 96 |             # elif rate > 40:
 97 |             #     color_class = 4
 98 |             # elif rate > 35:
 99 |             #     color_class = 5            
100 |             # elif rate > 30:
101 |             #     color_class = 6
102 |             # elif rate > 25:
103 |             #     color_class = 7
104 |             # elif rate > 20:
105 |             #     color_class = 8
106 |             # elif rate > 15:
107 |             #     color_class = 9
108 |             # else:
109 |             #     color_class = 10
110 | 
111 |             # plot corn yield
112 |             if rate > 200:
113 |                 color_class = 0
114 |             elif rate > 180:
115 |                 color_class = 1
116 |             elif rate > 160:
117 |                 color_class = 2
118 |             elif rate > 140:
119 |                 color_class = 3
120 |             elif rate > 120:
121 |                 color_class = 4
122 |             elif rate > 100:
123 |                 color_class = 5            
124 |             elif rate > 80:
125 |                 color_class = 6
126 |             elif rate > 60:
127 |                 color_class = 7
128 |             elif rate > 40:
129 |                 color_class = 8
130 |             elif rate > 20:
131 |                 color_class = 9
132 |             else:
133 |                 color_class = 10
134 | 
135 |             color = colors[color_class]
136 |             p['style'] = path_style + color
137 |      
138 |     soup=soup.prettify()
139 |     with open(path_save, 'wb') as f:
140 |         f.write(soup)
141 | 
142 | 
143 | def yield_map_raw(real,index,path_save,predict_year):
144 |     # Read CNN_err prediction
145 |     CNN = {}
146 |     err_CNN = real
147 | 
148 |     print 'CNN',err_CNN.min(),err_CNN.max()
149 | 
150 |     
151 | 
152 | 
153 |     for i in range(real.shape[0]):
154 |         loc1 = str(int(index[i,0]))
155 |         loc2 = str(int(index[i,1]))
156 |         if len(loc1)==1:
157 |             loc1='0'+loc1
158 |         if len(loc2)==1:
159 |             loc2='00'+loc2
160 |         if len(loc2)==2:
161 |             loc2='0'+loc2
162 |         fips = loc1+loc2
163 |         CNN[fips] = err_CNN[i]
164 | 
165 |     '''CNN'''
166 |     # Load the SVG map
167 |     svg = open('counties.svg', 'r').read()
168 |     # Load into Beautiful Soup
169 |     soup = BeautifulSoup(svg, selfClosingTags=['defs','sodipodi:namedview'])
170 |     # Find counties
171 |     paths = soup.findAll('path')
172 |     # Map colors
173 |     # # plot error: 8 classes
174 |     # colors = ["#b2182b", "#d6604d", "#f4a582", "#fddbc7", "#d1e5f0", "#92c5de", "#4393c3", "#2166ac"]
175 |     # plot yield: 11 classes
176 |     colors = ['#a50026','#d73027','#f46d43','#fdae61','#fee090','#ffffbf','#e0f3f8','#abd9e9','#74add1','#4575b4','#313695']
177 | 
178 |     # County style
179 |     path_style = 'font-size:12px;fill-rule:nonzero;stroke:#FFFFFF;stroke-opacity:1;stroke-width:0.1;stroke-miterlimit:4;stroke-dasharray:none;stroke-linecap:butt;marker-start:none;stroke-linejoin:bevel;fill:'
180 |     # Color the counties based on unemployment rate
181 |     for p in paths:    
182 |         if p['id'] not in ["State_Lines", "separator"]:
183 |             try:
184 |                 rate = CNN[p['id']]
185 |             except:
186 |                 continue
187 | 
188 |             # # plot error
189 |             # if rate > 15:
190 |             #     color_class = 7
191 |             # elif rate > 10:
192 |             #     color_class = 6
193 |             # elif rate > 5:
194 |             #     color_class = 5
195 |             # elif rate > 0:
196 |             #     color_class = 4
197 |             # elif rate > -5:
198 |             #     color_class = 3
199 |             # elif rate > -10:
200 |             #     color_class = 2            
201 |             # elif rate > -15:
202 |             #     color_class = 1
203 |             # else:
204 |             #     color_class = 0
205 | 
206 |             # plot yield
207 |             if rate > 60:
208 |                 color_class = 0
209 |             elif rate > 55:
210 |                 color_class = 1
211 |             elif rate > 50:
212 |                 color_class = 2
213 |             elif rate > 45:
214 |                 color_class = 3
215 |             elif rate > 40:
216 |                 color_class = 4
217 |             elif rate > 35:
218 |                 color_class = 5            
219 |             elif rate > 30:
220 |                 color_class = 6
221 |             elif rate > 25:
222 |                 color_class = 7
223 |             elif rate > 20:
224 |                 color_class = 8
225 |             elif rate > 15:
226 |                 color_class = 9
227 |             else:
228 |                 color_class = 10
229 | 
230 |             color = colors[color_class]
231 |             p['style'] = path_style + color
232 |      
233 |     soup=soup.prettify()
234 |     with open(path_save, 'wb') as f:
235 |         f.write(soup)
236 | 
237 | if __name__ == "__main__":
238 |     path = '/atlas/u/jiaxuan/data/train_results/final/new_L1_L2/'
239 | 
240 |     # # load baseline
241 |     # '''LOAD 2009-2015, no weather'''
242 |     # path_data = '/atlas/u/jiaxuan/data/google_drive/img_output/'
243 |     # # load mean data
244 |     # filename = 'histogram_all_mean.npz'
245 |     # content = np.load(path_data + filename)
246 |     # image_all = content['output_image']
247 |     # yield_all = content['output_yield']
248 |     # year_all = content['output_year']
249 |     # locations_all = content['output_locations']
250 |     # index_all = content['output_index']
251 | 
252 |     # # copy index
253 |     # path_load = path+str(0)+str(10)+str(2014)+'result_prediction.npz'
254 |     # content_ref=np.load(path_load)
255 |     # year_ref=content_ref['year_out']
256 |     # index_ref=content_ref['index_out']
257 |     # ref=np.concatenate((year_ref[:,np.newaxis], index_ref),axis=1)
258 | 
259 |     # print 'before',index_all.shape[0]
260 |     # # remove extra index
261 |     # list_delete=[]
262 |     # for i in range(index_all.shape[0]):
263 |     #     key = np.array([year_all[i],index_all[i,0],index_all[i,1]])
264 |     #     index = np.where(np.all(ref[:,0:3] == key, axis=1))
265 |     #     if index[0].shape[0] == 0:
266 |     #         list_delete.append(i)
267 |     # image_all=np.delete(image_all,list_delete,0)
268 |     # yield_all=np.delete(yield_all,list_delete,0)
269 |     # year_all = np.delete(year_all,list_delete, 0)
270 |     # locations_all = np.delete(locations_all, list_delete, 0)
271 |     # index_all = np.delete(index_all, list_delete, 0)
272 |     # print 'after',index_all.shape[0]
273 | 
274 |     # # calc NDVI
275 |     # image_NDVI = np.zeros([image_all.shape[0],32])
276 |     # for i in range(32):
277 |     #     image_NDVI[:,i] = (image_all[:,1+9*i]-image_all[:,9*i])/(image_all[:,1+9*i]+image_all[:,9*i])
278 | 
279 | 
280 |     
281 | 
282 |     for predict_year in range(2009,2014):
283 |         # validate = np.nonzero(year_all == predict_year)[0]
284 |         # train = np.nonzero(year_all < predict_year)[0]
285 |         for day in range(10,31,4):
286 |         #     # Ridge regression, NDVI
287 |         #     feature = image_NDVI[:,0:day]
288 | 
289 |         #     lr = linear_model.Ridge(10)
290 |         #     lr.fit(feature[train],yield_all[train])
291 |         #     Y_pred_reg = lr.predict(feature[validate])
292 | 
293 |         #     rmse = np.sqrt(np.mean((Y_pred_reg-yield_all[validate])**2))
294 |         #     me = np.mean(Y_pred_reg-yield_all[validate])/np.mean(yield_all[validate])*100
295 |             # print 'Ridge',predict_year,day,rmse,me
296 | 
297 |             # # print baseline figure
298 |             # path_save = path+'map_baseline/'+str(0)+str(predict_year)+str(day)+'baseline.svg'
299 |             # yield_map_raw(Y_pred_reg, index_all[validate], path_save, predict_year)
300 | 
301 | 
302 |             # print CNN figure
303 |             path_load = path+str(2)+str(day)+str(predict_year)+'result_prediction.npz'
304 |             path_save = path+'map_real/'+str(0)+str(predict_year)+str(day)+'real.svg'
305 |             yield_map(path_load, path_save, predict_year,'real')
306 |             print predict_year,day
307 | 
308 |             # print CNN figure
309 |             path_load = path+str(2)+str(day)+str(predict_year)+'result_prediction.npz'
310 |             path_save = path+'map_pred/'+str(0)+str(predict_year)+str(day)+'pred.svg'
311 |             yield_map(path_load, path_save, predict_year,'pred')
312 |             print predict_year,day
313 | 
314 | 


--------------------------------------------------------------------------------
/4 model_batch/train_for_hist_alldata_loop_corn.py:
--------------------------------------------------------------------------------
  1 | from nnet_for_hist_dropout_stride import *
  2 | import logging
  3 | 
  4 | 
  5 | 
  6 | if __name__ == "__main__":
  7 |     config = Config()
  8 |     summary_train_loss = []
  9 |     summary_eval_loss = []
 10 |     summary_RMSE = []
 11 |     summary_ME = []
 12 | 
 13 | 
 14 |     # load data to memory
 15 |     filename = 'histogram_all' + '.npz'
 16 |     # filename = 'histogram_all_soilweather' + '.npz'
 17 |     content = np.load(config.load_path + filename)
 18 |     image_all = content['output_image']
 19 |     yield_all = content['output_yield']
 20 |     year_all = content['output_year']
 21 |     locations_all = content['output_locations']
 22 |     index_all = content['output_index']
 23 |     
 24 |     # delete broken image
 25 |     list_delete=[]
 26 |     for i in range(image_all.shape[0]):
 27 |         if np.sum(image_all[i,:,:,:])<=287:
 28 |             if year_all[i]<2016:
 29 |                 list_delete.append(i)
 30 |     image_all=np.delete(image_all,list_delete,0)
 31 |     yield_all=np.delete(yield_all,list_delete,0)
 32 |     year_all = np.delete(year_all,list_delete, 0)
 33 |     locations_all = np.delete(locations_all, list_delete, 0)
 34 |     index_all = np.delete(index_all, list_delete, 0)
 35 | 
 36 | 
 37 |     # keep major counties
 38 |     list_keep=[]
 39 |     for i in range(image_all.shape[0]):
 40 |         if (index_all[i,0]==5)or(index_all[i,0]==17)or(index_all[i,0]==18)or(index_all[i,0]==19)or(index_all[i,0]==20)or(index_all[i,0]==27)or(index_all[i,0]==29)or(index_all[i,0]==31)or(index_all[i,0]==38)or(index_all[i,0]==39)or(index_all[i,0]==46):
 41 |             list_keep.append(i)
 42 |     image_all=image_all[list_keep,:,:,:]
 43 |     yield_all=yield_all[list_keep]
 44 |     year_all = year_all[list_keep]
 45 |     locations_all = locations_all[list_keep,:]
 46 |     index_all = index_all[list_keep,:]
 47 | 
 48 |     # match corn yield
 49 |     # load corn yield
 50 |     corn_yield = np.genfromtxt('/atlas/u/jiaxuan/git/crop-forecasting/corn_yield.csv', delimiter=',')
 51 |     # keep data with corn yield
 52 |     list_delete=[]
 53 |     for i in range(image_all.shape[0]):
 54 |         key = np.array([year_all[i],index_all[i,0],index_all[i,1]])
 55 |         index = np.where(np.all(corn_yield[:,0:3] == key, axis=1))
 56 |         # print index[0].shape
 57 |         if index[0].shape[0] != 0:
 58 |             yield_all[i]=corn_yield[index,3]
 59 |         else:
 60 |             # print 'del'
 61 |             list_delete.append(i)
 62 |     image_all=np.delete(image_all,list_delete,0)
 63 |     yield_all=np.delete(yield_all,list_delete,0)
 64 |     year_all = np.delete(year_all,list_delete, 0)
 65 |     locations_all = np.delete(locations_all, list_delete, 0)
 66 |     index_all = np.delete(index_all, list_delete, 0)
 67 | 
 68 | 
 69 | 
 70 |     for loop in range(0,1):
 71 |         for predict_year in range(2009,2016):
 72 |             logging.basicConfig(filename=config.save_path+'log/train_for_hist_alldata_loop'+str(predict_year)+str(loop)+'.log',level=logging.DEBUG)
 73 |             # # split into train and validate
 74 |             # index_train = np.nonzero(year_all < predict_year)[0]
 75 |             # index_validate = np.nonzero(year_all == predict_year)[0]
 76 |             # index_test = np.nonzero(year_all == predict_year+1)[0]
 77 | 
 78 |             # random choose validation set
 79 |             index_train = np.nonzero(year_all < predict_year)[0]
 80 |             index_validate = np.nonzero(year_all == predict_year)[0]
 81 |             print 'train size',index_train.shape[0]
 82 |             print 'validate size',index_validate.shape[0]
 83 |             logging.info('train size %d',index_train.shape[0])
 84 |             logging.info('validate size %d',index_validate.shape[0])
 85 | 
 86 | 
 87 |             # # calc train image mean (for each band), and then detract (broadcast)
 88 |             # image_mean=np.mean(image_all[index_train],(0,1,2))
 89 |             # image_all = image_all - image_mean
 90 | 
 91 |             image_validate=image_all[index_validate]
 92 |             yield_validate=yield_all[index_validate]
 93 | 
 94 |             for time in range(30,31):
 95 |                 RMSE_min = 100
 96 |                 g = tf.Graph()
 97 |                 with g.as_default():
 98 |                     # modify config
 99 |                     config = Config()
100 |                     config.H=time
101 | 
102 |                     model= NeuralModel(config,'net')
103 | 
104 |                     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.22)
105 |                     # Launch the graph.
106 |                     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
107 |                     sess.run(tf.initialize_all_variables())
108 |                     saver=tf.train.Saver()
109 |                     for i in range(config.train_step):
110 |                         if i==4000:
111 |                             config.lr/=10
112 | 
113 |                         if i==20000:
114 |                             config.lr/=10
115 |                        
116 |                         # index_train_batch = np.random.choice(index_train,size=config.B)
117 |                         index_validate_batch = np.random.choice(index_validate, size=config.B)
118 | 
119 |                         # try data augmentation while training
120 |                         shift = 1
121 |                         index_train_batch_1 = np.random.choice(index_train,size=config.B+shift*2)
122 |                         index_train_batch_2 = np.random.choice(index_train,size=config.B+shift*2)
123 |                         image_train_batch = (image_all[index_train_batch_1,:,0:config.H,:]+image_all[index_train_batch_1,:,0:config.H,:])/2
124 |                         yield_train_batch = (yield_all[index_train_batch_1]+yield_all[index_train_batch_1])/2
125 | 
126 |                         arg_index = np.argsort(yield_train_batch)
127 |                         yield_train_batch = yield_train_batch[arg_index][shift:-shift]
128 |                         image_train_batch = image_train_batch[arg_index][shift:-shift]
129 | 
130 |                         _, train_loss, train_loss_reg = sess.run([model.train_op, model.loss_err, model.loss_reg], feed_dict={
131 |                             model.x:image_train_batch,
132 |                             model.y:yield_train_batch,
133 |                             model.lr:config.lr,
134 |                             model.keep_prob: config.keep_prob
135 |                             })
136 | 
137 |                         if i%500 == 0:
138 |                             val_loss,val_loss_reg = sess.run([model.loss_err,model.loss_reg], feed_dict={
139 |                                 model.x: image_all[index_validate_batch, :, 0:config.H, :],
140 |                                 model.y: yield_all[index_validate_batch],
141 |                                 model.keep_prob: 1
142 |                             })
143 | 
144 |                             print str(loop)+str(time)+'predict year'+str(predict_year)+'step'+str(i),train_loss,train_loss_reg,val_loss,val_loss_reg,config.lr
145 |                             logging.info('%d %d %d step %d %f %f %f %f %f',loop,time,predict_year,i,train_loss,train_loss_reg,val_loss,val_loss_reg,config.lr)
146 |                         if i%500 == 0:
147 |                             # do validation
148 |                             pred = []
149 |                             real = []
150 |                             for j in range(image_validate.shape[0] / config.B):
151 |                                 real_temp = yield_validate[j * config.B:(j + 1) * config.B]
152 |                                 pred_temp= sess.run(model.logits, feed_dict={
153 |                                     model.x: image_validate[j * config.B:(j + 1) * config.B,:,0:config.H,:],
154 |                                     model.y: yield_validate[j * config.B:(j + 1) * config.B],
155 |                                     model.keep_prob: 1
156 |                                     })
157 |                                 pred.append(pred_temp)
158 |                                 real.append(real_temp)
159 |                             pred=np.concatenate(pred)
160 |                             real=np.concatenate(real)
161 |                             RMSE=np.sqrt(np.mean((pred-real)**2))
162 |                             ME=np.mean(pred-real)
163 |                             RMSE_ideal = np.sqrt(np.mean((pred-ME-real)**2))
164 |                             arg_index = np.argsort(pred)
165 |                             pred = pred[arg_index][50:-50]
166 |                             real = real[arg_index][50:-50]
167 |                             ME_part = np.mean(pred-real)
168 | 
169 |                             if RMSE<RMSE_min:
170 |                                 RMSE_min=RMSE
171 |                                
172 | 
173 |                             # print 'Validation set','RMSE',RMSE,'ME',ME,'RMSE_min',RMSE_min
174 |                             # logging.info('Validation set RMSE %f ME %f RMSE_min %f',RMSE,ME,RMSE_min)
175 |                             print 'Validation set','RMSE',RMSE,'RMSE_ideal',RMSE_ideal,'ME',ME,'ME_part',ME_part,'RMSE_min',RMSE_min
176 |                             logging.info('Validation set RMSE %f RMSE_ideal %f ME %f ME_part %f RMSE_min %f',RMSE,RMSE_ideal,ME,ME_part,RMSE_min)
177 |             
178 |                             summary_train_loss.append(train_loss)
179 |                             summary_eval_loss.append(val_loss)
180 |                             summary_RMSE.append(RMSE)
181 |                             summary_ME.append(ME)
182 |                     # save
183 |                     save_path = saver.save(sess, config.save_path+str(loop)+str(time) + str(predict_year)+'CNN_model.ckpt')
184 |                     print('save in file: %s' % save_path)
185 |                     logging.info('save in file: %s' % save_path)
186 | 
187 |                     # save result
188 |                     pred_out = []
189 |                     real_out = []
190 |                     feature_out = []
191 |                     year_out = []
192 |                     locations_out =[]
193 |                     index_out = []
194 |                     for i in range(image_all.shape[0] / config.B):
195 |                         feature,pred = sess.run(
196 |                             [model.fc6,model.logits], feed_dict={
197 |                             model.x: image_all[i * config.B:(i + 1) * config.B,:,0:config.H,:],
198 |                             model.y: yield_all[i * config.B:(i + 1) * config.B],
199 |                             model.keep_prob:1
200 |                         })
201 |                         real = yield_all[i * config.B:(i + 1) * config.B]
202 | 
203 |                         pred_out.append(pred)
204 |                         real_out.append(real)
205 |                         feature_out.append(feature)
206 |                         year_out.append(year_all[i * config.B:(i + 1) * config.B])
207 |                         locations_out.append(locations_all[i * config.B:(i + 1) * config.B])
208 |                         index_out.append(index_all[i * config.B:(i + 1) * config.B])
209 |                         # print i
210 |                     weight_out, b_out = sess.run(
211 |                         [model.dense_W, model.dense_B], feed_dict={
212 |                             model.x: image_all[0 * config.B:(0 + 1) * config.B, :, 0:config.H, :],
213 |                             model.y: yield_all[0 * config.B:(0 + 1) * config.B],
214 |                             model.keep_prob: 1
215 |                         })
216 |                     pred_out=np.concatenate(pred_out)
217 |                     real_out=np.concatenate(real_out)
218 |                     feature_out=np.concatenate(feature_out)
219 |                     year_out=np.concatenate(year_out)
220 |                     locations_out=np.concatenate(locations_out)
221 |                     index_out=np.concatenate(index_out)
222 |                     
223 |                     np.savez(config.save_path+str(loop)+str(time)+str(predict_year)+'result_prediction.npz',
224 |                         pred_out=pred_out,real_out=real_out,feature_out=feature_out,
225 |                         year_out=year_out,locations_out=locations_out,weight_out=weight_out,b_out=b_out,index_out=index_out)
226 |                     np.savez(config.save_path+str(loop)+str(time)+str(predict_year)+'result.npz',
227 |                         summary_train_loss=summary_train_loss,summary_eval_loss=summary_eval_loss,
228 |                         summary_RMSE=summary_RMSE,summary_ME=summary_RMSE)


--------------------------------------------------------------------------------
/3 model/train_for_hist_alldata.py:
--------------------------------------------------------------------------------
  1 | from nnet_for_hist_dropout_stride import *
  2 | from GP_crop_v3 import *
  3 | import logging
  4 | 
  5 | 
  6 | 
  7 | if __name__ == "__main__":
  8 |     predict_year = 2013
  9 |     logging.basicConfig(filename='train_for_hist_alldata'+str(predict_year)+'.log',level=logging.DEBUG)
 10 |     # Create a coordinator
 11 |     config = Config()
 12 | 
 13 |     # load data to memory
 14 |     filename = 'histogram_all' + '.npz'
 15 |     # filename = 'histogram_all_soilweather' + '.npz'
 16 |     content = np.load(config.load_path + filename)
 17 |     image_all = content['output_image']
 18 |     yield_all = content['output_yield']
 19 |     year_all = content['output_year']
 20 |     locations_all = content['output_locations']
 21 |     index_all = content['output_index']
 22 | 
 23 |      # delete broken image
 24 |     list_delete=[]
 25 |     for i in range(image_all.shape[0]):
 26 |         if np.sum(image_all[i,:,:,:])<=287:
 27 |             if year_all[i]<2016:
 28 |                 list_delete.append(i)
 29 |     image_all=np.delete(image_all,list_delete,0)
 30 |     yield_all=np.delete(yield_all,list_delete,0)
 31 |     year_all = np.delete(year_all,list_delete, 0)
 32 |     locations_all = np.delete(locations_all, list_delete, 0)
 33 |     index_all = np.delete(index_all, list_delete, 0)
 34 | 
 35 | 
 36 |     # keep major counties
 37 |     list_keep=[]
 38 |     for i in range(image_all.shape[0]):
 39 |         if (index_all[i,0]==5)or(index_all[i,0]==17)or(index_all[i,0]==18)or(index_all[i,0]==19)or(index_all[i,0]==20)or(index_all[i,0]==27)or(index_all[i,0]==29)or(index_all[i,0]==31)or(index_all[i,0]==38)or(index_all[i,0]==39)or(index_all[i,0]==46):
 40 |             list_keep.append(i)
 41 |     image_all=image_all[list_keep,:,:,:]
 42 |     yield_all=yield_all[list_keep]
 43 |     year_all = year_all[list_keep]
 44 |     locations_all = locations_all[list_keep,:]
 45 |     index_all = index_all[list_keep,:]
 46 | 
 47 |     # split into train and validate
 48 |     index_train = np.nonzero(year_all < predict_year)[0]
 49 |     index_validate = np.nonzero(year_all == predict_year)[0]
 50 |     print 'train size',index_train.shape[0]
 51 |     print 'validate size',index_validate.shape[0]
 52 | 
 53 |     # calc train image mean (for each band), and then detract (broadcast)
 54 |     image_mean=np.mean(image_all[index_train],(0,1,2))
 55 |     image_all = image_all - image_mean
 56 | 
 57 |     image_validate=image_all[index_validate]
 58 |     yield_validate=yield_all[index_validate]
 59 | 
 60 |     model= NeuralModel(config,'net')
 61 | 
 62 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.22)
 63 |     # Launch the graph.
 64 |     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 65 |     sess.run(tf.initialize_all_variables())
 66 | 
 67 |     summary_train_loss = []
 68 |     summary_eval_loss = []
 69 |     summary_RMSE = []
 70 |     summary_ME = []
 71 | 
 72 |     train_loss=0
 73 |     val_loss=0
 74 |     val_prediction = 0
 75 |     val_deviation = np.zeros([config.B])
 76 |     # #########################
 77 |     # block when test
 78 |     # add saver
 79 |     saver=tf.train.Saver()
 80 |     # Restore variables from disk.
 81 |     try:
 82 |         saver.restore(sess, config.save_path+str(predict_year)+"CNN_model.ckpt")
 83 |     # Restore log results
 84 |         npzfile = np.load(config.save_path + str(predict_year)+'result.npz')
 85 |         summary_train_loss = npzfile['summary_train_loss'].tolist()
 86 |         summary_eval_loss = npzfile['summary_eval_loss'].tolist()
 87 |         summary_RMSE = npzfile['summary_RMSE'].tolist()
 88 |         summary_ME = npzfile['summary_ME'].tolist()
 89 |         print("Model restored.")
 90 |     except:
 91 |         print 'No history model found'
 92 |     # #########################
 93 |     
 94 | 
 95 |     RMSE_min = 100
 96 |     try:
 97 |         for i in range(config.train_step):
 98 |             if i==3500:
 99 |                 config.lr/=10
100 |                 # saver.restore(sess, config.save_path+str(predict_year)+"CNN_model.ckpt")
101 |                 # # Restore log results
102 |                 # npzfile = np.load(config.save_path + str(predict_year)+'result.npz')
103 |                 # summary_train_loss = npzfile['summary_train_loss'].tolist()
104 |                 # summary_eval_loss = npzfile['summary_eval_loss'].tolist()
105 |                 # summary_RMSE = npzfile['summary_RMSE'].tolist()
106 |                 # summary_ME = npzfile['summary_ME'].tolist()
107 |                 # print("Model restored.")
108 |             if i==20000:
109 |                 config.lr/=10
110 |                 # saver.restore(sess, config.save_path+str(predict_year)+"CNN_model.ckpt")
111 |                 # # Restore log results
112 |                 # npzfile = np.load(config.save_path + str(predict_year)+'result.npz')
113 |                 # summary_train_loss = npzfile['summary_train_loss'].tolist()
114 |                 # summary_eval_loss = npzfile['summary_eval_loss'].tolist()
115 |                 # summary_RMSE = npzfile['summary_RMSE'].tolist()
116 |                 # summary_ME = npzfile['summary_ME'].tolist()
117 |                 # print("Model restored.")
118 |             # if i==12000:
119 |             #     config.lr/=10
120 |                 # saver.restore(sess, config.save_path+str(predict_year)+"CNN_model.ckpt")
121 |                 # # Restore log results
122 |                 # npzfile = np.load(config.save_path + str(predict_year)+'result.npz')
123 |                 # summary_train_loss = npzfile['summary_train_loss'].tolist()
124 |                 # summary_eval_loss = npzfile['summary_eval_loss'].tolist()
125 |                 # summary_RMSE = npzfile['summary_RMSE'].tolist()
126 |                 # summary_ME = npzfile['summary_ME'].tolist()
127 |                 # print("Model restored.")
128 | 
129 |             # No augmentation
130 |             # index_train_batch = np.random.choice(index_train,size=config.B)
131 |             # image_train_batch = image_all[index_train_batch,:,0:config.H,:]
132 |             # yield_train_batch = yield_all[index_train_batch]
133 |             # year_train_batch = year_all[index_train_batch,np.newaxis]
134 | 
135 |             # try data augmentation while training
136 |             index_train_batch_1 = np.random.choice(index_train,size=config.B)
137 |             index_train_batch_2 = np.random.choice(index_train,size=config.B)
138 |             image_train_batch = (image_all[index_train_batch_1,:,0:config.H,:]+image_all[index_train_batch_1,:,0:config.H,:])/2
139 |             yield_train_batch = (yield_all[index_train_batch_1]+yield_all[index_train_batch_1])/2
140 |             # year_train_batch = (year_all[index_train_batch_1,np.newaxis]+year_all[index_train_batch_2,np.newaxis])/2
141 | 
142 |             index_validate_batch = np.random.choice(index_validate, size=config.B)
143 | 
144 |             _, train_loss = sess.run([model.train_op, model.loss_err], feed_dict={
145 |                 model.x:image_train_batch,
146 |                 model.y:yield_train_batch,
147 |                 model.lr:config.lr,
148 |                 model.keep_prob: config.drop_out
149 |                 })
150 | 
151 |             if i%200 == 0:
152 |                 val_loss,fc6,W,B = sess.run([model.loss_err,model.fc6,model.dense_W,model.dense_B], feed_dict={
153 |                     model.x: image_all[index_validate_batch, :, 0:config.H, :],
154 |                     model.y: yield_all[index_validate_batch],
155 |                     model.keep_prob: 1
156 |                 })
157 | 
158 |                 print 'predict year'+str(predict_year)+'step'+str(i),train_loss,val_loss,config.lr
159 |                 logging.info('predict year %d step %d %f %f %f',predict_year,i,train_loss,val_loss,config.lr)
160 |             if i%200 == 0:
161 |                 # do validation
162 |                 pred = []
163 |                 real = []
164 |                 for j in range(image_validate.shape[0] / config.B):
165 |                     real_temp = yield_validate[j * config.B:(j + 1) * config.B]
166 |                     pred_temp= sess.run(model.logits, feed_dict={
167 |                         model.x: image_validate[j * config.B:(j + 1) * config.B,:,0:config.H,:],
168 |                         model.y: yield_validate[j * config.B:(j + 1) * config.B],
169 |                         model.keep_prob: 1
170 |                         })
171 |                     pred.append(pred_temp)
172 |                     real.append(real_temp)
173 |                 pred=np.concatenate(pred)
174 |                 real=np.concatenate(real)
175 |                 RMSE=np.sqrt(np.mean((pred-real)**2))
176 |                 ME=np.mean(pred-real)
177 | 
178 |                 if RMSE<RMSE_min:
179 |                     RMSE_min=RMSE
180 |                     # # save
181 |                     # save_path = saver.save(sess, config.save_path + str(predict_year)+'CNN_model.ckpt')
182 |                     # print('save in file: %s' % save_path)
183 |                     # np.savez(config.save_path+str(predict_year)+'result.npz',
184 |                     #     summary_train_loss=summary_train_loss,summary_eval_loss=summary_eval_loss,
185 |                     #     summary_RMSE=summary_RMSE,summary_ME=summary_RMSE)
186 | 
187 |                 print 'Validation set','RMSE',RMSE,'ME',ME,'RMSE_min',RMSE_min
188 |                 logging.info('Validation set RMSE %f ME %f RMSE_min %f',RMSE,ME,RMSE_min)
189 |             
190 |                 summary_train_loss.append(train_loss)
191 |                 summary_eval_loss.append(val_loss)
192 |                 summary_RMSE.append(RMSE)
193 |                 summary_ME.append(ME)
194 | 
195 | 
196 | 
197 |     except KeyboardInterrupt:
198 |         print 'stopped'
199 | 
200 |     finally:
201 | 
202 |         # save
203 |         save_path = saver.save(sess, config.save_path + str(predict_year)+'CNN_model.ckpt')
204 |         print('save in file: %s' % save_path)
205 |         logging.info('save in file: %s' % save_path)
206 | 
207 |         # save result
208 |         pred_out = []
209 |         real_out = []
210 |         feature_out = []
211 |         year_out = []
212 |         locations_out =[]
213 |         index_out = []
214 |         for i in range(image_all.shape[0] / config.B):
215 |             feature,pred = sess.run(
216 |                 [model.fc6,model.logits], feed_dict={
217 |                 model.x: image_all[i * config.B:(i + 1) * config.B,:,0:config.H,:],
218 |                 model.y: yield_all[i * config.B:(i + 1) * config.B],
219 |                 model.keep_prob:1
220 |             })
221 |             real = yield_all[i * config.B:(i + 1) * config.B]
222 | 
223 |             pred_out.append(pred)
224 |             real_out.append(real)
225 |             feature_out.append(feature)
226 |             year_out.append(year_all[i * config.B:(i + 1) * config.B])
227 |             locations_out.append(locations_all[i * config.B:(i + 1) * config.B])
228 |             index_out.append(index_all[i * config.B:(i + 1) * config.B])
229 |             # print i
230 |         weight_out, b_out = sess.run(
231 |             [model.dense_W, model.dense_B], feed_dict={
232 |                 model.x: image_all[0 * config.B:(0 + 1) * config.B, :, 0:config.H, :],
233 |                 model.y: yield_all[0 * config.B:(0 + 1) * config.B],
234 |                 model.keep_prob: 1
235 |             })
236 |         pred_out=np.concatenate(pred_out)
237 |         real_out=np.concatenate(real_out)
238 |         feature_out=np.concatenate(feature_out)
239 |         year_out=np.concatenate(year_out)
240 |         locations_out=np.concatenate(locations_out)
241 |         index_out=np.concatenate(index_out)
242 |         
243 |         path = config.save_path + str(predict_year)+'result_prediction.npz'
244 |         np.savez(path,
245 |             pred_out=pred_out,real_out=real_out,feature_out=feature_out,
246 |             year_out=year_out,locations_out=locations_out,weight_out=weight_out,b_out=b_out,index_out=index_out)
247 | 
248 |         # RMSE_GP,ME_GP,Average_GP=GaussianProcess(predict_year,path)
249 |         # print 'RMSE_GP',RMSE_GP
250 |         # print 'ME_GP',ME_GP
251 |         # print 'Average_GP',Average_GP
252 | 
253 |         np.savez(config.save_path+str(predict_year)+'result.npz',
254 |                         summary_train_loss=summary_train_loss,summary_eval_loss=summary_eval_loss,
255 |                         summary_RMSE=summary_RMSE,summary_ME=summary_ME)
256 |         # plot results
257 |         npzfile = np.load(config.save_path+str(predict_year)+'result.npz')
258 |         summary_train_loss=npzfile['summary_train_loss']
259 |         summary_eval_loss=npzfile['summary_eval_loss']
260 |         summary_RMSE = npzfile['summary_RMSE']
261 |         summary_ME = npzfile['summary_ME']
262 | 
263 |         # Plot the points using matplotlib
264 |         plt.plot(range(len(summary_train_loss)), summary_train_loss)
265 |         plt.plot(range(len(summary_eval_loss)), summary_eval_loss)
266 |         plt.xlabel('Training steps')
267 |         plt.ylabel('L2 loss')
268 |         plt.title('Loss curve')
269 |         plt.legend(['Train', 'Validate'])
270 |         plt.show()
271 | 
272 |         plt.plot(range(len(summary_RMSE)), summary_RMSE)
273 |         # plt.plot(range(len(summary_ME)), summary_ME)
274 |         plt.xlabel('Training steps')
275 |         plt.ylabel('Error')
276 |         plt.title('RMSE')
277 |         # plt.legend(['RMSE', 'ME'])
278 |         plt.show()
279 | 
280 |         # plt.plot(range(len(summary_RMSE)), summary_RMSE)
281 |         plt.plot(range(len(summary_ME)), summary_ME)
282 |         plt.xlabel('Training steps')
283 |         plt.ylabel('Error')
284 |         plt.title('ME')
285 |         # plt.legend(['RMSE', 'ME'])
286 |         plt.show()
287 | 


--------------------------------------------------------------------------------
/2 clean data/final_clean_data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.io as io
  3 | import math
  4 | import os
  5 | import skimage.io
  6 | import pandas as pd
  7 | import gdal
  8 | from scipy.ndimage import zoom
  9 | 
 10 | from joblib import Parallel, delayed
 11 | import multiprocessing
 12 | 
 13 | 
 14 | ################
 15 | # Data range
 16 | # MODIS: 2003-2016, 14 years
 17 | # MODIS_landcover: 2003-2013, 12 years
 18 | # MODIS_temperature: 2003_2015, 13 years
 19 | 
 20 | # Intersection: 2003-2013, 11 years
 21 | 
 22 | ################
 23 | 
 24 | 
 25 | def check_data_integrity_del():
 26 |     data = np.genfromtxt('yield_final_highquality.csv', delimiter=',')
 27 |     # check if they have related files
 28 |     dir = "/atlas/u/jiaxuan/data/google_drive/img_zoom_output/"
 29 |     list_del = []
 30 |     for i in range(data.shape[0]):
 31 |         year = data[i,0]
 32 |         loc1 = data[i,1]
 33 |         loc2 = data[i,2]
 34 |         filename = str(int(year)) + '_' + str(int(loc1)) + '_' + str(int(loc2)) + '.npy'
 35 |         if os.path.isfile(dir + filename)==False:
 36 |             print 'del'
 37 |             list_del.append(i)
 38 | 
 39 |     list_del = np.array(list_del)
 40 |     data_clean=np.delete(data, list_del, axis=0)
 41 |     np.savetxt("yield_final_highquality.csv", data_clean, delimiter=",")
 42 | 
 43 | def check_data_integrity():
 44 |     print 'begin'
 45 |     data = np.genfromtxt('yield_final_highquality.csv', delimiter=',')
 46 |     # check if they have related files
 47 |     dir = "/atlas/u/jiaxuan/data/google_drive/img_output/"
 48 |     for i in range(data.shape[0]):
 49 |         year = data[i,0]
 50 |         loc1 = data[i,1]
 51 |         loc2 = data[i,2]
 52 |         filename = str(int(year)) + '_' + str(int(loc1)) + '_' + str(int(loc2)) + '.npy'
 53 |         if os.path.isfile(dir + filename)==False:
 54 |             print filename
 55 |     print 'end'
 56 | 
 57 | # def check_data_integrity():
 58 | #     data = pd.read_csv('locations_final.csv',header=None)
 59 | #     # check if they have related files
 60 | #     idx=0
 61 | #     dir = "/atlas/u/jiaxuan/data/google_drive/data_image/"
 62 | #     for loc1, loc2,_,_ in data.values:
 63 | #         # filename = str(int(year)) + '_' + str(int(loc1)) + '_' + str(int(loc2)) + '.npz'
 64 | #         filename = str(int(loc1)) + '_' + str(int(loc2)) + '.tif'
 65 | #         if os.path.isfile(dir + filename)==False:
 66 | #             print filename,idx
 67 | #         idx+=1
 68 | #     print 'done',idx
 69 | #     print data.values.shape[0]
 70 | 
 71 | def divide_image(img,first,step,num):
 72 |     image_list=[]
 73 |     for i in range(0,num-1):
 74 |         image_list.append(img[:, :, first:first+step])
 75 |         first+=step
 76 |     image_list.append(img[:, :, first:])
 77 |     return image_list
 78 | 
 79 | def extend_mask(img,num):
 80 |     for i in range(0,num):
 81 |         img = np.concatenate((img, img[:,:,-2:-1]),axis=2)
 82 |     return img
 83 | 
 84 | # very dirty... but should work
 85 | def merge_image(MODIS_img_list,MODIS_temperature_img_list):
 86 |     MODIS_list=[]
 87 |     for i in range(0,len(MODIS_img_list)):
 88 |         img_shape=MODIS_img_list[i].shape
 89 |         img_temperature_shape=MODIS_temperature_img_list[i].shape
 90 |         img_shape_new=(img_shape[0],img_shape[1],img_shape[2]+img_temperature_shape[2])
 91 |         merge=np.empty(img_shape_new)
 92 |         for j in range(0,img_shape[2]/7):
 93 |             img=MODIS_img_list[i][:,:,(j*7):(j*7+7)]
 94 |             temperature=MODIS_temperature_img_list[i][:,:,(j*2):(j*2+2)]
 95 |             merge[:,:,(j*9):(j*9+9)]=np.concatenate((img,temperature),axis=2)
 96 |         MODIS_list.append(merge)
 97 |     return MODIS_list
 98 | 
 99 | 
100 | def mask_image(MODIS_list,MODIS_mask_img_list):
101 |     MODIS_list_masked = []
102 |     for i in range(0, len(MODIS_list)):
103 |         mask = np.tile(MODIS_mask_img_list[i],(1,1,MODIS_list[i].shape[2]))
104 |         masked_img = MODIS_list[i]*mask
105 |         MODIS_list_masked.append(masked_img)
106 |     return MODIS_list_masked
107 | 
108 | def quality_dector(image_temp):
109 |         filter_0=image_temp>0
110 |         filter_5000=image_temp<5000
111 |         filter=filter_0*filter_5000
112 |         return float(np.count_nonzero(filter))/image_temp.size
113 | 
114 | def preprocess_save_data():
115 | 
116 |     MODIS_dir="/atlas/u/jiaxuan/data/google_drive/data_image"
117 |     MODIS_temperature_dir="/atlas/u/jiaxuan/data/google_drive/data_temperature"
118 |     MODIS_mask_dir="/atlas/u/jiaxuan/data/google_drive/data_mask"
119 | 
120 |     img_output_dir="/atlas/u/jiaxuan/data/google_drive/img_output/"
121 | 
122 |     # MODIS_processed_dir="C:/360Downloads/6_Data_county_processed_scaled/"
123 | 
124 |     # MODIS_dir="/atlas/u/jiaxuan/data/MODIS_data_county/3_Data_county"
125 |     # MODIS_temperature_dir="/atlas/u/jiaxuan/data/MODIS_data_county_temperature"
126 |     # MODIS_mask_dir="/atlas/u/jiaxuan/data/MODIS_data_county_mask"
127 |     # MODIS_processed_dir="/atlas/u/jiaxuan/data/MODIS_data_county_processed_compressed/"
128 | 
129 |     data_yield = np.genfromtxt('yield_final.csv', delimiter=',', dtype=float)
130 |     count=1
131 |     for root, dirs, files in os.walk(MODIS_dir):
132 |         for file in files:
133 |             if file.endswith(".tif"):
134 |                 MODIS_path=os.path.join(MODIS_dir, file)
135 |                 # check file size to see if it's broken
136 |                 # if os.path.getsize(MODIS_path) < 10000000:
137 |                 #     print 'file broken, continue'
138 |                 #     continue
139 |                 MODIS_temperature_path=os.path.join(MODIS_temperature_dir,file)
140 |                 MODIS_mask_path=os.path.join(MODIS_mask_dir,file)
141 | 
142 |                 # get geo location
143 |                 raw = file.replace('_',' ').replace('.',' ').split()
144 |                 loc1 = int(raw[0])
145 |                 loc2 = int(raw[1])
146 |                 # read image
147 |                 try:
148 |                     MODIS_img = np.transpose(np.array(gdal.Open(MODIS_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0))
149 |                 except ValueError as msg:
150 |                     print msg
151 |                     continue
152 |                 # read temperature
153 |                 MODIS_temperature_img = np.transpose(np.array(gdal.Open(MODIS_temperature_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0))
154 |                 # shift
155 |                 MODIS_temperature_img = MODIS_temperature_img-12000
156 |                 # scale
157 |                 MODIS_temperature_img = MODIS_temperature_img*1.25
158 |                 # clean
159 |                 MODIS_temperature_img[MODIS_temperature_img<0]=0
160 |                 MODIS_temperature_img[MODIS_temperature_img>5000]=5000
161 |                 # read mask
162 |                 MODIS_mask_img = np.transpose(np.array(gdal.Open(MODIS_mask_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0))
163 |                 # Non-crop = 0, crop = 1
164 |                 MODIS_mask_img[MODIS_mask_img != 12] = 0
165 |                 MODIS_mask_img[MODIS_mask_img == 12] = 1
166 | 
167 |                 # Divide image into years
168 |                 MODIS_img_list=divide_image(MODIS_img, 0, 46 * 7, 14)
169 |                 MODIS_temperature_img_list = divide_image(MODIS_temperature_img, 0, 46 * 2, 14)
170 |                 MODIS_mask_img = extend_mask(MODIS_mask_img, 3)
171 |                 MODIS_mask_img_list = divide_image(MODIS_mask_img, 0, 1, 14)
172 | 
173 |                 # Merge image and temperature
174 |                 MODIS_list = merge_image(MODIS_img_list,MODIS_temperature_img_list)
175 | 
176 |                 # Do the mask job
177 |                 MODIS_list_masked = mask_image(MODIS_list,MODIS_mask_img_list)
178 | 
179 |                 # check if the result is in the list
180 |                 year_start = 2003
181 |                 for i in range(0, 14):
182 |                     year = i+year_start
183 |                     key = np.array([year,loc1,loc2])
184 |                     if np.sum(np.all(data_yield[:,0:3] == key, axis=1))>0:
185 |                         # save as .npy
186 |                         filename=img_output_dir+str(year)+'_'+str(loc1)+'_'+str(loc2)+'.npy'
187 |                         np.save(filename,MODIS_list_masked[i])
188 |                         print filename,':written ',str(count)
189 |                         count+=1
190 | 
191 | def preprocess_save_data_parallel(file):
192 | 
193 |     MODIS_dir="/atlas/u/jiaxuan/data/google_drive/data_image_full"
194 |     MODIS_temperature_dir="/atlas/u/jiaxuan/data/google_drive/data_temperature"
195 |     MODIS_mask_dir="/atlas/u/jiaxuan/data/google_drive/data_mask"
196 | 
197 |     img_output_dir="/atlas/u/jiaxuan/data/google_drive/img_full_output/"
198 |     img_zoom_output_dir="/atlas/u/jiaxuan/data/google_drive/img_zoom_full_output/"
199 | 
200 |     # MODIS_processed_dir="C:/360Downloads/6_Data_county_processed_scaled/"
201 | 
202 |     # MODIS_dir="/atlas/u/jiaxuan/data/MODIS_data_county/3_Data_county"
203 |     # MODIS_temperature_dir="/atlas/u/jiaxuan/data/MODIS_data_county_temperature"
204 |     # MODIS_mask_dir="/atlas/u/jiaxuan/data/MODIS_data_county_mask"
205 |     # MODIS_processed_dir="/atlas/u/jiaxuan/data/MODIS_data_county_processed_compressed/"
206 | 
207 |     data_yield = np.genfromtxt('yield_final.csv', delimiter=',', dtype=float)
208 |     if file.endswith(".tif"):
209 |         MODIS_path=os.path.join(MODIS_dir, file)
210 |         # check file size to see if it's broken
211 |         # if os.path.getsize(MODIS_path) < 10000000:
212 |         #     print 'file broken, continue'
213 |         #     continue
214 |         MODIS_temperature_path=os.path.join(MODIS_temperature_dir,file)
215 |         MODIS_mask_path=os.path.join(MODIS_mask_dir,file)
216 | 
217 |         # get geo location
218 |         raw = file.replace('_',' ').replace('.',' ').split()
219 |         loc1 = int(raw[0])
220 |         loc2 = int(raw[1])
221 |         # read image
222 |         try:
223 |             MODIS_img = np.transpose(np.array(gdal.Open(MODIS_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0))
224 |         except ValueError as msg:
225 |             print msg
226 |         # read temperature
227 |         MODIS_temperature_img = np.transpose(np.array(gdal.Open(MODIS_temperature_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0))
228 |         # shift
229 |         # MODIS_temperature_img = MODIS_temperature_img-12000
230 |         # scale
231 |         # MODIS_temperature_img = MODIS_temperature_img*1.25
232 |         # clean
233 |         # MODIS_temperature_img[MODIS_temperature_img<0]=0
234 |         # MODIS_temperature_img[MODIS_temperature_img>5000]=5000
235 |         # read mask
236 |         MODIS_mask_img = np.transpose(np.array(gdal.Open(MODIS_mask_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0))
237 |         # Non-crop = 0, crop = 1
238 |         MODIS_mask_img[MODIS_mask_img != 12] = 0
239 |         MODIS_mask_img[MODIS_mask_img == 12] = 1
240 | 
241 |         # Divide image into years
242 |         MODIS_img_list=divide_image(MODIS_img, 0, 46 * 7, 14)
243 |         MODIS_temperature_img_list = divide_image(MODIS_temperature_img, 0, 46 * 2, 14)
244 |         MODIS_mask_img = extend_mask(MODIS_mask_img, 3)
245 |         MODIS_mask_img_list = divide_image(MODIS_mask_img, 0, 1, 14)
246 | 
247 |         # Merge image and temperature
248 |         MODIS_list = merge_image(MODIS_img_list,MODIS_temperature_img_list)
249 | 
250 |         # Do the mask job
251 |         MODIS_list_masked = mask_image(MODIS_list,MODIS_mask_img_list)
252 | 
253 |         # check if the result is in the list
254 |         year_start = 2003
255 |         for i in range(0, 14):
256 |             year = i+year_start
257 |             key = np.array([year,loc1,loc2])
258 |             if np.sum(np.all(data_yield[:,0:3] == key, axis=1))>0:
259 |                 # # detect quality
260 |                 # quality = quality_dector(MODIS_list_masked[i])
261 |                 # if quality < 0.01:
262 |                 #     print 'omitted'
263 |                 #     print year,loc1,loc2,quality
264 | 
265 |                     # # delete
266 |                     # yield_all = np.genfromtxt('yield_final_highquality.csv', delimiter=',')
267 |                     # key = np.array([year,loc1,loc2])
268 |                     # index = np.where(np.all(yield_all[:,0:3] == key, axis=1))
269 |                     # yield_all=np.delete(yield_all, index, axis=0)
270 |                     # np.savetxt("yield_final_highquality.csv", yield_all, delimiter=",")
271 | 
272 |                     # continue
273 | 
274 |                 ## 1 save original file
275 |                 filename=img_output_dir+str(year)+'_'+str(loc1)+'_'+str(loc2)+'.npy'
276 |                 np.save(filename,MODIS_list_masked[i])
277 |                 print filename,':written '
278 | 
279 |                 ## 2 save zoomed file (48*48)
280 |                 zoom0 = float(48) / MODIS_list_masked[i].shape[0]
281 |                 zoom1 = float(48) / MODIS_list_masked[i].shape[1]
282 |                 output_image = zoom(MODIS_list_masked[i], (zoom0, zoom1, 1))
283 | 
284 |                 filename=img_zoom_output_dir+str(year)+'_'+str(loc1)+'_'+str(loc2)+'.npy'
285 |                 np.save(filename,output_image)
286 |                 print filename,':written '
287 | 
288 | 
289 |                 
290 | 
291 | if __name__ == "__main__":
292 |     # # save data
293 |     MODIS_dir="/atlas/u/jiaxuan/data/google_drive/data_image_full"
294 |     for _, _, files in os.walk(MODIS_dir):
295 |         Parallel(n_jobs=12)(delayed(preprocess_save_data_parallel)(file) for file in files)
296 | 
297 |     # # clean yield (low quality)
298 |     # check_data_integrity_del()
299 |     # # check integrity
300 |     # check_data_integrity()
301 | 
302 | 


--------------------------------------------------------------------------------
/3 model/train_for_hist_alldata_lstm.py:
--------------------------------------------------------------------------------
  1 | from nnet_lstm import *
  2 | from GP_crop_v3 import *
  3 | import logging
  4 | 
  5 | 
  6 | 
  7 | if __name__ == "__main__":
  8 |     predict_year = 2013
  9 |     logging.basicConfig(filename='train_for_hist_alldata'+str(predict_year)+'.log',level=logging.DEBUG)
 10 |     # Create a coordinator
 11 |     config = Config()
 12 | 
 13 |     # load data to memory
 14 |     filename = 'histogram_all' + '.npz'
 15 |     # filename = 'histogram_all_soilweather' + '.npz'
 16 |     content = np.load(config.load_path + filename)
 17 |     image_all = content['output_image']
 18 |     yield_all = content['output_yield']
 19 |     year_all = content['output_year']
 20 |     locations_all = content['output_locations']
 21 |     index_all = content['output_index']
 22 | 
 23 |      # delete broken image
 24 |     list_delete=[]
 25 |     for i in range(image_all.shape[0]):
 26 |         if np.sum(image_all[i,:,:,:])<=287:
 27 |             if year_all[i]<2016:
 28 |                 list_delete.append(i)
 29 |     image_all=np.delete(image_all,list_delete,0)
 30 |     yield_all=np.delete(yield_all,list_delete,0)
 31 |     year_all = np.delete(year_all,list_delete, 0)
 32 |     locations_all = np.delete(locations_all, list_delete, 0)
 33 |     index_all = np.delete(index_all, list_delete, 0)
 34 | 
 35 | 
 36 |     # keep major counties
 37 |     list_keep=[]
 38 |     for i in range(image_all.shape[0]):
 39 |         if (index_all[i,0]==5)or(index_all[i,0]==17)or(index_all[i,0]==18)or(index_all[i,0]==19)or(index_all[i,0]==20)or(index_all[i,0]==27)or(index_all[i,0]==29)or(index_all[i,0]==31)or(index_all[i,0]==38)or(index_all[i,0]==39)or(index_all[i,0]==46):
 40 |             list_keep.append(i)
 41 |     image_all=image_all[list_keep,:,:,:]
 42 |     yield_all=yield_all[list_keep]
 43 |     year_all = year_all[list_keep]
 44 |     locations_all = locations_all[list_keep,:]
 45 |     index_all = index_all[list_keep,:]
 46 | 
 47 |     # split into train and validate
 48 |     index_train = np.nonzero(year_all < predict_year)[0]
 49 |     index_validate = np.nonzero(year_all == predict_year)[0]
 50 |     print 'train size',index_train.shape[0]
 51 |     print 'validate size',index_validate.shape[0]
 52 | 
 53 |     # calc train image mean (for each band), and then detract (broadcast)
 54 |     image_mean=np.mean(image_all[index_train],(0,1,2))
 55 |     image_all = image_all - image_mean
 56 | 
 57 |     image_validate=image_all[index_validate]
 58 |     yield_validate=yield_all[index_validate]
 59 | 
 60 |     model= NeuralModel(config,'net')
 61 | 
 62 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.22)
 63 |     # Launch the graph.
 64 |     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 65 |     sess.run(tf.initialize_all_variables())
 66 | 
 67 |     summary_train_loss = []
 68 |     summary_eval_loss = []
 69 |     summary_RMSE = []
 70 |     summary_ME = []
 71 | 
 72 |     train_loss=0
 73 |     val_loss=0
 74 |     val_prediction = 0
 75 |     val_deviation = np.zeros([config.B])
 76 |     # #########################
 77 |     # block when test
 78 |     # add saver
 79 |     saver=tf.train.Saver()
 80 |     # Restore variables from disk.
 81 |     try:
 82 |         saver.restore(sess, config.save_path+str(predict_year)+"CNN_model.ckpt")
 83 |     # Restore log results
 84 |         npzfile = np.load(config.save_path + str(predict_year)+'result.npz')
 85 |         summary_train_loss = npzfile['summary_train_loss'].tolist()
 86 |         summary_eval_loss = npzfile['summary_eval_loss'].tolist()
 87 |         summary_RMSE = npzfile['summary_RMSE'].tolist()
 88 |         summary_ME = npzfile['summary_ME'].tolist()
 89 |         print("Model restored.")
 90 |     except:
 91 |         print 'No history model found'
 92 |     # #########################
 93 |     
 94 | 
 95 |     RMSE_min = 100
 96 |     try:
 97 |         for i in range(config.train_step):
 98 |             if i==3000:
 99 |                 config.lr/=10
100 |                 # saver.restore(sess, config.save_path+str(predict_year)+"CNN_model.ckpt")
101 |                 # # Restore log results
102 |                 # npzfile = np.load(config.save_path + str(predict_year)+'result.npz')
103 |                 # summary_train_loss = npzfile['summary_train_loss'].tolist()
104 |                 # summary_eval_loss = npzfile['summary_eval_loss'].tolist()
105 |                 # summary_RMSE = npzfile['summary_RMSE'].tolist()
106 |                 # summary_ME = npzfile['summary_ME'].tolist()
107 |                 # print("Model restored.")
108 |             if i==8000:
109 |                 config.lr/=10
110 |                 # saver.restore(sess, config.save_path+str(predict_year)+"CNN_model.ckpt")
111 |                 # # Restore log results
112 |                 # npzfile = np.load(config.save_path + str(predict_year)+'result.npz')
113 |                 # summary_train_loss = npzfile['summary_train_loss'].tolist()
114 |                 # summary_eval_loss = npzfile['summary_eval_loss'].tolist()
115 |                 # summary_RMSE = npzfile['summary_RMSE'].tolist()
116 |                 # summary_ME = npzfile['summary_ME'].tolist()
117 |                 # print("Model restored.")
118 |             # if i==12000:
119 |             #     config.lr/=10
120 |                 # saver.restore(sess, config.save_path+str(predict_year)+"CNN_model.ckpt")
121 |                 # # Restore log results
122 |                 # npzfile = np.load(config.save_path + str(predict_year)+'result.npz')
123 |                 # summary_train_loss = npzfile['summary_train_loss'].tolist()
124 |                 # summary_eval_loss = npzfile['summary_eval_loss'].tolist()
125 |                 # summary_RMSE = npzfile['summary_RMSE'].tolist()
126 |                 # summary_ME = npzfile['summary_ME'].tolist()
127 |                 # print("Model restored.")
128 | 
129 |             # No augmentation
130 |             # index_train_batch = np.random.choice(index_train,size=config.B)
131 |             # image_train_batch = image_all[index_train_batch,:,0:config.H,:]
132 |             # yield_train_batch = yield_all[index_train_batch]
133 |             # year_train_batch = year_all[index_train_batch,np.newaxis]
134 | 
135 |             # try data augmentation while training
136 |             index_train_batch_1 = np.random.choice(index_train,size=config.B)
137 |             index_train_batch_2 = np.random.choice(index_train,size=config.B)
138 |             image_train_batch = (image_all[index_train_batch_1,:,0:config.H,:]+image_all[index_train_batch_1,:,0:config.H,:])/2
139 |             yield_train_batch = (yield_all[index_train_batch_1]+yield_all[index_train_batch_1])/2
140 |             # year_train_batch = (year_all[index_train_batch_1,np.newaxis]+year_all[index_train_batch_2,np.newaxis])/2
141 | 
142 |             index_validate_batch = np.random.choice(index_validate, size=config.B)
143 | 
144 |             _, train_loss = sess.run([model.train_op, model.loss], feed_dict={
145 |                 model.x:image_train_batch,
146 |                 model.y:yield_train_batch,
147 |                 model.lr:config.lr,
148 |                 model.keep_prob: config.drop_out
149 |                 })
150 | 
151 |             if i%200 == 0:
152 |                 val_loss,fc6,W,B = sess.run([model.loss,model.feature,model.dense_W,model.dense_B], feed_dict={
153 |                     model.x: image_all[index_validate_batch, :, 0:config.H, :],
154 |                     model.y: yield_all[index_validate_batch],
155 |                     model.keep_prob: 1
156 |                 })
157 | 
158 |                 print 'predict year'+str(predict_year)+'step'+str(i),train_loss,val_loss,config.lr
159 |                 logging.info('predict year %d step %d %f %f %f',predict_year,i,train_loss,val_loss,config.lr)
160 |             if i%200 == 0:
161 |                 # do validation
162 |                 pred = []
163 |                 real = []
164 |                 for j in range(image_validate.shape[0] / config.B):
165 |                     real_temp = yield_validate[j * config.B:(j + 1) * config.B]
166 |                     pred_temp= sess.run(model.pred, feed_dict={
167 |                         model.x: image_validate[j * config.B:(j + 1) * config.B,:,0:config.H,:],
168 |                         model.y: yield_validate[j * config.B:(j + 1) * config.B],
169 |                         model.keep_prob: 1
170 |                         })
171 |                     pred.append(pred_temp)
172 |                     real.append(real_temp)
173 |                 pred=np.concatenate(pred)
174 |                 real=np.concatenate(real)
175 |                 RMSE=np.sqrt(np.mean((pred-real)**2))
176 |                 ME=np.mean(pred-real)
177 | 
178 |                 if RMSE<RMSE_min:
179 |                     RMSE_min=RMSE
180 |                     # # save
181 |                     # save_path = saver.save(sess, config.save_path + str(predict_year)+'CNN_model.ckpt')
182 |                     # print('save in file: %s' % save_path)
183 |                     # np.savez(config.save_path+str(predict_year)+'result.npz',
184 |                     #     summary_train_loss=summary_train_loss,summary_eval_loss=summary_eval_loss,
185 |                     #     summary_RMSE=summary_RMSE,summary_ME=summary_RMSE)
186 | 
187 |                 print 'Validation set','RMSE',RMSE,'ME',ME,'RMSE_min',RMSE_min
188 |                 logging.info('Validation set RMSE %f ME %f RMSE_min %f',RMSE,ME,RMSE_min)
189 |             
190 |                 summary_train_loss.append(train_loss)
191 |                 summary_eval_loss.append(val_loss)
192 |                 summary_RMSE.append(RMSE)
193 |                 summary_ME.append(ME)
194 | 
195 | 
196 | 
197 |     except KeyboardInterrupt:
198 |         print 'stopped'
199 | 
200 |     finally:
201 | 
202 |         # save
203 |         save_path = saver.save(sess, config.save_path + str(predict_year)+'CNN_model.ckpt')
204 |         print('save in file: %s' % save_path)
205 |         logging.info('save in file: %s' % save_path)
206 | 
207 |         # # save result
208 |         # pred_out = []
209 |         # real_out = []
210 |         # feature_out = []
211 |         # year_out = []
212 |         # locations_out =[]
213 |         # index_out = []
214 |         # for i in range(image_all.shape[0] / config.B):
215 |         #     feature,pred = sess.run(
216 |         #         [model.feature,model.pred], feed_dict={
217 |         #         model.x: image_all[i * config.B:(i + 1) * config.B,:,0:config.H,:],
218 |         #         model.y: yield_all[i * config.B:(i + 1) * config.B],
219 |         #         model.keep_prob:1
220 |         #     })
221 |         #     real = yield_all[i * config.B:(i + 1) * config.B]
222 | 
223 |         #     pred_out.append(pred)
224 |         #     real_out.append(real)
225 |         #     feature_out.append(feature)
226 |         #     year_out.append(year_all[i * config.B:(i + 1) * config.B])
227 |         #     locations_out.append(locations_all[i * config.B:(i + 1) * config.B])
228 |         #     index_out.append(index_all[i * config.B:(i + 1) * config.B])
229 |         #     # print i
230 |         # weight_out, b_out = sess.run(
231 |         #     [model.dense_W, model.dense_B], feed_dict={
232 |         #         model.x: image_all[0 * config.B:(0 + 1) * config.B, :, 0:config.H, :],
233 |         #         model.y: yield_all[0 * config.B:(0 + 1) * config.B],
234 |         #         model.keep_prob: 1
235 |         #     })
236 |         # pred_out=np.concatenate(pred_out)
237 |         # real_out=np.concatenate(real_out)
238 |         # feature_out=np.concatenate(feature_out)
239 |         # year_out=np.concatenate(year_out)
240 |         # locations_out=np.concatenate(locations_out)
241 |         # index_out=np.concatenate(index_out)
242 |         
243 |         # path = config.save_path + str(predict_year)+'result_prediction.npz'
244 |         # np.savez(path,
245 |         #     pred_out=pred_out,real_out=real_out,feature_out=feature_out,
246 |         #     year_out=year_out,locations_out=locations_out,weight_out=weight_out,b_out=b_out,index_out=index_out)
247 | 
248 |         # # RMSE_GP,ME_GP,Average_GP=GaussianProcess(predict_year,path)
249 |         # # print 'RMSE_GP',RMSE_GP
250 |         # # print 'ME_GP',ME_GP
251 |         # # print 'Average_GP',Average_GP
252 | 
253 |         # np.savez(config.save_path+str(predict_year)+'result.npz',
254 |         #                 summary_train_loss=summary_train_loss,summary_eval_loss=summary_eval_loss,
255 |         #                 summary_RMSE=summary_RMSE,summary_ME=summary_ME)
256 |         # # plot results
257 |         # npzfile = np.load(config.save_path+str(predict_year)+'result.npz')
258 |         # summary_train_loss=npzfile['summary_train_loss']
259 |         # summary_eval_loss=npzfile['summary_eval_loss']
260 |         # summary_RMSE = npzfile['summary_RMSE']
261 |         # summary_ME = npzfile['summary_ME']
262 | 
263 |         # # Plot the points using matplotlib
264 |         # plt.plot(range(len(summary_train_loss)), summary_train_loss)
265 |         # plt.plot(range(len(summary_eval_loss)), summary_eval_loss)
266 |         # plt.xlabel('Training steps')
267 |         # plt.ylabel('L2 loss')
268 |         # plt.title('Loss curve')
269 |         # plt.legend(['Train', 'Validate'])
270 |         # plt.show()
271 | 
272 |         # plt.plot(range(len(summary_RMSE)), summary_RMSE)
273 |         # # plt.plot(range(len(summary_ME)), summary_ME)
274 |         # plt.xlabel('Training steps')
275 |         # plt.ylabel('Error')
276 |         # plt.title('RMSE')
277 |         # # plt.legend(['RMSE', 'ME'])
278 |         # plt.show()
279 | 
280 |         # # plt.plot(range(len(summary_RMSE)), summary_RMSE)
281 |         # plt.plot(range(len(summary_ME)), summary_ME)
282 |         # plt.xlabel('Training steps')
283 |         # plt.ylabel('Error')
284 |         # plt.title('ME')
285 |         # # plt.legend(['RMSE', 'ME'])
286 |         # plt.show()
287 | 


--------------------------------------------------------------------------------
/3 model/GP_crop_v3.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | # import pdb
  4 | from scipy.spatial.distance import pdist, squareform
  5 | from sklearn import linear_model
  6 | 
  7 | from joblib import Parallel, delayed
  8 | import multiprocessing
  9 | import logging
 10 | 
 11 | def Ridge(year,path):
 12 |     year_current = year
 13 |     npzfile = np.load(path)
 14 | 
 15 |     # load crop area
 16 |     area = np.genfromtxt('acres_harvested.csv', delimiter=',')
 17 | 
 18 |     # read
 19 |     pred_out=npzfile['pred_out']
 20 |     real_out=npzfile['real_out']
 21 |     feature_out=npzfile['feature_out']
 22 |     year_out=npzfile['year_out']
 23 |     locations_out=npzfile['locations_out']
 24 |     index_out=npzfile['index_out']
 25 |     W = npzfile['weight_out']
 26 |     b = npzfile['b_out']
 27 |     W = np.concatenate((W,b))
 28 | 
 29 |     print pred_out.shape,real_out.shape
 30 |     print year_out.shape,locations_out.shape
 31 |     # plt.plot(year_out,pred_out-real_out,'.')
 32 |     # plt.show()
 33 | 
 34 |     '''2 divide dataset'''
 35 | 
 36 |     # get train, validate, test index
 37 |     c1 = year_out==year_current
 38 |     # c2 = (index_out[:,0]==5)+(index_out[:,0]==17)+(index_out[:,0]==18)+(index_out[:,0]==19)+(index_out[:,0]==20)+(index_out[:,0]==27)+(index_out[:,0]==29)+(index_out[:,0]==31)+(index_out[:,0]==38)+(index_out[:,0]==39)+(index_out[:,0]==46)
 39 |     ind_test = np.where(c1)[0]
 40 |     print 'shape of test set',ind_test.shape
 41 | 
 42 |     c3 = year_out<year_current
 43 |     # c3 = year_out==year_current-1
 44 |     ind_train = np.where(c3)[0]
 45 |     print 'shape of train set',ind_train.shape
 46 | 
 47 | 
 48 |     '''4 normalize all features'''
 49 |     bias = np.ones([feature_out.shape[0],1])
 50 |     # feature_out = np.concatenate((feature_out,bias),axis=1)
 51 | 
 52 |     locations_mean = np.mean(locations_out, axis=0,keepdims=True)
 53 |     locations_std = np.mean(locations_out,axis=0,keepdims=True)
 54 |     locations_scale = np.amax(locations_out,axis=0)-np.amin(locations_out,axis=0)
 55 |     locations_out -= locations_mean
 56 |     locations_out /= locations_scale
 57 | 
 58 |     year_out = year_out[:,np.newaxis]
 59 |     year_mean = np.mean(year_out, axis=0,keepdims=True)
 60 |     year_std = np.mean(year_out,axis=0,keepdims=True)
 61 |     year_scale = np.amax(year_out,axis=0)-np.amin(year_out,axis=0)
 62 |     year_out -= year_mean
 63 |     year_out /= year_scale
 64 | 
 65 |     real_out = real_out[:,np.newaxis]
 66 | 
 67 | 
 68 |     # split dataset
 69 |     feat_train = feature_out[ind_train,]
 70 |     feat_test = feature_out[ind_test,]
 71 |     Y_train = real_out[ind_train,]
 72 |     Y_test = real_out[ind_test,]
 73 |     loc_train = locations_out[ind_train,]
 74 |     loc_test = locations_out[ind_test,]
 75 |     year_train = year_out[ind_train,]
 76 |     year_test = year_out[ind_test,]
 77 | 
 78 | 
 79 | 
 80 |     '''baseline'''
 81 |     print "The std deviation of test yield is", np.std(real_out[ind_test,])
 82 |     print "Average yield is", np.mean(real_out[ind_test])
 83 | 
 84 |     for alpha in np.linspace(-4, 2,num=5):
 85 |         '''Ridge regression'''
 86 |         print alpha
 87 |         lr = linear_model.Ridge(alpha =np.power(10,alpha))
 88 |         lr.fit(np.concatenate((feat_train,year_train),axis=1),Y_train)
 89 |         Y_pred_reg = lr.predict(np.concatenate((feat_test,year_test),axis=1))
 90 | 
 91 |         # lr = linear_model.Ridge(alpha =np.power(10,alpha))
 92 |         # lr.fit(feat_train,Y_train)
 93 |         # Y_pred_reg = lr.predict(feat_test)
 94 | 
 95 |         print lr.coef_.shape
 96 |         print lr.coef_ 
 97 |         print lr.intercept_
 98 | 
 99 |         print "The RMSE of ridge regression is", np.sqrt(np.mean((Y_pred_reg-real_out[ind_test,])**2))
100 |         print "Mean Error of ridge regression is",np.mean(Y_pred_reg-real_out[ind_test,])
101 |     print 0
102 |     lr = linear_model.Ridge(alpha =0)
103 |     lr.fit(np.concatenate((feat_train,year_train),axis=1),Y_train)
104 |     Y_pred_reg = lr.predict(np.concatenate((feat_test,year_test),axis=1))
105 |     print lr.coef_.shape
106 |     print lr.coef_ 
107 |     print lr.intercept_
108 | 
109 |     print "The RMSE of ridge regression is", np.sqrt(np.mean((Y_pred_reg-real_out[ind_test,])**2))
110 |     print "Mean Error of ridge regression is",np.mean(Y_pred_reg-real_out[ind_test,])
111 | 
112 |     # print 'linear regression'
113 |     # lr = linear_model.LinearRegression()
114 |     # lr.fit(np.concatenate((feat_train,year_train),axis=1),Y_train)
115 |     # Y_pred_reg = lr.predict(np.concatenate((feat_test,year_test),axis=1))
116 |     # print lr.coef_.shape
117 |     # print lr.coef_ 
118 |     # print lr.intercept_
119 | 
120 |     print "The RMSE of linear regression is", np.sqrt(np.mean((Y_pred_reg-real_out[ind_test,])**2))
121 |     print "Mean Error of linear regression is",np.mean(Y_pred_reg-real_out[ind_test,])
122 | 
123 | 
124 | def GaussianProcess(year,path):
125 | 
126 |     year_current = year
127 |     npzfile = np.load(path)
128 | 
129 |     # load crop area
130 |     area = np.genfromtxt('acres_harvested.csv', delimiter=',')
131 | 
132 |     # read
133 |     pred_out=npzfile['pred_out']
134 |     real_out=npzfile['real_out']
135 |     feature_out=npzfile['feature_out']
136 |     year_out=npzfile['year_out']
137 |     locations_out=npzfile['locations_out']
138 |     index_out=npzfile['index_out']
139 |     W = npzfile['weight_out']
140 |     b = npzfile['b_out']
141 |     W = np.concatenate((W,b))
142 | 
143 | 
144 | 
145 |     # print pred_out.shape,real_out.shape
146 |     # print year_out.shape,locations_out.shape
147 |     # plt.plot(year_out,pred_out-real_out,'.')
148 |     # plt.show()
149 | 
150 |     '''2 divide dataset'''
151 | 
152 |     # get train, validate, test index
153 |     c1 = year_out==year_current
154 |     # c2 = (index_out[:,0]==5)+(index_out[:,0]==17)+(index_out[:,0]==18)+(index_out[:,0]==19)+(index_out[:,0]==20)+(index_out[:,0]==27)+(index_out[:,0]==29)+(index_out[:,0]==31)+(index_out[:,0]==38)+(index_out[:,0]==39)+(index_out[:,0]==46)
155 |     ind_test = np.where(c1)[0]
156 |     print 'shape of test set',ind_test.shape
157 | 
158 |     c3 = year_out<year_current
159 |     c4 = year_out>year_current-6
160 |     # c5 = year_out!=2012
161 |     # c3 = year_out==year_current-1
162 |     ind_train = np.where(c3*c4)[0]
163 |     print 'shape of train set',ind_train.shape
164 |     index_reg = np.where(year_out!=2016)[0]
165 | 
166 |     '''4 normalize all features'''
167 |     bias = np.ones([feature_out.shape[0],1])
168 |     feature_out = np.concatenate((feature_out,bias),axis=1)
169 | 
170 |     locations_mean = np.mean(locations_out, axis=0,keepdims=True)
171 |     locations_std = np.mean(locations_out,axis=0,keepdims=True)
172 |     locations_scale = np.amax(locations_out,axis=0)-np.amin(locations_out,axis=0)
173 |     locations_out -= locations_mean
174 |     locations_out /= locations_scale
175 | 
176 |     year_out = year_out[:,np.newaxis]
177 |     year_mean = np.mean(year_out, axis=0,keepdims=True)
178 |     year_std = np.mean(year_out,axis=0,keepdims=True)
179 |     year_scale = np.amax(year_out,axis=0)-np.amin(year_out,axis=0)
180 |     year_out -= year_mean
181 |     year_out /= year_scale
182 | 
183 |     real_out = real_out[:,np.newaxis]
184 |     # print 'year_out',np.amin(year_out),np.amax(year_out)
185 | 
186 | 
187 |     
188 |     # print index_reg.shape
189 |     # '''
190 |     # remove yearly effect (optional)
191 |     # '''
192 |     # print 'remove yearly effect'
193 |     # lr = linear_model.Ridge(alpha=0.1,fit_intercept=True)
194 |     # lr.fit(year_out[index_reg,:],real_out[index_reg,:])
195 |     # year_weight = lr.coef_[0]
196 |     # print lr.coef_.shape
197 |     # print lr.coef_,lr.intercept_
198 | 
199 |     # plt.plot(year_out[index_reg,:],real_out[index_reg,:])
200 |     # plt.show()
201 |     # real_out = real_out-year_out*year_weight
202 | 
203 | 
204 |     # split dataset
205 |     feat_train = feature_out[ind_train,]
206 |     feat_test = feature_out[ind_test,]
207 |     Y_train = real_out[ind_train,]
208 |     Y_test = real_out[ind_test,]
209 |     loc_train = locations_out[ind_train,]
210 |     loc_test = locations_out[ind_test,]
211 |     year_train = year_out[ind_train,]
212 |     year_test = year_out[ind_test,]
213 | 
214 | 
215 | 
216 |     '''baseline'''
217 |     # print "The std deviation of test yield is", np.std(real_out[ind_test,])
218 |     # print "Average yield is", np.mean(real_out[ind_test])
219 | 
220 |     # '''Ridge regression'''
221 |     # for alpha in np.linspace(-4, 2,num=5):
222 |     #     '''Ridge regression'''
223 |     #     print np.power(10,alpha)
224 |     #     lr = linear_model.Ridge(alpha =np.power(10,alpha),fit_intercept=False)
225 |     #     lr.fit(np.concatenate((feat_train,year_train),axis=1),Y_train)
226 |     #     Y_pred_reg = lr.predict(np.concatenate((feat_test,year_test),axis=1))
227 | 
228 |     #     # lr = linear_model.Ridge(alpha =np.power(10,alpha))
229 |     #     # lr.fit(feat_train,Y_train)
230 |     #     # Y_pred_reg = lr.predict(feat_test)
231 | 
232 |     #     # print lr.coef_.shape
233 |     #     # print lr.coef_ 
234 |     #     # print lr.intercept_
235 | 
236 |     #     print "The RMSE of ridge regression is", np.sqrt(np.mean((Y_pred_reg-real_out[ind_test,])**2))
237 |     #     print "Mean Error of ridge regression is",np.mean(Y_pred_reg-real_out[ind_test,])
238 | 
239 | 
240 |     '''CNN baseline'''
241 |     print "The RMSE of CNN model is", np.sqrt(np.mean((real_out[ind_test,0]-pred_out[ind_test])**2))
242 |     '''CNN weight regression'''
243 |     # print "The RMSE of regression, using CNN weight", np.sqrt(np.mean((real_out[ind_test,]-(np.dot(feat_test,W)))**2))
244 |     print "Mean Error of CNN is",np.mean(pred_out[ind_test]-real_out[ind_test,0])
245 |     # print "Average prediction of CNN is", np.mean(pred_out[ind_test])
246 | 
247 | 
248 | 
249 |     '''
250 |         Gaussian Prcoess Model 3,
251 |         Linear GP as on page 28 of GP for machine learning
252 |         kernel: spatial*time
253 | 
254 |     '''
255 | 
256 |     sigma=1
257 |     l_s = 0.5
258 |     l_t = 1.5
259 |     noise = 0.1
260 |     const = 0.01
261 | 
262 |     X_train = feat_train
263 |     X_test = feat_test
264 |     n1 = X_train.shape[0]
265 |     n2 = X_test.shape[0]
266 |     X = np.concatenate((X_train,X_test),axis=0)
267 |     LOC = np.concatenate((loc_train,loc_test),axis=0)
268 |     YEAR = np.concatenate((year_train,year_test),axis=0)
269 |     pairwise_dists_loc = squareform(pdist(LOC, 'euclidean'))**2/l_s**2
270 |     pairwise_dists_year = squareform(pdist(YEAR, 'euclidean'))**2/l_t**2
271 | 
272 |     n=np.zeros([n1+n2,n1+n2])
273 |     n[0:n1,0:n1] += noise*np.identity(n1)
274 |     kernel_mat_3 = sigma*(np.exp(-pairwise_dists_loc)*np.exp(-pairwise_dists_year))+n
275 |     b = W
276 |     B = np.identity(X_train.shape[1])
277 | 
278 |     print l_s,l_t,noise,const
279 |     B /= const # B is diag, inverse is simplified
280 |     K_inv = np.linalg.inv(kernel_mat_3[0:n1,0:n1])
281 |     beta = np.linalg.inv(B+X_train.T.dot(K_inv).dot(X_train)).dot(
282 |             X_train.T.dot(K_inv).dot(Y_train.reshape([n1,1]))+B.dot(b))
283 |     Y_pred_3 = X_test.dot(beta) + kernel_mat_3[n1:(n1+n2),0:n1].dot(K_inv\
284 |             ).dot(Y_train.reshape([n1,1])-X_train.dot(beta))
285 | 
286 |     RMSE_GP=np.sqrt(np.mean((Y_pred_3-real_out[ind_test,].reshape(Y_pred_3.shape))**2))
287 |     ME_GP=np.mean(Y_pred_3[:,0]-real_out[ind_test,0])
288 |     Average_GP=np.mean(Y_pred_3[:,0])
289 |     print "The RMSE of GP model is", RMSE_GP
290 |     print "Mean Error of GP model is",ME_GP
291 |     # print "Average prediction of GP is",Average_GP
292 | 
293 |     '''If there is no bias'''
294 |     print "if there is no bias, the RMSE is"
295 |     print "CNN",np.sqrt(np.mean((real_out[ind_test,0]-pred_out[ind_test]+np.mean(pred_out[ind_test]-real_out[ind_test,0]))**2))
296 |     print "GP",np.sqrt(np.mean((Y_pred_3-real_out[ind_test,].reshape(Y_pred_3.shape)-np.mean(Y_pred_3[:,0]-real_out[ind_test,0]))**2))
297 | 
298 |     return (RMSE_GP,ME_GP)
299 | 
300 |     # '''3 weighted sum'''
301 |     # area_total = 0
302 |     # yield_real_total = 0
303 |     # yield_pred_total_CNN = 0
304 |     # yield_pred_total_GP = 0
305 |     # for count,i in enumerate(ind_test):
306 |     #     # get area
307 |     #     year = year_current
308 |     #     loc1 = index_out[i,0]
309 |     #     loc2 = index_out[i,1]
310 |     #     key = np.array([year,loc1,loc2])
311 |     #     index = np.where(np.all(area[:,0:3] == key, axis=1))
312 |     #     # print i,key,index
313 |     #     area_current = area[index,3]
314 | 
315 |     #     area_total+=area_current
316 |     #     yield_real_total+=area_current*real_out[i,]
317 |     #     yield_pred_total_CNN+=area_current*pred_out[i]
318 |     #     yield_pred_total_GP+=area_current*Y_pred_3[count,]
319 | 
320 |     # real_average = yield_real_total/area_total
321 |     # pred_average_CNN = yield_pred_total_CNN/area_total
322 |     # pred_average_GP = yield_pred_total_GP/area_total
323 |     # print 'real_average',real_average
324 |     # print 'pred_average_CNN',pred_average_CNN
325 |     # print 'pred_average_GP',pred_average_GP
326 | 
327 | 
328 | 
329 | 
330 | # if __name__ == "__main__":
331 | #     # predict_year = 2013
332 | 
333 | #     # save_path = '/atlas/u/jiaxuan/data/train_results/histogram_new/test22_optimize/'
334 | #     # path = save_path + str(predict_year)+'result_prediction.npz'
335 | 
336 | #     # RMSE = Parallel(n_jobs=8)(delayed(GaussianProcess)(2012,path_normal+'2012result_day'+str(i)+'.npz') for i in range(9,33))
337 | #     # Ridge(predict_year,path)
338 |     
339 | #     for predict_year in range(2012,2016):
340 | #         save_path = '/atlas/u/jiaxuan/data/train_results/final/yearly/archive/'
341 | #         path = save_path + str(predict_year)+'result_prediction.npz'
342 | #         GaussianProcess(predict_year,path)
343 | 
344 | 
345 | 
346 | 
347 | 


--------------------------------------------------------------------------------
/6 result_analysis/GP_crop_v3.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | # import pdb
  4 | from scipy.spatial.distance import pdist, squareform
  5 | from sklearn import linear_model
  6 | 
  7 | from joblib import Parallel, delayed
  8 | import multiprocessing
  9 | import logging
 10 | 
 11 | def Ridge(year,path):
 12 |     year_current = year
 13 |     npzfile = np.load(path)
 14 | 
 15 |     # load crop area
 16 |     area = np.genfromtxt('acres_harvested.csv', delimiter=',')
 17 | 
 18 |     # read
 19 |     pred_out=npzfile['pred_out']
 20 |     real_out=npzfile['real_out']
 21 |     feature_out=npzfile['feature_out']
 22 |     year_out=npzfile['year_out']
 23 |     locations_out=npzfile['locations_out']
 24 |     index_out=npzfile['index_out']
 25 |     W = npzfile['weight_out']
 26 |     b = npzfile['b_out']
 27 |     W = np.concatenate((W,b))
 28 | 
 29 |     print pred_out.shape,real_out.shape
 30 |     print year_out.shape,locations_out.shape
 31 |     # plt.plot(year_out,pred_out-real_out,'.')
 32 |     # plt.show()
 33 | 
 34 |     '''2 divide dataset'''
 35 | 
 36 |     # get train, validate, test index
 37 |     c1 = year_out==year_current
 38 |     # c2 = (index_out[:,0]==5)+(index_out[:,0]==17)+(index_out[:,0]==18)+(index_out[:,0]==19)+(index_out[:,0]==20)+(index_out[:,0]==27)+(index_out[:,0]==29)+(index_out[:,0]==31)+(index_out[:,0]==38)+(index_out[:,0]==39)+(index_out[:,0]==46)
 39 |     ind_test = np.where(c1)[0]
 40 |     print 'shape of test set',ind_test.shape
 41 | 
 42 |     c3 = year_out<year_current
 43 |     # c3 = year_out==year_current-1
 44 |     ind_train = np.where(c3)[0]
 45 |     print 'shape of train set',ind_train.shape
 46 | 
 47 | 
 48 |     '''4 normalize all features'''
 49 |     bias = np.ones([feature_out.shape[0],1])
 50 |     # feature_out = np.concatenate((feature_out,bias),axis=1)
 51 | 
 52 |     locations_mean = np.mean(locations_out, axis=0,keepdims=True)
 53 |     locations_std = np.mean(locations_out,axis=0,keepdims=True)
 54 |     locations_scale = np.amax(locations_out,axis=0)-np.amin(locations_out,axis=0)
 55 |     locations_out -= locations_mean
 56 |     locations_out /= locations_scale
 57 | 
 58 |     year_out = year_out[:,np.newaxis]
 59 |     year_mean = np.mean(year_out, axis=0,keepdims=True)
 60 |     year_std = np.mean(year_out,axis=0,keepdims=True)
 61 |     year_scale = np.amax(year_out,axis=0)-np.amin(year_out,axis=0)
 62 |     year_out -= year_mean
 63 |     year_out /= year_scale
 64 | 
 65 |     real_out = real_out[:,np.newaxis]
 66 | 
 67 | 
 68 |     # split dataset
 69 |     feat_train = feature_out[ind_train,]
 70 |     feat_test = feature_out[ind_test,]
 71 |     Y_train = real_out[ind_train,]
 72 |     Y_test = real_out[ind_test,]
 73 |     loc_train = locations_out[ind_train,]
 74 |     loc_test = locations_out[ind_test,]
 75 |     year_train = year_out[ind_train,]
 76 |     year_test = year_out[ind_test,]
 77 | 
 78 | 
 79 | 
 80 |     '''baseline'''
 81 |     print "The std deviation of test yield is", np.std(real_out[ind_test,])
 82 |     print "Average yield is", np.mean(real_out[ind_test])
 83 | 
 84 |     for alpha in np.linspace(-4, 2,num=5):
 85 |         '''Ridge regression'''
 86 |         print alpha
 87 |         lr = linear_model.Ridge(alpha =np.power(10,alpha))
 88 |         lr.fit(np.concatenate((feat_train,year_train),axis=1),Y_train)
 89 |         Y_pred_reg = lr.predict(np.concatenate((feat_test,year_test),axis=1))
 90 | 
 91 |         # lr = linear_model.Ridge(alpha =np.power(10,alpha))
 92 |         # lr.fit(feat_train,Y_train)
 93 |         # Y_pred_reg = lr.predict(feat_test)
 94 | 
 95 |         print lr.coef_.shape
 96 |         print lr.coef_ 
 97 |         print lr.intercept_
 98 | 
 99 |         print "The RMSE of ridge regression is", np.sqrt(np.mean((Y_pred_reg-real_out[ind_test,])**2))
100 |         print "Mean Error of ridge regression is",np.mean(Y_pred_reg-real_out[ind_test,])
101 |     print 0
102 |     lr = linear_model.Ridge(alpha =0)
103 |     lr.fit(np.concatenate((feat_train,year_train),axis=1),Y_train)
104 |     Y_pred_reg = lr.predict(np.concatenate((feat_test,year_test),axis=1))
105 |     print lr.coef_.shape
106 |     print lr.coef_ 
107 |     print lr.intercept_
108 | 
109 |     print "The RMSE of ridge regression is", np.sqrt(np.mean((Y_pred_reg-real_out[ind_test,])**2))
110 |     print "Mean Error of ridge regression is",np.mean(Y_pred_reg-real_out[ind_test,])
111 | 
112 |     # print 'linear regression'
113 |     # lr = linear_model.LinearRegression()
114 |     # lr.fit(np.concatenate((feat_train,year_train),axis=1),Y_train)
115 |     # Y_pred_reg = lr.predict(np.concatenate((feat_test,year_test),axis=1))
116 |     # print lr.coef_.shape
117 |     # print lr.coef_ 
118 |     # print lr.intercept_
119 | 
120 |     print "The RMSE of linear regression is", np.sqrt(np.mean((Y_pred_reg-real_out[ind_test,])**2))
121 |     print "Mean Error of linear regression is",np.mean(Y_pred_reg-real_out[ind_test,])
122 | 
123 | 
124 | def GaussianProcess(year,path):
125 | 
126 |     year_current = year
127 |     npzfile = np.load(path)
128 | 
129 |     # load crop area
130 |     area = np.genfromtxt('acres_harvested.csv', delimiter=',')
131 | 
132 |     # read
133 |     pred_out=npzfile['pred_out']
134 |     real_out=npzfile['real_out']
135 |     feature_out=npzfile['feature_out']
136 |     year_out=npzfile['year_out']
137 |     locations_out=npzfile['locations_out']
138 |     index_out=npzfile['index_out']
139 |     W = npzfile['weight_out']
140 |     b = npzfile['b_out']
141 |     W = np.concatenate((W,b))
142 | 
143 | 
144 | 
145 |     # print pred_out.shape,real_out.shape
146 |     # print year_out.shape,locations_out.shape
147 |     # plt.plot(year_out,pred_out-real_out,'.')
148 |     # plt.show()
149 | 
150 |     '''2 divide dataset'''
151 | 
152 |     # get train, validate, test index
153 |     c1 = year_out==year_current
154 |     # c2 = (index_out[:,0]==5)+(index_out[:,0]==17)+(index_out[:,0]==18)+(index_out[:,0]==19)+(index_out[:,0]==20)+(index_out[:,0]==27)+(index_out[:,0]==29)+(index_out[:,0]==31)+(index_out[:,0]==38)+(index_out[:,0]==39)+(index_out[:,0]==46)
155 |     ind_test = np.where(c1)[0]
156 |     print 'shape of test set',ind_test.shape
157 | 
158 |     c3 = year_out<year_current
159 |     c4 = year_out>year_current-6
160 |     # c5 = year_out!=2012
161 |     # c3 = year_out==year_current-1
162 |     ind_train = np.where(c3*c4)[0]
163 |     print 'shape of train set',ind_train.shape
164 |     index_reg = np.where(year_out!=2016)[0]
165 | 
166 |     '''4 normalize all features'''
167 |     bias = np.ones([feature_out.shape[0],1])
168 |     feature_out = np.concatenate((feature_out,bias),axis=1)
169 | 
170 |     locations_mean = np.mean(locations_out, axis=0,keepdims=True)
171 |     locations_std = np.mean(locations_out,axis=0,keepdims=True)
172 |     locations_scale = np.amax(locations_out,axis=0)-np.amin(locations_out,axis=0)
173 |     locations_out -= locations_mean
174 |     locations_out /= locations_scale
175 | 
176 |     year_out = year_out[:,np.newaxis]
177 |     year_mean = np.mean(year_out, axis=0,keepdims=True)
178 |     year_std = np.mean(year_out,axis=0,keepdims=True)
179 |     year_scale = np.amax(year_out,axis=0)-np.amin(year_out,axis=0)
180 |     year_out -= year_mean
181 |     year_out /= year_scale
182 | 
183 |     real_out = real_out[:,np.newaxis]
184 |     # print 'year_out',np.amin(year_out),np.amax(year_out)
185 | 
186 | 
187 |     
188 |     # print index_reg.shape
189 |     # '''
190 |     # remove yearly effect (optional)
191 |     # '''
192 |     # print 'remove yearly effect'
193 |     # lr = linear_model.Ridge(alpha=0.1,fit_intercept=True)
194 |     # lr.fit(year_out[index_reg,:],real_out[index_reg,:])
195 |     # year_weight = lr.coef_[0]
196 |     # print lr.coef_.shape
197 |     # print lr.coef_,lr.intercept_
198 | 
199 |     # plt.plot(year_out[index_reg,:],real_out[index_reg,:])
200 |     # plt.show()
201 |     # real_out = real_out-year_out*year_weight
202 | 
203 | 
204 |     # split dataset
205 |     feat_train = feature_out[ind_train,]
206 |     feat_test = feature_out[ind_test,]
207 |     Y_train = real_out[ind_train,]
208 |     Y_test = real_out[ind_test,]
209 |     loc_train = locations_out[ind_train,]
210 |     loc_test = locations_out[ind_test,]
211 |     year_train = year_out[ind_train,]
212 |     year_test = year_out[ind_test,]
213 | 
214 | 
215 | 
216 |     '''baseline'''
217 |     # print "The std deviation of test yield is", np.std(real_out[ind_test,])
218 |     # print "Average yield is", np.mean(real_out[ind_test])
219 | 
220 |     # '''Ridge regression'''
221 |     # for alpha in np.linspace(-4, 2,num=5):
222 |     #     '''Ridge regression'''
223 |     #     print np.power(10,alpha)
224 |     #     lr = linear_model.Ridge(alpha =np.power(10,alpha),fit_intercept=False)
225 |     #     lr.fit(np.concatenate((feat_train,year_train),axis=1),Y_train)
226 |     #     Y_pred_reg = lr.predict(np.concatenate((feat_test,year_test),axis=1))
227 | 
228 |     #     # lr = linear_model.Ridge(alpha =np.power(10,alpha))
229 |     #     # lr.fit(feat_train,Y_train)
230 |     #     # Y_pred_reg = lr.predict(feat_test)
231 | 
232 |     #     # print lr.coef_.shape
233 |     #     # print lr.coef_ 
234 |     #     # print lr.intercept_
235 | 
236 |     #     print "The RMSE of ridge regression is", np.sqrt(np.mean((Y_pred_reg-real_out[ind_test,])**2))
237 |     #     print "Mean Error of ridge regression is",np.mean(Y_pred_reg-real_out[ind_test,])
238 | 
239 | 
240 |     '''CNN baseline'''
241 |     print "The RMSE of CNN model is", np.sqrt(np.mean((real_out[ind_test,0]-pred_out[ind_test])**2))
242 |     '''CNN weight regression'''
243 |     # print "The RMSE of regression, using CNN weight", np.sqrt(np.mean((real_out[ind_test,]-(np.dot(feat_test,W)))**2))
244 |     print "Mean Error of CNN is",np.mean(pred_out[ind_test]-real_out[ind_test,0])
245 |     # print "Average prediction of CNN is", np.mean(pred_out[ind_test])
246 | 
247 | 
248 | 
249 |     '''
250 |         Gaussian Prcoess Model 3,
251 |         Linear GP as on page 28 of GP for machine learning
252 |         kernel: spatial*time
253 | 
254 |     '''
255 | 
256 |     sigma=1
257 |     l_s = 0.5
258 |     l_t = 1.5
259 |     noise = 0.1
260 |     const = 0.01
261 | 
262 |     X_train = feat_train
263 |     X_test = feat_test
264 |     n1 = X_train.shape[0]
265 |     n2 = X_test.shape[0]
266 |     X = np.concatenate((X_train,X_test),axis=0)
267 |     LOC = np.concatenate((loc_train,loc_test),axis=0)
268 |     YEAR = np.concatenate((year_train,year_test),axis=0)
269 |     pairwise_dists_loc = squareform(pdist(LOC, 'euclidean'))**2/l_s**2
270 |     pairwise_dists_year = squareform(pdist(YEAR, 'euclidean'))**2/l_t**2
271 | 
272 |     n=np.zeros([n1+n2,n1+n2])
273 |     n[0:n1,0:n1] += noise*np.identity(n1)
274 |     kernel_mat_3 = sigma*(np.exp(-pairwise_dists_loc)*np.exp(-pairwise_dists_year))+n
275 |     b = W
276 |     B = np.identity(X_train.shape[1])
277 | 
278 |     print l_s,l_t,noise,const
279 |     B /= const # B is diag, inverse is simplified
280 |     K_inv = np.linalg.inv(kernel_mat_3[0:n1,0:n1])
281 |     beta = np.linalg.inv(B+X_train.T.dot(K_inv).dot(X_train)).dot(
282 |             X_train.T.dot(K_inv).dot(Y_train.reshape([n1,1]))+B.dot(b))
283 |     Y_pred_3 = X_test.dot(beta) + kernel_mat_3[n1:(n1+n2),0:n1].dot(K_inv\
284 |             ).dot(Y_train.reshape([n1,1])-X_train.dot(beta))
285 | 
286 |     RMSE_GP=np.sqrt(np.mean((Y_pred_3-real_out[ind_test,].reshape(Y_pred_3.shape))**2))
287 |     ME_GP=np.mean(Y_pred_3[:,0]-real_out[ind_test,0])
288 |     Average_GP=np.mean(Y_pred_3[:,0])
289 |     print "The RMSE of GP model is", RMSE_GP
290 |     print "Mean Error of GP model is",ME_GP
291 |     # print "Average prediction of GP is",Average_GP
292 | 
293 |     '''If there is no bias'''
294 |     print "if there is no bias, the RMSE is"
295 |     print "CNN",np.sqrt(np.mean((real_out[ind_test,0]-pred_out[ind_test]+np.mean(pred_out[ind_test]-real_out[ind_test,0]))**2))
296 |     print "GP",np.sqrt(np.mean((Y_pred_3-real_out[ind_test,].reshape(Y_pred_3.shape)-np.mean(Y_pred_3[:,0]-real_out[ind_test,0]))**2))
297 | 
298 |     return (RMSE_GP,ME_GP)
299 | 
300 |     # '''3 weighted sum'''
301 |     # area_total = 0
302 |     # yield_real_total = 0
303 |     # yield_pred_total_CNN = 0
304 |     # yield_pred_total_GP = 0
305 |     # for count,i in enumerate(ind_test):
306 |     #     # get area
307 |     #     year = year_current
308 |     #     loc1 = index_out[i,0]
309 |     #     loc2 = index_out[i,1]
310 |     #     key = np.array([year,loc1,loc2])
311 |     #     index = np.where(np.all(area[:,0:3] == key, axis=1))
312 |     #     # print i,key,index
313 |     #     area_current = area[index,3]
314 | 
315 |     #     area_total+=area_current
316 |     #     yield_real_total+=area_current*real_out[i,]
317 |     #     yield_pred_total_CNN+=area_current*pred_out[i]
318 |     #     yield_pred_total_GP+=area_current*Y_pred_3[count,]
319 | 
320 |     # real_average = yield_real_total/area_total
321 |     # pred_average_CNN = yield_pred_total_CNN/area_total
322 |     # pred_average_GP = yield_pred_total_GP/area_total
323 |     # print 'real_average',real_average
324 |     # print 'pred_average_CNN',pred_average_CNN
325 |     # print 'pred_average_GP',pred_average_GP
326 | 
327 | 
328 | 
329 | 
330 | # if __name__ == "__main__":
331 | #     # predict_year = 2013
332 | 
333 | #     # save_path = '/atlas/u/jiaxuan/data/train_results/histogram_new/test22_optimize/'
334 | #     # path = save_path + str(predict_year)+'result_prediction.npz'
335 | 
336 | #     # RMSE = Parallel(n_jobs=8)(delayed(GaussianProcess)(2012,path_normal+'2012result_day'+str(i)+'.npz') for i in range(9,33))
337 | #     # Ridge(predict_year,path)
338 |     
339 | #     for predict_year in range(2012,2016):
340 | #         save_path = '/atlas/u/jiaxuan/data/train_results/final/yearly/archive/'
341 | #         path = save_path + str(predict_year)+'result_prediction.npz'
342 | #         GaussianProcess(predict_year,path)
343 | 
344 | 
345 | 
346 | 
347 | 


--------------------------------------------------------------------------------
/5 model_semi_supervised/train_for_semi.py:
--------------------------------------------------------------------------------
  1 | from nnet_semi import *
  2 | from GP_crop_v3 import *
  3 | import logging
  4 | import time
  5 | 
  6 | predict_year = 2015
  7 | 
  8 | def load_data(filename,config):
  9 |     content = np.load(config.load_path + filename)
 10 |     image_all = content['output_image']
 11 |     yield_all = content['output_yield']
 12 |     year_all = content['output_year']
 13 |     locations_all = content['output_locations']
 14 |     index_all = content['output_index']
 15 | 
 16 |      # delete broken image
 17 |     list_delete=[]
 18 |     for i in range(image_all.shape[0]):
 19 |         if np.sum(image_all[i,:,:,:])<=287:
 20 |             if year_all[i]<2016:
 21 |                 list_delete.append(i)
 22 |     image_all=np.delete(image_all,list_delete,0)
 23 |     yield_all=np.delete(yield_all,list_delete,0)
 24 |     year_all = np.delete(year_all,list_delete, 0)
 25 |     locations_all = np.delete(locations_all, list_delete, 0)
 26 |     index_all = np.delete(index_all, list_delete, 0)
 27 | 
 28 | 
 29 |     # keep major counties
 30 |     list_keep=[]
 31 |     for i in range(image_all.shape[0]):
 32 |         if (index_all[i,0]==5)or(index_all[i,0]==17)or(index_all[i,0]==18)or(index_all[i,0]==19)or(index_all[i,0]==20)or(index_all[i,0]==27)or(index_all[i,0]==29)or(index_all[i,0]==31)or(index_all[i,0]==38)or(index_all[i,0]==39)or(index_all[i,0]==46):
 33 |             list_keep.append(i)
 34 |     image_all=image_all[list_keep,:,:,:]
 35 |     yield_all=yield_all[list_keep]
 36 |     year_all = year_all[list_keep]
 37 |     locations_all = locations_all[list_keep,:]
 38 |     index_all = index_all[list_keep,:]
 39 | 
 40 |     # split into train and validate
 41 |     index_train = np.nonzero(year_all < predict_year)[0]
 42 |     index_validate = np.nonzero(year_all == predict_year)[0]
 43 |     index_train_validate = np.nonzero(year_all <= predict_year)[0]
 44 |     print 'train size',index_train.shape[0]
 45 |     print 'validate size',index_validate.shape[0]
 46 | 
 47 |     # calc train image mean (for each band), and then detract (broadcast)
 48 |     image_mean=np.mean(image_all[index_train],(0,1,2))
 49 |     image_all = image_all - image_mean
 50 | 
 51 |     return image_all,yield_all,year_all,locations_all,index_all,index_train,index_validate,index_train_validate
 52 | 
 53 | 
 54 | if __name__ == "__main__":
 55 |     logging.basicConfig(filename='/logging_semi/'+str(predict_year)+'.log',level=logging.DEBUG)
 56 |     # Create a coordinator
 57 |     config = Config()
 58 | 
 59 |     filename = 'histogram_all' + '.npz'
 60 |     # filename = 'histogram_all_soilweather' + '.npz'
 61 |     time1 = time.time()
 62 |     image_all,yield_all,year_all,locations_all,index_all,index_train,index_validate,_ = load_data(filename, config)
 63 |     print("load time: %ss" % (time.time() - time1))
 64 |     image_validate=image_all[index_validate]
 65 |     yield_validate=yield_all[index_validate]
 66 | 
 67 |     model= NeuralModel(config,'net')
 68 | 
 69 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.48)
 70 |     # Launch the graph.
 71 |     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
 72 |     sess.run(tf.initialize_all_variables())
 73 | 
 74 |     # summary_train_loss = []
 75 |     # summary_eval_loss = []
 76 |     # summary_RMSE = []
 77 |     # summary_ME = []
 78 | 
 79 |     train_loss=0
 80 |     val_loss=0
 81 |     val_prediction = 0
 82 |     val_deviation = np.zeros([config.B])
 83 |     # # #########################
 84 |     # # block when test
 85 |     # # add saver
 86 |     # saver=tf.train.Saver()
 87 |     # # Restore variables from disk.
 88 |     # try:
 89 |     #     saver.restore(sess, config.save_path+str(predict_year)+"CNN_model.ckpt")
 90 |     # # Restore log results
 91 |     #     # npzfile = np.load(config.save_path + str(predict_year)+'result.npz')
 92 |     #     # summary_train_loss = npzfile['summary_train_loss'].tolist()
 93 |     #     # summary_eval_loss = npzfile['summary_eval_loss'].tolist()
 94 |     #     # summary_RMSE = npzfile['summary_RMSE'].tolist()
 95 |     #     # summary_ME = npzfile['summary_ME'].tolist()
 96 |     #     print("Model restored.")
 97 |     # except:
 98 |     #     print 'No history model found'
 99 |     # # #########################
100 |     
101 |     RMSE_min = 100
102 |     chkpoint_loop = 500
103 |     try:
104 |         for i in range(config.train_step):
105 | 
106 |             # load extra unlabel data
107 |             if i%chkpoint_loop ==0:
108 |                 chkpoint = i/chkpoint_loop + 1
109 |                 # load unsupervised data
110 |                 filename = 'histogram_semi_rand_200_20000'+str(chkpoint)+'.npz'
111 |                 time1 = time.time()
112 |                 image_all_ulab,_,_,_,_,_,index_validate_ulab,index_ulab = load_data(filename, config)
113 |                 print("load time: %ss" % (time.time() - time1))
114 | 
115 |             # No augmentation
116 |             index_train_batch = np.random.choice(index_train,size=config.B)
117 |             image_train_batch = image_all[index_train_batch,:,0:config.H,:]
118 |             index_train_batch_ulab = np.random.choice(index_ulab,size=config.B)
119 |             image_train_batch_ulab = image_all_ulab[index_train_batch_ulab,:,0:config.H,:]
120 |             image_train_batch = np.concatenate((image_train_batch, image_train_batch_ulab),axis=0)
121 |             # image_train_batch = np.concatenate((image_train_batch, image_train_batch),axis=0)
122 | 
123 |             yield_train_batch = yield_all[index_train_batch,np.newaxis]
124 | 
125 |             # # try data augmentation while training
126 |             # index_train_batch_1 = np.random.choice(index_train,size=config.B)
127 |             # index_train_batch_2 = np.random.choice(index_train,size=config.B)
128 |             # image_train_batch = (image_all[index_train_batch_1,:,0:config.H,:]+image_all[index_train_batch_1,:,0:config.H,:])/2
129 |             # yield_train_batch = (yield_all[index_train_batch_1]+yield_all[index_train_batch_1])/2
130 |             # # year_train_batch = (year_all[index_train_batch_1,np.newaxis]+year_all[index_train_batch_2,np.newaxis])/2
131 | 
132 |             index_validate_batch = np.random.choice(index_validate, size=config.B)
133 |             image_validate_batch = image_all[index_validate_batch,:,0:config.H,:]
134 |             index_validate_batch_ulab = np.random.choice(index_validate_ulab,size=config.B)
135 |             image_validate_batch_ulab = image_all_ulab[index_validate_batch_ulab,:,0:config.H,:]
136 |             image_validate_batch = np.concatenate((image_validate_batch, image_validate_batch_ulab),axis=0)
137 |             # image_validate_batch = np.concatenate((image_validate_batch, image_validate_batch),axis=0)
138 | 
139 |             yield_validate_batch = yield_all[index_validate_batch,np.newaxis]
140 | 
141 |             _,t_L,t_C,t_U,t_R,t_loss,t_pred,t_real,t_err = sess.run(
142 |                 [model.train_op,model.L,model.C,model.U,model.R,model.loss,model.pred,model.real,model.pred_err], feed_dict={
143 |                 model.x:image_train_batch,
144 |                 model.y_lab:yield_train_batch,
145 |                 model.lr:config.lr,
146 |                 model.keep_prob:config.keep_prob
147 |                 })
148 | 
149 |             if i%10 == 0:
150 |                 v_L,v_C,v_U,v_R,v_loss,v_pred,v_real,v_err = sess.run(
151 |                     [model.L,model.C,model.U,model.R,model.loss,model.pred,model.real,model.pred_err], feed_dict={
152 |                     model.x: image_validate_batch,
153 |                     model.y_lab: yield_validate_batch,
154 |                     model.keep_prob:1
155 |                 })
156 | 
157 |                 print 'predict year'+str(predict_year)+'step'+str(i),config.lr
158 |                 print t_L,t_C,t_U,t_R,t_loss,np.mean(t_pred),np.mean(t_real),np.mean(t_pred-t_real),t_err
159 |                 print v_L,v_C,v_U,v_R,v_loss,np.mean(v_pred),np.mean(v_real),np.mean(v_pred-v_real),v_err
160 |                 logging.info('predict year %d step %d lr %f'predict_year,i,config.lr)
161 |                 logging.info('%f %f %f %f %f %f %f %f %f',t_L,t_C,t_U,t_R,t_loss,np.mean(t_pred),np.mean(t_real),np.mean(t_pred-t_real),t_err)
162 |                 logging.info('%f %f %f %f %f %f %f %f %f',v_L,v_C,v_U,v_R,v_loss,np.mean(v_pred),np.mean(v_real),np.mean(v_pred-v_real),v_err)
163 |             if i%10 == 0:
164 |                 # do validation
165 |                 pred = []
166 |                 real = []
167 |                 for j in range(image_validate.shape[0] / config.B):
168 |                     real_temp = yield_validate[j * config.B:(j + 1) * config.B]
169 |                     image_batch = image_validate[j * config.B:(j + 1) * config.B,:,0:config.H,:]
170 |                     image_batch = np.concatenate((image_batch, image_batch),axis=0)
171 |                     # image_batch = np.concatenate((image_batch, image_batch),axis=0)
172 |                     yield_batch = yield_validate[j * config.B:(j + 1) * config.B,np.newaxis]
173 |                     pred_temp= sess.run(model.y_lab_pred, feed_dict={
174 |                         model.x: image_batch,
175 |                         model.y_lab: yield_batch,
176 |                         model.keep_prob: 1
177 |                         })
178 |                     pred.append(np.squeeze(pred_temp))
179 |                     real.append(np.squeeze(real_temp))
180 |                 pred=np.concatenate(pred,axis=0)
181 |                 real=np.concatenate(real,axis=0)
182 |                 RMSE=np.sqrt(np.mean((pred-real)**2))
183 |                 ME=np.mean(pred-real)
184 | 
185 |                 if RMSE<RMSE_min:
186 |                     RMSE_min=RMSE
187 | 
188 |                 print 'Validation set','RMSE',RMSE,'ME',ME,'RMSE_min',RMSE_min
189 |                 logging.info('Validation set RMSE %f ME %f RMSE_min %f',RMSE,ME,RMSE_min)
190 |             
191 |                 # summary_train_loss.append(train_loss)
192 |                 # summary_eval_loss.append(val_loss)
193 |                 # summary_RMSE.append(RMSE)
194 |                 # summary_ME.append(ME)
195 | 
196 |     except KeyboardInterrupt:
197 |         print 'stopped'
198 | 
199 |     finally:
200 |         # save
201 |         save_path = saver.save(sess, config.save_path + str(predict_year)+'CNN_model.ckpt')
202 |         print('save in file: %s' % save_path)
203 |         # logging.info('save in file: %s' % save_path)
204 | 
205 |         # save result
206 |         # pred_out = []
207 |         # real_out = []
208 |         # feature_out = []
209 |         # year_out = []
210 |         # locations_out =[]
211 |         # index_out = []
212 |         # for i in range(image_all.shape[0] / config.B):
213 |         #     feature,pred = sess.run(
214 |         #         [model.fc6,model.logits], feed_dict={
215 |         #         model.x: image_all[i * config.B:(i + 1) * config.B,:,0:config.H,:],
216 |         #         model.y: yield_all[i * config.B:(i + 1) * config.B],
217 |         #         model.keep_prob:1
218 |         #     })
219 |         #     real = yield_all[i * config.B:(i + 1) * config.B]
220 | 
221 |         #     pred_out.append(pred)
222 |         #     real_out.append(real)
223 |         #     feature_out.append(feature)
224 |         #     year_out.append(year_all[i * config.B:(i + 1) * config.B])
225 |         #     locations_out.append(locations_all[i * config.B:(i + 1) * config.B])
226 |         #     index_out.append(index_all[i * config.B:(i + 1) * config.B])
227 |         #     # print i
228 |         # weight_out, b_out = sess.run(
229 |         #     [model.dense_W, model.dense_B], feed_dict={
230 |         #         model.x: image_all[0 * config.B:(0 + 1) * config.B, :, 0:config.H, :],
231 |         #         model.y: yield_all[0 * config.B:(0 + 1) * config.B],
232 |         #         model.keep_prob: 1
233 |         #     })
234 |         # pred_out=np.concatenate(pred_out)
235 |         # real_out=np.concatenate(real_out)
236 |         # feature_out=np.concatenate(feature_out)
237 |         # year_out=np.concatenate(year_out)
238 |         # locations_out=np.concatenate(locations_out)
239 |         # index_out=np.concatenate(index_out)
240 |         
241 |         # path = config.save_path + str(predict_year)+'result_prediction.npz'
242 |         # np.savez(path,
243 |         #     pred_out=pred_out,real_out=real_out,feature_out=feature_out,
244 |         #     year_out=year_out,locations_out=locations_out,weight_out=weight_out,b_out=b_out,index_out=index_out)
245 | 
246 | 
247 |         # np.savez(config.save_path+str(predict_year)+'result.npz',
248 |         #                 summary_train_loss=summary_train_loss,summary_eval_loss=summary_eval_loss,
249 |         #                 summary_RMSE=summary_RMSE,summary_ME=summary_ME)
250 |         # # plot results
251 |         # npzfile = np.load(config.save_path+str(predict_year)+'result.npz')
252 |         # summary_train_loss=npzfile['summary_train_loss']
253 |         # summary_eval_loss=npzfile['summary_eval_loss']
254 |         # summary_RMSE = npzfile['summary_RMSE']
255 |         # summary_ME = npzfile['summary_ME']
256 | 
257 |         # # Plot the points using matplotlib
258 |         # plt.plot(range(len(summary_train_loss)), summary_train_loss)
259 |         # plt.plot(range(len(summary_eval_loss)), summary_eval_loss)
260 |         # plt.xlabel('Training steps')
261 |         # plt.ylabel('L2 loss')
262 |         # plt.title('Loss curve')
263 |         # plt.legend(['Train', 'Validate'])
264 |         # plt.show()
265 | 
266 |         # plt.plot(range(len(summary_RMSE)), summary_RMSE)
267 |         # # plt.plot(range(len(summary_ME)), summary_ME)
268 |         # plt.xlabel('Training steps')
269 |         # plt.ylabel('Error')
270 |         # plt.title('RMSE')
271 |         # # plt.legend(['RMSE', 'ME'])
272 |         # plt.show()
273 | 
274 |         # # plt.plot(range(len(summary_RMSE)), summary_RMSE)
275 |         # plt.plot(range(len(summary_ME)), summary_ME)
276 |         # plt.xlabel('Training steps')
277 |         # plt.ylabel('Error')
278 |         # plt.title('ME')
279 |         # # plt.legend(['RMSE', 'ME'])
280 |         # plt.show()
281 | 


--------------------------------------------------------------------------------