├── LICENSE.txt ├── runTest.script ├── tools ├── README ├── build.xml └── src │ └── edu │ └── ucsc │ └── srl │ └── tools │ └── NetcdfFileGenerator.java ├── README ├── src └── edu │ └── ucsc │ └── srl │ └── damasc │ └── netcdf │ ├── NCTool.java │ ├── combine │ ├── AverageCombiner.java │ ├── MaxCombiner.java │ ├── IdentityCombiner.java │ ├── SimpleMaxCombiner.java │ ├── MedianCombiner.java │ └── SimpleMedianCombiner.java │ ├── map │ ├── NullMapper.java │ ├── MaxMapper.java │ ├── SimpleMaxMapper.java │ ├── SimpleMedianMapper.java │ ├── AverageMapper.java │ ├── IdentityMapper.java │ └── MedianMapper.java │ ├── reduce │ ├── NullReducer.java │ ├── MaxReducer.java │ ├── IdentityReducer.java │ ├── SimpleMaxReducer.java │ ├── AverageReducer.java │ ├── SimpleMedianReducer.java │ └── MedianReducer.java │ ├── NetCDFUtils.java │ ├── io │ ├── SHFileStatus.java │ ├── NcHdfsRaf.java │ ├── input │ │ ├── NetCDFFileSplit.java │ │ ├── ArrayBasedFileSplit.java │ │ ├── NetCDFRecordReader.java │ │ └── ArrayBasedFileInputFormat.java │ ├── AverageResult.java │ ├── Result.java │ ├── GroupID.java │ ├── ArraySpec.java │ └── HolisticResult.java │ └── tools │ ├── Average.java │ ├── Median.java │ ├── Max.java │ └── Identity.java └── netcdf_patch ├── INSTALL ├── netcdf43.diff └── netcdf42.diff /LICENSE.txt: -------------------------------------------------------------------------------- 1 | All code in this project, unless otherwise stated, is GPLv2 licensed. 2 | If you are interested in another license, please contact me at buck@soe.ucsc.edu. 3 | -------------------------------------------------------------------------------- /runTest.script: -------------------------------------------------------------------------------- 1 | hadoop jar build/jar/hadoop-scidata.jar average -D damasc.extraction_shape=1,36,36,10 -D damasc.partition_mode=record -D damasc.placement_mode=sampling -D damasc.query_dependant=false -D damasc.number_reducers=1 -D damasc.variable_name=windspeed1 -D damasc.buffer_size=134217728 -D damasc.logfile=/tmp/damasc_log.txt test_data/* test_output 2 | 3 | -------------------------------------------------------------------------------- /tools/README: -------------------------------------------------------------------------------- 1 | A jar file that generates a valid NetCDF-3 file 2 | 3 | To generate a file do the following: 4 | 5 | ant jar 6 | ant run 7 | 8 | this will generate a .nc file with the timestamp, in milliseconds since the epoch, being the file name. This should allow using the file name to seed the random number generator as a means of checking the data somewhere down the line. 9 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | This represents the code base used to run the experiments for the SC '11 SciHadoop paper. 2 | See our website for links to the paper and more details: http://systems.soe.ucsc.edu/projects/damasc 3 | 4 | The NetCDF patches need to be applied to a 4.2 version of the NetCDF Java library for the SciHadoop code to work. Directions can be found in the netcdf_patches directory. 5 | 6 | Installation instructions can be found on our github page, here https://github.com/four2five/SciHadoop/wiki/How-to-Build-SciHadoop 7 | That link has instructions for installing Hadoop, patching and building NetCDF and running a SciHadoop job 8 | 9 | Questions can be sent to buck@soe.ucsc.edu 10 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/NCTool.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf; 2 | 3 | import org.apache.hadoop.util.ProgramDriver; 4 | 5 | import edu.ucsc.srl.damasc.netcdf.tools.Identity; 6 | import edu.ucsc.srl.damasc.netcdf.tools.Average; 7 | import edu.ucsc.srl.damasc.netcdf.tools.Max; 8 | import edu.ucsc.srl.damasc.netcdf.tools.Median; 9 | 10 | /** 11 | * Helper class that registers the various functions that 12 | * SciHadoop supports 13 | */ 14 | public class NCTool { 15 | public static void main(String[] args) { 16 | int exitCode = -1; 17 | ProgramDriver pgd = new ProgramDriver(); 18 | try { 19 | pgd.addClass("average", Average.class, "NetCDF average job"); 20 | pgd.addClass("identity", Identity.class, "NetCDF identity job"); 21 | pgd.addClass("max", Max.class, "NetCDF max job"); 22 | pgd.addClass("median", Median.class, "NetCDF median job"); 23 | 24 | //exitCode = pgd.driver(args); 25 | pgd.driver(args); 26 | } catch (Throwable e) { 27 | e.printStackTrace(); 28 | } 29 | 30 | System.exit(exitCode); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /netcdf_patch/INSTALL: -------------------------------------------------------------------------------- 1 | March 6, 2013 2 | Joe Buck ( buck@soe.ucsc.edu ) 3 | 4 | We've also added a patch for NetCDF Java 4.3.15 as netcdf43.diff. 5 | from the "cdm" directory, apply the patch like so: 6 | patch -p2 < netcdf43.diff 7 | 8 | Nov 3, 2011 9 | Joe Buck ( buck@soe.ucsc.edu) 10 | 11 | 1) download a source verison of NetCDF 4.2. This patch was built against the package from Oct 24, 2011 12 | 13 | 2) unzip the NetCDF source somewhere 14 | 15 | 3) copy the netcdf42.diff file into the same directory as "cdm" (normally the "thredds directory"). 16 | 17 | 4) patch with this command 'patch -p1 < netcdf42.diff 18 | 19 | 5) this should apply cleanly. If it does, then cd into the cdm directory and build the jar with this command: 20 | 'ant makeMainComplete' 21 | 22 | 6) there should now be a directory under 'cdm' called 'target'. The jar file in there called 'netcdfAll-4.2.jar is likely the one you want. You can try using netcdf-4.2.jar but we just use the "All" variant ant it works out pretty well. 23 | 24 | 7) make sure this jar is in both your CLASSPATH and HADOOP_CLASSPATH (configured in the conf/hadoop-env.sh script in your hadoop installation) for every node. 25 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/combine/AverageCombiner.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.combine; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.io.IntWritable; 10 | import org.apache.hadoop.io.LongWritable; 11 | import org.apache.hadoop.mapreduce.Reducer; 12 | 13 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 14 | import edu.ucsc.srl.damasc.netcdf.io.AverageResult; 15 | 16 | /** 17 | * Combiner class for the Average function 18 | */ 19 | public class AverageCombiner extends 20 | Reducer { 21 | 22 | private static final Log LOG = LogFactory.getLog(AverageCombiner.class); 23 | 24 | /** 25 | * Reduces values for a given key 26 | * @param key the Key for the given values being passed in 27 | * @param values a List of AverageResult objects to combine 28 | * @param context the Context object for the currently executing job 29 | */ 30 | public void reduce(LongWritable key, Iterable values, 31 | Context context) 32 | throws IOException, InterruptedException { 33 | 34 | 35 | AverageResult avgResult = new AverageResult(); 36 | 37 | for (AverageResult value : values) { 38 | avgResult.addAverageResult(value); 39 | } 40 | 41 | context.write(key, avgResult); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/combine/MaxCombiner.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.combine; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.io.IntWritable; 10 | import org.apache.hadoop.io.LongWritable; 11 | import org.apache.hadoop.mapreduce.Reducer; 12 | 13 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 14 | import edu.ucsc.srl.damasc.netcdf.io.Result; 15 | 16 | /** 17 | * Combiner for the max operator 18 | */ 19 | public class MaxCombiner extends 20 | Reducer { 21 | 22 | private static final Log LOG = LogFactory.getLog(MaxCombiner.class); 23 | 24 | /** 25 | * Reduces values for a given key 26 | * @param key the Key for the given values being passed in 27 | * @param values a List of IntWritable objects to combine 28 | * @param context the Context object for the currently executing job 29 | */ 30 | public void reduce(GroupID key, Iterable values, 31 | Context context) 32 | throws IOException, InterruptedException { 33 | 34 | 35 | IntWritable maxVal = new IntWritable(); 36 | maxVal.set(Integer.MIN_VALUE); 37 | 38 | for (IntWritable value : values) { 39 | if ( value.get() > maxVal.get() ) 40 | maxVal.set(value.get()); 41 | } 42 | 43 | context.write(key, maxVal); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/map/NullMapper.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.map; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | import org.apache.hadoop.io.IntWritable; 8 | import org.apache.hadoop.io.LongWritable; 9 | import org.apache.hadoop.mapreduce.Mapper; 10 | 11 | import edu.ucsc.srl.damasc.netcdf.io.ArraySpec; 12 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 13 | 14 | import ucar.ma2.Array; 15 | import ucar.ma2.ArrayInt; 16 | 17 | /** 18 | * Dummy mapper, just passed data through with a dummy key. 19 | * This is used for testing purposes 20 | */ 21 | public class NullMapper extends Mapper { 22 | 23 | /** 24 | * Reduces values for a given key 25 | * @param key ArraySpec representing the given Array being passed in 26 | * @param value an Array to process that corresponds to the given key 27 | * @param context the Context object for the currently executing job 28 | */ 29 | public void map(ArraySpec key, Array value, Context context) 30 | throws IOException, InterruptedException { 31 | try { 32 | ArrayInt intArray = (ArrayInt)value; 33 | int[] dummyGID = {0}; 34 | GroupID groupID = new GroupID(dummyGID, "nullData"); 35 | IntWritable intW = new IntWritable(Integer.MIN_VALUE); 36 | 37 | context.write(groupID, intW); 38 | } catch ( Exception e ) { 39 | System.out.println("Caught an exception in NullMapper.map()" + e.toString() ); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/combine/IdentityCombiner.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.combine; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | 8 | import org.apache.hadoop.io.IntWritable; 9 | import org.apache.hadoop.io.LongWritable; 10 | import org.apache.hadoop.mapreduce.Reducer; 11 | 12 | /** 13 | * A combiner that finds the max value for a given key 14 | */ 15 | public class IdentityCombiner extends 16 | Reducer { 17 | 18 | private static final Log LOG = LogFactory.getLog(IdentityCombiner.class); 19 | 20 | /** 21 | * Reduces values for a given key 22 | * @param key the Key for the given values being passed in 23 | * @param values a List of IntWritable objects to combine 24 | * @param context the Context object for the currently executing job 25 | */ 26 | public void reduce(LongWritable key, Iterable values, 27 | Context context) 28 | throws IOException, InterruptedException { 29 | long timer = System.currentTimeMillis(); 30 | 31 | 32 | IntWritable maxVal = new IntWritable(); 33 | maxVal.set(Integer.MIN_VALUE); 34 | 35 | for (IntWritable value : values) { 36 | if ( value.get() > maxVal.get() ) 37 | maxVal.set(value.get()); 38 | 39 | } 40 | 41 | context.write(key, maxVal); 42 | timer = System.currentTimeMillis() - timer; 43 | 44 | LOG.info("Entire combiner took " + timer + " ms"); 45 | 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/combine/SimpleMaxCombiner.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.combine; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.io.IntWritable; 10 | import org.apache.hadoop.io.LongWritable; 11 | import org.apache.hadoop.mapreduce.Reducer; 12 | 13 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 14 | import edu.ucsc.srl.damasc.netcdf.io.Result; 15 | 16 | /** 17 | * Combiner for the Max function that uses simple data structures as keys 18 | * and values 19 | */ 20 | public class SimpleMaxCombiner extends 21 | Reducer { 22 | 23 | private static final Log LOG = LogFactory.getLog(SimpleMaxCombiner.class); 24 | 25 | /** 26 | * Reduces values for a given key 27 | * @param key the Key for the given values being passed in 28 | * @param values a List of IntWritable objects to combine 29 | * @param context the Context object for the currently executing job 30 | */ 31 | public void reduce(LongWritable key, Iterable values, 32 | Context context) 33 | throws IOException, InterruptedException { 34 | 35 | 36 | IntWritable maxVal = new IntWritable(); 37 | maxVal.set(Integer.MIN_VALUE); 38 | 39 | //for (Result value : values) { 40 | for (IntWritable value : values) { 41 | if ( value.get() > maxVal.get() ) 42 | maxVal.set(value.get()); 43 | } 44 | context.write(key, maxVal); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/reduce/NullReducer.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.reduce; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | 8 | import org.apache.hadoop.io.IntWritable; 9 | import org.apache.hadoop.io.LongWritable; 10 | import org.apache.hadoop.mapreduce.Reducer; 11 | 12 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 13 | import edu.ucsc.srl.damasc.netcdf.Utils; 14 | 15 | /** 16 | * Reducer that simply iterates through the data it is passed 17 | */ 18 | public class NullReducer extends 19 | Reducer { 20 | 21 | private static final Log LOG = LogFactory.getLog(NullReducer.class); 22 | 23 | /** 24 | * Iterates through the data it is passed, doing nothing to it. Outputs a 25 | * Integer.MINIMUM_VALUE as the value for its key 26 | * @param key the flattened corner for this instance of the extraction shape 27 | * in the global logical space 28 | * @param values an Iterable list of IntWritable objects that represent all the inputs 29 | * for this key 30 | * @param context the Context object for the executing program 31 | */ 32 | public void reduce(GroupID key, Iterable values, 33 | Context context) 34 | throws IOException, InterruptedException { 35 | 36 | long timer = System.currentTimeMillis(); 37 | 38 | IntWritable maxVal = new IntWritable(); 39 | maxVal.set(Integer.MIN_VALUE); 40 | 41 | // empty loop 42 | for (IntWritable value : values) { 43 | } 44 | 45 | context.write(key, maxVal); 46 | 47 | timer = System.currentTimeMillis() - timer; 48 | LOG.info("total reducer took: " + timer + " ms"); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/reduce/MaxReducer.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.reduce; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | 8 | import org.apache.hadoop.io.IntWritable; 9 | import org.apache.hadoop.io.LongWritable; 10 | import org.apache.hadoop.mapreduce.Reducer; 11 | 12 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 13 | import edu.ucsc.srl.damasc.netcdf.Utils; 14 | 15 | /** 16 | * Reducer for the Max operator 17 | */ 18 | public class MaxReducer extends 19 | Reducer { 20 | 21 | private static final Log LOG = LogFactory.getLog(IdentityReducer.class); 22 | 23 | /** 24 | * Reduces all the values for the given key, produces the maximum of the 25 | * IntWritable objects in values 26 | * @param key the flattened corner for this instance of the extraction shape 27 | * in the global logical space 28 | * @param values an Iterable list of IntWritable objects that represent all the inputs 29 | * for this key 30 | * @param context the Context object for the executing program 31 | */ 32 | public void reduce(GroupID key, Iterable values, 33 | Context context) 34 | throws IOException, InterruptedException { 35 | 36 | long timer = System.currentTimeMillis(); 37 | 38 | // now we need to parse the variable dimensions out 39 | //int[] variableShape = Utils.getVariableShape( context.getConfiguration()); 40 | 41 | IntWritable maxVal = new IntWritable(); 42 | maxVal.set(Integer.MIN_VALUE); 43 | 44 | for (IntWritable value : values) { 45 | if ( value.get() > maxVal.get() ) 46 | maxVal.set(value.get()); 47 | } 48 | 49 | //tempID.setGroupID( tempID.unflatten(variableShape, key.get() ) ); 50 | context.write(key, maxVal); 51 | 52 | timer = System.currentTimeMillis() - timer; 53 | LOG.info("total reducer took: " + timer + " ms"); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/reduce/IdentityReducer.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.reduce; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | 8 | import org.apache.hadoop.io.IntWritable; 9 | import org.apache.hadoop.io.LongWritable; 10 | import org.apache.hadoop.mapreduce.Reducer; 11 | 12 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 13 | import edu.ucsc.srl.damasc.netcdf.Utils; 14 | 15 | public class IdentityReducer extends 16 | Reducer { 17 | 18 | private static final Log LOG = LogFactory.getLog(IdentityReducer.class); 19 | 20 | /** 21 | * Reduces all the values for the given key, produces the Identity of the 22 | * IntWritable objects in values 23 | * @param key the flattened corner for this instance of the extraction shape 24 | * in the global logical space 25 | * @param values an Iterable list of IntWritable objects that represent all the inputs 26 | * for this key 27 | * @param context the Context object for the executing program 28 | */ 29 | public void reduce(LongWritable key, Iterable values, 30 | Context context) 31 | throws IOException, InterruptedException { 32 | 33 | long timer = System.currentTimeMillis(); 34 | GroupID tempID = new GroupID(); 35 | 36 | // now we need to parse the variable dimensions out 37 | int[] variableShape = Utils.getVariableShape( context.getConfiguration()); 38 | 39 | IntWritable maxVal = new IntWritable(); 40 | maxVal.set(Integer.MIN_VALUE); 41 | 42 | for (IntWritable value : values) { 43 | if ( value.get() > maxVal.get() ) 44 | maxVal.set(value.get()); 45 | } 46 | 47 | tempID.setGroupID( tempID.unflatten(variableShape, key.get() ) ); 48 | context.write(tempID, maxVal); 49 | 50 | timer = System.currentTimeMillis() - timer; 51 | LOG.info("total reducer took: " + timer + " ms"); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/reduce/SimpleMaxReducer.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.reduce; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.io.IntWritable; 10 | import org.apache.hadoop.io.LongWritable; 11 | import org.apache.hadoop.mapreduce.Reducer; 12 | 13 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 14 | import edu.ucsc.srl.damasc.netcdf.io.Result; 15 | 16 | import edu.ucsc.srl.damasc.netcdf.Utils; 17 | 18 | /** 19 | * A reducer that applies the maximum function. Uses a simple data structure 20 | * as its key 21 | */ 22 | public class SimpleMaxReducer extends 23 | Reducer { 24 | 25 | private static final Log LOG = LogFactory.getLog(SimpleMaxReducer.class); 26 | 27 | /** 28 | * Reduces all the values for the given key, produces the average of the 29 | * IntWritable objects in values 30 | * @param key the flattened corner for this instance of the extraction shape 31 | * in the global logical space 32 | * @param values an Iterable list of IntWritable objects that represent all the inputs 33 | * for this key 34 | * @param context the Context object for the executing program 35 | */ 36 | public void reduce(LongWritable key, Iterable values, 37 | Context context) 38 | throws IOException, InterruptedException { 39 | 40 | GroupID tempID = new GroupID(); 41 | 42 | // now we need to parse the variable dimensions out 43 | int[] variableShape = Utils.getVariableShape( context.getConfiguration()); 44 | 45 | IntWritable maxVal = new IntWritable(); 46 | maxVal.set(Integer.MIN_VALUE); 47 | 48 | for (IntWritable value : values) { 49 | if ( value.get() > maxVal.get() ) 50 | maxVal.set(value.get()); 51 | } 52 | 53 | tempID.setGroupID( tempID.unflatten(variableShape, key.get() ) ); 54 | context.write(tempID, maxVal); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/NetCDFUtils.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.File; 5 | import java.io.FileWriter; 6 | import java.io.IOException; 7 | import java.util.HashMap; 8 | import java.util.Map; 9 | import java.util.Set; 10 | 11 | import org.apache.commons.logging.Log; 12 | 13 | import edu.ucsc.srl.damasc.netcdf.io.ArraySpec; 14 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 15 | 16 | import ucar.ma2.Array; 17 | 18 | /** 19 | * Utility methods that are specific to NetCDF files / data 20 | */ 21 | public class NetCDFUtils { 22 | 23 | /** 24 | * Logs the IDs generated for a given Key / Value pair 25 | * (ArraySpec / Array objects) 26 | * @param debugFileName file to log this data to 27 | * @param ncArray a NetCDF Array object that is the data 28 | * for the "value" part of the 29 | * key / value pair 30 | * @param key an ArraySpec object that is the key 31 | * for the "key" value pair. An ArraySpec 32 | * @param extractionShape the extraction shape specified 33 | * for this query 34 | * @param groupSubArrayMap mapping from GroupIDs to data 35 | * @param LOG the log object to write to in case of an exception 36 | */ 37 | public static void logGIDs( String debugFileName, Array ncArray, 38 | ArraySpec key, int[] extractionShape, 39 | HashMap groupSubArrayMap, 40 | Log LOG) { 41 | try { 42 | File outputFile = new File( debugFileName ); 43 | BufferedWriter writer = 44 | new BufferedWriter( new FileWriter(outputFile, true)); 45 | 46 | Set> set = groupSubArrayMap.entrySet(); 47 | 48 | writer.write("InputSplit: " + key); 49 | writer.newLine(); 50 | for( Map.Entry me : set ) { 51 | writer.write("\tgid: " + me.getKey().toString(extractionShape) + 52 | "\tspec: " + Utils.arrayToString(me.getValue().getShape()) ); 53 | writer.newLine(); 54 | } 55 | writer.close(); 56 | 57 | } catch ( IOException ioe ) { 58 | LOG.error("Caught an ioe in MedianMapper.logGIDS()\n" + ioe.toString()); 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/reduce/AverageReducer.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.reduce; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | 8 | import org.apache.hadoop.io.IntWritable; 9 | import org.apache.hadoop.io.LongWritable; 10 | import org.apache.hadoop.mapreduce.Reducer; 11 | 12 | import edu.ucsc.srl.damasc.netcdf.io.AverageResult; 13 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 14 | import edu.ucsc.srl.damasc.netcdf.Utils; 15 | 16 | /** 17 | * A reducer for the Average function 18 | */ 19 | public class AverageReducer extends 20 | Reducer { 21 | 22 | private static final Log LOG = LogFactory.getLog(AverageReducer.class); 23 | 24 | /** 25 | * Reduces all the values for the given key, produces the average of the 26 | * AverageResult objects in values 27 | * @param key the flattened corner for this instance of the extraction shape 28 | * in the global logical space 29 | * @param values an Iterable list of AverageResult objects that represent all the inputs 30 | * for this key 31 | * @param context the Context object for the executing program 32 | */ 33 | public void reduce(LongWritable key, Iterable values, 34 | Context context) 35 | throws IOException, InterruptedException { 36 | 37 | long timer = System.currentTimeMillis(); 38 | 39 | int[] variableShape = Utils.getVariableShape( context.getConfiguration()); 40 | AverageResult avgResult = new AverageResult(); 41 | GroupID myGroupID = new GroupID(); 42 | IntWritable myIntW = new IntWritable(); 43 | 44 | //for (Result value : values) { 45 | for (AverageResult value : values) { 46 | //currentAverage = (int) ((((long)currentAverage * currentSamplesInAverage) + tempValue) / (currentSamplesInAverage++)); 47 | avgResult.addAverageResult(value); 48 | } 49 | 50 | myGroupID.setGroupID( myGroupID.unflatten(variableShape, key.get()) ); 51 | myIntW.set(avgResult.getCurrentValue()); 52 | context.write(myGroupID, myIntW); 53 | 54 | timer = System.currentTimeMillis() - timer; 55 | LOG.info("total reducer took: " + timer + " ms"); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/io/SHFileStatus.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.io; 2 | 3 | import org.apache.hadoop.fs.FileStatus; 4 | 5 | /** 6 | * This class represents a FileStatus an a logical offset. It's used 7 | * to map multiple files into a single logical space 8 | */ 9 | public class SHFileStatus implements Comparable { 10 | private FileStatus _fileStatus; 11 | private int[] _startOffset; 12 | 13 | /** 14 | * Constructor 15 | * @param fileStatus the FileStatus to wrap 16 | * @param startOffset the offset, in the global logical space, where 17 | * this file starts 18 | */ 19 | public SHFileStatus( FileStatus fileStatus, int[] startOffset) { 20 | this.setFileStatus( fileStatus); 21 | this.setStartOffset(startOffset); 22 | } 23 | 24 | /** 25 | * Sets the FileStatus object wrapped by this SHFileStatus object 26 | * @param newFileStatus a FileStatus object to wrap 27 | */ 28 | public void setFileStatus( FileStatus newFileStatus ){ 29 | _fileStatus = newFileStatus; 30 | } 31 | 32 | /** 33 | * Sets the offset, in the global space, for this SHFileStatus object 34 | * @param newStartOffset an offset in the global logical space 35 | */ 36 | public void setStartOffset(int[] newStartOffset) { 37 | _startOffset = new int[newStartOffset.length]; 38 | 39 | for ( int i=0; i < newStartOffset.length; i++) { 40 | _startOffset[i] = newStartOffset[i]; 41 | } 42 | 43 | } 44 | 45 | /** 46 | * Returns the offset, in the global space, for this SHFileStatus object 47 | * @return the offset, in the global logical space, for this SHFileStatus 48 | * object 49 | */ 50 | public int[] getStartOffset() { 51 | return _startOffset; 52 | } 53 | 54 | /** 55 | * Gets the FileStatus object wrapped by this SHFileStatus object 56 | * @return the FileStatus object wrapped by this SHFileStatus object 57 | */ 58 | public FileStatus getFileStatus() { 59 | return _fileStatus; 60 | } 61 | 62 | /** 63 | * Compare this SHFileStatus object to another. 64 | * Use the encapsulated FileStatus compareTo method 65 | * @param other the SHFileStatus object to compare to this one 66 | * @return a value that is less than, equal to, or greater than zero 67 | * if the object passed in is, respectively, less than, equal to, or 68 | * greater than this object 69 | */ 70 | public int compareTo( SHFileStatus other ) { 71 | return this._fileStatus.compareTo(other.getFileStatus()); 72 | } 73 | 74 | } 75 | 76 | -------------------------------------------------------------------------------- /tools/build.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/combine/MedianCombiner.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.combine; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.io.IntWritable; 10 | import org.apache.hadoop.io.LongWritable; 11 | import org.apache.hadoop.mapreduce.Reducer; 12 | 13 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 14 | import edu.ucsc.srl.damasc.netcdf.io.HolisticResult; 15 | import edu.ucsc.srl.damasc.netcdf.io.Result; 16 | 17 | import edu.ucsc.srl.damasc.netcdf.Utils; 18 | 19 | /** 20 | * Combiner class for the Median operator 21 | */ 22 | public class MedianCombiner extends 23 | Reducer { 24 | 25 | private static final Log LOG = LogFactory.getLog(MedianCombiner.class); 26 | static enum MedianCombinerStatus { FULL, NOTFULL, MERGED } 27 | 28 | /** 29 | * Reduces values for a given key 30 | * @param key the Key for the given values being passed in 31 | * @param values a List of HolisticResult objects to combine 32 | * @param context the Context object for the currently executing job 33 | */ 34 | 35 | public void reduce(GroupID key, Iterable values, 36 | Context context) 37 | throws IOException, InterruptedException { 38 | 39 | 40 | // now we need to parse the variable dimensions out 41 | int[] variableShape = Utils.getVariableShape( context.getConfiguration()); 42 | int[] extractionShape = 43 | Utils.getExtractionShape(context.getConfiguration(), 44 | variableShape.length); 45 | int neededValues = Utils.calcTotalSize(extractionShape); 46 | GroupID tempID = new GroupID(); 47 | 48 | HolisticResult holVal = new HolisticResult(); 49 | holVal.setNeededValueCount( neededValues ); 50 | 51 | for (HolisticResult value : values) { 52 | if ( holVal.isFull() ) { 53 | LOG.warn("Adding an element to an already full HR. Key: " + 54 | key.toString() + 55 | " array size: " + holVal.getNeededValueCount() + 56 | " current elems: " + 57 | holVal.getCurrentValueCount() ); 58 | } 59 | 60 | holVal.merge(value); 61 | context.getCounter(MedianCombinerStatus.MERGED).increment(value.getCurrentValueCount()); 62 | } 63 | 64 | // now, the remainig holistic result should be full. Check though 65 | if( holVal.isFull() ) { 66 | // apply whatever function you want, in this case we 67 | // sort and then pull the median out 68 | holVal.sort(); 69 | holVal.setFinal( holVal.getValues()[(holVal.getValues().length)/2] ); 70 | context.getCounter(MedianCombinerStatus.FULL).increment(1); 71 | } else { 72 | context.getCounter(MedianCombinerStatus.NOTFULL).increment(1); 73 | } 74 | 75 | context.write(key, holVal); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/tools/Average.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.tools; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.conf.Configuration; 6 | import org.apache.hadoop.conf.Configured; 7 | import org.apache.hadoop.fs.Path; 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.io.IntWritable; 10 | import org.apache.hadoop.io.LongWritable; 11 | //import org.apache.hadoop.mapred.JobConf; 12 | import org.apache.hadoop.mapreduce.*; 13 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 14 | import org.apache.hadoop.util.Tool; 15 | import org.apache.hadoop.util.ToolRunner; 16 | 17 | import edu.ucsc.srl.damasc.netcdf.combine.AverageCombiner; 18 | import edu.ucsc.srl.damasc.netcdf.io.input.NetCDFFileInputFormat; 19 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 20 | import edu.ucsc.srl.damasc.netcdf.io.AverageResult; 21 | import edu.ucsc.srl.damasc.netcdf.io.ArraySpec; 22 | import edu.ucsc.srl.damasc.netcdf.map.AverageMapper; 23 | import edu.ucsc.srl.damasc.netcdf.reduce.AverageReducer; 24 | import edu.ucsc.srl.damasc.netcdf.Utils; 25 | import edu.ucsc.srl.damasc.netcdf.Utils.Operator; 26 | 27 | public class Average extends Configured implements Tool { 28 | 29 | public int run(String[] args) throws Exception { 30 | if (args.length != 2) { 31 | System.err.println("Usage: identity "); 32 | System.exit(2); 33 | } 34 | 35 | Configuration conf = getConf(); 36 | Job job = new Job(conf); 37 | String jobNameString = ""; 38 | 39 | // get the buffer size 40 | int bufferSize = Utils.getBufferSize(conf); 41 | jobNameString += " buffersize: " + bufferSize + " "; 42 | 43 | jobNameString += " average "; 44 | job.setJarByClass(Average.class); 45 | job.setMapperClass(AverageMapper.class); 46 | job.setReducerClass(AverageReducer.class); 47 | 48 | // reducer output 49 | job.setOutputKeyClass(GroupID.class); 50 | //job.setOutputKeyClass(Text.class); 51 | job.setOutputValueClass(IntWritable.class); 52 | 53 | // mapper output 54 | job.setMapOutputKeyClass(LongWritable.class); 55 | job.setMapOutputValueClass(AverageResult.class); 56 | 57 | if( Utils.noScanEnabled(conf) ) 58 | jobNameString += " with noscan "; 59 | 60 | if( Utils.queryDependantEnabled(conf) ) 61 | jobNameString += " and query dependant"; 62 | 63 | jobNameString += Utils.getPartModeString(conf) + ", " + 64 | Utils.getPlacementModeString(conf); 65 | jobNameString += " with " + Utils.getNumberReducers(conf) + 66 | " reducers "; 67 | 68 | job.setJobName(jobNameString); 69 | 70 | job.setInputFormatClass(NetCDFFileInputFormat.class); 71 | job.setNumReduceTasks( Utils.getNumberReducers(conf) ); 72 | 73 | NetCDFFileInputFormat.addInputPath(job, new Path(args[0])); 74 | FileOutputFormat.setOutputPath(job, new Path(args[1])); 75 | 76 | job.waitForCompletion(true); 77 | 78 | return 0; 79 | } 80 | 81 | public static void main(String[] args) throws Exception { 82 | int res = ToolRunner.run(new Configuration(), new Average(), args); 83 | System.exit(res); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/combine/SimpleMedianCombiner.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.combine; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.io.IntWritable; 10 | import org.apache.hadoop.io.LongWritable; 11 | import org.apache.hadoop.mapreduce.Reducer; 12 | 13 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 14 | import edu.ucsc.srl.damasc.netcdf.io.HolisticResult; 15 | import edu.ucsc.srl.damasc.netcdf.io.Result; 16 | 17 | import edu.ucsc.srl.damasc.netcdf.Utils; 18 | 19 | /** 20 | * Combiner class for the Median operator that uses simple data structures for 21 | * keys and values 22 | */ 23 | public class SimpleMedianCombiner extends 24 | Reducer { 25 | 26 | private static final Log LOG = LogFactory.getLog(SimpleMedianCombiner.class); 27 | static enum SimpleMedianCombinerStatus { FULL, NOTFULL, MERGED } 28 | 29 | /** 30 | * Reduces values for a given key 31 | * @param key the Key for the given values being passed in 32 | * @param values a List of HolisiticResult objects to combine 33 | * @param context the Context object for the currently executing job 34 | */ 35 | public void reduce(LongWritable key, Iterable values, 36 | Context context) 37 | throws IOException, InterruptedException { 38 | 39 | 40 | // now we need to parse the variable dimensions out 41 | int[] variableShape = Utils.getVariableShape( context.getConfiguration()); 42 | int[] extractionShape = 43 | Utils.getExtractionShape(context.getConfiguration(), 44 | variableShape.length); 45 | int neededValues = Utils.calcTotalSize(extractionShape); 46 | GroupID tempID = new GroupID(); 47 | 48 | HolisticResult holVal = new HolisticResult(); 49 | holVal.setNeededValueCount( neededValues ); 50 | 51 | for (HolisticResult value : values) { 52 | if ( holVal.isFull() ) { 53 | LOG.warn("Adding an element to an already full HR. Key: " + 54 | key.toString() + 55 | " array size: " + holVal.getNeededValueCount() + 56 | " current elems: " + 57 | holVal.getCurrentValueCount() ); 58 | } 59 | 60 | holVal.merge(value); 61 | context.getCounter(SimpleMedianCombinerStatus.MERGED).increment(value.getCurrentValueCount()); 62 | } 63 | 64 | // now, the remainig holistic result should be full. Check though 65 | if( holVal.isFull() ) { 66 | // apply whatever function you want, in this case we 67 | // sort and then pull the median out 68 | holVal.sort(); 69 | holVal.setFinal( holVal.getValues()[(holVal.getValues().length)/2] ); 70 | context.getCounter(SimpleMedianCombinerStatus.FULL).increment(1); 71 | } else { 72 | context.getCounter(SimpleMedianCombinerStatus.NOTFULL).increment(1); 73 | } 74 | 75 | context.write(key, holVal); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/tools/Median.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.tools; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.conf.Configuration; 6 | import org.apache.hadoop.conf.Configured; 7 | import org.apache.hadoop.fs.Path; 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.io.IntWritable; 10 | import org.apache.hadoop.io.LongWritable; 11 | //import org.apache.hadoop.mapred.JobConf; 12 | import org.apache.hadoop.mapreduce.*; 13 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 14 | import org.apache.hadoop.util.Tool; 15 | import org.apache.hadoop.util.ToolRunner; 16 | 17 | import edu.ucsc.srl.damasc.netcdf.combine.MedianCombiner; 18 | import edu.ucsc.srl.damasc.netcdf.io.input.NetCDFFileInputFormat; 19 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 20 | import edu.ucsc.srl.damasc.netcdf.io.HolisticResult; 21 | import edu.ucsc.srl.damasc.netcdf.io.ArraySpec; 22 | import edu.ucsc.srl.damasc.netcdf.map.MedianMapper; 23 | import edu.ucsc.srl.damasc.netcdf.reduce.MedianReducer; 24 | import edu.ucsc.srl.damasc.netcdf.Utils; 25 | import edu.ucsc.srl.damasc.netcdf.Utils.Operator; 26 | 27 | public class Median extends Configured implements Tool { 28 | 29 | public int run(String[] args) throws Exception { 30 | if (args.length != 2) { 31 | System.err.println("Usage: median "); 32 | System.exit(2); 33 | } 34 | 35 | Configuration conf = getConf(); 36 | Job job = new Job(conf); 37 | String jobNameString = ""; 38 | 39 | // get the buffer size 40 | int bufferSize = Utils.getBufferSize(conf); 41 | jobNameString += " buffersize: " + bufferSize + " "; 42 | 43 | jobNameString += "Median"; 44 | job.setJarByClass(Median.class); 45 | 46 | job.setMapperClass(MedianMapper.class); 47 | if ( Utils.useCombiner(conf) ) { 48 | jobNameString += " with combiner "; 49 | job.setCombinerClass(MedianCombiner.class); 50 | } 51 | job.setReducerClass(MedianReducer.class); 52 | 53 | // mapper output 54 | job.setMapOutputKeyClass(GroupID.class); 55 | job.setMapOutputValueClass(HolisticResult.class); 56 | 57 | // reducer output 58 | job.setOutputKeyClass(GroupID.class); 59 | job.setOutputValueClass(IntWritable.class); 60 | 61 | if( Utils.noScanEnabled(conf) ) 62 | jobNameString += " with noscan "; 63 | 64 | if( Utils.queryDependantEnabled(conf) ) 65 | jobNameString += " and query dependant"; 66 | 67 | jobNameString += Utils.getPartModeString(conf) + ", " + 68 | Utils.getPlacementModeString(conf); 69 | jobNameString += " with " + Utils.getNumberReducers(conf) + 70 | " reducers "; 71 | 72 | job.setJobName(jobNameString); 73 | 74 | job.setInputFormatClass(NetCDFFileInputFormat.class); 75 | job.setNumReduceTasks( Utils.getNumberReducers(conf) ); 76 | 77 | NetCDFFileInputFormat.addInputPath(job, new Path(args[0])); 78 | FileOutputFormat.setOutputPath(job, new Path(args[1])); 79 | 80 | job.waitForCompletion(true); 81 | 82 | return 0; 83 | } 84 | 85 | public static void main(String[] args) throws Exception { 86 | int res = ToolRunner.run(new Configuration(), new Median(), args); 87 | System.exit(res); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/tools/Max.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.tools; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.conf.Configuration; 6 | import org.apache.hadoop.conf.Configured; 7 | import org.apache.hadoop.fs.Path; 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.io.IntWritable; 10 | import org.apache.hadoop.io.LongWritable; 11 | //import org.apache.hadoop.mapred.JobConf; 12 | import org.apache.hadoop.mapreduce.*; 13 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 14 | import org.apache.hadoop.util.Tool; 15 | import org.apache.hadoop.util.ToolRunner; 16 | 17 | import edu.ucsc.srl.damasc.netcdf.combine.MaxCombiner; 18 | import edu.ucsc.srl.damasc.netcdf.io.input.NetCDFFileInputFormat; 19 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 20 | import edu.ucsc.srl.damasc.netcdf.io.HolisticResult; 21 | import edu.ucsc.srl.damasc.netcdf.io.AverageResult; 22 | import edu.ucsc.srl.damasc.netcdf.io.Result; 23 | import edu.ucsc.srl.damasc.netcdf.io.ArraySpec; 24 | import edu.ucsc.srl.damasc.netcdf.map.MaxMapper; 25 | import edu.ucsc.srl.damasc.netcdf.reduce.MaxReducer; 26 | import edu.ucsc.srl.damasc.netcdf.Utils; 27 | import edu.ucsc.srl.damasc.netcdf.Utils.Operator; 28 | 29 | public class Max extends Configured implements Tool { 30 | 31 | public int run(String[] args) throws Exception { 32 | if (args.length != 2) { 33 | System.err.println("Usage: identity "); 34 | System.exit(2); 35 | } 36 | 37 | Configuration conf = getConf(); 38 | Job job = new Job(conf); 39 | String jobNameString = ""; 40 | 41 | // get the buffer size 42 | int bufferSize = Utils.getBufferSize(conf); 43 | jobNameString += " buffersize: " + bufferSize + " "; 44 | 45 | jobNameString += "max"; 46 | job.setJarByClass(Max.class); 47 | job.setMapperClass(MaxMapper.class); 48 | 49 | if ( Utils.useCombiner(conf) ) { 50 | jobNameString += " with combiner "; 51 | job.setCombinerClass(MaxCombiner.class); 52 | } 53 | 54 | job.setReducerClass(MaxReducer.class); 55 | 56 | // mapper output 57 | job.setMapOutputKeyClass(GroupID.class); 58 | job.setMapOutputValueClass(IntWritable.class); 59 | 60 | // reducer output 61 | job.setOutputKeyClass(GroupID.class); 62 | job.setOutputValueClass(IntWritable.class); 63 | 64 | if( Utils.noScanEnabled(conf) ) 65 | jobNameString += " with noscan "; 66 | 67 | if( Utils.queryDependantEnabled(conf) ) 68 | jobNameString += " and query dependant"; 69 | 70 | jobNameString += Utils.getPartModeString(conf) + ", " + 71 | Utils.getPlacementModeString(conf); 72 | jobNameString += " with " + Utils.getNumberReducers(conf) + 73 | " reducers "; 74 | 75 | job.setJobName(jobNameString); 76 | 77 | job.setInputFormatClass(NetCDFFileInputFormat.class); 78 | job.setNumReduceTasks( Utils.getNumberReducers(conf) ); 79 | 80 | NetCDFFileInputFormat.addInputPath(job, new Path(args[0])); 81 | FileOutputFormat.setOutputPath(job, new Path(args[1])); 82 | 83 | job.waitForCompletion(true); 84 | 85 | return 0; 86 | } 87 | 88 | public static void main(String[] args) throws Exception { 89 | int res = ToolRunner.run(new Configuration(), new Max(), args); 90 | System.exit(res); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/reduce/SimpleMedianReducer.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.reduce; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.io.IntWritable; 10 | import org.apache.hadoop.io.LongWritable; 11 | import org.apache.hadoop.mapreduce.Reducer; 12 | 13 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 14 | import edu.ucsc.srl.damasc.netcdf.io.HolisticResult; 15 | import edu.ucsc.srl.damasc.netcdf.io.Result; 16 | 17 | import edu.ucsc.srl.damasc.netcdf.Utils; 18 | 19 | /** 20 | * Reducer that applies the median function. Uses simple data structures as keys 21 | */ 22 | public class SimpleMedianReducer extends 23 | Reducer { 24 | 25 | private static final Log LOG = LogFactory.getLog(SimpleMedianReducer.class); 26 | 27 | static enum SimpleMedianReducerStatus { FULL, NOTFULL } 28 | 29 | /** 30 | * Reduces all the values for the given key, produces the median of the 31 | * HolisticResult objects in values 32 | * @param key the flattened corner for this instance of the extraction shape 33 | * in the global logical space 34 | * @param values an Iterable list of HolisticResult objects that represent all the inputs 35 | * for this key 36 | * @param context the Context object for the executing program 37 | */ 38 | public void reduce(LongWritable key, Iterable values, 39 | Context context) 40 | throws IOException, InterruptedException { 41 | 42 | GroupID tempID = new GroupID(); 43 | IntWritable outputInt = new IntWritable(Integer.MIN_VALUE); 44 | 45 | // now we need to parse the variable dimensions out 46 | int[] variableShape = Utils.getVariableShape( context.getConfiguration()); 47 | int[] extractionShape = 48 | Utils.getExtractionShape(context.getConfiguration(), 49 | variableShape.length); 50 | 51 | int neededSize = Utils.calcTotalSize( extractionShape ); 52 | 53 | HolisticResult maxVal = new HolisticResult(); 54 | maxVal.setNeededValueCount( neededSize ); 55 | 56 | for (HolisticResult value : values) { 57 | // sanity check 58 | if ( maxVal.isFull() ) { 59 | LOG.warn("Adding an element to an already full HR. Key: " + 60 | key.toString() + 61 | " array size: " + maxVal.getNeededValueCount() + 62 | " current elems: " + 63 | maxVal.getCurrentValueCount() ); 64 | } 65 | 66 | maxVal.merge(value); 67 | } 68 | 69 | // now, the remainig holistic result should be full. Check though 70 | // and make sure it wasn't already finalized 71 | 72 | if( maxVal.isFull() && !maxVal.isFinal() ) { 73 | // apply whatever function you want, 74 | // in this case we sort and then pull the median out 75 | maxVal.sort(); 76 | maxVal.setFinal( maxVal.getValues()[(maxVal.getValues().length)/2] ); 77 | LOG.info("gid: " + key + " is full at " + 78 | maxVal.getCurrentValueCount() + " elements"); 79 | context.getCounter(SimpleMedianReducerStatus.FULL).increment(1); 80 | } else if (!maxVal.isFull() ) { 81 | LOG.info("gid: " + key + " has " + maxVal.getCurrentValueCount() + 82 | " elements" + 83 | " but should be full"); 84 | } else if (maxVal.isFinal() ) { 85 | LOG.info("gid: " + key + " has already been set to final"); 86 | } 87 | 88 | 89 | tempID.setGroupID( tempID.unflatten(variableShape, key.get() ) ); 90 | 91 | outputInt.set(maxVal.getValue(0)); 92 | context.write(tempID, outputInt); 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/map/MaxMapper.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.map; 2 | 3 | import java.io.IOException; 4 | 5 | import java.lang.Integer; 6 | import java.lang.Long; 7 | import java.util.HashMap; 8 | import java.util.Iterator; 9 | import java.util.Map; 10 | 11 | import org.apache.commons.logging.Log; 12 | import org.apache.commons.logging.LogFactory; 13 | import org.apache.hadoop.io.IntWritable; 14 | import org.apache.hadoop.io.LongWritable; 15 | import org.apache.hadoop.mapreduce.Mapper; 16 | 17 | import ucar.ma2.Array; 18 | import ucar.ma2.ArrayInt; 19 | import ucar.ma2.IndexIterator; 20 | import edu.ucsc.srl.damasc.netcdf.io.ArraySpec; 21 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 22 | import edu.ucsc.srl.damasc.netcdf.io.GroupIDGen; 23 | import edu.ucsc.srl.damasc.netcdf.Utils; 24 | 25 | /** 26 | * A Mapper class that applies the Max function 27 | */ 28 | public class MaxMapper extends Mapper { 29 | 30 | private static final Log LOG = LogFactory.getLog(IdentityMapper.class); 31 | private static boolean _benchmarkArraySpec = true; 32 | 33 | public static enum InvalidCell { INVALID_CELL_COUNT } ; 34 | 35 | /** 36 | * Reduces values for a given key 37 | * @param key ArraySpec representing the given Array being passed in 38 | * @param value an Array to process that corresponds to the given key 39 | * @param context the Context object for the currently executing job 40 | */ 41 | public void map(ArraySpec key, Array value, Context context) 42 | throws IOException, InterruptedException { 43 | 44 | long timerA = System.currentTimeMillis(); 45 | 46 | ArrayInt ncArray = (ArrayInt)value; 47 | 48 | int[] allOnes = new int[key.getShape().length]; 49 | for( int i=0; i groupSubArrayMap = new HashMap(); 61 | 62 | GroupIDGen myGIDG = new GroupIDGen(); 63 | GroupIDGen.pullOutSubArrays( myGIDG, ncArray, key, extractionShape, 64 | allOnes, groupSubArrayMap); 65 | 66 | ArrayInt localArray; 67 | GroupID localGID; 68 | // now roll through all the entries in the HashMap 69 | 70 | int invalidMapperCounter = 0; 71 | int currentMax = Integer.MIN_VALUE; 72 | int tempValue = Integer.MIN_VALUE; 73 | 74 | IntWritable myIntW = new IntWritable(); 75 | LongWritable myLongW = new LongWritable(); 76 | 77 | Iterator> gidItr = 78 | groupSubArrayMap.entrySet().iterator(); 79 | 80 | while (gidItr.hasNext() ) { 81 | currentMax = Integer.MIN_VALUE; 82 | Map.Entry pairs = gidItr.next(); 83 | localGID = pairs.getKey(); 84 | localArray = (ArrayInt)pairs.getValue(); 85 | 86 | // TODO sort out how to do filtering with this new GroupID based setup 87 | // -jbuck 88 | 89 | IndexIterator valItr = localArray.getIndexIterator(); 90 | 91 | while( valItr.hasNext() ) { 92 | 93 | tempValue = valItr.getIntNext(); 94 | if ( tempValue > currentMax ) { 95 | currentMax = tempValue; 96 | } 97 | } 98 | 99 | // write out the current groupID and the max value found for it 100 | //Utils.flatten(localGID); 101 | myIntW.set(currentMax); 102 | context.write(localGID, myIntW); 103 | 104 | } 105 | 106 | timerA = System.currentTimeMillis() - timerA; 107 | LOG.info("for corner " + Utils.arrayToString(key.getCorner()) + 108 | " map loop time: " + 109 | timerA + " ms with " ); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/reduce/MedianReducer.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.reduce; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.io.IntWritable; 10 | import org.apache.hadoop.io.LongWritable; 11 | import org.apache.hadoop.mapreduce.Reducer; 12 | 13 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 14 | import edu.ucsc.srl.damasc.netcdf.io.HolisticResult; 15 | import edu.ucsc.srl.damasc.netcdf.io.Result; 16 | 17 | import edu.ucsc.srl.damasc.netcdf.Utils; 18 | 19 | /** 20 | * Reducer for the Median operator 21 | */ 22 | public class MedianReducer extends 23 | Reducer { 24 | 25 | private static final Log LOG = LogFactory.getLog(MedianReducer.class); 26 | 27 | static enum MedianReducerStatus { FULL, NOTFULL, NOTFINAL } 28 | 29 | /** 30 | * Reduces all the values for the given key, produces the median of the 31 | * HolisticResult objects in values 32 | * @param key the flattened corner for this instance of the extraction shape 33 | * in the global logical space 34 | * @param values an Iterable list of HolisticResult objects that represent all the inputs 35 | * for this key 36 | * @param context the Context object for the executing program 37 | */ 38 | public void reduce(GroupID key, Iterable values, 39 | Context context) 40 | throws IOException, InterruptedException { 41 | 42 | //GroupID tempID = new GroupID(); 43 | IntWritable outputInt = new IntWritable(Integer.MIN_VALUE); 44 | 45 | // now we need to parse the variable dimensions out 46 | int[] variableShape = Utils.getVariableShape( context.getConfiguration()); 47 | int[] extractionShape = 48 | Utils.getExtractionShape(context.getConfiguration(), 49 | variableShape.length); 50 | 51 | int neededSize = Utils.calcTotalSize( extractionShape ); 52 | 53 | HolisticResult maxVal = new HolisticResult(); 54 | maxVal.setNeededValueCount( neededSize ); 55 | 56 | for (HolisticResult value : values) { 57 | // sanity check 58 | if ( maxVal.isFull() ) { 59 | LOG.warn("Adding an element to an already full HR. Key: " + 60 | key.toString() + 61 | " array size: " + maxVal.getNeededValueCount() + 62 | " current elems: " + 63 | maxVal.getCurrentValueCount() ); 64 | } 65 | 66 | LOG.info("GID: " + key + " merging in " + value.getCurrentValueCount() + 67 | " keys, already " + maxVal.getCurrentValueCount() + " present"); 68 | 69 | maxVal.merge(value); 70 | } 71 | 72 | // now, the remainig holistic result should be full. Check though 73 | // and make sure it wasn't already finalized 74 | 75 | if( maxVal.isFull() && !maxVal.isFinal() ) { 76 | // apply whatever function you want, 77 | // in this case we sort and then pull the median out 78 | maxVal.sort(); 79 | 80 | if ( !Utils.isSorted( maxVal.getValues() )) { 81 | LOG.error("Holistic result for GID: " + key + " has unsorted results"); 82 | } 83 | 84 | maxVal.setFinal( maxVal.getValues()[(maxVal.getValues().length)/2] ); 85 | //LOG.info("gid: " + key + " is full at " + 86 | // maxVal.getCurrentValueCount() + " elements"); 87 | context.getCounter(MedianReducerStatus.FULL).increment(1); 88 | } else if (!maxVal.isFull() ) { 89 | LOG.info("gid: " + key + " has " + maxVal.getCurrentValueCount() + 90 | " elements" + 91 | " but should be full"); 92 | context.getCounter(MedianReducerStatus.NOTFULL).increment(1); 93 | } else if (maxVal.isFinal() ) { 94 | LOG.info("gid: " + key + " has already been set to final"); 95 | context.getCounter(MedianReducerStatus.NOTFINAL).increment(1); 96 | } 97 | 98 | 99 | //tempID.setGroupID( tempID.unflatten(variableShape, key.get() ) ); 100 | 101 | outputInt.set(maxVal.getValue(0)); 102 | context.write(key, outputInt); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/map/SimpleMaxMapper.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.map; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.commons.logging.Log; 6 | import org.apache.commons.logging.LogFactory; 7 | import org.apache.hadoop.io.Text; 8 | import org.apache.hadoop.io.IntWritable; 9 | import org.apache.hadoop.io.LongWritable; 10 | import org.apache.hadoop.mapreduce.Mapper; 11 | 12 | 13 | import edu.ucsc.srl.damasc.netcdf.io.ArraySpec; 14 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 15 | import edu.ucsc.srl.damasc.netcdf.io.Result; 16 | import edu.ucsc.srl.damasc.netcdf.Utils; 17 | 18 | import ucar.ma2.Array; 19 | import ucar.ma2.ArrayInt; 20 | import ucar.ma2.Index; 21 | import ucar.ma2.IndexIterator; 22 | 23 | /** 24 | * Mapper for the Max function that uses simple data structures as 25 | * keys 26 | */ 27 | public class SimpleMaxMapper 28 | extends Mapper { 29 | 30 | private static final Log LOG = LogFactory.getLog(SimpleMaxMapper.class); 31 | private static boolean _benchmarkArraySpec = true; 32 | 33 | public static enum InvalidCell { INVALID_CELL_COUNT } ; 34 | 35 | /** 36 | * Reduces values for a given key 37 | * @param key ArraySpec representing the given Array being passed in 38 | * @param value an Array to process that corresponds to the given key 39 | * @param context the Context object for the currently executing job 40 | */ 41 | public void map(ArraySpec key, Array value, Context context) 42 | throws IOException, InterruptedException { 43 | 44 | ArrayInt intArray = (ArrayInt)value; 45 | 46 | int[] globalCoord = new int[key.getShape().length]; 47 | int[] groupIDArray = new int[key.getShape().length]; 48 | 49 | int[] allOnes = new int[key.getShape().length]; 50 | for( int i=0; i { 28 | 29 | private static final Log LOG = LogFactory.getLog(SimpleMedianMapper.class); 30 | private static boolean _benchmarkArraySpec = true; 31 | 32 | static enum MapOutputsCreated{ MAP } 33 | 34 | public static enum InvalidCell { INVALID_CELL_COUNT } ; 35 | 36 | 37 | /** 38 | * Reduces values for a given key 39 | * @param key ArraySpec representing the given Array being passed in 40 | * @param value an Array to process that corresponds to the given key 41 | * @param context the Context object for the currently executing job 42 | */ 43 | public void map(ArraySpec key, Array value, Context context) 44 | throws IOException, InterruptedException { 45 | 46 | ArrayInt intArray = (ArrayInt)value; 47 | 48 | int[] globalCoord = new int[key.getShape().length]; 49 | int[] groupIDArray = new int[key.getShape().length]; 50 | 51 | int[] allOnes = new int[key.getShape().length]; 52 | for( int i=0; i { 30 | 31 | private static final Log LOG = LogFactory.getLog(AverageMapper.class); 32 | private static boolean _benchmarkArraySpec = true; 33 | 34 | public static enum InvalidCell { INVALID_CELL_COUNT } ; 35 | 36 | /** 37 | * Reduces values for a given key 38 | * @param key the Key for the given value being passed in 39 | * @param value an Array to process that corresponds to the given key 40 | * @param context the Context object for the currently executing job 41 | */ 42 | public void map(ArraySpec key, Array value, Context context) 43 | throws IOException, InterruptedException { 44 | try { 45 | 46 | long timerA = System.currentTimeMillis(); 47 | 48 | ArrayInt ncArray = (ArrayInt)value; 49 | 50 | int[] allOnes = new int[key.getShape().length]; 51 | for( int i=0; i groupSubArrayMap = new HashMap(); 62 | 63 | GroupIDGen myGIDG = new GroupIDGen(); 64 | GroupIDGen.pullOutSubArrays( myGIDG, ncArray, key, extractionShape, 65 | allOnes, groupSubArrayMap); 66 | LOG.info("pullOutSubArrays returned " + groupSubArrayMap.size() + " elements"); 67 | 68 | ArrayInt localArray; 69 | GroupID localGID = new GroupID(); 70 | localGID.setName(key.getVarName()); 71 | int tempInt = 0; 72 | 73 | int[] zeroArray = new int[extractionShape.length]; 74 | for( int i = 0; i < zeroArray.length; i++) { 75 | zeroArray[i] = 0; 76 | } 77 | 78 | int[] helpArray = new int[extractionShape.length]; 79 | for( int i = 0; i < helpArray.length; i++) { 80 | helpArray[i] = 0; 81 | } 82 | 83 | GroupID zeroGID = new GroupID(zeroArray, "windspeed1"); 84 | 85 | int invalidMapperCounter = 0; 86 | 87 | LongWritable myLongW = new LongWritable(); 88 | AverageResult avgResult = new AverageResult(); 89 | 90 | // debugging bit here 91 | Iterator> gidItr2 = 92 | groupSubArrayMap.entrySet().iterator(); 93 | 94 | System.out.println("ArraySpec corner: " + Utils.arrayToString(key.getCorner()) + 95 | " shape: " + Utils.arrayToString(key.getShape())); 96 | while (gidItr2.hasNext() ) { 97 | System.out.println("gid: " + Utils.arrayToString(gidItr2.next().getKey().getGroupID())); 98 | } 99 | 100 | Iterator> gidItr = 101 | groupSubArrayMap.entrySet().iterator(); 102 | 103 | while (gidItr.hasNext() ) { 104 | Map.Entry pairs = gidItr.next(); 105 | localGID = pairs.getKey(); 106 | localArray = (ArrayInt)pairs.getValue(); 107 | avgResult.clear(); // reset this variable 108 | 109 | // TODO sort out how to do filtering with this new GroupID based setup 110 | // -jbuck 111 | 112 | IndexIterator valItr = localArray.getIndexIterator(); 113 | 114 | while( valItr.hasNext() ) { 115 | tempInt = valItr.getIntNext(); 116 | avgResult.addValue(tempInt); 117 | } 118 | 119 | Utils.adjustGIDForLogicalOffset(localGID, key.getLogicalStartOffset(), extractionShape ); 120 | 121 | myLongW.set( localGID.flatten(variableShape) ); 122 | 123 | context.write(myLongW, avgResult); 124 | 125 | LOG.info("ArraySpec corner: " + Utils.arrayToString(key.getCorner()) + 126 | " shape: " + Utils.arrayToString(key.getShape()) + 127 | " logical start: " + Utils.arrayToString(key.getLogicalStartOffset()) + 128 | " extraction shape: " + Utils.arrayToString(extractionShape) + 129 | " localGID: " + Utils.arrayToString(localGID.getGroupID()) 130 | ); 131 | 132 | } 133 | 134 | timerA = System.currentTimeMillis() - timerA; 135 | LOG.info("for corner " + Utils.arrayToString(key.getCorner()) + 136 | " map loop time: " + 137 | timerA + " ms with " ); 138 | } catch ( Exception e ) { 139 | System.out.println( " Exception caught in Average.map(). " + e.toString() ); 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/map/IdentityMapper.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.map; 2 | 3 | import java.io.IOException; 4 | 5 | import java.lang.Integer; 6 | import java.lang.Long; 7 | import java.util.HashMap; 8 | import java.util.Iterator; 9 | import java.util.Map; 10 | 11 | import org.apache.commons.logging.Log; 12 | import org.apache.commons.logging.LogFactory; 13 | import org.apache.hadoop.io.IntWritable; 14 | import org.apache.hadoop.io.LongWritable; 15 | import org.apache.hadoop.mapreduce.Mapper; 16 | 17 | import ucar.ma2.Array; 18 | import ucar.ma2.ArrayInt; 19 | import ucar.ma2.IndexIterator; 20 | import edu.ucsc.srl.damasc.netcdf.io.ArraySpec; 21 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 22 | import edu.ucsc.srl.damasc.netcdf.Utils; 23 | 24 | /** 25 | * Mapper for the Identity function 26 | * Currently uses an in-memory combiner 27 | */ 28 | public class IdentityMapper extends Mapper { 29 | 30 | private static final Log LOG = LogFactory.getLog(IdentityMapper.class); 31 | private static boolean _benchmarkArraySpec = true; 32 | 33 | public static enum InvalidCell { INVALID_CELL_COUNT } ; 34 | 35 | /** 36 | * Reduces values for a given key 37 | * @param key ArraySpec representing the given Array being passed in 38 | * @param value an Array to process that corresponds to the given key 39 | * @param context the Context object for the currently executing job 40 | */ 41 | public void map(ArraySpec key, Array value, Context context) 42 | throws IOException, InterruptedException { 43 | 44 | long timerA = System.currentTimeMillis(); 45 | 46 | ArrayInt ncArray = (ArrayInt)value; 47 | int numCells = (int)ncArray.getSize(); 48 | 49 | // this is a fair bit of memeory. Better to do it in one allocation, than a lot of them, 50 | // but we should still try to optimize this out 51 | 52 | int[] globalCoord = new int[key.getShape().length]; 53 | int[] groupIDArray = new int[key.getShape().length]; 54 | 55 | int[] allOnes = new int[key.getShape().length]; 56 | for( int i=0; i inMapperCombiner = new HashMap(); 63 | 64 | long[] longArray = new long[numCells]; 65 | int[] intArray = new int[numCells]; 66 | 67 | IndexIterator iter = ncArray.getIndexIterator(); 68 | 69 | GroupID myGroupID = new GroupID(); 70 | myGroupID.setName(key.getVarName()); 71 | 72 | IntWritable myIntW = new IntWritable(); 73 | LongWritable myLongW = new LongWritable(); 74 | 75 | //Integer myInt = new Integer(); 76 | //Long myLong = new Long(); 77 | 78 | int[] extractionShape = 79 | Utils.getExtractionShape(context.getConfiguration(), 80 | key.getShape().length); 81 | int[] variableShape = 82 | Utils.getVariableShape(context.getConfiguration()); 83 | 84 | int invalidMapperCounter = 0; 85 | int val = 0; 86 | int counter = 0; 87 | long startup = System.currentTimeMillis() - timerA; 88 | LOG.info("Startup time: " + startup + " ms"); 89 | 90 | timerA = System.currentTimeMillis(); 91 | 92 | while( iter.hasNext() ) { 93 | 94 | val = iter.getIntNext(); 95 | 96 | // orient this cell's location in the global space 97 | globalCoord = Utils.mapToGlobal(iter.getCurrentCounter(), 98 | key.getCorner(), globalCoord ); 99 | 100 | // track cells that are invalid. This is typically due 101 | // to filtering of cells by the query 102 | // it's only applied at the mapper for optB. 103 | // optC applies it at split generation 104 | 105 | if ( !Utils.noScanEnabled(context.getConfiguration()) ) 106 | { 107 | if ( !Utils.isValid(globalCoord, context.getConfiguration()) ) { 108 | invalidMapperCounter++; 109 | continue; 110 | } 111 | } 112 | 113 | // figure out the "bucket" for this (determined by the 114 | // extraction shape and position of this cell 115 | myGroupID = Utils.mapToLocal(globalCoord, groupIDArray, 116 | myGroupID, extractionShape); 117 | 118 | longArray[counter] = myGroupID.flatten(variableShape); 119 | intArray[counter] = val; 120 | 121 | if ( inMapperCombiner.containsKey(longArray[counter]) ){ 122 | if ( intArray[counter] > inMapperCombiner.get(longArray[counter]) ){ 123 | inMapperCombiner.put(longArray[counter], intArray[counter]); 124 | } 125 | } else { 126 | inMapperCombiner.put(longArray[counter], intArray[counter]); 127 | } 128 | 129 | counter++; 130 | } 131 | 132 | timerA = System.currentTimeMillis() - timerA; 133 | LOG.info("for corner " + Utils.arrayToString(key.getCorner()) + 134 | " map loop time: " + 135 | timerA + " ms with " + invalidMapperCounter + 136 | " invalid ArraySpecs" ); 137 | 138 | // stop the timer 139 | timerA = System.currentTimeMillis(); 140 | 141 | // write out all results of the in-mapper combiner here 142 | Iterator> itr = inMapperCombiner.entrySet().iterator(); 143 | while(itr.hasNext()){ 144 | Map.Entry pairs = itr.next(); 145 | myLongW.set(pairs.getKey()); 146 | myIntW.set(pairs.getValue() ); 147 | context.write(myLongW, myIntW); 148 | } 149 | 150 | context.getCounter(InvalidCell.INVALID_CELL_COUNT).increment(invalidMapperCounter); 151 | timerA = System.currentTimeMillis() - timerA; 152 | LOG.info("writing data and increment counte took " + 153 | timerA + " ms" ); 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/map/MedianMapper.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.map; 2 | 3 | import java.io.IOException; 4 | import java.io.BufferedWriter; 5 | import java.io.File; 6 | import java.io.FileWriter; 7 | import java.util.HashMap; 8 | import java.util.Iterator; 9 | import java.util.Map; 10 | import java.util.Set; 11 | 12 | import org.apache.commons.logging.Log; 13 | import org.apache.commons.logging.LogFactory; 14 | import org.apache.hadoop.io.Text; 15 | import org.apache.hadoop.io.IntWritable; 16 | import org.apache.hadoop.io.LongWritable; 17 | import org.apache.hadoop.mapreduce.Mapper; 18 | 19 | import ucar.ma2.Array; 20 | import ucar.ma2.ArrayInt; 21 | import ucar.ma2.Index; 22 | import ucar.ma2.IndexIterator; 23 | 24 | import edu.ucsc.srl.damasc.netcdf.io.ArraySpec; 25 | import edu.ucsc.srl.damasc.netcdf.io.GroupID; 26 | import edu.ucsc.srl.damasc.netcdf.io.GroupIDGen; 27 | import edu.ucsc.srl.damasc.netcdf.io.HolisticResult; 28 | import edu.ucsc.srl.damasc.netcdf.io.Result; 29 | import edu.ucsc.srl.damasc.netcdf.Utils; 30 | import edu.ucsc.srl.damasc.netcdf.NetCDFUtils; 31 | 32 | /** 33 | * Mapper that prepares data for the Median operation 34 | */ 35 | public class MedianMapper extends 36 | Mapper { 37 | 38 | private static final Log LOG = LogFactory.getLog(MedianMapper.class); 39 | private static boolean _benchmarkArraySpec = true; 40 | 41 | public static enum MedianMapStatus { FULL, NOTFULL } 42 | 43 | /** 44 | * Reduces values for a given key 45 | * @param key ArraySpec representing the given Array being passed in 46 | * @param value an Array to process that corresponds to the given key 47 | * @param context the Context object for the currently executing job 48 | */ 49 | public void map(ArraySpec key, Array value, Context context) 50 | throws IOException, InterruptedException { 51 | 52 | long timerA = System.currentTimeMillis(); 53 | 54 | ArrayInt ncArray = (ArrayInt)value; 55 | 56 | int[] allOnes = new int[key.getShape().length]; 57 | for( int i=0; i groupSubArrayMap = new HashMap(); 66 | 67 | GroupIDGen myGIDG = new GroupIDGen(); 68 | GroupIDGen.pullOutSubArrays( myGIDG, ncArray, key, extractionShape, 69 | allOnes, groupSubArrayMap); 70 | 71 | String debugFileName = Utils.getDebugLogFileName(context.getConfiguration()); 72 | if ( "" != debugFileName ) { 73 | LOG.info("Trying to log to " + debugFileName); 74 | NetCDFUtils.logGIDs( debugFileName, ncArray, key, 75 | extractionShape, groupSubArrayMap, LOG ); 76 | } 77 | 78 | ArrayInt localArray; 79 | GroupID localGID; 80 | 81 | int currentMax = Integer.MIN_VALUE; 82 | int tempValue = Integer.MIN_VALUE; 83 | int invalidMapperCounter = 0; 84 | long totalCellsSeen = 0; 85 | long counter = 0; 86 | int finalValue = Integer.MIN_VALUE; 87 | 88 | // the two output classes 89 | HolisticResult outputHolisticResult = new HolisticResult(); 90 | 91 | Iterator> gidItr = 92 | groupSubArrayMap.entrySet().iterator(); 93 | 94 | 95 | while( gidItr.hasNext() ) { 96 | counter = 0; 97 | Map.Entry pair = gidItr.next(); 98 | localGID = pair.getKey(); 99 | localArray = (ArrayInt)pair.getValue(); 100 | 101 | outputHolisticResult.clear(); 102 | 103 | // create a holistic result big enough to hold all the data that it 104 | // may see (note, we know how many it will see but 105 | // this seems like a safe move) 106 | outputHolisticResult.setNeededValueCount( 107 | Utils.calcTotalSize(extractionShape) ); 108 | 109 | IndexIterator valItr = localArray.getIndexIterator(); 110 | 111 | LOG.info("exShapeSize: " + Utils.calcTotalSize(extractionShape) + 112 | " holisticResultSize: " + localArray.getSize() ); 113 | 114 | while( valItr.hasNext() ) { 115 | //LOG.info("gid: " + Utils.arrayToString( localGID.getGroupID()) + 116 | // " adding element " + counter + " at: " + 117 | // outputHolisticResult.getCurrentValueCount()); 118 | // get a legit value first 119 | outputHolisticResult.setValue(valItr.getIntNext()); 120 | counter++; 121 | } 122 | 123 | // if we have a full HolisticResult, 124 | // we should consolidate it into a final answer 125 | // prior to writing it out 126 | if( outputHolisticResult.isFull() ){ 127 | outputHolisticResult.sort(); 128 | 129 | if ( !Utils.isSorted( outputHolisticResult.getValues())) { 130 | LOG.error("Holistic result for GID: " + key + " has unsorted results"); 131 | } 132 | 133 | finalValue = outputHolisticResult.getValues()[outputHolisticResult.getValues().length/2 ]; 134 | outputHolisticResult.setFinal(finalValue); 135 | context.getCounter(MedianMapStatus.FULL).increment(1); 136 | LOG.info("GID " + localGID + " is full in the mapper"); 137 | } else { 138 | context.getCounter(MedianMapStatus.NOTFULL).increment(1); 139 | LOG.info("GID " + localGID + " is NOT full in the mapper, has " + 140 | outputHolisticResult.getCurrentValueCount() + " elements" ); 141 | } 142 | 143 | context.write(localGID, outputHolisticResult); 144 | 145 | totalCellsSeen += localArray.getSize(); 146 | } 147 | 148 | timerA = System.currentTimeMillis() - timerA; 149 | LOG.info("for corner " + Utils.arrayToString(key.getCorner()) + 150 | " map loop time: " + 151 | timerA + " ms with " + 152 | + totalCellsSeen + " total cells seen"); 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/io/input/NetCDFFileSplit.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.io.input; 2 | 3 | import java.io.IOException; 4 | import java.io.DataInput; 5 | import java.io.DataOutput; 6 | import java.util.ArrayList; 7 | 8 | import org.apache.commons.logging.Log; 9 | import org.apache.commons.logging.LogFactory; 10 | import org.apache.hadoop.mapreduce.InputSplit; 11 | import org.apache.hadoop.io.Writable; 12 | import org.apache.hadoop.io.Text; 13 | import org.apache.hadoop.fs.Path; 14 | 15 | import edu.ucsc.srl.damasc.netcdf.io.ArraySpec; 16 | 17 | /** 18 | * This class represents an InputSplit which corresponds to a 19 | * NetCDF file. 20 | */ 21 | public class NetCDFFileSplit extends InputSplit implements Writable { 22 | 23 | private Path _path = null; 24 | private String[] _hosts = null; 25 | private long _totalDataElements = 0; 26 | private ArrayList _arraySpecList; 27 | private static final Log LOG = LogFactory.getLog(NetCDFFileSplit.class); 28 | 29 | public NetCDFFileSplit() {} 30 | 31 | /** 32 | * NOTE: start and shape must be the same length 33 | * 34 | * @param file the file name 35 | * @param variable the name of the variable this data belongs to 36 | * @param shape an n-dimensional array representing the length, 37 | * in each dimension, of this array 38 | * @param dimToIterateOver which dimension to iterate over 39 | * @param startStep which step in the dimension being iterated over to start on 40 | * @param numSteps how many steps this split represents 41 | * @param hosts the list of hosts containing this block, possibly null 42 | */ 43 | public NetCDFFileSplit( Path path, ArrayList arraySpecList, 44 | String[] hosts) { 45 | this._path = new Path(path.toString()); 46 | this._arraySpecList = new ArrayList(arraySpecList); 47 | 48 | this._hosts = new String[hosts.length]; 49 | for (int i=0; i getArraySpecList() { 72 | return this._arraySpecList; 73 | } 74 | 75 | /** 76 | * Returns the number of cells represented by the set of ArraySpecs 77 | * in this Split 78 | * @return the number of total cells represented by this split 79 | */ 80 | @Override 81 | public long getLength() throws IOException, InterruptedException { 82 | return this._totalDataElements; 83 | } 84 | 85 | /** 86 | * Returns the list of hosts which contain, locally, the file system 87 | * block that this InputSplit is assigned to 88 | * @return an array of Strings that are hostnames. The scheduler should attempt 89 | * to place this InputSplit on one of those hosts 90 | */ 91 | @Override 92 | public String[] getLocations() throws IOException, InterruptedException { 93 | if (null == this._hosts) 94 | return new String[]{}; 95 | else 96 | return this._hosts; 97 | } 98 | 99 | /** 100 | * Serialze this structure to a DataOutput object 101 | * @param out The DataOutput object to write the context of this split to 102 | */ 103 | public void write(DataOutput out) throws IOException { 104 | Text.writeString(out, this._path.toString()); 105 | 106 | // first, write the number of entires in the list 107 | out.writeInt(this._arraySpecList.size() ); 108 | 109 | // then, for each entry, write it 110 | for (int i=0; i(listSize); 126 | 127 | // for each element, create a corner and shape 128 | for (int i=0; i< listSize; i++) { 129 | ArraySpec array = new ArraySpec(); 130 | array.readFields(in); 131 | this._arraySpecList.add(array); 132 | } 133 | } 134 | 135 | /** 136 | * Compares this NetCDFFileSplit to another. 137 | * @return an int that is less than, equal to, or greater than 0, 138 | * depending on whether the object passed in is lesser than, equal to, 139 | * or greater than this NetCDFFileSplit (respectively) 140 | */ 141 | public int compareTo(Object o) { 142 | NetCDFFileSplit temp = (NetCDFFileSplit)o; 143 | int retVal = 0; 144 | 145 | // we'll compare the first entry in the ArrayList 146 | // from each object. If they are 147 | // the same, then we'll assume these are the same element 148 | // (since each ArraySpec should exist in one and 149 | // only one entry 150 | ArraySpec arrayA = this._arraySpecList.get(0); 151 | ArraySpec arrayB = temp.getArraySpecList().get(0); 152 | 153 | if (arrayA == null) { 154 | return -1; 155 | } else if ( arrayB == null) { 156 | return 1; 157 | } 158 | 159 | for (int i=0; i < arrayA.getCorner().length; i++) { 160 | retVal = new Integer(arrayA.getCorner()[i] - 161 | arrayB.getCorner()[i]).intValue(); 162 | if (retVal != 0) { 163 | return retVal; 164 | } 165 | } 166 | 167 | return 0; 168 | } 169 | 170 | /** 171 | * Writes the contents of this NetCDFFileSplit to a string 172 | * @return a String representing the content of this NetCDFFileSplit 173 | */ 174 | public String toString() { 175 | String tempStr = ""; 176 | 177 | tempStr += "file: " + this._path.getName() + " with ArraySpecs:\n"; 178 | 179 | for ( int i=0; i _arraySpecList; // List of ArraySpecs for this split 29 | 30 | private static final Log LOG = LogFactory.getLog(ArrayBasedFileSplit.class); 31 | 32 | public ArrayBasedFileSplit() {} 33 | 34 | /** 35 | * NOTE: start and shape must be the same length 36 | * 37 | * Constructor for an ArrayBasedFileSplit 38 | * @param file the file name 39 | * @param variable the name of the variable this data belongs to 40 | * @param shape an n-dimensional array representing the length, in each dimension, of this array 41 | * @param dimToIterateOver which dimension to iterate over 42 | * @param startStep which step in the dimension being iterated over to start on 43 | * @param numSteps how many steps this split represents 44 | * @param hosts the list of hosts containing this block, possibly null 45 | */ 46 | public ArrayBasedFileSplit( Path path, ArrayList arraySpecList, 47 | String[] hosts) { 48 | this._path = new Path(path.toString()); 49 | this._arraySpecList = new ArrayList(arraySpecList); 50 | 51 | this._hosts = new String[hosts.length]; 52 | for (int i=0; i embedded within this split 71 | * @return ArrayList with specific ArraySpecs 72 | */ 73 | public ArrayList getArraySpecList() { 74 | return this._arraySpecList; 75 | } 76 | 77 | @Override 78 | /** 79 | * Returns the total number of cells represented by the group of 80 | * ArraySpec entries in this split 81 | * @return total number of cells represented by this split 82 | */ 83 | public long getLength() throws IOException, InterruptedException { 84 | return this._totalDataElements; 85 | } 86 | 87 | @Override 88 | /** 89 | * Returns a list of hosts that locally possess a copy of the file system 90 | * block that this Split corresponds to 91 | * @return array of hostnames as Strings 92 | */ 93 | public String[] getLocations() throws IOException, InterruptedException { 94 | if (null == this._hosts) 95 | return new String[]{}; 96 | else 97 | return this._hosts; 98 | } 99 | 100 | /** 101 | * Serializes this Split to the DataOutput object 102 | * @param out DataOutput object to serialize this Split to 103 | */ 104 | public void write(DataOutput out) throws IOException { 105 | Text.writeString(out, this._path.toString()); 106 | 107 | // first, write the number of entires in the list 108 | out.writeInt(this._arraySpecList.size() ); 109 | 110 | // then, for each entry, write it 111 | for (int i=0; i(listSize); 127 | 128 | // for each element, create a corner and shape 129 | for (int i=0; i< listSize; i++) { 130 | ArraySpec array = new ArraySpec(); 131 | array.readFields(in); 132 | this._arraySpecList.add(array); 133 | } 134 | } 135 | 136 | /** 137 | * Compares this object to another and returns a negative value, 138 | * zero or a positive value if this object is less than, equal to 139 | * or greater than the object passed in. 140 | * @param o the Object to compare this Split to. Assumed to bed another 141 | * ArrayBasedFileSplit 142 | * @return a negative value, zero or positive value if this object 143 | * is less than, equal to or greater than, respectively, the object 144 | * passed in 145 | */ 146 | public int compareTo(Object o) { 147 | ArrayBasedFileSplit temp = (ArrayBasedFileSplit)o; 148 | int retVal = 0; 149 | 150 | // we'll compare the first entry in the ArrayList 151 | // from each object. If they are 152 | // the same, then we'll assume these are the same element 153 | // (since each ArraySpec should exist in one and 154 | // only one entry 155 | ArraySpec arrayA = this._arraySpecList.get(0); 156 | ArraySpec arrayB = temp.getArraySpecList().get(0); 157 | 158 | if (arrayA == null) { 159 | return -1; 160 | } else if ( arrayB == null) { 161 | return 1; 162 | } 163 | 164 | for (int i=0; i < arrayA.getCorner().length; i++) { 165 | retVal = new Integer(arrayA.getCorner()[i] - 166 | arrayB.getCorner()[i]).intValue(); 167 | if (retVal != 0) { 168 | return retVal; 169 | } 170 | } 171 | 172 | return 0; 173 | } 174 | 175 | /** 176 | * Handy function to print out the contents of this split 177 | * @return string versions of the data contained in this split 178 | */ 179 | public String toString() { 180 | String tempStr = ""; 181 | 182 | tempStr += "file: " + this._path.getName() + " with ArraySpecs:\n"; 183 | 184 | for ( int i=0; i "); 48 | System.exit(2); 49 | } 50 | 51 | //Configuration conf = new Configuration(); 52 | Configuration conf = getConf(); 53 | //JobConf jc = new JobConf(conf, Identity.class); 54 | //Cluster cluster = new Cluster(conf); 55 | //Job job = Job.getInstance(cluster); 56 | Job job = new Job(conf); 57 | String jobNameString = ""; 58 | 59 | // get the buffer size 60 | int bufferSize = Utils.getBufferSize(conf); 61 | jobNameString += " buffersize: " + bufferSize + " "; 62 | 63 | if( Utils.getOperator(conf) == Operator.simpleMedian) { 64 | jobNameString += "Simple Median"; 65 | job.setJarByClass(Identity.class); 66 | 67 | job.setMapperClass(SimpleMedianMapper.class); 68 | if ( Utils.useCombiner(conf) ) { 69 | jobNameString += " with combiner "; 70 | job.setCombinerClass(SimpleMedianCombiner.class); 71 | } 72 | job.setReducerClass(SimpleMedianReducer.class); 73 | 74 | // mapper output 75 | job.setMapOutputKeyClass(LongWritable.class); 76 | job.setMapOutputValueClass(HolisticResult.class); 77 | 78 | // reducer output 79 | job.setOutputKeyClass(GroupID.class); 80 | job.setOutputValueClass(IntWritable.class); 81 | 82 | } else if( Utils.getOperator(conf) == Operator.median) { 83 | jobNameString += "Median"; 84 | job.setJarByClass(Identity.class); 85 | 86 | job.setMapperClass(MedianMapper.class); 87 | if ( Utils.useCombiner(conf) ) { 88 | jobNameString += " with combiner "; 89 | job.setCombinerClass(MedianCombiner.class); 90 | } 91 | job.setReducerClass(MedianReducer.class); 92 | 93 | // mapper output 94 | job.setMapOutputKeyClass(GroupID.class); 95 | job.setMapOutputValueClass(HolisticResult.class); 96 | 97 | // reducer output 98 | job.setOutputKeyClass(GroupID.class); 99 | job.setOutputValueClass(IntWritable.class); 100 | } else if( Utils.getOperator(conf) == Operator.simpleMax) { 101 | jobNameString += "Simple Max"; 102 | job.setJarByClass(Identity.class); 103 | job.setMapperClass(SimpleMaxMapper.class); 104 | job.setReducerClass(SimpleMaxReducer.class); 105 | 106 | if ( Utils.useCombiner(conf) ) { 107 | jobNameString += " with combiner "; 108 | job.setCombinerClass(SimpleMaxCombiner.class); 109 | } 110 | 111 | // mapper output 112 | job.setMapOutputKeyClass(LongWritable.class); 113 | job.setMapOutputValueClass(IntWritable.class); 114 | 115 | // reducer output 116 | job.setOutputKeyClass(GroupID.class); 117 | job.setOutputValueClass(IntWritable.class); 118 | 119 | } else if( Utils.getOperator(conf) == Operator.max) { 120 | jobNameString += "max"; 121 | job.setJarByClass(Identity.class); 122 | job.setMapperClass(MaxMapper.class); 123 | 124 | if ( Utils.useCombiner(conf) ) { 125 | jobNameString += " with combiner "; 126 | job.setCombinerClass(MaxCombiner.class); 127 | } 128 | 129 | job.setReducerClass(MaxReducer.class); 130 | 131 | // mapper output 132 | job.setMapOutputKeyClass(GroupID.class); 133 | job.setMapOutputValueClass(IntWritable.class); 134 | 135 | // reducer output 136 | job.setOutputKeyClass(GroupID.class); 137 | job.setOutputValueClass(IntWritable.class); 138 | 139 | } else if( Utils.getOperator(conf) == Operator.nulltest) { 140 | jobNameString += "null test"; 141 | job.setJarByClass(Identity.class); 142 | job.setMapperClass(NullMapper.class); 143 | job.setReducerClass(NullReducer.class); 144 | 145 | // reducer output 146 | job.setOutputKeyClass(GroupID.class); 147 | job.setOutputValueClass(IntWritable.class); 148 | 149 | // mapper output 150 | job.setMapOutputKeyClass(GroupID.class); 151 | job.setMapOutputValueClass(IntWritable.class); 152 | } else if( Utils.getOperator(conf) == Operator.average) { 153 | jobNameString += " average "; 154 | job.setJarByClass(Identity.class); 155 | job.setMapperClass(AverageMapper.class); 156 | job.setReducerClass(AverageReducer.class); 157 | 158 | // reducer output 159 | job.setOutputKeyClass(GroupID.class); 160 | //job.setOutputKeyClass(Text.class); 161 | job.setOutputValueClass(IntWritable.class); 162 | 163 | // mapper output 164 | job.setMapOutputKeyClass(LongWritable.class); 165 | job.setMapOutputValueClass(AverageResult.class); 166 | } else { // TODO -jbuck error out here, do NOT assume a default functor 167 | 168 | System.err.println("No operator specified. Try again"); 169 | System.exit(2); 170 | } 171 | 172 | if( Utils.noScanEnabled(conf) ) 173 | jobNameString += " with noscan "; 174 | 175 | if( Utils.queryDependantEnabled(conf) ) 176 | jobNameString += " and query dependant"; 177 | 178 | jobNameString += Utils.getPartModeString(conf) + ", " + 179 | Utils.getPlacementModeString(conf); 180 | jobNameString += " with " + Utils.getNumberReducers(conf) + 181 | " reducers "; 182 | 183 | job.setJobName(jobNameString); 184 | 185 | job.setInputFormatClass(NetCDFFileInputFormat.class); 186 | job.setNumReduceTasks( Utils.getNumberReducers(conf) ); 187 | 188 | NetCDFFileInputFormat.addInputPath(job, new Path(args[0])); 189 | FileOutputFormat.setOutputPath(job, new Path(args[1])); 190 | 191 | job.waitForCompletion(true); 192 | 193 | return 0; 194 | } 195 | 196 | public static void main(String[] args) throws Exception { 197 | int res = ToolRunner.run(new Configuration(), new Identity(), args); 198 | System.exit(res); 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /src/edu/ucsc/srl/damasc/netcdf/io/GroupID.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.damasc.netcdf.io; 2 | 3 | import java.util.Arrays; 4 | import java.io.IOException; 5 | import java.io.DataInput; 6 | import java.io.DataOutput; 7 | 8 | import edu.ucsc.srl.damasc.netcdf.io.ArraySpec; 9 | import edu.ucsc.srl.damasc.netcdf.Utils; 10 | 11 | import org.apache.commons.logging.Log; 12 | import org.apache.commons.logging.LogFactory; 13 | 14 | import org.apache.hadoop.io.WritableComparable; 15 | import org.apache.hadoop.io.Text; 16 | 17 | /** 18 | * Stores the instance of the extraction shape that corresponds to an 19 | * array. Serves as a key for the shuffle phase of MapReduce jobs. 20 | */ 21 | public class GroupID implements WritableComparable { 22 | private String _name; // the variable name this data came from 23 | private int[] _groupID; // the corner of a given extraction shape 24 | 25 | public GroupID() {} 26 | 27 | private static final Log LOG = LogFactory.getLog(GroupID.class); 28 | 29 | 30 | /** 31 | * Constructor that sets the GroupID and the name of the variable 32 | * the ID corresponds to 33 | * @param groupID GroupID, as an n-dimensional variable 34 | * @param name Variable name that this GroupID belongs to 35 | */ 36 | public GroupID(int[] groupID, String name) throws Exception { 37 | 38 | this._groupID = new int[groupID.length]; 39 | for (int i=0; i < groupID.length; i++) { 40 | this._groupID[i] = groupID[i]; 41 | } 42 | 43 | this._name = new String(name); 44 | } 45 | 46 | /** 47 | * Return the number of dimensions for this GroupID 48 | * @return the number of dimensions for the variable 49 | * that this GroupID belongs to 50 | */ 51 | public int getRank() { 52 | return this._groupID.length; 53 | } 54 | 55 | /** 56 | * Returns the GroupID 57 | * @return The GroupID as an n-dimensional array 58 | */ 59 | public int[] getGroupID() { 60 | return this._groupID; 61 | } 62 | 63 | /** 64 | * Returns the name of the variable that this GroupID corresponds to 65 | * @return variable name as a String 66 | */ 67 | public String getName() { 68 | return this._name; 69 | } 70 | 71 | /** 72 | * Sets the group ID for this GroupID object 73 | * @param newGroupID the ID for this object 74 | */ 75 | public void setGroupID( int[] newGroupID) { 76 | this._groupID = newGroupID; 77 | } 78 | 79 | /** 80 | * Makes it possible to set the ID for a specific dimension 81 | * @param dim The dimension to set 82 | * @param val The value to set indicated dimension to 83 | */ 84 | public void setDimension( int dim, int val) 85 | throws ArrayIndexOutOfBoundsException { 86 | if ( dim < 0 || dim >= this._groupID.length) 87 | throw new ArrayIndexOutOfBoundsException("setDimension called with " + 88 | "dimension " + dim + " on groupID with dimensions " + 89 | this._groupID.length); 90 | 91 | this._groupID[dim] = val; 92 | } 93 | 94 | /** 95 | * Sets the variable name for this GroupID object 96 | * @param newName the name of the Variable for this object 97 | */ 98 | public void setName( String newName ) { 99 | this._name = newName; 100 | } 101 | 102 | /** 103 | * Returns the contents of this GroupID object as a String 104 | * @return a String version of this GroupID object 105 | */ 106 | public String toString() { 107 | return _name + ": groupID = " + Arrays.toString(this._groupID); 108 | } 109 | 110 | /** 111 | * Projects this GroupID from the local logical space 112 | * into the global logical space via the extraction shape 113 | * @param exShape The extraction shape to use to project this 114 | * GroupID into the global space 115 | * @return the group ID for this object, in the global logical space 116 | */ 117 | public String toString(int[] exShape) { 118 | int[] tempArray = new int[this._groupID.length]; 119 | for ( int i=0; i { 42 | 43 | private static final Log LOG = LogFactory.getLog(NetCDFRecordReader.class); 44 | private long _timer; 45 | private int _numArraySpecs; 46 | 47 | //this will cause the library to use its default size 48 | private int _bufferSize = -1; 49 | 50 | private NetcdfFile _ncfile = null; 51 | private NcHdfsRaf _raf = null; 52 | private Variable _curVar; // actual Variable object 53 | private String _curVarName; // name of the current variable that is open 54 | private String _curFileName; 55 | 56 | // how many data elements were read the last step 57 | private long _totalDataElements = 1; 58 | 59 | // how many data elements have been read so far (used to track work done) 60 | private long _elementsSeenSoFar = 0; 61 | 62 | private ArrayList _arraySpecArrayList = null; 63 | 64 | private ArraySpec _currentArraySpec = null; // this also serves as key 65 | private Array _value = null; 66 | private int _currentArraySpecIndex = 0; 67 | 68 | /** 69 | * Resets a RecordReader each time it is passed a new InputSplit to read 70 | * @param genericSplit an InputSplit (really an ArrayBasedFileSplit) that 71 | * needs its data read 72 | * @param context TaskAttemptContext for the currently executing progrma 73 | */ 74 | @Override 75 | public void initialize(InputSplit genericSplit, TaskAttemptContext context) 76 | throws IOException { 77 | 78 | this._timer = System.currentTimeMillis(); 79 | ArrayBasedFileSplit split = (ArrayBasedFileSplit)genericSplit; 80 | this._numArraySpecs = split.getArraySpecList().size(); 81 | Configuration job = context.getConfiguration(); 82 | 83 | Path path = split.getPath(); 84 | FileSystem fs = path.getFileSystem(job); 85 | 86 | this._arraySpecArrayList = split.getArraySpecList(); 87 | 88 | // calculate the total data elements in this split 89 | this._totalDataElements = 0; 90 | 91 | for ( int j=0; j < this._arraySpecArrayList.size(); j++) { 92 | this._totalDataElements += this._arraySpecArrayList.get(j).getSize(); 93 | } 94 | 95 | // get the buffer size 96 | this._bufferSize = Utils.getBufferSize(job); 97 | 98 | this._raf = new NcHdfsRaf(fs.getFileStatus(path), job, this._bufferSize); 99 | this._ncfile = NetcdfFile.open(this._raf, path.toString()); 100 | 101 | } 102 | 103 | /** this is called to load the next key/value in. The actual data is retrieved 104 | * via getCurrent[Key|Value] calls 105 | * @return a boolean that is true if there is more data to be read, 106 | * false otherwise 107 | */ 108 | @Override 109 | public boolean nextKeyValue() throws IOException, InterruptedException { 110 | if ( !this._arraySpecArrayList.isEmpty() ) { 111 | // set the current element 112 | this._currentArraySpec = this._arraySpecArrayList.get(0); 113 | 114 | // then delete it from the ArrayList 115 | this._arraySpecArrayList.remove(0); 116 | 117 | // fixing an entirely random bug -jbuck TODO FIXME 118 | if ( this._currentArraySpec.getCorner().length <= 1 ) { 119 | return this.nextKeyValue(); 120 | } 121 | 122 | // transfer the data 123 | loadDataFromFile(); 124 | 125 | return true; 126 | } else { 127 | this._timer = System.currentTimeMillis() - this._timer; 128 | LOG.debug("from init() to nextKeyValue() returning false, " + 129 | "this record reader took: " + this._timer + 130 | " ms. It had " + this._numArraySpecs + 131 | " ArraySpecs to process" ); 132 | 133 | return false; 134 | } 135 | } 136 | 137 | /** 138 | * Load data into the value element from disk. 139 | * Currently this only supports IntWritable. Extend this 140 | * to support other data types TODO 141 | */ 142 | private void loadDataFromFile() throws IOException { 143 | try { 144 | 145 | // reuse the open variable if it's the correct one 146 | if ( this._curVarName == null || 147 | 0 != (this._currentArraySpec.getVarName()).compareTo(this._curVarName)){ 148 | LOG.debug("calling getVar on " + this._currentArraySpec.getVarName() ); 149 | this._curVar = 150 | this._ncfile.findVariable(this._currentArraySpec.getVarName()); 151 | } 152 | 153 | 154 | if ( this._curVar ==null ) { 155 | LOG.warn("this._curVar is null. BAD NEWS"); 156 | LOG.warn( "file: " + this._currentArraySpec.getFileName() + 157 | "corner: " + 158 | Arrays.toString(this._currentArraySpec.getCorner() ) + 159 | " shape: " + Arrays.toString(this._currentArraySpec.getShape() ) ); 160 | } 161 | 162 | LOG.warn( " File: " + this._currentArraySpec.getFileName() + 163 | " startOffset: " + Utils.arrayToString(this._currentArraySpec.getLogicalStartOffset()) + 164 | "corner: " + 165 | Arrays.toString(this._currentArraySpec.getCorner()) + 166 | " shape: " + 167 | Arrays.toString(this._currentArraySpec.getShape())); 168 | 169 | // this next bit is to be able to set the dimensions of the variable 170 | // for this ArraySpec. Needed for flattening the groupID to a long 171 | ArrayList varDims = 172 | new ArrayList(this._curVar.getDimensions()); 173 | int[] varDimLengths = new int[varDims.size()]; 174 | 175 | for( int i=0; i 1 && 168 | (this._values.length == this._currentValueCount) ) { 169 | this._full = true; 170 | } 171 | 172 | return this._full; 173 | } 174 | 175 | /** 176 | * Adds a value to this result object 177 | * @param value the value to add to this object 178 | */ 179 | public void setValue(int value) throws IOException { 180 | if( this._values.length > 1 && this.isFull()) { 181 | throw new IOException("ERROR: adding an element to an already " + 182 | "full HolisticResult object." + 183 | "Length: " + this._values.length); 184 | } 185 | 186 | if ( this._final == true ) { 187 | throw new IOException("ERROR: adding a value to a " + 188 | "HolisticResult that has been marked final"); 189 | } 190 | 191 | this._values[this._currentValueCount] = value; 192 | this._currentValueCount++; 193 | } 194 | 195 | /** 196 | * This means that the result for this object 197 | * has been calculated. This generates a new array, holding only the 198 | * result, and sets the "final" status to true. 199 | * @param value the result for this result object 200 | */ 201 | public void setFinal( int value ) throws IOException { 202 | this._values = new int[1]; // free up the now useless ._values array 203 | this._currentValueCount = 0; 204 | this.setValue(value); 205 | this._final = true; 206 | } 207 | 208 | 209 | /** 210 | * Returns the contents of this result object in String form 211 | * @return the contents of this object as a String 212 | */ 213 | public String toString() { 214 | return "values = " + Utils.arrayToString(this._values); 215 | } 216 | 217 | /** 218 | * Initializes this object to having a single value in it. 219 | * This is used to reset a result object 220 | * @param value the single value to seed this result object with 221 | */ 222 | public void setHolisticResult( int value ) throws IOException { 223 | this._values = new int[1]; 224 | this._currentValueCount = 0; 225 | this.setValue(value); 226 | } 227 | 228 | /** 229 | * Initializes a result object with an array of results to 230 | * add in to this object. 231 | * TODO: optimize this by allocating _values here 232 | * @param values the array of values to add to this object 233 | */ 234 | public void setHolisticResult( int[] values ) throws IOException { 235 | 236 | for ( int i=0; i BlockLocation mappings 60 | * @param blockToAS HashMap that stores the mappings being added to 61 | * @param offset The offset, in bytes, in the file that this ArraySpec starts 62 | * at 63 | * @param as The ArraySpec to add to the Map 64 | */ 65 | public static void insertNewAs( HashMap> blockToAS, 67 | long offset, ArraySpec as) { 68 | 69 | // search for the correct BlockLocation 70 | // (TODO this is inefficient, fix it) 71 | Iterator iter = blockToAS.keySet().iterator(); 72 | 73 | while( iter.hasNext() ) { 74 | BlockLocation tempKey = (BlockLocation)(iter.next()); 75 | if( tempKey.getOffset() == offset ) { 76 | (blockToAS.get(tempKey)).add(as); 77 | } 78 | } 79 | } 80 | 81 | /** 82 | * Partitions the data represented by dims into groups of records where records 83 | * are whole subarrays with size 1 on the zero-th dimension. 84 | * This may not work for all formats, revisit this later TODO 85 | * @param dims List of Dimension objects representing the dimensions of the input 86 | * data that we are generating partitions for 87 | * @param varName Name of the variable we are generating partitions for 88 | * @param fileName name of the file that contains the variable we are generating 89 | * partitions for 90 | * @param partMode the partitioning mode being used to generate the partitions 91 | * @param startOffset the logical offset in the input data to start creating 92 | * partitions at 93 | * @param conf Configuration object for this execution the given MR program 94 | * @return an array of ArraySpec objects that represent the partitions this 95 | * function generated 96 | */ 97 | protected ArraySpec[] recordBasedPartition( 98 | int[] dims, 99 | String varName, String fileName, 100 | PartMode partMode, 101 | int[] startOffset, 102 | Configuration conf) throws IOException { 103 | 104 | int ndims = dims.length; 105 | int recDimLen = dims[0]; 106 | 107 | int[] recordShape = new int[ndims]; 108 | int[] recordCorner = new int[ndims]; 109 | 110 | ArrayList records = new ArrayList(recDimLen); 111 | 112 | for (int i = 0; i < ndims; i++) { 113 | recordShape[i] = dims[i]; 114 | recordCorner[i] = 0; 115 | } 116 | 117 | recordShape[0] = 1; 118 | 119 | if ( Utils.queryDependantEnabled(conf) ) { 120 | LOG.info("Query Dependant enabled"); 121 | recordShape[0] = Utils.getExtractionShape(conf, recordShape.length)[0]; 122 | } else { 123 | LOG.info("Query Dependant NOT enabled"); 124 | } 125 | 126 | 127 | ArraySpec tempSpec = null; 128 | for (int i = 0; i < recDimLen; i+=recordShape[0]) { 129 | recordCorner[0] = i; 130 | // FIXME: this is clunky 131 | try { 132 | // if this is optC and the record is not valid, do not add it, 133 | if ( Utils.noScanEnabled(conf)) { 134 | if ( Utils.isValid(recordCorner, conf) ) { 135 | tempSpec = new ArraySpec(recordCorner, recordShape, varName, fileName); 136 | tempSpec.setLogicalStartOffset(startOffset); 137 | records.add(tempSpec); 138 | //records.add(new ArraySpec(recordCorner, recordShape, varName, fileName)); 139 | } 140 | } else { // else wise do add it 141 | tempSpec = new ArraySpec(recordCorner, recordShape, varName, fileName); 142 | tempSpec.setLogicalStartOffset(startOffset); 143 | records.add(tempSpec); 144 | //records.add(new ArraySpec(recordCorner, recordShape, varName, fileName)); 145 | } 146 | } catch (Exception e) { 147 | throw new IOException(e); 148 | } 149 | } 150 | 151 | ArraySpec[] returnArray = new ArraySpec[records.size()]; 152 | returnArray = records.toArray(returnArray); 153 | 154 | return returnArray; 155 | } 156 | 157 | /** 158 | * Calculates the size of each parition when the proportional 159 | * partitioning scheme is used. 160 | * @param dims represent the logical input space 161 | * @param blockSize the size, in bytes, of the blocks used to 162 | * store the file that contains the data being partitioned 163 | * @param numBlocks the number of blocks to use for generating 164 | * the per-partition size 165 | * @param fileLen the length of the file, in bytes 166 | * @param dataTypeSize the size, in bytes, of a single cell for 167 | * the given data type stored in the file for which partitions 168 | * are being generated 169 | * @param conf Configuration object for this current MR program 170 | * @return an int array that is the same length as dims, where each 171 | * element is the length, in cells, that the step shape is in the 172 | * given dimension 173 | */ 174 | 175 | private int[] calcStepShape( int[] dims, long blockSize, 176 | long numBlocks, long fileLen, 177 | int dataTypeSize, Configuration conf ) { 178 | 179 | int[] stepShape = new int[dims.length]; // sort out the max space 180 | 181 | for ( int i=0; i records = new ArrayList(); 236 | 237 | // this next bit is fairly hard-coded and specific to our tests. 238 | // it represents a naive split that a human might come up with 239 | 240 | // sort out the step size 241 | int[] stepShape = calcStepShape(dims, blockSize, numBlocks, 242 | fileLen, dataTypeSize, conf); 243 | 244 | int[] tempCorner = new int[ndims]; 245 | int[] tempStep = new int[ndims]; 246 | 247 | // initialize the temporary step shape to be the first step 248 | for( int i=0; i> blockToArrays, 325 | long totalArraySpecCount, String fileName, 326 | Configuration conf ) 327 | throws IOException { 328 | 329 | // place each ArraySpec in the correct block 330 | for (ArraySpec record : records) { 331 | int blockToInsertTo = 332 | (int) (totalArraySpecCount % blockToArrays.size()); 333 | /* 334 | System.out.println("record corner: " + 335 | Utils.arrayToString(record.getCorner() ) + 336 | " going into block " + 337 | blockToInsertTo + " which starts at offset " + 338 | blocks[blockToInsertTo] ); 339 | */ 340 | 341 | blockToArrays.get( blocks[blockToInsertTo] ).add(record); 342 | totalArraySpecCount++; 343 | } 344 | 345 | return totalArraySpecCount; 346 | } 347 | 348 | /* 349 | public List orderMultiFileInput( List files/ 350 | Configuration conf ) { 351 | List retList = new ArrayList(); 352 | 353 | // first, sort the files in alphanumeric order 354 | Collections.sort(files); 355 | 356 | int[] startOffset = null; 357 | // now go through them, in order 358 | for (FileStatus file: files) { 359 | 360 | Variable var = getVariable(file, conf); 361 | 362 | if ( startOffset == null ){ 363 | startOffset = new int[var.getDimensions().size()]; 364 | for( int i=0; i getSplits(JobContext job) throws IOException { 393 | List splits = new ArrayList(); 394 | //List files = listStatus(job); 395 | 396 | // HashMap> blockToSlab = 397 | // new HashMap>(); 398 | 399 | /* 400 | FileStatus ncfileStatus = null; 401 | 402 | 403 | for (FileStatus file: files) { 404 | if (ncfileStatus == null) { 405 | ncfileStatus = file; 406 | LOG.info("Using input: " + file.getPath().toString()); 407 | } else { 408 | LOG.warn("Skipping input: " + file.getPath().toString()); 409 | } 410 | } 411 | 412 | if (ncfileStatus == null) 413 | return splits; 414 | */ 415 | /* 416 | PartMode partMode = Utils.getPartMode(job.getConfiguration()); 417 | PlacementMode placementMode = 418 | Utils.getPlacementMode(job.getConfiguration()); 419 | */ 420 | 421 | /* 422 | if (Utils.getMultiFileMode(job.getConfiguration()) == MultiFileMode.concat) { 423 | orderMultiFileInput( files, shFiles); 424 | } 425 | */ 426 | 427 | // set the starting offset for each file (depends on damasc.multi_file_mode 428 | /* 429 | shFiles = orderMultiFileInput( files, job.getConfiguration() ); 430 | 431 | for (SHFileStatus shFile: shFiles) { 432 | LOG.info("Parsing file: " + shFile.getFileStatus().getPath().toString()); 433 | Utils.addFileName(shFile.getFileStatus().getPath().toString(), job.getConfiguration()); 434 | genFileSplits(job, shFile, splits, partMode, placementMode); 435 | } 436 | 437 | 438 | // debug: log splits to a file if the debug log files is set 439 | String debugFileName = Utils.getDebugLogFileName(job.getConfiguration()); 440 | if ( "" != debugFileName ) { 441 | LOG.info("Trying to log to " + debugFileName); 442 | File outputFile = new File( debugFileName ); 443 | BufferedWriter writer = new BufferedWriter( new FileWriter(outputFile)); 444 | 445 | int i = 0; 446 | for (InputSplit split : splits) { 447 | ArrayBasedFileSplit tempSplit = (ArrayBasedFileSplit)split; 448 | //LOG.info("Split " + i); 449 | writer.write("Splits " + i); 450 | writer.newLine(); 451 | for ( ArraySpec spec : tempSplit.getArraySpecList() ) { 452 | writer.write("File: " + spec.getFileName() + 453 | "\tvar: " + spec.getVarName() + 454 | "\tcorner: " + Utils.arrayToString( spec.getCorner()) + 455 | "\t shape: " + Utils.arrayToString( spec.getShape() ) + 456 | "\t startOffset: " + Utils.arrayToString( spec.getLogicalStartOffset()) ); 457 | writer.newLine(); 458 | } 459 | i++; 460 | } 461 | writer.close(); 462 | } else { 463 | LOG.info("No debugFileName set"); 464 | } 465 | */ 466 | 467 | return splits; 468 | } 469 | } 470 | -------------------------------------------------------------------------------- /tools/src/edu/ucsc/srl/tools/NetcdfFileGenerator.java: -------------------------------------------------------------------------------- 1 | package edu.ucsc.srl.tools; 2 | 3 | import java.util.Date; 4 | import java.util.Random; 5 | import java.io.IOException; 6 | import java.util.List; 7 | import java.util.ArrayList; 8 | 9 | import ucar.nc2.NetcdfFileWriteable; 10 | 11 | import ucar.nc2.Dimension; 12 | import ucar.nc2.Variable; 13 | import ucar.ma2.Array; 14 | import ucar.ma2.ArrayObject; 15 | import ucar.ma2.ArrayInt; 16 | import ucar.ma2.ArrayLong; 17 | import ucar.ma2.ArrayFloat; 18 | import ucar.ma2.Index; 19 | import ucar.ma2.IteratorFast; 20 | import ucar.ma2.DataType; 21 | import ucar.ma2.InvalidRangeException; 22 | 23 | import ucar.ma2.IndexIterator; 24 | 25 | /** 26 | * This class generates a exemplary netcdf data set 27 | */ 28 | public class NetcdfFileGenerator { 29 | 30 | // keep the memory usage below this 31 | // let's start with 64 megs 32 | 33 | private static int maxMemory = 67108864; // bytes 34 | 35 | private Dimension _unlimitedDim; 36 | 37 | public NetcdfFileGenerator() { 38 | this._unlimitedDim = null ; 39 | } 40 | 41 | private void writeCoordinateVariable( VariableEntry variable, 42 | NetcdfFileWriteable ncFile ) 43 | throws IOException, InvalidRangeException { 44 | 45 | int[] dimensions = {variable.getSize()}; 46 | Array array; 47 | 48 | // so far, everything is a float or an int 49 | // way too much code duplication but I'm done fightin java for now 50 | if ( variable.getType() == DataType.INT ) { 51 | array = new ArrayInt(dimensions); 52 | int tempInt = 0; 53 | IndexIterator iter = array.getIndexIterator(); 54 | 55 | while (iter.hasNext() ) { 56 | iter.getIntNext(); 57 | iter.setIntCurrent(tempInt); 58 | tempInt++; 59 | } 60 | 61 | ncFile.write(variable.getVariableName(),array); 62 | } else if ( variable.getType() == DataType.FLOAT ) { 63 | array = new ArrayFloat(dimensions); 64 | float tempFloat = 0; 65 | IndexIterator iter = array.getIndexIterator(); 66 | 67 | while (iter.hasNext() ) { 68 | iter.getFloatNext(); 69 | iter.setFloatCurrent(tempFloat); 70 | tempFloat++; 71 | } 72 | 73 | ncFile.write(variable.getVariableName(),array); 74 | } else if ( variable.getType() == DataType.LONG) { 75 | array = new ArrayLong(dimensions); 76 | long tempLong = 0; 77 | IndexIterator iter = array.getIndexIterator(); 78 | 79 | while (iter.hasNext() ) { 80 | iter.getLongNext(); 81 | iter.setLongCurrent(tempLong); 82 | tempLong++; 83 | } 84 | 85 | ncFile.write(variable.getVariableName(),array); 86 | } 87 | 88 | //ncFile.write(variable.getVariableName(),array); 89 | 90 | } 91 | 92 | private NetcdfFileWriteable writeNetcdfMetadata( NetcdfFileWriteable ncFile, 93 | String variableName, 94 | ArrayList> variableListList ) 95 | throws IOException { 96 | 97 | int suffixInt = 1; 98 | 99 | for ( ArrayList variableList : variableListList ) { 100 | ArrayList dims = new ArrayList(); 101 | 102 | // add coordinate variables for each dimension 103 | for ( int i=0; i dims = new ArrayList(); 142 | Dimension tempDim; 143 | String localName = "dim"; 144 | 145 | for( int i = 0; i < dimensions.length; i++){ 146 | if ( (0 == i) && variableDim) { 147 | tempDim = ncFile.addUnlimitedDimension("dim" + Integer.toString(i)); 148 | } else { 149 | tempDim = ncFile.addDimension("dim" + Integer.toString(i), dimensions[i]); 150 | } 151 | dims.add( tempDim); 152 | } 153 | 154 | ncFile.addVariable(varName, dataType, dims); 155 | */ 156 | 157 | return ncFile; 158 | } 159 | 160 | // this is the highest dimension that will be a one for writings sake 161 | private int determineHighestNonWriteDimension( int[] dimensions, long maxWrite ) { 162 | int firstStepDim = dimensions.length - 1; 163 | 164 | long totalStepSize = 1; 165 | 166 | for(int i = dimensions.length - 1; i >= 0; i--) { 167 | 168 | if ( (dimensions[i] * totalStepSize) < maxWrite ) { 169 | firstStepDim = i-1; 170 | totalStepSize *= dimensions[i]; 171 | } else { 172 | break; 173 | } 174 | } 175 | 176 | return Math.max(firstStepDim,0); 177 | } 178 | 179 | private int[] createWriteStep(int[] dimensions, long maxWrite, long highestNonWriteDimension) { 180 | 181 | int[] returnArray = new int[dimensions.length]; 182 | int i; 183 | for ( i=0; i<= highestNonWriteDimension; i++ ) { 184 | returnArray[i] = 1; 185 | } 186 | 187 | for (; i < returnArray.length; i++) { 188 | returnArray[i] = dimensions[i]; 189 | } 190 | 191 | return returnArray; 192 | } 193 | 194 | public int determineNumberOfStepWrites(int[] stepSize, long writeSize) { 195 | long tempLong = 0; 196 | 197 | for (int i=0; i= 0) && 241 | ((origin.getCurrentCounter()[highestNonWriteDim] + numStepWrites) <= dimensions[highestNonWriteDim]) 242 | ) { 243 | singleStep[highestNonWriteDim] = numStepWrites; 244 | System.out.println("JUST OPTIMIZED. New write step: " + arrayToString(singleStep) ); 245 | // keep 'i' right 246 | i += numStepWrites - 1; 247 | } else { 248 | singleStep[highestNonWriteDim] = 1; 249 | } 250 | 251 | 252 | Array array = new ArrayInt( singleStep ); 253 | IndexIterator iter = array.getIndexIterator(); 254 | 255 | while( iter.hasNext() ) { 256 | iter.getIntNext(); 257 | // uncomment the following line for a random distribution 258 | //iter.setIntCurrent((int) (Math.abs(generator.nextGaussian()) * 40) ); 259 | // uncomment this line for an incrementing value 260 | iter.setIntCurrent(valueCounter); 261 | valueCounter++; 262 | 263 | // book keeping 264 | writtenSoFar++; 265 | //origin.incr(); 266 | } 267 | 268 | System.out.println("Writing to file: " + ncFile.getLocation() + ". var_name: " + varName + 269 | " origin: " + arrayToString(origin.getCurrentCounter()) + 270 | " writeSize: " + array.getSize() + 271 | " write shape: " + arrayToString(singleStep) ); 272 | 273 | // write to the actual file 274 | ncFile.write(varName, origin.getCurrentCounter(), array); 275 | 276 | // update origin accordingly 277 | 278 | for( int j=0; j= totalSize ) { 289 | done = true; 290 | break; 291 | } 292 | } 293 | } 294 | 295 | return valueCounter; 296 | } 297 | 298 | // return the last value written to the file 299 | private int createFile( String variableName, DataType dataType, int writeSeed ){ 300 | 301 | // hacky init 302 | this._unlimitedDim = null; 303 | 304 | ArrayList> variableListList = new ArrayList>(); 305 | 306 | // seperate method to define the variables for this file 307 | /* 308 | ArrayList variableList3 = defineDataForThisFile3(); 309 | variableListList.add(variableList3); 310 | 311 | ArrayList variableList1 = defineDataForThisFile1(); 312 | variableListList.add(variableList1); 313 | */ 314 | 315 | ArrayList variableList2 = defineDataForThisFile2(); 316 | variableListList.add(variableList2); 317 | 318 | Date now = new Date(); 319 | Random generator = new Random( now.getTime() ); 320 | int valueCounter = writeSeed; 321 | 322 | String filename = Long.toString(now.getTime()) + ".nc"; 323 | 324 | try { 325 | // create the file 326 | NetcdfFileWriteable ncfile = NetcdfFileWriteable.createNew(filename, false); 327 | 328 | // this loop needs to define all the meta-data prior to any data being written 329 | // set the metadata 330 | System.out.println("\t calling writeNetcdfMetadata for file " + ncfile.getLocation() ); 331 | ncfile = this.writeNetcdfMetadata(ncfile, variableName, variableListList); 332 | 333 | // this is only called once per file 334 | ncfile.create(); 335 | 336 | int suffixInt = 1; 337 | for ( ArrayList variableList : variableListList ) { 338 | // write out coordinate variables 339 | for (VariableEntry entry : variableList) { 340 | writeCoordinateVariable( entry, ncfile ); 341 | } 342 | // pull out the dimensions of the variables from variableList 343 | int[] dimensions = new int[variableList.size()]; 344 | for (int i=0; i0) 371 | tempStr += ","; 372 | tempStr += array[i]; 373 | } 374 | 375 | return tempStr; 376 | } 377 | 378 | // This is where the dimensions and shape of the file are configured 379 | public static void main(String args[]) { 380 | 381 | // list out the dimensions for this file 382 | 383 | String variableName = "windspeed"; // this is for the measurement actually in this data set 384 | DataType dataType = DataType.INT; // data type for the actual data 385 | 386 | NetcdfFileGenerator myGen = new NetcdfFileGenerator(); 387 | int numFiles = 1; 388 | int writeSeed = 0; 389 | 390 | for ( int i = 0; i < numFiles; i++) { 391 | writeSeed = myGen.createFile(variableName, dataType, writeSeed); 392 | } 393 | 394 | } 395 | 396 | private ArrayList defineDataForThisFile1 () { 397 | ArrayList variableList = new ArrayList(); 398 | 399 | variableList.add(new VariableEntry("time", 20, DataType.INT, true, 400 | "days", "time since midnight, 1,1,1980") ); 401 | variableList.add(new VariableEntry("lat1", 720, DataType.FLOAT, false, 402 | "latitude", "half_degrees_from_north" ) ); 403 | variableList.add(new VariableEntry("lon1", 720, DataType.FLOAT, false, 404 | "longitude", "quarter_degrees_going_east" ) ); 405 | variableList.add(new VariableEntry("elev1", 100, DataType.FLOAT, false, 406 | "elevation", "meters") ); 407 | 408 | return variableList; 409 | } 410 | 411 | /** 412 | * This will generate a data file that is 413 | */ 414 | private ArrayList defineDataForThisFile2() { 415 | ArrayList variableList = new ArrayList(); 416 | 417 | //variableList.add(new VariableEntry("time", 14600, DataType.INT, true, 418 | variableList.add(new VariableEntry("time", 80, DataType.INT, true, 419 | "days", "time since midnight, 1,1,1970") ); 420 | //variableList.add(new VariableEntry("lat2", 180, DataType.FLOAT, false, 421 | variableList.add(new VariableEntry("lat2", 180, DataType.FLOAT, false, 422 | "latitude", "half_degrees_from_north" ) ); 423 | variableList.add(new VariableEntry("lon2", 360, DataType.FLOAT, false, 424 | "longitude", "quarter_degrees_going_east" ) ); 425 | //variableList.add(new VariableEntry("elev2", 35, DataType.FLOAT, false, 426 | variableList.add(new VariableEntry("elev2", 50, DataType.FLOAT, false, 427 | "elevation", "meters") ); 428 | 429 | return variableList; 430 | } 431 | 432 | // non-record dimension data 433 | private ArrayList defineDataForThisFile3() { 434 | ArrayList variableList = new ArrayList(); 435 | 436 | variableList.add(new VariableEntry("time3", 100, DataType.INT, false, 437 | "days", "time since midnight, 1,1,1980") ); 438 | variableList.add(new VariableEntry("lat3", 180, DataType.FLOAT, false, 439 | "latitude", "half_degrees_from_north" ) ); 440 | variableList.add(new VariableEntry("lon3", 180, DataType.FLOAT, false, 441 | "longitude", "quarter_degrees_going_east" ) ); 442 | variableList.add(new VariableEntry("elev3", 50, DataType.FLOAT, false, 443 | "elevation", "meters") ); 444 | 445 | return variableList; 446 | } 447 | 448 | public class VariableEntry { 449 | private String _variableName; 450 | private int _length; 451 | private DataType _type; 452 | private boolean _isUnlimited; 453 | private String _longName; 454 | private String _units; 455 | 456 | public VariableEntry( String variableName, int length, DataType type, boolean isUnlimited, 457 | String longName, String units) { 458 | _variableName = variableName; 459 | _length = length; 460 | _type = type; 461 | _isUnlimited = isUnlimited; 462 | _longName = longName; 463 | _units = units; 464 | } 465 | 466 | public String getVariableName() { 467 | return _variableName; 468 | } 469 | 470 | public int getSize() { 471 | return _length; 472 | } 473 | 474 | public DataType getType() { 475 | return _type; 476 | } 477 | 478 | public boolean isUnlimited() { 479 | return _isUnlimited; 480 | } 481 | 482 | public String getLongName() { 483 | return _longName; 484 | } 485 | 486 | public String getUnits() { 487 | return _units; 488 | } 489 | } 490 | 491 | } 492 | --------------------------------------------------------------------------------