├── MR-plan.png ├── Tez-plan.png ├── jira shot.png ├── README.md └── PIG-5210.patch /MR-plan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ly16/Apache-Open-Source-Project/HEAD/MR-plan.png -------------------------------------------------------------------------------- /Tez-plan.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ly16/Apache-Open-Source-Project/HEAD/Tez-plan.png -------------------------------------------------------------------------------- /jira shot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ly16/Apache-Open-Source-Project/HEAD/jira shot.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Apache-Open-Source-Project 2 | 3 | ### Option to print MR/Tez plan before launching [PIG-5210] 4 | An Improvement for Apache Open Source Software Pig 5 | 6 | ### Why do we add this patch? 7 | For pig script, users need to use pig -e ```explain -script test.pig```to print out MR/Tez Plan. But for Python script, it is a hard thing for PIG to explain the plan automatically. This option can help to print out MR/Tez plan automatically before implementing MapReduce. 8 | 9 | ### Steps to creat a Jira issue and upload the patch 10 | - Get clone of 0.17.0 version PIG by git pull  11 | - Set up Eclipse ```ant build.xml``` 12 | - Import Pig src to Eclipse, and set pig.print.exec.plan "true" in file JobControlCompiler.java,TezJobCompiler.java before Mapreduce starts 13 | ``` 14 | // Set pig.print.exec.plan "true" in mapReduce engine 15 | if (conf.getBoolean(PigConfiguration.PIG_PRINT_EXEC_PLAN, false)) { log.info(mro.toString()); } 16 | ``` 17 | ``` 18 | // Set pig.print.exec.plan "true" in Tez engine 19 | if (conf.getBoolean(PigConfiguration.PIG_PRINT_EXEC_PLAN, false)) { log.info(tezPlanNode.getTezOperPlan()); } 20 | ``` 21 | - Check for compiling ```ant``` 22 | - Start remote debugger in Eclipse 23 | ``` 24 | export PIG_OPTS="- agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=8000" 25 | ``` 26 | - Or start to run pig only in terminal 27 | ``` 28 | unset PIG_OPTS 29 | ``` 30 | - Test for MR engine `-x local test.pig`; Test for Tez engine `-x tez_local test.pig` 31 | - MapReduce plan printed as expected for MR/Tez engine 32 | 33 | ![mr](https://github.com/ly16/Apache-Open-Source-Project/blob/master/MR-plan.png) 34 | ![tez](https://github.com/ly16/Apache-Open-Source-Project/blob/master/Tez-plan.png) 35 | 36 | - Upload Patch to Jira 37 | 38 | ![screenshot](https://github.com/ly16/Apache-Open-Source-Project/blob/master/jira%20shot.png) 39 | 40 | 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /PIG-5210.patch: -------------------------------------------------------------------------------- 1 | Index: src/org/apache/pig/PigConfiguration.java 2 | =================================================================== 3 | --- src/org/apache/pig/PigConfiguration.java (revision 1790033) 4 | +++ src/org/apache/pig/PigConfiguration.java (working copy) 5 | @@ -508,6 +508,8 @@ 6 | 7 | public static final String PIG_STORE_SCHEMA_DISAMBIGUATE_DEFAULT = "true"; 8 | 9 | + public static final String PIG_PRINT_EXEC_PLAN = "pig.print.exec.plan"; 10 | + 11 | // Deprecated settings of Pig 0.13 12 | 13 | /** 14 | Index: src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java 15 | =================================================================== 16 | --- src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java (revision 1790033) 17 | +++ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java (working copy) 18 | @@ -321,6 +321,12 @@ 19 | if(mro instanceof NativeMapReduceOper) { 20 | return null; 21 | } 22 | + 23 | + //Print MR plan before launching if needed 24 | + if (conf.getBoolean(PigConfiguration.PIG_PRINT_EXEC_PLAN, false)) { 25 | + log.info(mro.toString()); 26 | + } 27 | + 28 | Job job = getJob(plan, mro, conf, pigContext); 29 | jobMroMap.put(job, mro); 30 | jobCtrl.addJob(job); 31 | Index: src/org/apache/pig/backend/hadoop/executionengine/tez/TezJobCompiler.java 32 | =================================================================== 33 | --- src/org/apache/pig/backend/hadoop/executionengine/tez/TezJobCompiler.java (revision 1790033) 34 | +++ src/org/apache/pig/backend/hadoop/executionengine/tez/TezJobCompiler.java (working copy) 35 | @@ -30,6 +30,7 @@ 36 | import org.apache.hadoop.fs.Path; 37 | import org.apache.hadoop.yarn.api.records.LocalResource; 38 | import org.apache.hadoop.yarn.exceptions.YarnException; 39 | +import org.apache.pig.PigConfiguration; 40 | import org.apache.pig.PigException; 41 | import org.apache.pig.backend.hadoop.PigATSClient; 42 | import org.apache.pig.backend.hadoop.executionengine.JobCreationException; 43 | @@ -110,6 +111,12 @@ 44 | for (Map.Entry entry : localResources.entrySet()) { 45 | log.info("Local resource: " + entry.getKey()); 46 | } 47 | + 48 | + // Print Tez plan before launching if needed 49 | + if (conf.getBoolean(PigConfiguration.PIG_PRINT_EXEC_PLAN, false)) { 50 | + log.info(tezPlanNode.getTezOperPlan()); 51 | + } 52 | + 53 | DAG tezDag = buildDAG(tezPlanNode, localResources); 54 | tezDag.setDAGInfo(createDagInfo(TezScriptState.get().getScript())); 55 | // set Tez caller context 56 | Index: test/org/apache/pig/test/TestEvalPipelineLocal.java 57 | =================================================================== 58 | --- test/org/apache/pig/test/TestEvalPipelineLocal.java (revision 1790033) 59 | +++ test/org/apache/pig/test/TestEvalPipelineLocal.java (working copy) 60 | @@ -17,6 +17,8 @@ 61 | */ 62 | package org.apache.pig.test; 63 | 64 | +import java.io.ByteArrayInputStream; 65 | +import java.io.ByteArrayOutputStream; 66 | import java.io.File; 67 | import java.io.FileInputStream; 68 | import java.io.FileOutputStream; 69 | @@ -25,6 +27,7 @@ 70 | import java.io.PrintWriter; 71 | import java.util.ArrayList; 72 | import java.util.Collection; 73 | +import java.util.Enumeration; 74 | import java.util.HashMap; 75 | import java.util.HashSet; 76 | import java.util.Iterator; 77 | @@ -37,6 +40,12 @@ 78 | import junit.framework.Assert; 79 | 80 | import org.apache.hadoop.mapreduce.Job; 81 | +import org.apache.log4j.Appender; 82 | +import org.apache.log4j.FileAppender; 83 | +import org.apache.log4j.Level; 84 | +import org.apache.log4j.Logger; 85 | +import org.apache.log4j.SimpleLayout; 86 | +import org.apache.log4j.WriterAppender; 87 | import org.apache.pig.ComparisonFunc; 88 | import org.apache.pig.EvalFunc; 89 | import org.apache.pig.FuncSpec; 90 | @@ -58,6 +67,7 @@ 91 | import org.apache.pig.impl.util.Pair; 92 | import org.apache.pig.impl.util.UDFContext; 93 | import org.apache.pig.impl.util.Utils; 94 | +import org.apache.pig.newplan.logical.rules.ColumnPruneVisitor; 95 | import org.apache.pig.test.utils.Identity; 96 | import org.junit.Assume; 97 | import org.junit.Before; 98 | @@ -1251,7 +1261,17 @@ 99 | @Test 100 | public void testBytesRawComparatorDesc() throws Exception{ 101 | File f1 = createFile(new String[]{"2", "1", "4", "3"}); 102 | - 103 | + 104 | + ByteArrayOutputStream bos = new ByteArrayOutputStream(); 105 | + Logger logger = Logger.getRootLogger(); 106 | + 107 | + logger.setLevel(Level.INFO); 108 | + SimpleLayout layout = new SimpleLayout(); 109 | + Appender appender = new WriterAppender(layout, new PrintStream(bos)); 110 | + logger.addAppender(appender); 111 | + 112 | + // Also test PIG-5210 here in the same test 113 | + pigServer.getPigContext().getProperties().setProperty("pig.print.exec.plan", "true"); 114 | pigServer.registerQuery("a = load '" + Util.generateURI(f1.toString(), pigServer.getPigContext()) 115 | + "' as (value:long);"); 116 | pigServer.registerQuery("b = foreach a generate " + TOTUPLENOINNERSCHEMA.class.getName() + "(value);"); 117 | @@ -1264,5 +1284,9 @@ 118 | Assert.assertEquals(iter.next().toString(), "(2)"); 119 | Assert.assertEquals(iter.next().toString(), "(1)"); 120 | Assert.assertFalse(iter.hasNext()); 121 | + 122 | + logger.removeAppender(appender); 123 | + 124 | + Assert.assertTrue(bos.toString().contains("New For Each(false,false)[tuple]")); 125 | } 126 | } 127 | --------------------------------------------------------------------------------