Home | About | Sematext search-lucene.com search-hadoop.com
 Search Hadoop and all its subprojects:

Switch to Plain View
Pig, mail # user - Hadoop Job History Loader with PIG


+
Zebeljan, Nebojsa 2012-10-10, 12:23
+
Bill Graham 2012-10-11, 00:59
+
Cheolsoo Park 2012-10-11, 04:30
+
Zebeljan, Nebojsa 2012-10-11, 07:29
+
Zebeljan, Nebojsa 2012-10-11, 07:46
Copy link to this message
-
Re: Hadoop Job History Loader with PIG
Cheolsoo Park 2012-10-11, 19:06
Hi Nebojsa,

You're absolutely right. CDH4.x compiles everything against hadoop-2.0.x,
so HadoopJobHistoryLoader is excluded. Thank you very much for pointing
that out.

This is a packaging bug as I see it, and I am going to get it fixed in next
release. In the meantime, could you apply the patch that I added at the end
and build piggybank.jar from the source tarball by yourself?

1) wget http://archive.cloudera.com/cdh4/cdh/4/pig-0.9.2-cdh4.0.1.tar.gz
2) tar -xf pig-0.9.2-cdh4.0.1.tar.gz
3) cd pig-0.9.2-cdh4.0.1
4) patch -p0 -i <this patch>
5) ant clean compile-test jar-withouthadoop -Dhadoopversion=23
6) cd contrib/piggybank/java
7) ant clean jar -Dhadoopversion=20 -Dmr1.test=mr1

Now you will find piggybank.jar built in the current directory, and it
contains HadoopJobHistoryLoader as follows:

8) jar -tvf piggybank.jar | grep HadoopJobHistoryLoader
  1866 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$1.class
  1885 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$HadoopJobHistoryInputFormat.class
  5769 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$HadoopJobHistoryReader.class
   943 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$JobHistoryPathFilter.class
  3460 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$JobKeys.class
  2681 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$JobXMLHandler.class
   751 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$MRJobInfo.class
 16364 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader.class

You can also run the unit test as follows:

9) ant clean test -Dhadoopversion=20 -Dmr1.test=mr1
-Dtestcase=TestHadoopJobHistoryLoader

Please let me know if this works for you.

Thanks!
Cheolsoo

diff --git contrib/piggybank/java/build.xml contrib/piggybank/java/build.xml
index b162dbd..1616e38 100755
--- contrib/piggybank/java/build.xml
+++ contrib/piggybank/java/build.xml
@@ -15,7 +15,15 @@
    limitations under the License.
 -->

-<project basedir="." default="jar" name="pigudf">
+<project basedir="." default="jar" name="pigudf"
+         xmlns:artifact="urn:maven-artifact-ant"
+         xmlns:ivy="antlib:org.apache.ivy.ant">
+    <taskdef resource="net/sf/antcontrib/antcontrib.properties">
+        <classpath>
+            <pathelement
location="../../../cloudera/maven-packaging/lib/ant-contrib-1.0b3.jar"/>
+        </classpath>
+    </taskdef>
+
     <!-- javac properties -->
     <property name="javac.debug" value="on" />
     <property name="javac.level" value="source,lines,vars"/>
@@ -39,6 +47,17 @@
     <property name="hsqldb.jar"
value="../../../build/ivy/lib/Pig/hsqldb-1.8.0.10.jar"/>
     <property name="ivy.lib.dir" value="../../../build/ivy/lib/Pig"/>

+    <property name="src.shims.dir"
value="../../../shims/src/hadoop${hadoopversion}" />
+    <if>
+        <equals arg1="${mr1.test}" arg2="mr1"/>
+        <then>
+            <property name="src.shims.test.dir"
value="../../../shims/test/hadoop20" />
+        </then>
+        <else>
+            <property name="src.shims.test.dir"
value="../../../shims/test/hadoop${hadoopversion}" />
+        </else>
+    </if>
+
  <!-- JobHistoryLoader currently does not support 0.23 -->
     <condition property="build.classes.excludes"
value="**/HadoopJobHistoryLoader.java" else="">
         <equals arg1="${hadoopversion}" arg2="23"/>
@@ -59,14 +78,99 @@
     <property name="test.src.dir" value="src/test/java" />
     <property name="junit.hadoop.conf" value="${user.home}/pigtest/conf/"/>

-    <path id="pigudf.classpath">
-        <pathelement location="${build.classes}"/>
-        <pathelement location="${pigjar-withouthadoop}"/>
-        <pathelement location="${pigtest}"/>
-        <fileset dir="../../../build/ivy/lib">
-            <include name="**/*.jar"/>
-        </fileset>
-    </path>
+    <property name="ivy.dir" location="../../../ivy" />
+    <property name="build.ivy.dir" location="${build.dir}/ivy" />
+    <property name="build.ivy.lib.dir" location="${build.ivy.dir}/lib" />
+    <property name="ivy.lib.dir" location="${build.ivy.lib.dir}/${
ant.project.name}"/>
+    <property name="build.ivy.report.dir"
location="${build.ivy.dir}/report" />
+    <property name="build.ivy.maven.dir" location="${build.ivy.dir}/maven"
/>
+    <property name="build.ivy.maven.pom"
location="${build.ivy.maven.dir}/pig-${version}.pom" />
+    <property name="build.ivy.maven.jar"
location="${build.ivy.maven.dir}/pig-${version}-core.jar" />
+
+    <loadproperties srcfile="${ivy.dir}/libraries.properties"/>
+    <property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml"
/>
+    <property name="ivy.jar" location="${ivy.dir}/ivy-${ivy.version}.jar"/>
+    <property name="mvnrepo" value="http://repo2.maven.org/maven2"/>
+    <property name="ivy_repo_url"
value="${mvnrepo}/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar"/>
+
+    <target name="ivy-init-dirs">
+        <mkdir dir="${build.ivy.dir}" />
+        <mkdir dir="${build.ivy.lib.dir}" />
+        <mkdir dir="${build.ivy.report.dir}" />
+        <mkdir dir="${build.ivy.maven.dir}" />
+        <copy todir="${basedir}/" file="../../../ivy.xml" />
+    </target>
+
+    <target name="ivy-probe-antlib" >
+        <condition property="ivy.found">
+         <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
+        </condition>
+    </target>
+
+    <target name="ivy-download" description="To download ivy"
unless="offline">
+        <get src="${ivy_repo_url}" dest="${ivy.jar}" usetimestamp="true"/>
+    </target>
+
+    <!--
+    To avoid Ivy leaking things across big projects, always load Ivy in
the same classloader.
+    Also note how we skip loading Ivy if it is already there, just to make
sure all is well.
+    -->
+    <target name="ivy-init