Hi Nebojsa,
You're absolutely right. CDH4.x compiles everything against hadoop-2.0.x,
so HadoopJobHistoryLoader is excluded. Thank you very much for pointing
that out.
This is a packaging bug as I see it, and I am going to get it fixed in next
release. In the meantime, could you apply the patch that I added at the end
and build piggybank.jar from the source tarball by yourself?
1) wget
http://archive.cloudera.com/cdh4/cdh/4/pig-0.9.2-cdh4.0.1.tar.gz2) tar -xf pig-0.9.2-cdh4.0.1.tar.gz
3) cd pig-0.9.2-cdh4.0.1
4) patch -p0 -i <this patch>
5) ant clean compile-test jar-withouthadoop -Dhadoopversion=23
6) cd contrib/piggybank/java
7) ant clean jar -Dhadoopversion=20 -Dmr1.test=mr1
Now you will find piggybank.jar built in the current directory, and it
contains HadoopJobHistoryLoader as follows:
8) jar -tvf piggybank.jar | grep HadoopJobHistoryLoader
1866 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$1.class
1885 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$HadoopJobHistoryInputFormat.class
5769 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$HadoopJobHistoryReader.class
943 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$JobHistoryPathFilter.class
3460 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$JobKeys.class
2681 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$JobXMLHandler.class
751 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$MRJobInfo.class
16364 Thu Oct 11 11:20:40 PDT 2012
org/apache/pig/piggybank/storage/HadoopJobHistoryLoader.class
You can also run the unit test as follows:
9) ant clean test -Dhadoopversion=20 -Dmr1.test=mr1
-Dtestcase=TestHadoopJobHistoryLoader
Please let me know if this works for you.
Thanks!
Cheolsoo
diff --git contrib/piggybank/java/build.xml contrib/piggybank/java/build.xml
index b162dbd..1616e38 100755
--- contrib/piggybank/java/build.xml
+++ contrib/piggybank/java/build.xml
@@ -15,7 +15,15 @@
limitations under the License.
-->
-<project basedir="." default="jar" name="pigudf">
+<project basedir="." default="jar" name="pigudf"
+ xmlns:artifact="urn:maven-artifact-ant"
+ xmlns:ivy="antlib:org.apache.ivy.ant">
+ <taskdef resource="net/sf/antcontrib/antcontrib.properties">
+ <classpath>
+ <pathelement
location="../../../cloudera/maven-packaging/lib/ant-contrib-1.0b3.jar"/>
+ </classpath>
+ </taskdef>
+
<!-- javac properties -->
<property name="javac.debug" value="on" />
<property name="javac.level" value="source,lines,vars"/>
@@ -39,6 +47,17 @@
<property name="hsqldb.jar"
value="../../../build/ivy/lib/Pig/hsqldb-1.8.0.10.jar"/>
<property name="ivy.lib.dir" value="../../../build/ivy/lib/Pig"/>
+ <property name="src.shims.dir"
value="../../../shims/src/hadoop${hadoopversion}" />
+ <if>
+ <equals arg1="${mr1.test}" arg2="mr1"/>
+ <then>
+ <property name="src.shims.test.dir"
value="../../../shims/test/hadoop20" />
+ </then>
+ <else>
+ <property name="src.shims.test.dir"
value="../../../shims/test/hadoop${hadoopversion}" />
+ </else>
+ </if>
+
<!-- JobHistoryLoader currently does not support 0.23 -->
<condition property="build.classes.excludes"
value="**/HadoopJobHistoryLoader.java" else="">
<equals arg1="${hadoopversion}" arg2="23"/>
@@ -59,14 +78,99 @@
<property name="test.src.dir" value="src/test/java" />
<property name="junit.hadoop.conf" value="${user.home}/pigtest/conf/"/>
- <path id="pigudf.classpath">
- <pathelement location="${build.classes}"/>
- <pathelement location="${pigjar-withouthadoop}"/>
- <pathelement location="${pigtest}"/>
- <fileset dir="../../../build/ivy/lib">
- <include name="**/*.jar"/>
- </fileset>
- </path>
+ <property name="ivy.dir" location="../../../ivy" />
+ <property name="build.ivy.dir" location="${build.dir}/ivy" />
+ <property name="build.ivy.lib.dir" location="${build.ivy.dir}/lib" />
+ <property name="ivy.lib.dir" location="${build.ivy.lib.dir}/${
ant.project.name}"/>
+ <property name="build.ivy.report.dir"
location="${build.ivy.dir}/report" />
+ <property name="build.ivy.maven.dir" location="${build.ivy.dir}/maven"
/>
+ <property name="build.ivy.maven.pom"
location="${build.ivy.maven.dir}/pig-${version}.pom" />
+ <property name="build.ivy.maven.jar"
location="${build.ivy.maven.dir}/pig-${version}-core.jar" />
+
+ <loadproperties srcfile="${ivy.dir}/libraries.properties"/>
+ <property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml"
/>
+ <property name="ivy.jar" location="${ivy.dir}/ivy-${ivy.version}.jar"/>
+ <property name="mvnrepo" value="
http://repo2.maven.org/maven2"/>+ <property name="ivy_repo_url"
value="${mvnrepo}/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar"/>
+
+ <target name="ivy-init-dirs">
+ <mkdir dir="${build.ivy.dir}" />
+ <mkdir dir="${build.ivy.lib.dir}" />
+ <mkdir dir="${build.ivy.report.dir}" />
+ <mkdir dir="${build.ivy.maven.dir}" />
+ <copy todir="${basedir}/" file="../../../ivy.xml" />
+ </target>
+
+ <target name="ivy-probe-antlib" >
+ <condition property="ivy.found">
+ <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/>
+ </condition>
+ </target>
+
+ <target name="ivy-download" description="To download ivy"
unless="offline">
+ <get src="${ivy_repo_url}" dest="${ivy.jar}" usetimestamp="true"/>
+ </target>
+
+ <!--
+ To avoid Ivy leaking things across big projects, always load Ivy in
the same classloader.
+ Also note how we skip loading Ivy if it is already there, just to make
sure all is well.
+ -->
+ <target name="ivy-init