|
|
+
Zebeljan, Nebojsa 2012-10-10, 12:23
+
Bill Graham 2012-10-11, 00:59
+
Cheolsoo Park 2012-10-11, 04:30
+
Zebeljan, Nebojsa 2012-10-11, 07:29
+
Zebeljan, Nebojsa 2012-10-11, 07:46
-
Re: Hadoop Job History Loader with PIGCheolsoo Park 2012-10-11, 19:06
Hi Nebojsa,
You're absolutely right. CDH4.x compiles everything against hadoop-2.0.x, so HadoopJobHistoryLoader is excluded. Thank you very much for pointing that out. This is a packaging bug as I see it, and I am going to get it fixed in next release. In the meantime, could you apply the patch that I added at the end and build piggybank.jar from the source tarball by yourself? 1) wget http://archive.cloudera.com/cdh4/cdh/4/pig-0.9.2-cdh4.0.1.tar.gz 2) tar -xf pig-0.9.2-cdh4.0.1.tar.gz 3) cd pig-0.9.2-cdh4.0.1 4) patch -p0 -i <this patch> 5) ant clean compile-test jar-withouthadoop -Dhadoopversion=23 6) cd contrib/piggybank/java 7) ant clean jar -Dhadoopversion=20 -Dmr1.test=mr1 Now you will find piggybank.jar built in the current directory, and it contains HadoopJobHistoryLoader as follows: 8) jar -tvf piggybank.jar | grep HadoopJobHistoryLoader 1866 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$1.class 1885 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$HadoopJobHistoryInputFormat.class 5769 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$HadoopJobHistoryReader.class 943 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$JobHistoryPathFilter.class 3460 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$JobKeys.class 2681 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$JobXMLHandler.class 751 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader$MRJobInfo.class 16364 Thu Oct 11 11:20:40 PDT 2012 org/apache/pig/piggybank/storage/HadoopJobHistoryLoader.class You can also run the unit test as follows: 9) ant clean test -Dhadoopversion=20 -Dmr1.test=mr1 -Dtestcase=TestHadoopJobHistoryLoader Please let me know if this works for you. Thanks! Cheolsoo diff --git contrib/piggybank/java/build.xml contrib/piggybank/java/build.xml index b162dbd..1616e38 100755 --- contrib/piggybank/java/build.xml +++ contrib/piggybank/java/build.xml @@ -15,7 +15,15 @@ limitations under the License. --> -<project basedir="." default="jar" name="pigudf"> +<project basedir="." default="jar" name="pigudf" + xmlns:artifact="urn:maven-artifact-ant" + xmlns:ivy="antlib:org.apache.ivy.ant"> + <taskdef resource="net/sf/antcontrib/antcontrib.properties"> + <classpath> + <pathelement location="../../../cloudera/maven-packaging/lib/ant-contrib-1.0b3.jar"/> + </classpath> + </taskdef> + <!-- javac properties --> <property name="javac.debug" value="on" /> <property name="javac.level" value="source,lines,vars"/> @@ -39,6 +47,17 @@ <property name="hsqldb.jar" value="../../../build/ivy/lib/Pig/hsqldb-1.8.0.10.jar"/> <property name="ivy.lib.dir" value="../../../build/ivy/lib/Pig"/> + <property name="src.shims.dir" value="../../../shims/src/hadoop${hadoopversion}" /> + <if> + <equals arg1="${mr1.test}" arg2="mr1"/> + <then> + <property name="src.shims.test.dir" value="../../../shims/test/hadoop20" /> + </then> + <else> + <property name="src.shims.test.dir" value="../../../shims/test/hadoop${hadoopversion}" /> + </else> + </if> + <!-- JobHistoryLoader currently does not support 0.23 --> <condition property="build.classes.excludes" value="**/HadoopJobHistoryLoader.java" else=""> <equals arg1="${hadoopversion}" arg2="23"/> @@ -59,14 +78,99 @@ <property name="test.src.dir" value="src/test/java" /> <property name="junit.hadoop.conf" value="${user.home}/pigtest/conf/"/> - <path id="pigudf.classpath"> - <pathelement location="${build.classes}"/> - <pathelement location="${pigjar-withouthadoop}"/> - <pathelement location="${pigtest}"/> - <fileset dir="../../../build/ivy/lib"> - <include name="**/*.jar"/> - </fileset> - </path> + <property name="ivy.dir" location="../../../ivy" /> + <property name="build.ivy.dir" location="${build.dir}/ivy" /> + <property name="build.ivy.lib.dir" location="${build.ivy.dir}/lib" /> + <property name="ivy.lib.dir" location="${build.ivy.lib.dir}/${ ant.project.name}"/> + <property name="build.ivy.report.dir" location="${build.ivy.dir}/report" /> + <property name="build.ivy.maven.dir" location="${build.ivy.dir}/maven" /> + <property name="build.ivy.maven.pom" location="${build.ivy.maven.dir}/pig-${version}.pom" /> + <property name="build.ivy.maven.jar" location="${build.ivy.maven.dir}/pig-${version}-core.jar" /> + + <loadproperties srcfile="${ivy.dir}/libraries.properties"/> + <property name="ivysettings.xml" location="${ivy.dir}/ivysettings.xml" /> + <property name="ivy.jar" location="${ivy.dir}/ivy-${ivy.version}.jar"/> + <property name="mvnrepo" value="http://repo2.maven.org/maven2"/> + <property name="ivy_repo_url" value="${mvnrepo}/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar"/> + + <target name="ivy-init-dirs"> + <mkdir dir="${build.ivy.dir}" /> + <mkdir dir="${build.ivy.lib.dir}" /> + <mkdir dir="${build.ivy.report.dir}" /> + <mkdir dir="${build.ivy.maven.dir}" /> + <copy todir="${basedir}/" file="../../../ivy.xml" /> + </target> + + <target name="ivy-probe-antlib" > + <condition property="ivy.found"> + <typefound uri="antlib:org.apache.ivy.ant" name="cleancache"/> + </condition> + </target> + + <target name="ivy-download" description="To download ivy" unless="offline"> + <get src="${ivy_repo_url}" dest="${ivy.jar}" usetimestamp="true"/> + </target> + + <!-- + To avoid Ivy leaking things across big projects, always load Ivy in the same classloader. + Also note how we skip loading Ivy if it is already there, just to make sure all is well. + --> + <target name="ivy-init |