Home | About | Sematext search-lucene.com search-hadoop.com
 Search Hadoop and all its subprojects:

Switch to Threaded View
Hadoop, mail # user - dynamic mapper?

Copy link to this message
Re: dynamic mapper?
madhu phatak 2012-03-28, 08:49
 You can use java API's to compile custom java code and create jars. For
example , look at this code from Sqoop

 * Licensed to Cloudera, Inc. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Cloudera, Inc. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *     http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.

package com.cloudera.sqoop.orm;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.jar.JarOutputStream;
import java.util.zip.ZipEntry;

import javax.tools.JavaCompiler;
import javax.tools.JavaFileObject;
import javax.tools.StandardJavaFileManager;
import javax.tools.ToolProvider;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.JobConf;

import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.util.FileListing;

import com.cloudera.sqoop.util.Jars;

 * Manages the compilation of a bunch of .java files into .class files
 * and eventually a jar.
 * Also embeds this program's jar into the lib/ directory inside the
 * jar to ensure that the job runs correctly.
public class CompilationManager {

  /** If we cannot infer a jar name from a table name, etc., use this. */
  public static final String DEFAULT_CODEGEN_JAR_NAME       "sqoop-codegen-created.jar";

  public static final Log LOG = LogFactory.getLog(

  private SqoopOptions options;
  private List<String> sources;

  public CompilationManager(final SqoopOptions opts) {
    options = opts;
    sources = new ArrayList<String>();

  public void addSourceFile(String sourceName) {

   * locate the hadoop-*-core.jar in $HADOOP_HOME or --hadoop-home.
   * If that doesn't work, check our classpath.
   * @return the filename of the hadoop-*-core.jar file.
  private String findHadoopCoreJar() {
    String hadoopHome = options.getHadoopHome();

    if (null == hadoopHome) {
      LOG.info("$HADOOP_HOME is not set");
      return Jars.getJarPathForClass(JobConf.class);

    if (!hadoopHome.endsWith(File.separator)) {
      hadoopHome = hadoopHome + File.separator;

    File hadoopHomeFile = new File(hadoopHome);
    LOG.info("HADOOP_HOME is " + hadoopHomeFile.getAbsolutePath());
    File [] entries = hadoopHomeFile.listFiles();

    if (null == entries) {
      LOG.warn("HADOOP_HOME appears empty or missing");
      return Jars.getJarPathForClass(JobConf.class);

    for (File f : entries) {
      if (f.getName().startsWith("hadoop-")
          && f.getName().endsWith("-core.jar")) {
        LOG.info("Found hadoop core jar at: " + f.getAbsolutePath());
        return f.getAbsolutePath();

    return Jars.getJarPathForClass(JobConf.class);

   * Compile the .java files into .class files via embedded javac call.
   * On success, move .java files to the code output dir.
  public void compile() throws IOException {
    List<String> args = new ArrayList<String>();

    // ensure that the jar output dir exists.
    String jarOutDir = options.getJarOutputDir();
    File jarOutDirObj = new File(jarOutDir);
    if (!jarOutDirObj.exists()) {
      boolean mkdirSuccess = jarOutDirObj.mkdirs();
      if (!mkdirSuccess) {
        LOG.debug("Warning: Could not make directories for " + jarOutDir);
    } else if (LOG.isDebugEnabled()) {
      LOG.debug("Found existing " + jarOutDir);

    // Make sure jarOutDir ends with a '/'.
    if (!jarOutDir.endsWith(File.separator)) {
      jarOutDir = jarOutDir + File.separator;

    // find hadoop-*-core.jar for classpath.
    String coreJar = findHadoopCoreJar();
    if (null == coreJar) {
      // Couldn't find a core jar to insert into the CP for compilation.
      // however, we're running this from a unit test, then the path to the
      // .class files might be set via the hadoop.alt.classpath property
      // instead. Check there first.
      String coreClassesPath = System.getProperty("hadoop.alt.classpath");
      if (null == coreClassesPath) {
        // no -- we're out of options. Fail.
        throw new IOException("Could not find hadoop core jar!");
      } else {
        coreJar = coreClassesPath;

    // find sqoop jar for compilation classpath
    String sqoopJar = Jars.getSqoopJarPath();
    if (null != sqoopJar) {
      sqoopJar = File.pathSeparator + sqoopJar;
    } else {
      LOG.warn("Could not find sqoop jar; child compilation may fail");
      sqoopJar = "";

    String curClasspath = System.getProperty("java.class.path");



    args.add(curClasspath + File.pathSeparator + coreJar + sqoopJar);

    JavaCompiler compiler = ToolProvider.getSystemJavaCompiler();
    if (null == compiler) {
      LOG.error("It seems as though you are running sqoop with a JRE.");
      LOG.error("Sqoop requires a JDK that can compile Java code.");
      LOG.error("Please install a JDK and set $JAVA_HOME to use it.");
      throw new IOException("Could not start Java compiler.");
    StandardJavaFileManager fileManager         compiler.getStandardFileManager(nul