Home | About | Sematext search-lucene.com search-hadoop.com
NEW: Monitor These Apps!
elasticsearch, apache solr, apache hbase, hadoop, redis, casssandra, amazon cloudwatch, mysql, memcached, apache kafka, apache zookeeper, apache storm, ubuntu, centOS, red hat, debian, puppet labs, java, senseiDB
 Search Hadoop and all its subprojects:

Switch to Threaded View
Hadoop >> mail # user >> dynamic mapper?

Copy link to this message
Re: dynamic mapper?
 You can use java API's to compile custom java code and create jars. For
example , look at this code from Sqoop

 * Licensed to Cloudera, Inc. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Cloudera, Inc. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *     http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.

package com.cloudera.sqoop.orm;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.jar.JarOutputStream;
import java.util.zip.ZipEntry;

import javax.tools.JavaCompiler;
import javax.tools.JavaFileObject;
import javax.tools.StandardJavaFileManager;
import javax.tools.ToolProvider;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapred.JobConf;

import com.cloudera.sqoop.SqoopOptions;
import com.cloudera.sqoop.util.FileListing;

import com.cloudera.sqoop.util.Jars;

 * Manages the compilation of a bunch of .java files into .class files
 * and eventually a jar.
 * Also embeds this program's jar into the lib/ directory inside the
 * jar to ensure that the job runs correctly.
public class CompilationManager {

  /** If we cannot infer a jar name from a table name, etc., use this. */
  public static final String DEFAULT_CODEGEN_JAR_NAME       "sqoop-codegen-created.jar";

  public static final Log LOG = LogFactory.getLog(

  private SqoopOptions options;
  private List<String> sources;

  public CompilationManager(final SqoopOptions opts) {
    options = opts;
    sources = new ArrayList<String>();

  public void addSourceFile(String sourceName) {

   * locate the hadoop-*-core.jar in $HADOOP_HOME or --hadoop-home.
   * If that doesn't work, check our classpath.
   * @return the filename of the hadoop-*-core.jar file.
  private String findHadoopCoreJar() {
    String hadoopHome = options.getHadoopHome();

    if (null == hadoopHome) {
      LOG.info("$HADOOP_HOME is not set");
      return Jars.getJarPathForClass(JobConf.class);

    if (!hadoopHome.endsWith(File.separator)) {
      hadoopHome = hadoopHome + File.separator;

    File hadoopHomeFile = new File(hadoopHome);
    LOG.info("HADOOP_HOME is " + hadoopHomeFile.getAbsolutePath());
    File [] entries = hadoopHomeFile.listFiles();

    if (null == entries) {
      LOG.warn("HADOOP_HOME appears empty or missing");
      return Jars.getJarPathForClass(JobConf.class);

    for (File f : entries) {
      if (f.getName().startsWith("hadoop-")
          && f.getName().endsWith("-core.jar")) {
        LOG.info("Found hadoop core jar at: " + f.getAbsolutePath());
        return f.getAbsolutePath();

    return Jars.getJarPathForClass(JobConf.class);

   * Compile the .java files into .class files via embedded javac call.
   * On success, move .java files to the code output dir.
  public void compile() throws IOException {
    List<String> args = new ArrayList<String>();

    // ensure that the jar output dir exists.
    String jarOutDir = options.getJarOutputDir();
    File jarOutDirObj = new File(jarOutDir);
    if (!jarOutDirObj.exists()) {
      boolean mkdirSuccess = jarOutDirObj.mkdirs();
      if (!mkdirSuccess) {
        LOG.debug("Warning: Could not make directories for " + jarOutDir);
    } else if (LOG.isDebugEnabled()) {
      LOG.debug("Found existing " + jarOutDir);

    // Make sure jarOutDir ends with a '/'.
    if (!jarOutDir.endsWith(File.separator)) {
      jarOutDir = jarOutDir + File.separator;

    // find hadoop-*-core.jar for classpath.
    String coreJar = findHadoopCoreJar();
    if (null == coreJar) {
      // Couldn't find a core jar to insert into the CP for compilation.
      // however, we're running this from a unit test, then the path to the
      // .class files might be set via the hadoop.alt.classpath property
      // instead. Check there first.
      String coreClassesPath = System.getProperty("hadoop.alt.classpath");
      if (null == coreClassesPath) {
        // no -- we're out of options. Fail.
        throw new IOException("Could not find hadoop core jar!");
      } else {
        coreJar = coreClassesPath;

    // find sqoop jar for compilation classpath
    String sqoopJar = Jars.getSqoopJarPath();
    if (null != sqoopJar) {
      sqoopJar = File.pathSeparator + sqoopJar;
    } else {
      LOG.warn("Could not find sqoop jar; child compilation may fail");
      sqoopJar = "";

    String curClasspath = System.getProperty("java.class.path");



    args.add(curClasspath + File.pathSeparator + coreJar + sqoopJar);

    JavaCompiler compiler = ToolProvider.getSystemJavaCompiler();
    if (null == compiler) {
      LOG.error("It seems as though you are running sqoop with a JRE.");
      LOG.error("Sqoop requires a JDK that can compile Java code.");
      LOG.error("Please install a JDK and set $JAVA_HOME to use it.");
      throw new IOException("Could not start Java compiler.");
    StandardJavaFileManager fileManager         compiler.getStandardFileManager(nul
NEW: Monitor These Apps!
elasticsearch, apache solr, apache hbase, hadoop, redis, casssandra, amazon cloudwatch, mysql, memcached, apache kafka, apache zookeeper, apache storm, ubuntu, centOS, red hat, debian, puppet labs, java, senseiDB