Home | About | Sematext search-lucene.com search-hadoop.com
NEW: Monitor These Apps!
elasticsearch, apache solr, apache hbase, hadoop, redis, casssandra, amazon cloudwatch, mysql, memcached, apache kafka, apache zookeeper, apache storm, ubuntu, centOS, red hat, debian, puppet labs, java, senseiDB
 Search Hadoop and all its subprojects:

Switch to Threaded View
MapReduce >> mail # user >> Profiling Hadoop Code


Copy link to this message
-
Re: Profiling Hadoop Code
On 05/19/2011 04:26 AM, Shuja Rehman wrote:
> Hi All,
>
> I was investigating the ways to profile the hadoop code. All I found
> is to use JobConf.setProfileEnabled(boolean)
> <http://hadoop.apache.org/common/docs/current/api/org/apache/hadoop/mapred/JobConf.html#setProfileEnabled%28boolean%29>
> but i believe this is not available in the new api. so can anybody let
> me know how i can profile my hadoop code to get details which part is
> taking what time to tune the application?
>
> Thanks
>
> --
> Regards
> Shuja-ur-Rehman Baig
>
>
Version 0.20.2
Location:/hadoop-0.20.2/src/mapred/org/apache/hadoop/mapred/JobConf.java
--------------------------------
/**
    * Get whether the task profiling is enabled.
    * @return true if some tasks will be profiled
    */
   public boolean getProfileEnabled() {
     return getBoolean("mapred.task.profile", false);
   }

   /**
    * Set whether the system should collect profiler information for
some of
    * the tasks in this job? The information is stored in the user log
    * directory.
    * @param newValue true means it should be gathered
    */
   public void setProfileEnabled(boolean newValue) {
     setBoolean("mapred.task.profile", newValue);
   }

   /**
    * Get the profiler configuration arguments.
    *
    * The default value for this property is
    *
"-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s"
    *
    * @return the parameters to pass to the task child to configure
profiling
    */
   public String getProfileParams() {
     return get("mapred.task.profile.params",
                "-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y," +
                  "verbose=n,file=%s");
   }

   /**
    * Set the profiler configuration arguments. If the string contains a
'%s' it
    * will be replaced with the name of the profiling output file when
the task
    * runs.
    *
    * This value is passed to the task child JVM on the command line.
    *
    * @param value the configuration string
    */
   public void setProfileParams(String value) {
     set("mapred.task.profile.params", value);
   }

   /**
    * Get the range of maps or reduces to profile.
    * @param isMap is the task a map?
    * @return the task ranges
    */
   public IntegerRanges getProfileTaskRange(boolean isMap) {
     return getRange((isMap ? "mapred.task.profile.maps" :
                        "mapred.task.profile.reduces"), "0-2");
   }

   /**
    * Set the ranges of maps or reduces to profile. setProfileEnabled(true)
    * must also be called.
    * @param newValue a set of integer ranges of the map ids
    */
   public void setProfileTaskRange(boolean isMap, String newValue) {
     // parse the value to make sure it is legal
     new Configuration.IntegerRanges(newValue);
     set((isMap ? "mapred.task.profile.maps" :
"mapred.task.profile.reduces"),
         newValue);
   }
--------------------------------

--
Marcos Lu�s Ort�z Valmaseda
  Software Engineer (Large-Scaled Distributed Systems)
  University of Information Sciences,
  La Habana, Cuba
  Linux User # 418229
  http://about.me/marcosortiz

NEW: Monitor These Apps!
elasticsearch, apache solr, apache hbase, hadoop, redis, casssandra, amazon cloudwatch, mysql, memcached, apache kafka, apache zookeeper, apache storm, ubuntu, centOS, red hat, debian, puppet labs, java, senseiDB