Home | About | Sematext search-lucene.com search-hadoop.com
 Search Hadoop and all its subprojects:

Switch to Threaded View
MapReduce >> mail # user >> Change the output of Reduce function


Copy link to this message
-
Re: Change the output of Reduce function
I think uou can use NullWritable as key.
http://hadoop.apache.org/docs/current/api/org/apache/hadoop/io/NullWritable.html
Regards,
Shahab
On Thu, Jul 25, 2013 at 2:58 PM, Felipe Gutierrez <
[EMAIL PROTECTED]> wrote:

> I did a MapReduce program to execute a Grep function. I know there is a
> Grep function at hadoop examples, but I want to make my Grep MapReduce to
> explain to other.
> My problem is that my out put shows the key/value. I want to show only the
> value, since I saved the line number at this value. Example:
>
> 00048 [ line 6298 : Jul 25 15:18:14 felipe kernel: [ 2168.644689] wlan0:
> associated ]
>
> Here is my code. Thanks,
> Felipe
>
> package grep;
>
> import java.io.File;
> import java.io.FileReader;
> import java.io.LineNumberReader;
>
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.mapred.FileInputFormat;
> import org.apache.hadoop.mapred.FileOutputFormat;
> import org.apache.hadoop.mapred.JobClient;
> import org.apache.hadoop.mapred.JobConf;
> import org.apache.hadoop.mapred.TextInputFormat;
> import org.apache.hadoop.mapred.TextOutputFormat;
>
> public class Main {
>
>  public static void main(String[] args) throws Exception {
>
> if (args == null || args.length != 3) {
>  System.err.println("Usage: Main <in> <out> <regex>");
> System.exit(-1);
>  }
>
> JobConf conf = new JobConf(Main.class);
> conf.setJobName("grep");
>
> String input = args[0];
> String output = args[1];
> String regex = args[2];
>
> File arquivoLeitura = new File(input);
> LineNumberReader linhaLeitura = new LineNumberReader(new FileReader(
>  arquivoLeitura));
> linhaLeitura.skip(arquivoLeitura.length());
> String lines = String.valueOf(linhaLeitura.getLineNumber() + 1);
>  conf.set("grep.regex", regex);
> conf.set("grep.lines", lines);
>
> conf.setOutputKeyClass(Text.class);
>  conf.setOutputValueClass(MyWritable.class);
>
> conf.setMapperClass(GrepMapper.class);
> conf.setCombinerClass(GrepReducer.class);
>  conf.setReducerClass(GrepReducer.class);
>
> conf.setInputFormat(TextInputFormat.class);
>  conf.setOutputFormat(TextOutputFormat.class);
>
> FileInputFormat.setInputPaths(conf, new Path(input));
>  FileOutputFormat.setOutputPath(conf, new Path(output));
>
> JobClient.runJob(conf);
> }
> }
>
> package grep;
>
> import java.io.IOException;
> import java.text.DecimalFormat;
>
> import org.apache.hadoop.io.LongWritable;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.mapred.JobConf;
> import org.apache.hadoop.mapred.MapReduceBase;
> import org.apache.hadoop.mapred.Mapper;
> import org.apache.hadoop.mapred.OutputCollector;
> import org.apache.hadoop.mapred.Reporter;
>
> public class GrepMapper extends MapReduceBase implements
> Mapper<LongWritable, Text, Text, MyWritable> {
>
> private static long line = 1;
> private static long n = 0;
> private static long divisor = 1;
>  private static long qtdLines = 0;
> private Text k = new Text();
>
> public void map(LongWritable key, Text value,
>  OutputCollector<Text, MyWritable> output, Reporter reporter)
> throws IOException {
> String str = value.toString();
>  MyWritable text = new MyWritable("line " + line + " : " + str);
> if ((line % divisor) == 0) {
>  n++;
> }
> k.set(customFormat("00000", n));
>  output.collect(k, text);
> line++;
> }
>
>  @Override
> public void configure(JobConf job) {
> qtdLines = Long.parseLong(job.get("grep.lines"));
>  if (qtdLines <= 500) {
> divisor = 10;
> } else if (qtdLines <= 1000) {
>  divisor = 20;
> } else if (qtdLines <= 1500) {
> divisor = 30;
>  } else if (qtdLines <= 2000) {
> divisor = 40;
> } else if (qtdLines <= 2500) {
>  divisor = 50;
> } else if (qtdLines <= 3000) {
> divisor = 60;
>  } else if (qtdLines <= 3500) {
> divisor = 70;
> } else if (qtdLines <= 4000) {
>  divisor = 80;
> } else if (qtdLines <= 4500) {
> divisor = 90;
>  } else if (qtdLines <= 5000) {
> divisor = 100;
> } else if (qtdLines <= 5500) {
>  divisor = 110;
> } else if (qtdLines <= 6000) {