Home | About | Sematext search-lucene.com search-hadoop.com
 Search Hadoop and all its subprojects:

Switch to Threaded View
MapReduce, mail # user - Change the output of Reduce function


Copy link to this message
-
Re: Change the output of Reduce function
Felipe Gutierrez 2013-07-25, 19:18
Sorry, I think I didnt understand,
Does NullWritable go to replate MyWritable? But this is may value. My key
is a Text.
Regards,
Felipe

On Thu, Jul 25, 2013 at 4:07 PM, Shahab Yunus <[EMAIL PROTECTED]>wrote:

> I think uou can use NullWritable as key.
>
> http://hadoop.apache.org/docs/current/api/org/apache/hadoop/io/NullWritable.html
>
>
> Regards,
> Shahab
>
>
> On Thu, Jul 25, 2013 at 2:58 PM, Felipe Gutierrez <
> [EMAIL PROTECTED]> wrote:
>
>> I did a MapReduce program to execute a Grep function. I know there is a
>> Grep function at hadoop examples, but I want to make my Grep MapReduce to
>> explain to other.
>> My problem is that my out put shows the key/value. I want to show only
>> the value, since I saved the line number at this value. Example:
>>
>> 00048 [ line 6298 : Jul 25 15:18:14 felipe kernel: [ 2168.644689] wlan0:
>> associated ]
>>
>> Here is my code. Thanks,
>> Felipe
>>
>> package grep;
>>
>> import java.io.File;
>> import java.io.FileReader;
>> import java.io.LineNumberReader;
>>
>> import org.apache.hadoop.fs.Path;
>> import org.apache.hadoop.io.Text;
>> import org.apache.hadoop.mapred.FileInputFormat;
>> import org.apache.hadoop.mapred.FileOutputFormat;
>> import org.apache.hadoop.mapred.JobClient;
>> import org.apache.hadoop.mapred.JobConf;
>> import org.apache.hadoop.mapred.TextInputFormat;
>> import org.apache.hadoop.mapred.TextOutputFormat;
>>
>> public class Main {
>>
>>  public static void main(String[] args) throws Exception {
>>
>> if (args == null || args.length != 3) {
>>  System.err.println("Usage: Main <in> <out> <regex>");
>> System.exit(-1);
>>  }
>>
>> JobConf conf = new JobConf(Main.class);
>> conf.setJobName("grep");
>>
>> String input = args[0];
>> String output = args[1];
>> String regex = args[2];
>>
>> File arquivoLeitura = new File(input);
>> LineNumberReader linhaLeitura = new LineNumberReader(new FileReader(
>>  arquivoLeitura));
>> linhaLeitura.skip(arquivoLeitura.length());
>> String lines = String.valueOf(linhaLeitura.getLineNumber() + 1);
>>  conf.set("grep.regex", regex);
>> conf.set("grep.lines", lines);
>>
>> conf.setOutputKeyClass(Text.class);
>>  conf.setOutputValueClass(MyWritable.class);
>>
>> conf.setMapperClass(GrepMapper.class);
>> conf.setCombinerClass(GrepReducer.class);
>>  conf.setReducerClass(GrepReducer.class);
>>
>> conf.setInputFormat(TextInputFormat.class);
>>  conf.setOutputFormat(TextOutputFormat.class);
>>
>> FileInputFormat.setInputPaths(conf, new Path(input));
>>  FileOutputFormat.setOutputPath(conf, new Path(output));
>>
>> JobClient.runJob(conf);
>> }
>> }
>>
>> package grep;
>>
>> import java.io.IOException;
>> import java.text.DecimalFormat;
>>
>> import org.apache.hadoop.io.LongWritable;
>> import org.apache.hadoop.io.Text;
>> import org.apache.hadoop.mapred.JobConf;
>> import org.apache.hadoop.mapred.MapReduceBase;
>> import org.apache.hadoop.mapred.Mapper;
>> import org.apache.hadoop.mapred.OutputCollector;
>> import org.apache.hadoop.mapred.Reporter;
>>
>> public class GrepMapper extends MapReduceBase implements
>> Mapper<LongWritable, Text, Text, MyWritable> {
>>
>> private static long line = 1;
>> private static long n = 0;
>> private static long divisor = 1;
>>  private static long qtdLines = 0;
>> private Text k = new Text();
>>
>> public void map(LongWritable key, Text value,
>>  OutputCollector<Text, MyWritable> output, Reporter reporter)
>> throws IOException {
>> String str = value.toString();
>>  MyWritable text = new MyWritable("line " + line + " : " + str);
>> if ((line % divisor) == 0) {
>>  n++;
>> }
>> k.set(customFormat("00000", n));
>>  output.collect(k, text);
>> line++;
>> }
>>
>>  @Override
>> public void configure(JobConf job) {
>> qtdLines = Long.parseLong(job.get("grep.lines"));
>>  if (qtdLines <= 500) {
>> divisor = 10;
>> } else if (qtdLines <= 1000) {
>>  divisor = 20;
>> } else if (qtdLines <= 1500) {
>> divisor = 30;
>>  } else if (qtdLines <= 2000) {
>> divisor = 40;
>> } else if (qtdLines <= 2500) {
*--