Home | About | Sematext search-lucene.com search-hadoop.com
NEW: Monitor These Apps!
elasticsearch, apache solr, apache hbase, hadoop, redis, casssandra, amazon cloudwatch, mysql, memcached, apache kafka, apache zookeeper, apache storm, ubuntu, centOS, red hat, debian, puppet labs, java, senseiDB
 Search Hadoop and all its subprojects:

Switch to Threaded View
Hadoop >> mail # user >> Mapfile as a input to a MapReduce job


Copy link to this message
-
Re: Mapfile as a input to a MapReduce job
Its may be SequenceFileInputFormat reading the value as <Text,LongWritable>
by default. So u can write like

SequenceFileInputFormat<Text,ByteWritable> sequenceInputFormat = new
SequenceFileInputFormat<Text,ByteWritable>();
job.setInputFormat(sequenceInputFormat.getClass());
On Fri, Jan 21, 2011 at 2:25 AM, Luca <[EMAIL PROTECTED]> wrote:

> Hello,
>
> I recently started to use Hadoop and I have a problem while using a Mapfile
> as a input to a MapReduce job.
>
> The following working code, write a simple MapFile called "TestMap" in hdfs
> where there are three keys of type Text and three value of type
> BytesWritable
>
> $ hadoop fs  -text /user/hadoop/TestMap/data
> 11/01/20 11:17:58 INFO util.NativeCodeLoader: Loaded the native-hadoop
> library
> 11/01/20 11:17:58 INFO zlib.ZlibFactory: Successfully loaded & initialized
> native-zlib library
> 11/01/20 11:17:58 INFO compress.CodecPool: Got brand-new decompressor
> A       01
> B       02
> C       03
>
>
>
> import java.io.IOException;
> import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.io.MapFile;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.io.BytesWritable;
> import org.apache.hadoop.io.SequenceFile;
> import org.apache.hadoop.io.IOUtils;
>
> public class CreateMap {
>
>    public static void main(String[] args) throws IOException{
>
>        Configuration conf = new Configuration();
>        FileSystem hdfs  = FileSystem.get(conf);
>
>        Text key = new Text();
>        BytesWritable value = new BytesWritable();
>        byte[] data = {1, 2, 3};
>        String[] strs = {"A", "B", "C"};
>        int bytesRead;
>        MapFile.Writer writer = null;
>
>        writer = new MapFile.Writer(conf, hdfs, "TestMap", key.getClass(),
> value.getClass());
>        try {
>            for (int i = 0; i < 3; i++) {
>                key.set(strs[i]);
>                value.set(data, i, 1);
>                writer.append(key, value);
>                System.out.println(strs[i] + ":" + data[i] + " added.");
>            }
>        }
>        catch (IOException e) {
>            e.printStackTrace();
>        }
>        finally {
>             IOUtils.closeStream(writer);
>        }
>    }
> }
>
> The simple MapReduce job that follows try to increment by one the values of
> the mapfile:
>
> import java.io.IOException;
> import java.util.Iterator;
> import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.conf.Configured;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.mapred.FileInputFormat;
> import org.apache.hadoop.mapred.FileOutputFormat;
> import org.apache.hadoop.mapred.JobClient;
> import org.apache.hadoop.mapred.JobConf;
> import org.apache.hadoop.mapred.SequenceFileInputFormat;
> import org.apache.hadoop.mapred.MapReduceBase;
> import org.apache.hadoop.mapred.Mapper;
> import org.apache.hadoop.mapred.OutputCollector;
> import org.apache.hadoop.mapred.Reducer;
> import org.apache.hadoop.mapred.Reporter;
> import org.apache.hadoop.mapred.TextOutputFormat;
> import org.apache.hadoop.util.Tool;
> import org.apache.hadoop.util.ToolRunner;
> import org.apache.hadoop.io.BytesWritable;
>
>
> public class AddOne extends Configured implements Tool {
>
>    public static class MapClass extends MapReduceBase
>
>        implements Mapper<Text, BytesWritable, Text, Text> {
>
>        public void map(Text key, BytesWritable value,
>                        OutputCollector<Text, Text> output,
>                        Reporter reporter) throws IOException {
>
>
>            byte[] data = value.getBytes();
>            data[0] += 1;
>            value.set(data, 0, 1);
>            output.collect(key, new Text(value.toString()));
>        }
>    }
>
>    public static class Reduce extends MapReduceBase
>        implements Reducer<Text, Text, Text, Text> {
>
>        public void reduce(Text key, Iterator<Text> values,
>                           OutputCollector<Text, Text> output,
NEW: Monitor These Apps!
elasticsearch, apache solr, apache hbase, hadoop, redis, casssandra, amazon cloudwatch, mysql, memcached, apache kafka, apache zookeeper, apache storm, ubuntu, centOS, red hat, debian, puppet labs, java, senseiDB