Home | About | Sematext search-lucene.com search-hadoop.com
 Search Hadoop and all its subprojects:

Switch to Threaded View
Hadoop >> mail # user >> Re: XmlInputFormat Hadoop -Mapreduce


Copy link to this message
-
Re: XmlInputFormat Hadoop -Mapreduce
Hi Ranjini,
I have modified the code and it is perfectly working fine for
me...Please mail me on [EMAIL PROTECTED] i will send u the zip
code...

The code which you have writtenl, i really dont understand why from
the mapper class you are emitting the key as NullWritable which
doesn't make sense...

If you are making use of reducer after this, then there are two possiblites

(1)Grouping on null will happen at reduce and you will see null
pointer exception
(2) Grouping on null will have u something like this null,{Val1,val2......valn}

My Suggestion dont ever use null as an op key from mapper.

Regards,
Som Shekhar Sharma
+91-8197243810
On Tue, Dec 17, 2013 at 5:42 PM, Ranjini Rathinam
<[EMAIL PROTECTED]> wrote:
> Hi,
>
> I have attached the code. Please verify.
>
> Please suggest . I am using hadoop 0.20 version.
>
>
> import java.io.IOException;
> import java.util.logging.Level;
> import java.util.logging.Logger;
> import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.NullWritable;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.mapreduce.Job;
> import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
> import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
> import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
> //import org.apache.hadoop.mapreduce.lib.input.XmlInputFormat;
>
> public class ParserDriverMain {
>
> public static void main(String[] args) {
> try {
> runJob(args[0], args[1]);
>
> } catch (IOException ex) {
> Logger.getLogger(ParserDriverMain.class.getName()).log(Level.SEVERE, null,
> ex);
> }
>
> }
>
> //The code is mostly self explanatory. You need to define the starting and
> ending tag of to split a record from the xml file and it can be defined in
> the following lines
>
> //conf.set("xmlinput.start", "<startingTag>");
> //conf.set("xmlinput.end", "</endingTag>");
>
>
> public static void runJob(String input,String output ) throws IOException {
>
> Configuration conf = new Configuration();
>
> conf.set("xmlinput.start", "<Employee>");
> conf.set("xmlinput.end", "</Employee>");
> conf.set("io.serializations","org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
>
> Job job = new Job(conf, "jobName");
>
> input="/user/hduser/Ran/";
> output="/user/task/Sales/";
> FileInputFormat.setInputPaths(job, input);
> job.setJarByClass(ParserDriverMain.class);
> job.setMapperClass(MyParserMapper.class);
> job.setNumReduceTasks(1);
> job.setInputFormatClass(XmlInputFormatNew.class);
> job.setOutputKeyClass(NullWritable.class);
> job.setOutputValueClass(Text.class);
> Path outPath = new Path(output);
> FileOutputFormat.setOutputPath(job, outPath);
> FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
> if (dfs.exists(outPath)) {
> dfs.delete(outPath, true);
> }
>
>
> try {
>
> job.waitForCompletion(true);
>
> } catch (InterruptedException ex) {
> Logger.getLogger(ParserDriverMain.class.getName()).log(Level.SEVERE, null,
> ex);
> } catch (ClassNotFoundException ex) {
> Logger.getLogger(ParserDriverMain.class.getName()).log(Level.SEVERE, null,
> ex);
> }
>
> }
>
> }
>
>
>
>
>
> import java.io.IOException;
> import java.util.logging.Level;
> import java.util.logging.Logger;
> import org.apache.hadoop.io.LongWritable;
> import org.apache.hadoop.io.NullWritable;
> import org.apache.hadoop.io.Text;
> import org.apache.hadoop.mapreduce.Mapper;
> import org.jdom.Document;
> import org.jdom.Element;
> import org.jdom.JDOMException;
> import org.jdom.input.SAXBuilder;
> import java.io.Reader;
> import java.io.StringReader;
>
> /**
>  *
>  * @author root
>  */
> public class MyParserMapper extends Mapper<LongWritable, Text, NullWritable,
> Text> {
>
>     @Override
>     public void map(LongWritable key, Text value1,Context context)throws
> IOException, InterruptedException {
>
>                     String xmlString = value1.toString();