Home | About | Sematext search-lucene.com search-hadoop.com
 Search Hadoop and all its subprojects:

Switch to Threaded View
Pig, mail # user - Understanding evalfunc schema


Copy link to this message
-
Understanding evalfunc schema
Mohit Anchlia 2012-09-08, 23:14
I am trying to understand how I can add schema such that I get 2 or more
tuples in a bag. Is this the right way of doing this? For some reason the
second tuple_of_token1 always comes out null and both tuples that I add
show up in "tuple_of_tokens" itself.
 @Override
 public DataBag exec(Tuple input) throws IOException {
  DataBag output = mBagFactory.newDefaultBag();
  try {
   Object o = input.get(1);
   if (!(o instanceof String)) {
    throw new IOException(
      "Expected document input to be chararray, but  got "
        + o.getClass().getName());
   }
   Object o1 = input.get(0);
   if (!(o1 instanceof Long)) {
    throw new IOException("Expected input to be long, but  got "
      + o.getClass().getName());
   }
   String json = (String) o;
   WebEvent we = ModelParser.convertJsonStringToObj(json);

   output.add(mTupleFactory.newTuple("1," + we.getAccountOwner() + ","
     + we.getTrackingCodeVersion()));

   output.add(mTupleFactory.newTuple("2," + we.getAccountOwner() + ","
     + we.getTrackingCodeVersion()));
  } catch (ExecException ee) {
   log.error("Failed to Process ", ee);
   throw ee;
  }
  return output;
 }
 @Override
 public Schema outputSchema(Schema input) {
  try {
   Schema.FieldSchema tokenFs = new Schema.FieldSchema("token",
     DataType.CHARARRAY);
   Schema tupleSchema = new Schema(tokenFs);
   Schema.FieldSchema tupleFs;
   tupleFs = new Schema.FieldSchema("tuple_of_tokens", tupleSchema,
     DataType.TUPLE);
   Schema.FieldSchema tokenFs1 = new Schema.FieldSchema("token1",
     DataType.CHARARRAY);
   Schema tupleSchema1 = new Schema(tokenFs1);
   Schema.FieldSchema tupleFs1;
   tupleFs1 = new Schema.FieldSchema("tuple_of_tokens1", tupleSchema1,
     DataType.TUPLE);
   List<Schema.FieldSchema> fields = new ArrayList<Schema.FieldSchema>();
   fields.add(tupleFs);
   fields.add(tupleFs1);
   Schema bagSchema = new Schema(fields);
   bagSchema.setTwoLevelAccessRequired(true);
   Schema.FieldSchema bagFs = new Schema.FieldSchema(
     "bag_of_tokenTuples", bagSchema, DataType.BAG);
   return new Schema(bagFs);
  } catch (Exception e) {
   return null;
  }
 }