|
|
+
pablomar 2012-11-16, 20:48
+
Dmitriy Ryaboy 2012-11-16, 22:15
+
Bill Graham 2012-11-19, 17:16
+
pablomar 2012-11-19, 17:24
+
pablomar 2012-11-19, 21:17
+
Jonathan Coveney 2012-11-19, 23:32
-
Re: PigStoragepablomar 2012-11-20, 00:38
done. PIG-3057 <https://issues.apache.org/jira/browse/PIG-3057>
On Mon, Nov 19, 2012 at 6:32 PM, Jonathan Coveney <[EMAIL PROTECTED]>wrote: > Make a JIRA and attach the patch, please. > > > 2012/11/19 pablomar <[EMAIL PROTECTED]> > > > hi all, > > > > I did it as simple as I could. What about this changes ? > > > > > > PigStorage.java > > original: > > private void readField(byte[] buf, int start, int end) { > > if (start == end) { > > // NULL value > > mProtoTuple.add(null); > > } else { > > mProtoTuple.add(new DataByteArray(buf, start, end)); > > } > > } > > > > > > new: > > protected void addToCurrentTuple(DataByteArray data) { > > mProtoTuple.add(data); > > } > > > > protected void readField(byte[] buf, int start, int end) { > > if (start == end) { > > // NULL value > > addToCurrentTuple(null); > > } else { > > addToCurrentTuple(new DataByteArray(buf, start, end)); > > } > > } > > > > > > > > so, what's the advantage ? > > with the new version, if I want to extend PigStorage just to override > > readField, I do something like: > > > > import org.apache.pig.builtin.PigStorage; > > import org.apache.pig.data.DataByteArray; > > > > public class MyLoader extends PigStorage { > > public MyLoader() { > > this("\t"); > > } > > > > public MyLoader(String delimiter) { > > super(delimiter); > > } > > > > @Override > > protected void readField(byte[] buf, int start, int end) { > > // remove the field name (example: min=, max=) > > for (int i=start; i<=end;i++) { > > if (buf[i] == '=') { > > start = i + 1; > > break; > > } > > } > > > > if (start == end) { > > // NULL value > > addToCurrentTuple(null); > > } else { > > addToCurrentTuple(new DataByteArray(buf, start, end)); > > } > > } > > } > > > > currently, just to override readField, I also need to copy/paste a lot > from > > PigStorage: > > > > import java.io.IOException; > > import java.util.ArrayList; > > import java.util.Properties; > > import org.apache.hadoop.io.Text; > > import org.apache.pig.backend.executionengine.ExecException; > > import org.apache.pig.builtin.PigStorage; > > import org.apache.pig.PigException; > > import org.apache.pig.data.DataByteArray; > > import org.apache.pig.data.Tuple; > > import org.apache.pig.data.TupleFactory; > > import org.apache.pig.impl.util.ObjectSerializer; > > import org.apache.pig.impl.util.StorageUtil; > > import org.apache.pig.impl.util.UDFContext; > > > > public class MyLoaderextends PigStorage { > > private byte fieldDel = '\t'; > > private ArrayList<Object> mProtoTuple = null; > > private TupleFactory mTupleFactory = TupleFactory.getInstance(); > > private boolean mRequiredColumnsInitialized = false; > > > > public MyLoader() { > > this("\t"); > > } > > > > public MyLoader(String delimiter) { > > super(delimiter); > > fieldDel = StorageUtil.parseFieldDel(delimiter); > > } > > > > // exactly the same than in PigStorage, but I needed to modify > readField > > which is private, so I have to copy all of it > > @Override > > public Tuple getNext() throws IOException { > > mProtoTuple = new ArrayList<Object>(); > > if (!mRequiredColumnsInitialized) { > > if (signature!=null) { > > Properties p > > UDFContext.getUDFContext().getUDFProperties(this.getClass()); > > mRequiredColumns > > (boolean[])ObjectSerializer.deserialize(p.getProperty(signature)); > > } > > mRequiredColumnsInitialized = true; > > } > > try { > > boolean notDone = in.nextKeyValue(); > > if (!notDone) { > > return null; > > > > } > > > > Text value = (Text) in.getCurrentValue(); > > byte[] buf = value.getBytes(); |