Home | About | Sematext search-lucene.com search-hadoop.com
 Search Hadoop and all its subprojects:

Switch to Threaded View
Pig, mail # user - HBase 0.20.0 support patch


Copy link to this message
-
Re: HBase 0.20.0 support patch
Vincent BARAT 2009-09-19, 16:57
done

https://issues.apache.org/jira/browse/PIG-970
Alan Gates a �crit :
> Would it be possible to attach this to the JIRA so we can commit it to
> the code?  We'd be happy to get it integrated with Pig 0.5, which will
> run on Hadoop 0.20.
>
> Alan.
>
> On Sep 18, 2009, at 6:33 AM, Vincent BARAT wrote:
>
>> Hello,
>>
>> For those who need this, I have attached to this email a small PIG
>> patch to support HBase 0.20.0.
>>
>> It can be applied on the trunk as of today.
>>
>> It is a minimal patch that only modifies the source code. You still
>> need to import the HBase 0.20.0 .jar by yourselves.
>>
>> If nobody works on this, I also plan to add the storage of PIG results
>> in HBase tables, but I don't know when.
>>
>> Index: src/org/apache/pig/backend/hadoop/hbase/HBaseSlice.java
>> ==================================================================>> --- src/org/apache/pig/backend/hadoop/hbase/HBaseSlice.java    
>> (revision 816619)
>> +++ src/org/apache/pig/backend/hadoop/hbase/HBaseSlice.java    
>> (working copy)
>> @@ -24,9 +24,9 @@
>> import org.apache.hadoop.hbase.HBaseConfiguration;
>> import org.apache.hadoop.hbase.UnknownScannerException;
>> import org.apache.hadoop.hbase.client.HTable;
>> -import org.apache.hadoop.hbase.client.Scanner;
>> -import org.apache.hadoop.hbase.io.Cell;
>> -import org.apache.hadoop.hbase.io.RowResult;
>> +import org.apache.hadoop.hbase.client.Result;
>> +import org.apache.hadoop.hbase.client.ResultScanner;
>> +import org.apache.hadoop.hbase.client.Scan;
>> import org.apache.hadoop.hbase.util.Bytes;
>> import org.apache.hadoop.util.StringUtils;
>> import org.apache.pig.Slice;
>> @@ -61,7 +61,7 @@
>>     /** The connection to the table in Hbase **/
>>     private transient HTable m_table;
>>     /** The scanner over the table **/
>> -    private transient Scanner m_scanner;
>> +    private transient ResultScanner m_scanner;
>>
>>     private transient ArrayList<Object> mProtoTuple;
>>
>> @@ -178,17 +178,18 @@
>>      * @throws IOException
>>      */
>>     private void restart(byte[] startRow) throws IOException {
>> +    Scan scan;
>>         if ((m_endRow != null) && (m_endRow.length > 0)) {
>> -            this.m_scanner = this.m_table.getScanner(m_inputColumns,
>> startRow,
>> -                    m_endRow);
>> +        scan = new Scan(startRow, m_endRow);
>>         } else {
>> -            this.m_scanner = this.m_table.getScanner(m_inputColumns,
>> startRow);
>> +        scan = new Scan(startRow);
>>         }
>> +    this.m_scanner = this.m_table.getScanner(scan);
>>     }
>>
>>     @Override
>>     public boolean next(Tuple value) throws IOException {
>> -        RowResult result;
>> +        Result result;
>>         try {
>>             result = this.m_scanner.next();
>>         } catch (UnknownScannerException e) {
>> @@ -215,15 +216,14 @@
>>      * @param tuple
>>      *            tuple
>>      */
>> -    private void convertResultToTuple(RowResult result, Tuple tuple) {
>> +    private void convertResultToTuple(Result result, Tuple tuple) {
>>         if (mProtoTuple == null)
>>             mProtoTuple = new ArrayList<Object>();
>>
>> -        Cell cell = null;
>>         byte[] value = null;
>>         for (byte[] column : m_inputColumns) {
>> -            cell = result.get(column);
>> -            if (cell == null || (value = cell.getValue()) == null) {
>> +            value = result.getValue(column);
>> +            if (value == null) {
>>                 mProtoTuple.add(null);
>>             } else {
>>                 mProtoTuple.add(new DataByteArray(value));
>
>
>