Home | About | Sematext search-lucene.com search-hadoop.com
NEW: Monitor These Apps!
elasticsearch, apache solr, apache hbase, hadoop, redis, casssandra, amazon cloudwatch, mysql, memcached, apache kafka, apache zookeeper, apache storm, ubuntu, centOS, red hat, debian, puppet labs, java, senseiDB
 Search Hadoop and all its subprojects:

Switch to Threaded View
HBase >> mail # user >> using date as key


Copy link to this message
-
Re: using date as key
Hi,
See below the code + example.
It assumes that dates are lexicographically ordered (e.g. YYYYMMDD
20100403).
The following uses cases are covered:
1. create empty regions in new table.
2. add empty regions to existing table (existing regions were created using
the same code).
3. insert empty regions in the middle i.e. regions for 20100403 and 20100405
were created and we want to catch up the 20100404).

package com.infolinks.hbase.util;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.log4j.Logger;

import java.io.IOException;
import java.util.*;

/**
 * User: Lior Schachter
 * Email: [EMAIL PROTECTED]
 */
public class HBaseOperations {

    private static final Logger logger Logger.getLogger(com.infolinks.hadoop.commons.hbase.HBaseOperations.class);

    private Configuration conf;
    private HTablePool pool = null;

    public HBaseOperations(Configuration conf) {
        this.conf = conf;
        pool = new HTablePool(conf, 20);
    }

    private Put createPut(byte[] row, HRegionInfo hri, Result originalRow)
throws IOException {
        Put put = new Put(row);
        if (originalRow != null) {
            for (Map.Entry<byte[], NavigableMap<byte[], byte[]>> cf_en :
originalRow.getNoVersionMap().entrySet()) {
                byte[] cf = cf_en.getKey();
                for (Map.Entry<byte[], byte[]> c_en :
cf_en.getValue().entrySet()) {
                    put.add(cf, c_en.getKey(), c_en.getValue());
                }
            }
        }
        put.add(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
Writables.getBytes(hri));
        return put;
    }

    private HTable getMetaTable() throws IOException {
        return new HTable(conf, HConstants.META_TABLE_NAME);
    }

    private Result getMetaTableRowByDate(byte[] tableName, String date,
HTable metaTable) throws IOException {
        Result row = null;
        ResultScanner s = null;
        try {
            s = metaTable.getScanner(new Scan());
            for (Result result : s) {
                byte[] infoBytes result.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
                if (infoBytes != null) {
                    HRegionInfo info Writables.getHRegionInfo(result.getValue(HConstants.CATALOG_FAMILY,
HConstants.REGIONINFO_QUALIFIER));
                    HTableDescriptor desc = info.getTableDesc();
                    if (Bytes.compareTo(desc.getName(), tableName) == 0) {
                        if
(Bytes.toString(info.getStartKey()).startsWith(date) ||
Bytes.toString(info.getEndKey()).startsWith(date)) {
                            logger.info("Date " + date + ", already exists
for table " + Bytes.toString(tableName));
                            return null;
                        }
                        if (Bytes.compareTo(info.getEndKey(),
Bytes.toBytes(date)) > 0) {
                            return result;
                        }
                        row = result;
                    }
                }
            }
        } finally {
            if (s != null) {
                s.close();
            }
        }
        return row;
    }

    public int createEmptyRegions(String date, String delimiter, final
HTable table, String[] startKeys) throws IOException {
        Arrays.sort(startKeys);
        HTable meta = getMetaTable();
        List<HRegionInfo> newRegions = new
ArrayList<HRegionInfo>(startKeys.length);
        Result lastRow = getMetaTableRowByDate(table.getTableName(), date,
meta);
        int count = 0;
        byte[] endKey = HConstants.EMPTY_BYTE_ARRAY;
        if (lastRow != null) {
            HBaseAdmin admin = new HBaseAdmin(conf);
            HRegionInfo info = (HRegionInfo)
Writables.getWritable(lastRow.getColumnLatest(HConstants.CATALOG_FAMILY,
HConstants.REGIONINFO_QUALIFIER).getValue(), new HRegionInfo());
            HRegionInfo hri;
            if (Bytes.compareTo(info.getStartKey(), info.getEndKey()) == 0
&& Bytes.compareTo(info.getStartKey(), HConstants.EMPTY_BYTE_ARRAY) == 0) {
                //new table
                hri = new HRegionInfo(table.getTableDescriptor(),
HConstants.EMPTY_BYTE_ARRAY, Bytes.toBytes(date + delimiter + startKeys[0]),
false, info.getRegionId());
                Put put = createPut(hri.getRegionName(), hri, lastRow);
                meta.put(put);
            } else if (Bytes.compareTo(HConstants.EMPTY_BYTE_ARRAY,
info.getEndKey()) != 0) {
                //insert in the middle
                endKey = info.getEndKey();
                hri = new HRegionInfo(table.getTableDescriptor(),
info.getStartKey(), Bytes.toBytes(date), false, info.getRegionId());
                Put put = createPut(hri.getRegionName(), hri, lastRow);
                meta.put(put);
                hri = new HRegionInfo(table.getTableDescriptor(),
                        Bytes.toBytes(date), Bytes.toBytes(date + delimiter
+ startKeys[0]));
                put = createPut(hri.getRegionName(), hri, null);
                meta.put(put);
                newRegions.add(hri);
            } else {
                //new day
                hri = new HRegionInfo(table.getTableDescriptor(),
info.getStartKey(), Bytes.toBytes(date), false, info.getRegionId());
                Put put = createPut(hri.getRegionName(), hri, lastRow);
                meta.put(put);
                hri = new HRegionInfo(table.getTableDescriptor(),
Bytes.toBytes(date), Bytes.toBytes(date + delimiter + startKeys[0]));
                put = createPut(hri.getRegionName(), hri, null);
                meta.put(put);
                newRegions.add(hri);
            }
            for (int i = 0; i < startKeys.length; i++) {
                if
NEW: Monitor These Apps!
elasticsearch, apache solr, apache hbase, hadoop, redis, casssandra, amazon cloudwatch, mysql, memcached, apache kafka, apache zookeeper, apache storm, ubuntu, centOS, red hat, debian, puppet labs, java, senseiDB