Home | About | Sematext search-lucene.com search-hadoop.com
NEW: Monitor These Apps!
elasticsearch, apache solr, apache hbase, hadoop, redis, casssandra, amazon cloudwatch, mysql, memcached, apache kafka, apache zookeeper, apache storm, ubuntu, centOS, red hat, debian, puppet labs, java, senseiDB
 Search Hadoop and all its subprojects:

Switch to Threaded View
Hive >> mail # user >> A GenericUDF Function to Extract a Field From an Array of Structs


Copy link to this message
-
RE: A GenericUDF Function to Extract a Field From an Array of Structs
Hi Navis류승우,
Thank you very much.  Your code works, now I can run the function against external table.  Thank you so much.
However, do you or can someone point me into testing this function?  I am completely stuck in the testing part.
Thanks,Peter
The code for this function below.
======================================import org.apache.hadoop.hive.ql.exec.Description;import org.apache.hadoop.hive.ql.exec.UDFArgumentException;import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;import org.apache.hadoop.hive.ql.metadata.HiveException;import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.StructField;import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
@Description(name = "extract_product_category",        value = "_FUNC_( array< struct<productCategory:string> > ) - Collect all productCategory field values inside an array of struct(s), and return the results in an array<string>",        extended = "Example:\n SELECT _FUNC_(array_of_product_category_structs)")public class GenericUDFExtractProductCategory        extends GenericUDF{    private ArrayList ret;
    private ListObjectInspector listOI;    private StructObjectInspector structOI;    private ObjectInspector prodCatOI;
    @Override    public ObjectInspector initialize(ObjectInspector[] args)            throws UDFArgumentException    {        if (args.length != 1) {            throw new UDFArgumentLengthException("The function extract_product_category() requires exactly one argument.");        }
        if (args[0].getCategory() != Category.LIST) {            throw new UDFArgumentTypeException(0, "Type array<struct> is expected to be the argument for extract_product_category but " + args[0].getTypeName() + " is found instead");        }
        listOI = ((ListObjectInspector) args[0]);        structOI = ((StructObjectInspector) listOI.getListElementObjectInspector());
        if (structOI.getAllStructFieldRefs().size() != 1) {            throw new UDFArgumentTypeException(0, "Incorrect number of fields in the struct, should be one");        }
        StructField productCategoryField = structOI.getStructFieldRef("productCategory");        if (productCategoryField == null) {            throw new UDFArgumentTypeException(0, "NO \"productCategory\" field in input structure");        }
        prodCatOI = productCategoryField.getFieldObjectInspector();        if (prodCatOI.getCategory() != Category.PRIMITIVE) {            throw new UDFArgumentTypeException(0, "productCategory field must be of string type");        }
        if (((PrimitiveObjectInspector)prodCatOI).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {            throw new UDFArgumentTypeException(0, "productCategory field must be of string type");        }
        ret = new ArrayList();
        return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector);    }
    @Override    public ArrayList evaluate(DeferredObject[] arguments)            throws HiveException    {        ret.clear();
        if (arguments.length != 1) {            ArrayList<String> emptyList = new ArrayList<String>();            return emptyList;        }
        if (arguments[0].get() == null) {            ArrayList<String> emptyList = new ArrayList<String>();            return emptyList;        }
        int numElements = listOI.getListLength(arguments[0].get());
        for (int i = 0; i< numElements; i++){            Object element = listOI.getListElement(arguments[0].get(), i);            Object prodCatValue = structOI.getStructFieldData(element, structOI.getStructFieldRef("productCategory"));            ret.add(((PrimitiveObjectInspector)prodCatOI).getPrimitiveWritableObject(prodCatValue));        }        return ret;    }
    @Override    public String getDisplayString(String[] strings)    {        assert (strings.length > 0);        StringBuilder sb = new StringBuilder();        sb.append("extract_product_category(");        sb.append(strings[0]);        sb.append(")");        return sb.toString();    }}
====================================================From: [EMAIL PROTECTED]
To: [EMAIL PROTECTED]
Subject: RE: A GenericUDF Function to Extract a Field From an Array of Structs
Date: Fri, 5 Apr 2013 11:39:55 -0700
Hi Navis류승우,
Thank you very much.  Your code works, now I can run the function against external table.  Thank you so much.
However, do you or can someone point me into testing this function?  I am completely stuck in the testing part.
Peter
The code for this function below.
======================================import org.apache.hadoop.hive.ql.exec.Description;import org.apache.hadoop.hive.ql.exec.UDFArgumentException;import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;import org.apache.hadoop.hive.ql.metadata.HiveException;import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;import org.apache.hadoop.hive.serde2.
NEW: Monitor These Apps!
elasticsearch, apache solr, apache hbase, hadoop, redis, casssandra, amazon cloudwatch, mysql, memcached, apache kafka, apache zookeeper, apache storm, ubuntu, centOS, red hat, debian, puppet labs, java, senseiDB