Home | About | Sematext search-lucene.com search-hadoop.com
 Search Hadoop and all its subprojects:

Switch to Threaded View
Hive >> mail # user >> A GenericUDF Function to Extract a Field From an Array of Structs


Copy link to this message
-
RE: A GenericUDF Function to Extract a Field From an Array of Structs
Hi Navis류승우,
Thank you very much.  Your code works, now I can run the function against external table.  Thank you so much.
However, do you or can someone point me into testing this function?  I am completely stuck in the testing part.
Thanks,Peter
The code for this function below.
======================================import org.apache.hadoop.hive.ql.exec.Description;import org.apache.hadoop.hive.ql.exec.UDFArgumentException;import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;import org.apache.hadoop.hive.ql.metadata.HiveException;import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.StructField;import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
@Description(name = "extract_product_category",        value = "_FUNC_( array< struct<productCategory:string> > ) - Collect all productCategory field values inside an array of struct(s), and return the results in an array<string>",        extended = "Example:\n SELECT _FUNC_(array_of_product_category_structs)")public class GenericUDFExtractProductCategory        extends GenericUDF{    private ArrayList ret;
    private ListObjectInspector listOI;    private StructObjectInspector structOI;    private ObjectInspector prodCatOI;
    @Override    public ObjectInspector initialize(ObjectInspector[] args)            throws UDFArgumentException    {        if (args.length != 1) {            throw new UDFArgumentLengthException("The function extract_product_category() requires exactly one argument.");        }
        if (args[0].getCategory() != Category.LIST) {            throw new UDFArgumentTypeException(0, "Type array<struct> is expected to be the argument for extract_product_category but " + args[0].getTypeName() + " is found instead");        }
        listOI = ((ListObjectInspector) args[0]);        structOI = ((StructObjectInspector) listOI.getListElementObjectInspector());
        if (structOI.getAllStructFieldRefs().size() != 1) {            throw new UDFArgumentTypeException(0, "Incorrect number of fields in the struct, should be one");        }
        StructField productCategoryField = structOI.getStructFieldRef("productCategory");        if (productCategoryField == null) {            throw new UDFArgumentTypeException(0, "NO \"productCategory\" field in input structure");        }
        prodCatOI = productCategoryField.getFieldObjectInspector();        if (prodCatOI.getCategory() != Category.PRIMITIVE) {            throw new UDFArgumentTypeException(0, "productCategory field must be of string type");        }
        if (((PrimitiveObjectInspector)prodCatOI).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {            throw new UDFArgumentTypeException(0, "productCategory field must be of string type");        }
        ret = new ArrayList();
        return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableStringObjectInspector);    }
    @Override    public ArrayList evaluate(DeferredObject[] arguments)            throws HiveException    {        ret.clear();
        if (arguments.length != 1) {            ArrayList<String> emptyList = new ArrayList<String>();            return emptyList;        }
        if (arguments[0].get() == null) {            ArrayList<String> emptyList = new ArrayList<String>();            return emptyList;        }
        int numElements = listOI.getListLength(arguments[0].get());
        for (int i = 0; i< numElements; i++){            Object element = listOI.getListElement(arguments[0].get(), i);            Object prodCatValue = structOI.getStructFieldData(element, structOI.getStructFieldRef("productCategory"));            ret.add(((PrimitiveObjectInspector)prodCatOI).getPrimitiveWritableObject(prodCatValue));        }        return ret;    }
    @Override    public String getDisplayString(String[] strings)    {        assert (strings.length > 0);        StringBuilder sb = new StringBuilder();        sb.append("extract_product_category(");        sb.append(strings[0]);        sb.append(")");        return sb.toString();    }}
====================================================From: [EMAIL PROTECTED]
To: [EMAIL PROTECTED]
Subject: RE: A GenericUDF Function to Extract a Field From an Array of Structs
Date: Fri, 5 Apr 2013 11:39:55 -0700
Hi Navis류승우,
Thank you very much.  Your code works, now I can run the function against external table.  Thank you so much.
However, do you or can someone point me into testing this function?  I am completely stuck in the testing part.
Peter
The code for this function below.
======================================import org.apache.hadoop.hive.ql.exec.Description;import org.apache.hadoop.hive.ql.exec.UDFArgumentException;import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;import org.apache.hadoop.hive.ql.metadata.HiveException;import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;import org.apache.hadoop.hive.serde2.