添加链接
link之家
链接快照平台
  • 输入网页链接,自动生成快照
  • 标签化管理网页链接
DESCRIBE struct_demo;
+-------------------+-------------------------------+
| name              | type                          |
+-------------------+-------------------------------+
| lr_id             | string                        |
| segment_info      | ARRAY<struct<                 |
|                   |   idlpSegmentName:string,     |
|                   |   idlpSegmentValue:string >   |
|                   |      >                        |
|                   |                               |
+-------------------+-------------------------------+

我在Redshift(或任何Sql数据库)中创建表格 它在hive中为上述数据类型创建了具有类似格式的行。 但作为字符串。

如何在从redshift插入数据到hive的过程中进行铸造? 更具体地说,我怎样才能从字符串转换到结构数组?

我的SQL表。

lr_id    |          segment_info
---------|------------------------------------------------------------
1        |      [{"idlpsegmentname":"axciom","idlpsegmentvalue":"200"},{"idlpsegmentname":"people","idlpsegmentvalue":"z"}]

到目前为止,无法找到任何符合要求的udfs。

hadoop
hive
amazon-redshift
Neil Nandi
Neil Nandi
发布于 2017-11-15
1 个回答
Neil Nandi
Neil Nandi
发布于 2017-11-16
已采纳
0 人赞同

总之,找到了解决方案。

package hive;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Text;
public class UAStructUDF extends GenericUDF {
private Object[] result;
@Override
public String getDisplayString(String[] arg0) {
    return "My display string";
public static void main(String... args) {
    UAStructUDF ua = new UAStructUDF();
    ua.parseUAString("");
@Override
public ObjectInspector initialize(ObjectInspector[] arg0) throws UDFArgumentException {
    // Define the field names for the struct<> and their types
    ArrayList<String> structFieldNames = new ArrayList<String>();
    ArrayList<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>();
    // fill struct field names
    // segmentname
    structFieldNames.add("idlpsegmentname");
    structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
    // segmentvalue
    structFieldNames.add("idlpsegmentvalue");
    structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
    StructObjectInspector si = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames,
            structFieldObjectInspectors);
    return ObjectInspectorFactory.getStandardListObjectInspector(si);
    // return si;
@Override
public Object evaluate(DeferredObject[] args) throws HiveException {
    if (args == null || args.length < 1) {
        throw new HiveException("args is empty");
    if (args[0].get() == null) {
        throw new HiveException("args contains null instead of object");
    Object argObj = args[0].get();
    // get argument
    String argument = null;
    if (argObj instanceof Text) {
        argument = ((Text) argObj).toString();
    } else if (argObj instanceof String) {
        argument = (String) argObj;
    } else {
        throw new HiveException(
                "Argument is neither a Text nor String, it is a " + argObj.getClass().getCanonicalName());
    // parse UA string and return struct, which is just an array of objects:
    // Object[]
    return parseUAString(argument);
private Object parseUAString(String argument) {
    String test = "acxiom_cluster,03|aff_celeb_ent,Y";
    List<Object[]> ret = new ArrayList<Object[]>();
    for (String s : test.split("\\|")) {
        String arr[] = s.split(",");
        Object[] o = new Object[2];
        o[0] = new Text(arr[0]);
        o[1] = new Text(arr[1]);
        ret.add(o);