0
点赞
收藏
分享

微信扫一扫

Hive实现返回MAP的UDF


如果只是返回String,那么直接继承UDF即可,如果想要返回MAP/LIST/STRUCT,则需要继承GenericUDF;

如下代码示例,将URL中的参数进行了解析成了一个MAP返回:

Java



import java.util.LinkedHashMap; import java.util.Map; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.io.Text; public class UrlParamsToMap extends GenericUDF { private final Map<Text, Text> sortMap = new LinkedHashMap<Text, Text>(); private StringObjectInspector urlOI; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length != 1) { throw new UDFArgumentException("UrlParamsToMap param must be 1 argu."); } urlOI = (StringObjectInspector) arguments[0]; return ObjectInspectorFactory.getStandardMapObjectInspector( PrimitiveObjectInspectorFactory.writableStringObjectInspector, PrimitiveObjectInspectorFactory.writableStringObjectInspector); } @Override public Object evaluate(DeferredObject[] deferredObjects) throws HiveException { Object urlObj = deferredObjects[0].get(); Text url = (Text) urlOI.getPrimitiveWritableObject(urlObj); getParamsMap(url.toString(), sortMap); return sortMap; } public Map<Text, Text> getParamsMap(String url, Map<Text, Text> sortMap) { Map<Text, Text> defaultMap = new LinkedHashMap<Text, Text>(); if (StringUtils.isBlank(url)) { return defaultMap; } String[] urlSplits = url.split("\\?"); if (null == urlSplits || urlSplits.length != 2) { return defaultMap; } String urlParamStr = urlSplits[1]; if (StringUtils.isBlank(urlParamStr)) { return defaultMap; } String[] paramSplits = urlParamStr.split("&"); if (null == paramSplits || paramSplits.length == 0) { return defaultMap; } for (String kvStr : paramSplits) { if (StringUtils.isBlank(kvStr)) { continue; } String[] kvs = kvStr.split("="); if (null != kvs && kvs.length == 2) { if (StringUtils.isNotBlank(kvs[0]) && StringUtils.isNotBlank(kvs[1])) { sortMap.put(new Text(kvs[0]), new Text(kvs[1])); } } } return sortMap; } @Override public String getDisplayString(String[] strings) { return "map(" + strings[0] + ")"; } }


import java . util . LinkedHashMap ;
import java . util . Map ;

import org . apache . commons . lang . StringUtils ;
import org . apache . hadoop . hive . ql . exec . UDFArgumentException ;
import org . apache . hadoop . hive . ql . metadata . HiveException ;
import org . apache . hadoop . hive . ql . udf . generic . GenericUDF ;
import org . apache . hadoop . hive . serde2 . objectinspector . ObjectInspector ;
import org . apache . hadoop . hive . serde2 . objectinspector . ObjectInspectorFactory ;
import org . apache . hadoop . hive . serde2 . objectinspector . primitive . PrimitiveObjectInspectorFactory ;
import org . apache . hadoop . hive . serde2 . objectinspector . primitive . StringObjectInspector ;
import org . apache . hadoop . io . Text ;

public class UrlParamsToMap extends GenericUDF {
private final Map < Text , Text > sortMap = new LinkedHashMap < Text , Text > ( ) ;
private StringObjectInspector urlOI ;

@Override
public ObjectInspector initialize ( ObjectInspector [ ] arguments ) throws UDFArgumentException {
if ( arguments . length != 1 ) {
throw new UDFArgumentException ( "UrlParamsToMap param must be 1 argu." ) ;
}

urlOI = ( StringObjectInspector ) arguments [ 0 ] ;

return ObjectInspectorFactory . getStandardMapObjectInspector (
PrimitiveObjectInspectorFactory . writableStringObjectInspector ,
PrimitiveObjectInspectorFactory . writableStringObjectInspector ) ;
}

@Override
public Object evaluate ( DeferredObject [ ] deferredObjects ) throws HiveException {
Object urlObj = deferredObjects [ 0 ] . get ( ) ;
Text url = ( Text ) urlOI . getPrimitiveWritableObject ( urlObj ) ;

getParamsMap ( url . toString ( ) , sortMap ) ;
return sortMap ;
}

public Map < Text , Text > getParamsMap ( String url , Map < Text , Text > sortMap ) {
Map < Text , Text > defaultMap = new LinkedHashMap < Text , Text > ( ) ;
if ( StringUtils . isBlank ( url ) ) {
return defaultMap ;
}

String [ ] urlSplits = url . split ( "\\?" ) ;
if ( null == urlSplits || urlSplits . length != 2 ) {
return defaultMap ;
}

String urlParamStr = urlSplits [ 1 ] ;
if ( StringUtils . isBlank ( urlParamStr ) ) {
return defaultMap ;
}

String [ ] paramSplits = urlParamStr . split ( "&" ) ;
if ( null == paramSplits || paramSplits . length == 0 ) {
return defaultMap ;
}

for ( String kvStr : paramSplits ) {
if ( StringUtils . isBlank ( kvStr ) ) {
continue ;
}

String [ ] kvs = kvStr . split ( "=" ) ;
if ( null != kvs && kvs . length == 2 ) {
if ( StringUtils . isNotBlank ( kvs [ 0 ] ) && StringUtils . isNotBlank ( kvs [ 1 ] ) ) {
sortMap . put ( new Text ( kvs [ 0 ] ) , new Text ( kvs [ 1 ] ) ) ;
}
}
}
return sortMap ;
}

@Override
public String getDisplayString ( String [ ] strings ) {
return "map(" + strings [ 0 ] + ")" ;
}



 


举报

相关推荐

0 条评论