hive-ip地理信息查询

HIVE UDF IP查询

利用 hive-udf 自定义 IP 查询函数

借助 ipip 提供的 ipdb

https://www.ipip.net/product/ip.html#ipv4city
将 .ipdb 文件放在 resources 目录下

代码如下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import net.ipip.ipdb.City;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;

import java.io.IOException;

/**
* @description: ip查询城市
* @author: xxzuo
* @email: 1293378490@qq.com
**/
public class IpLocationCity extends GenericUDF {
private static City IPDB;
private transient StringObjectInspector allCgi;

/**
* Initialize this GenericUDF. This will be called once and only once per
* GenericUDF instance.
*
* @param arguments The ObjectInspector for the arguments
* @return The ObjectInspector for the return value
* @throws UDFArgumentException Thrown when arguments have wrong types, wrong length, etc.
*/
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
ObjectInspector arg = arguments[0];
if (arguments.length != 1) {
throw new UDFArgumentLengthException(
"The operator 'SubstrCgi' accepts one arguments.");
}
try {
IPDB = new City(this.getClass().getResourceAsStream("/ipipfree.ipdb"));
} catch (IOException e) {
}
this.allCgi = (StringObjectInspector) arg;
return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
}

/**
* Evaluate the GenericUDF with the arguments.
*
* @param arguments The arguments as DeferedObject, use DeferedObject.get() to get the
* actual argument Object. The Objects can be inspected by the
* ObjectInspectors passed in the initialize call.
* @return The
*/
@Override
public Object evaluate(GenericUDF.DeferredObject[] arguments) throws HiveException {
String cgi = allCgi.getPrimitiveJavaObject(arguments[0].get());
if(null == cgi) {
return null;
}
String ipInfo = "";
try {
ipInfo = IPDB.find(cgi.toString(), "CN")[2];
}
catch (Exception e) {
}
return ipInfo;
}

/**
* Get the String to be displayed in explain.
*
* @param children
*/
@Override
public String getDisplayString(String[] children) {
return "Usage: SubstrCgi(String cgi)";
}
}

本博客所有文章除特别声明外,均采用 CC BY-SA 4.0 协议 ,转载请注明出处!