Commit 55cf9a15 55cf9a15232f013d3f3af763431d626454b602ce by zhanghao

commit

0 parents
1 #
2 .idea/*
3 logs/*
4 input/*
5 target/*
6 BrBigData.iml
7 src/test/*
8 *.png
9 ~*.xlsm
10 ajcore*.txt
11 dependency-reduced-pom.xml
12 README.md
...\ No newline at end of file ...\ No newline at end of file
1 <?xml version="1.0" encoding="UTF-8"?>
2
3 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5 <modelVersion>4.0.0</modelVersion>
6
7 <groupId>com.bigdata.test</groupId>
8 <artifactId>BrBigDataTest</artifactId>
9 <version>1.0.0</version>
10 <packaging>jar</packaging>
11 <name>BrBigDataTest</name>
12 <url>http://maven.apache.org</url>
13
14 <properties>
15 <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
16 <maven.compiler.source>1.8</maven.compiler.source>
17 <maven.compiler.target>1.8</maven.compiler.target>
18 <hadoop.version>2.6.0-cdh5.15.1</hadoop.version>
19 </properties>
20
21 <repositories>
22 <repository>
23 <id>cloudera</id>
24 <url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
25 </repository>
26 </repositories>
27
28 <dependencies>
29 <dependency>
30 <groupId>junit</groupId>
31 <artifactId>junit</artifactId>
32 <version>4.11</version>
33 <scope>test</scope>
34 </dependency>
35 <dependency>
36 <groupId>org.apache.hadoop</groupId>
37 <artifactId>hadoop-client</artifactId>
38 <version>${hadoop.version}</version>
39 </dependency>
40 <dependency>
41 <groupId>com.alibaba</groupId>
42 <artifactId>fastjson</artifactId>
43 <version>1.2.76</version>
44 </dependency>
45 </dependencies>
46
47 <build>
48 <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
49 <plugins>
50 <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
51 <plugin>
52 <artifactId>maven-clean-plugin</artifactId>
53 <version>3.1.0</version>
54 </plugin>
55 <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
56 <plugin>
57 <artifactId>maven-resources-plugin</artifactId>
58 <version>3.0.2</version>
59 </plugin>
60 <plugin>
61 <artifactId>maven-compiler-plugin</artifactId>
62 <version>3.8.0</version>
63 </plugin>
64 <plugin>
65 <artifactId>maven-surefire-plugin</artifactId>
66 <version>2.22.1</version>
67 </plugin>
68 <plugin>
69 <artifactId>maven-jar-plugin</artifactId>
70 <version>3.0.2</version>
71 </plugin>
72 <plugin>
73 <artifactId>maven-install-plugin</artifactId>
74 <version>2.5.2</version>
75 </plugin>
76 <plugin>
77 <artifactId>maven-deploy-plugin</artifactId>
78 <version>2.8.2</version>
79 </plugin>
80 <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
81 <plugin>
82 <artifactId>maven-site-plugin</artifactId>
83 <version>3.7.1</version>
84 </plugin>
85 <plugin>
86 <artifactId>maven-project-info-reports-plugin</artifactId>
87 <version>3.0.0</version>
88 </plugin>
89 </plugins>
90 </pluginManagement>
91 </build>
92 </project>
1 CREATE DATABASE IF NOT EXISTS testdb;
2 use testdb;
3 CREATE EXTERNAL TABLE qa_log(
4 rest string,
5 requestUri string,
6 request string,
7 response string,
8 status string,
9 method string,
10 uid string,
11 biz_data string,
12 code string,
13 trace string,
14 timestamp string
15 ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
...\ No newline at end of file ...\ No newline at end of file
1 use testdb;
2 LOAD DATA INPATH 'hdfs://localhost:8020/input/etl' OVERWRITE INTO TABLE qa_log;
...\ No newline at end of file ...\ No newline at end of file
1 #!/bin/bash
2 hadoop jar BrBigData-1.0.0.jar com.bigdata.test.ETLApp /input/data/log-2021-04-13.03.log /input/etl/
3 sleep 3
4 hive -f load_data.hql
...\ No newline at end of file ...\ No newline at end of file
1 use testdb;
2 select count(1) from qa_log;
3 select count(1) from qa_log where rest='ok' and status='200' and code='0';
4 select count(1) from qa_log where rest!='ok' or status!='200' or code!='0';
5
6
7 select count(1) from qa_log where status='200';
8 select count(1) from qa_log where status!='200';
9
10
11
12 select count(1) from qa_log where code='0';
13 select count(1) from qa_log where code!='0';
...\ No newline at end of file ...\ No newline at end of file
1 package com.bigdata.test;
2
3 import com.alibaba.fastjson.JSONObject;
4 import org.apache.hadoop.conf.Configuration;
5 import org.apache.hadoop.fs.FileSystem;
6 import org.apache.hadoop.fs.Path;
7 import org.apache.hadoop.io.LongWritable;
8 import org.apache.hadoop.io.NullWritable;
9 import org.apache.hadoop.io.Text;
10 import org.apache.hadoop.mapreduce.Job;
11 import org.apache.hadoop.mapreduce.Mapper;
12 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
13 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
14
15 import java.io.IOException;
16
17 public class ETLApp {
18
19 public static void main(String[] args) throws Exception{
20 System.setProperty("HADOOP_USER_NAME","zhanghao");
21 Configuration configuration = new Configuration();
22 configuration.set("fs.defaultFS","hdfs://localhost:8020");
23
24 FileSystem fileSystem = FileSystem.get(configuration);
25 Path outputPath = new Path(args[1]);
26 if(fileSystem.exists(outputPath)) {
27 fileSystem.delete(outputPath,true);
28 }
29
30 Job job = Job.getInstance(configuration);
31 job.setJarByClass(ETLApp.class);
32 job.addArchiveToClassPath(new Path("/jar/fastjson-1.2.76.jar"));
33
34 job.setMapperClass(MyMapper.class);
35
36 job.setMapOutputKeyClass(NullWritable.class);
37 job.setMapOutputValueClass(Text.class);
38
39 FileInputFormat.setInputPaths(job, new Path(args[0]));
40 FileOutputFormat.setOutputPath(job, new Path(args[1]));
41
42 job.waitForCompletion(true);
43 }
44
45 static class MyMapper extends Mapper<LongWritable, Text, NullWritable, Text> {
46
47 @Override
48 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
49 String timestamp = StringUtil.isNull;
50 String rest = StringUtil.isNull;
51 String trace = StringUtil.isNull;
52 String requestUri = StringUtil.isNull;
53 String status = StringUtil.isNull;
54 String request = StringUtil.isNull;
55 String response = StringUtil.isNull;
56 String method = StringUtil.isNull;
57 String uid = StringUtil.isNull;
58 String biz_data = StringUtil.isNull;
59 String code = StringUtil.isNull;
60
61 String log = value.toString();
62 if(StringUtil.isMessyCode(log)) {
63 rest = "messy";
64 } else {
65 JSONObject obj = JSONObject.parseObject(log);
66
67 timestamp = obj.getString("timestamp");
68 timestamp = StringUtil.format(timestamp);
69 rest = obj.getString("rest");
70 rest = StringUtil.format(rest);
71 trace = obj.getString("trace");
72 trace = StringUtil.format(trace);
73
74 String[] split_rest = rest.split("] \\[");
75 if(split_rest != null && split_rest.length == 11) {
76 rest = "ok";
77 requestUri = split_rest[2] != null ? split_rest[2].replace("requestUri:","") : StringUtil.isNull;
78 requestUri = StringUtil.format(requestUri);
79 status = split_rest[5] != null ? split_rest[5].replace("status:","") : StringUtil.isNull;
80 status = StringUtil.format(status);
81 request = split_rest[8] != null ? split_rest[8].replace("request:","") : StringUtil.isNull;
82 request = StringUtil.format(request);
83 if(StringUtil.isMessyCode(request)) {
84 request = "messy";
85 status = "messy";
86 }
87 response = split_rest[9] != null ? split_rest[9].replace("response:","") : StringUtil.isNull;
88 response = StringUtil.format(response);
89 if(request != null && !"".equals(request)) {
90 String[] requests = request.split("&");
91 if(requests != null && requests.length == 10) {
92 method = requests[1] != null ? requests[1].replace("method=","") : StringUtil.isNull;
93 method = StringUtil.format(method);
94 uid = requests[2] != null ? requests[2].replace("uid=","") : StringUtil.isNull;
95 uid = StringUtil.format(uid);
96 biz_data = requests[6] != null ? requests[6].replace("biz_data=","") : StringUtil.isNull;
97 biz_data = StringUtil.format(biz_data);
98 }
99 }
100 if(response != null && !"".equals(response) && JsonUtil.isJson(response)) {
101 code = StringUtil.format(JSONObject.parseObject(response).getString("code"));
102 }
103 }
104 }
105
106 StringBuilder builder = new StringBuilder();
107 builder.append(rest).append("\t");
108 builder.append(requestUri).append("\t");
109 builder.append(request).append("\t");
110 builder.append(response).append("\t");
111 builder.append(status).append("\t");
112 builder.append(method).append("\t");
113 builder.append(uid).append("\t");
114 builder.append(biz_data).append("\t");
115 builder.append(code).append("\t");
116 builder.append(trace).append("\t");
117 builder.append(timestamp);
118
119 context.write(NullWritable.get(), new Text(builder.toString()));
120 }
121 }
122 }
123
124
1 package com.bigdata.test;
2
3 import com.alibaba.fastjson.JSONObject;
4 import com.alibaba.fastjson.TypeReference;
5 import com.alibaba.fastjson.serializer.SerializerFeature;
6 import org.apache.commons.lang.StringUtils;
7
8 import java.util.List;
9
10 public class JsonUtil {
11
12 /**
13 * 将对象序列化为Json字符串
14 * 注意:obj String类型的值Null将会被转化成""
15 *
16 * @param obj 对象
17 * @return String Json字符串
18 */
19 public static final String toJson(Object obj) {
20 return JSONObject.toJSONString(obj, SerializerFeature.WriteNullStringAsEmpty);
21 }
22
23
24 /**
25 * 将对象序列化为Json字符串
26 *
27 * @param obj 对象
28 * @return String Json字符串
29 */
30 public static final String toJson(Object obj, SerializerFeature... features) {
31 return JSONObject.toJSONString(obj, features);
32 }
33
34
35 /**
36 * 将Json字符串反序列化为对象
37 *
38 * @param jsonString Json字符串
39 * @param clazz 需要转换到的对象类
40 * @return <T>对象
41 */
42 public static final <T> T toObj(String jsonString, Class<T> clazz) {
43 return JSONObject.parseObject(jsonString, clazz);
44 }
45
46 public static final <T> T toObj(String jsonString, TypeReference<T> typeReference) {
47 return JSONObject.parseObject(jsonString, typeReference);
48 }
49
50 /**
51 * 将Json字符串反序列化为数组对象
52 *
53 * @param jsonString Json字符串
54 * @param clazz 需要转换到的对象类(数组内的对象实体)
55 * @return List<T>数组对象
56 */
57 public static final <T> List<T> toList(String jsonString, Class<T> clazz) {
58 return JSONObject.parseArray(jsonString, clazz);
59 }
60
61 /**
62 * 将Map转化为对象
63 *
64 * @param map
65 * @param clazz 需要转换到的对象类
66 * @return <T>对象
67 */
68 public static final <T> T toObj(Object map, Class<T> clazz) {
69 return JSONObject.parseObject(JSONObject.toJSONString(map, SerializerFeature.WriteMapNullValue), clazz);
70 }
71
72 public static boolean isJson(String content) {
73 if(StringUtils.isEmpty(content)){
74 return false;
75 }
76 boolean isJsonObject = true;
77 boolean isJsonArray = true;
78 try {
79 JSONObject.parseObject(content);
80 } catch (Exception e) {
81 isJsonObject = false;
82 }
83 try {
84 JSONObject.parseArray(content);
85 } catch (Exception e) {
86 isJsonArray = false;
87 }
88 if(!isJsonObject && !isJsonArray){
89 return false;
90 }
91 return true;
92 }
93
94 }
1 package com.bigdata.test;
2
3 import java.util.regex.Matcher;
4 import java.util.regex.Pattern;
5
6 public class StringUtil {
7 public static String temp = "";
8 public static final String isNull = "N/A";
9
10 public static String format(String str) {
11 temp = str != null ? str : isNull;
12 return !"".equals(temp) ? temp : isNull;
13 }
14
15 public static boolean isChinese(char c) {
16 Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
17 if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
18 || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
19 || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
20 || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
21 || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
22 || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
23 return true;
24 }
25 return false;
26 }
27
28 public static boolean isMessyCode(String strName) {
29 Pattern p = Pattern.compile("\\s*|t*|r*|n*");
30 Matcher m = p.matcher(strName);
31 String after = m.replaceAll("");
32 String temp = after.replaceAll("\\p{P}", "");
33 char[] ch = temp.trim().toCharArray();
34 float chLength = ch.length;
35 float count = 0;
36 for (int i = 0; i < ch.length; i++) {
37 char c = ch[i];
38 if (!Character.isLetterOrDigit(c)) {
39 if (!isChinese(c)) {
40 count = count + 1;
41 }
42 }
43 }
44 float result = count / chLength;
45 if (result > 0.4) {
46 return true;
47 } else {
48 return false;
49 }
50 }
51 }
1 log4j.rootLogger=debug,rootFile,console
2 log4j.logger.org.apache.http=OFF
3 #日志文件
4 log4j.appender.rootFile=org.apache.log4j.RollingFileAppender
5 log4j.appender.rootFile.File=${user.dir}/logs/debug.log
6 log4j.appender.rootFile.Encoding=UTF8
7 log4j.appender.rootFile.MaxFileSize=50000KB
8 log4j.appender.rootFile.MaxBackupIndex=1000
9 log4j.appender.rootFile.Threshold=TRACE
10 log4j.appender.rootFile.layout=org.apache.log4j.PatternLayout
11 log4j.appender.rootFile.layout.ConversionPattern= %-d{yyyy-MM-dd HH:mm:ss.SSS} [%-5p] %c - %m%n
12 #控制台
13 log4j.appender.console=org.apache.log4j.ConsoleAppender
14 log4j.appender.console.Threshold=TRACE
15 log4j.appender.console.ImmediateFlush=true
16 log4j.appender.console.Target=System.out
17 log4j.appender.console.layout=org.apache.log4j.PatternLayout
18 log4j.appender.console.layout.ConversionPattern= %-d{yyyy-MM-dd HH:mm:ss.SSS} [%-5p] %c - %m%n
...\ No newline at end of file ...\ No newline at end of file