Commit 55cf9a15 55cf9a15232f013d3f3af763431d626454b602ce by zhanghao

commit

0 parents
#
.idea/*
logs/*
input/*
target/*
BrBigData.iml
src/test/*
*.png
~*.xlsm
ajcore*.txt
dependency-reduced-pom.xml
README.md
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.bigdata.test</groupId>
<artifactId>BrBigDataTest</artifactId>
<version>1.0.0</version>
<packaging>jar</packaging>
<name>BrBigDataTest</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<hadoop.version>2.6.0-cdh5.15.1</hadoop.version>
</properties>
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.76</version>
</dependency>
</dependencies>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
CREATE DATABASE IF NOT EXISTS testdb;
use testdb;
CREATE EXTERNAL TABLE qa_log(
rest string,
requestUri string,
request string,
response string,
status string,
method string,
uid string,
biz_data string,
code string,
trace string,
timestamp string
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
\ No newline at end of file
use testdb;
LOAD DATA INPATH 'hdfs://localhost:8020/input/etl' OVERWRITE INTO TABLE qa_log;
\ No newline at end of file
#!/bin/bash
hadoop jar BrBigData-1.0.0.jar com.bigdata.test.ETLApp /input/data/log-2021-04-13.03.log /input/etl/
sleep 3
hive -f load_data.hql
\ No newline at end of file
use testdb;
select count(1) from qa_log;
select count(1) from qa_log where rest='ok' and status='200' and code='0';
select count(1) from qa_log where rest!='ok' or status!='200' or code!='0';
select count(1) from qa_log where status='200';
select count(1) from qa_log where status!='200';
select count(1) from qa_log where code='0';
select count(1) from qa_log where code!='0';
\ No newline at end of file
package com.bigdata.test;
import com.alibaba.fastjson.JSONObject;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class ETLApp {
public static void main(String[] args) throws Exception{
System.setProperty("HADOOP_USER_NAME","zhanghao");
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS","hdfs://localhost:8020");
FileSystem fileSystem = FileSystem.get(configuration);
Path outputPath = new Path(args[1]);
if(fileSystem.exists(outputPath)) {
fileSystem.delete(outputPath,true);
}
Job job = Job.getInstance(configuration);
job.setJarByClass(ETLApp.class);
job.addArchiveToClassPath(new Path("/jar/fastjson-1.2.76.jar"));
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
static class MyMapper extends Mapper<LongWritable, Text, NullWritable, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String timestamp = StringUtil.isNull;
String rest = StringUtil.isNull;
String trace = StringUtil.isNull;
String requestUri = StringUtil.isNull;
String status = StringUtil.isNull;
String request = StringUtil.isNull;
String response = StringUtil.isNull;
String method = StringUtil.isNull;
String uid = StringUtil.isNull;
String biz_data = StringUtil.isNull;
String code = StringUtil.isNull;
String log = value.toString();
if(StringUtil.isMessyCode(log)) {
rest = "messy";
} else {
JSONObject obj = JSONObject.parseObject(log);
timestamp = obj.getString("timestamp");
timestamp = StringUtil.format(timestamp);
rest = obj.getString("rest");
rest = StringUtil.format(rest);
trace = obj.getString("trace");
trace = StringUtil.format(trace);
String[] split_rest = rest.split("] \\[");
if(split_rest != null && split_rest.length == 11) {
rest = "ok";
requestUri = split_rest[2] != null ? split_rest[2].replace("requestUri:","") : StringUtil.isNull;
requestUri = StringUtil.format(requestUri);
status = split_rest[5] != null ? split_rest[5].replace("status:","") : StringUtil.isNull;
status = StringUtil.format(status);
request = split_rest[8] != null ? split_rest[8].replace("request:","") : StringUtil.isNull;
request = StringUtil.format(request);
if(StringUtil.isMessyCode(request)) {
request = "messy";
status = "messy";
}
response = split_rest[9] != null ? split_rest[9].replace("response:","") : StringUtil.isNull;
response = StringUtil.format(response);
if(request != null && !"".equals(request)) {
String[] requests = request.split("&");
if(requests != null && requests.length == 10) {
method = requests[1] != null ? requests[1].replace("method=","") : StringUtil.isNull;
method = StringUtil.format(method);
uid = requests[2] != null ? requests[2].replace("uid=","") : StringUtil.isNull;
uid = StringUtil.format(uid);
biz_data = requests[6] != null ? requests[6].replace("biz_data=","") : StringUtil.isNull;
biz_data = StringUtil.format(biz_data);
}
}
if(response != null && !"".equals(response) && JsonUtil.isJson(response)) {
code = StringUtil.format(JSONObject.parseObject(response).getString("code"));
}
}
}
StringBuilder builder = new StringBuilder();
builder.append(rest).append("\t");
builder.append(requestUri).append("\t");
builder.append(request).append("\t");
builder.append(response).append("\t");
builder.append(status).append("\t");
builder.append(method).append("\t");
builder.append(uid).append("\t");
builder.append(biz_data).append("\t");
builder.append(code).append("\t");
builder.append(trace).append("\t");
builder.append(timestamp);
context.write(NullWritable.get(), new Text(builder.toString()));
}
}
}
package com.bigdata.test;
import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.TypeReference;
import com.alibaba.fastjson.serializer.SerializerFeature;
import org.apache.commons.lang.StringUtils;
import java.util.List;
public class JsonUtil {
/**
* 将对象序列化为Json字符串
* 注意:obj String类型的值Null将会被转化成""
*
* @param obj 对象
* @return String Json字符串
*/
public static final String toJson(Object obj) {
return JSONObject.toJSONString(obj, SerializerFeature.WriteNullStringAsEmpty);
}
/**
* 将对象序列化为Json字符串
*
* @param obj 对象
* @return String Json字符串
*/
public static final String toJson(Object obj, SerializerFeature... features) {
return JSONObject.toJSONString(obj, features);
}
/**
* 将Json字符串反序列化为对象
*
* @param jsonString Json字符串
* @param clazz 需要转换到的对象类
* @return <T>对象
*/
public static final <T> T toObj(String jsonString, Class<T> clazz) {
return JSONObject.parseObject(jsonString, clazz);
}
public static final <T> T toObj(String jsonString, TypeReference<T> typeReference) {
return JSONObject.parseObject(jsonString, typeReference);
}
/**
* 将Json字符串反序列化为数组对象
*
* @param jsonString Json字符串
* @param clazz 需要转换到的对象类(数组内的对象实体)
* @return List<T>数组对象
*/
public static final <T> List<T> toList(String jsonString, Class<T> clazz) {
return JSONObject.parseArray(jsonString, clazz);
}
/**
* 将Map转化为对象
*
* @param map
* @param clazz 需要转换到的对象类
* @return <T>对象
*/
public static final <T> T toObj(Object map, Class<T> clazz) {
return JSONObject.parseObject(JSONObject.toJSONString(map, SerializerFeature.WriteMapNullValue), clazz);
}
public static boolean isJson(String content) {
if(StringUtils.isEmpty(content)){
return false;
}
boolean isJsonObject = true;
boolean isJsonArray = true;
try {
JSONObject.parseObject(content);
} catch (Exception e) {
isJsonObject = false;
}
try {
JSONObject.parseArray(content);
} catch (Exception e) {
isJsonArray = false;
}
if(!isJsonObject && !isJsonArray){
return false;
}
return true;
}
}
package com.bigdata.test;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class StringUtil {
public static String temp = "";
public static final String isNull = "N/A";
public static String format(String str) {
temp = str != null ? str : isNull;
return !"".equals(temp) ? temp : isNull;
}
public static boolean isChinese(char c) {
Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
|| ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
return true;
}
return false;
}
public static boolean isMessyCode(String strName) {
Pattern p = Pattern.compile("\\s*|t*|r*|n*");
Matcher m = p.matcher(strName);
String after = m.replaceAll("");
String temp = after.replaceAll("\\p{P}", "");
char[] ch = temp.trim().toCharArray();
float chLength = ch.length;
float count = 0;
for (int i = 0; i < ch.length; i++) {
char c = ch[i];
if (!Character.isLetterOrDigit(c)) {
if (!isChinese(c)) {
count = count + 1;
}
}
}
float result = count / chLength;
if (result > 0.4) {
return true;
} else {
return false;
}
}
}
log4j.rootLogger=debug,rootFile,console
log4j.logger.org.apache.http=OFF
#日志文件
log4j.appender.rootFile=org.apache.log4j.RollingFileAppender
log4j.appender.rootFile.File=${user.dir}/logs/debug.log
log4j.appender.rootFile.Encoding=UTF8
log4j.appender.rootFile.MaxFileSize=50000KB
log4j.appender.rootFile.MaxBackupIndex=1000
log4j.appender.rootFile.Threshold=TRACE
log4j.appender.rootFile.layout=org.apache.log4j.PatternLayout
log4j.appender.rootFile.layout.ConversionPattern= %-d{yyyy-MM-dd HH:mm:ss.SSS} [%-5p] %c - %m%n
#控制台
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.Threshold=TRACE
log4j.appender.console.ImmediateFlush=true
log4j.appender.console.Target=System.out
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern= %-d{yyyy-MM-dd HH:mm:ss.SSS} [%-5p] %c - %m%n
\ No newline at end of file