commit
0 parents
Showing
10 changed files
with
425 additions
and
0 deletions
.gitignore
0 → 100644
pom.xml
0 → 100644
| 1 | <?xml version="1.0" encoding="UTF-8"?> | ||
| 2 | |||
| 3 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
| 4 | xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
| 5 | <modelVersion>4.0.0</modelVersion> | ||
| 6 | |||
| 7 | <groupId>com.bigdata.test</groupId> | ||
| 8 | <artifactId>BrBigDataTest</artifactId> | ||
| 9 | <version>1.0.0</version> | ||
| 10 | <packaging>jar</packaging> | ||
| 11 | <name>BrBigDataTest</name> | ||
| 12 | <url>http://maven.apache.org</url> | ||
| 13 | |||
| 14 | <properties> | ||
| 15 | <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
| 16 | <maven.compiler.source>1.8</maven.compiler.source> | ||
| 17 | <maven.compiler.target>1.8</maven.compiler.target> | ||
| 18 | <hadoop.version>2.6.0-cdh5.15.1</hadoop.version> | ||
| 19 | </properties> | ||
| 20 | |||
| 21 | <repositories> | ||
| 22 | <repository> | ||
| 23 | <id>cloudera</id> | ||
| 24 | <url>https://repository.cloudera.com/artifactory/cloudera-repos</url> | ||
| 25 | </repository> | ||
| 26 | </repositories> | ||
| 27 | |||
| 28 | <dependencies> | ||
| 29 | <dependency> | ||
| 30 | <groupId>junit</groupId> | ||
| 31 | <artifactId>junit</artifactId> | ||
| 32 | <version>4.11</version> | ||
| 33 | <scope>test</scope> | ||
| 34 | </dependency> | ||
| 35 | <dependency> | ||
| 36 | <groupId>org.apache.hadoop</groupId> | ||
| 37 | <artifactId>hadoop-client</artifactId> | ||
| 38 | <version>${hadoop.version}</version> | ||
| 39 | </dependency> | ||
| 40 | <dependency> | ||
| 41 | <groupId>com.alibaba</groupId> | ||
| 42 | <artifactId>fastjson</artifactId> | ||
| 43 | <version>1.2.76</version> | ||
| 44 | </dependency> | ||
| 45 | </dependencies> | ||
| 46 | |||
| 47 | <build> | ||
| 48 | <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) --> | ||
| 49 | <plugins> | ||
| 50 | <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle --> | ||
| 51 | <plugin> | ||
| 52 | <artifactId>maven-clean-plugin</artifactId> | ||
| 53 | <version>3.1.0</version> | ||
| 54 | </plugin> | ||
| 55 | <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging --> | ||
| 56 | <plugin> | ||
| 57 | <artifactId>maven-resources-plugin</artifactId> | ||
| 58 | <version>3.0.2</version> | ||
| 59 | </plugin> | ||
| 60 | <plugin> | ||
| 61 | <artifactId>maven-compiler-plugin</artifactId> | ||
| 62 | <version>3.8.0</version> | ||
| 63 | </plugin> | ||
| 64 | <plugin> | ||
| 65 | <artifactId>maven-surefire-plugin</artifactId> | ||
| 66 | <version>2.22.1</version> | ||
| 67 | </plugin> | ||
| 68 | <plugin> | ||
| 69 | <artifactId>maven-jar-plugin</artifactId> | ||
| 70 | <version>3.0.2</version> | ||
| 71 | </plugin> | ||
| 72 | <plugin> | ||
| 73 | <artifactId>maven-install-plugin</artifactId> | ||
| 74 | <version>2.5.2</version> | ||
| 75 | </plugin> | ||
| 76 | <plugin> | ||
| 77 | <artifactId>maven-deploy-plugin</artifactId> | ||
| 78 | <version>2.8.2</version> | ||
| 79 | </plugin> | ||
| 80 | <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle --> | ||
| 81 | <plugin> | ||
| 82 | <artifactId>maven-site-plugin</artifactId> | ||
| 83 | <version>3.7.1</version> | ||
| 84 | </plugin> | ||
| 85 | <plugin> | ||
| 86 | <artifactId>maven-project-info-reports-plugin</artifactId> | ||
| 87 | <version>3.0.0</version> | ||
| 88 | </plugin> | ||
| 89 | </plugins> | ||
| 90 | </pluginManagement> | ||
| 91 | </build> | ||
| 92 | </project> |
shell/create_table.hql
0 → 100755
| 1 | CREATE DATABASE IF NOT EXISTS testdb; | ||
| 2 | use testdb; | ||
| 3 | CREATE EXTERNAL TABLE qa_log( | ||
| 4 | rest string, | ||
| 5 | requestUri string, | ||
| 6 | request string, | ||
| 7 | response string, | ||
| 8 | status string, | ||
| 9 | method string, | ||
| 10 | uid string, | ||
| 11 | biz_data string, | ||
| 12 | code string, | ||
| 13 | trace string, | ||
| 14 | timestamp string | ||
| 15 | ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'; | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
shell/load_data.hql
0 → 100755
shell/run.sh
0 → 100755
shell/select.hql
0 → 100755
| 1 | use testdb; | ||
| 2 | select count(1) from qa_log; | ||
| 3 | select count(1) from qa_log where rest='ok' and status='200' and code='0'; | ||
| 4 | select count(1) from qa_log where rest!='ok' or status!='200' or code!='0'; | ||
| 5 | |||
| 6 | |||
| 7 | select count(1) from qa_log where status='200'; | ||
| 8 | select count(1) from qa_log where status!='200'; | ||
| 9 | |||
| 10 | |||
| 11 | |||
| 12 | select count(1) from qa_log where code='0'; | ||
| 13 | select count(1) from qa_log where code!='0'; | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
src/main/java/com/bigdata/test/ETLApp.java
0 → 100755
| 1 | package com.bigdata.test; | ||
| 2 | |||
| 3 | import com.alibaba.fastjson.JSONObject; | ||
| 4 | import org.apache.hadoop.conf.Configuration; | ||
| 5 | import org.apache.hadoop.fs.FileSystem; | ||
| 6 | import org.apache.hadoop.fs.Path; | ||
| 7 | import org.apache.hadoop.io.LongWritable; | ||
| 8 | import org.apache.hadoop.io.NullWritable; | ||
| 9 | import org.apache.hadoop.io.Text; | ||
| 10 | import org.apache.hadoop.mapreduce.Job; | ||
| 11 | import org.apache.hadoop.mapreduce.Mapper; | ||
| 12 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | ||
| 13 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | ||
| 14 | |||
| 15 | import java.io.IOException; | ||
| 16 | |||
| 17 | public class ETLApp { | ||
| 18 | |||
| 19 | public static void main(String[] args) throws Exception{ | ||
| 20 | System.setProperty("HADOOP_USER_NAME","zhanghao"); | ||
| 21 | Configuration configuration = new Configuration(); | ||
| 22 | configuration.set("fs.defaultFS","hdfs://localhost:8020"); | ||
| 23 | |||
| 24 | FileSystem fileSystem = FileSystem.get(configuration); | ||
| 25 | Path outputPath = new Path(args[1]); | ||
| 26 | if(fileSystem.exists(outputPath)) { | ||
| 27 | fileSystem.delete(outputPath,true); | ||
| 28 | } | ||
| 29 | |||
| 30 | Job job = Job.getInstance(configuration); | ||
| 31 | job.setJarByClass(ETLApp.class); | ||
| 32 | job.addArchiveToClassPath(new Path("/jar/fastjson-1.2.76.jar")); | ||
| 33 | |||
| 34 | job.setMapperClass(MyMapper.class); | ||
| 35 | |||
| 36 | job.setMapOutputKeyClass(NullWritable.class); | ||
| 37 | job.setMapOutputValueClass(Text.class); | ||
| 38 | |||
| 39 | FileInputFormat.setInputPaths(job, new Path(args[0])); | ||
| 40 | FileOutputFormat.setOutputPath(job, new Path(args[1])); | ||
| 41 | |||
| 42 | job.waitForCompletion(true); | ||
| 43 | } | ||
| 44 | |||
| 45 | static class MyMapper extends Mapper<LongWritable, Text, NullWritable, Text> { | ||
| 46 | |||
| 47 | @Override | ||
| 48 | protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { | ||
| 49 | String timestamp = StringUtil.isNull; | ||
| 50 | String rest = StringUtil.isNull; | ||
| 51 | String trace = StringUtil.isNull; | ||
| 52 | String requestUri = StringUtil.isNull; | ||
| 53 | String status = StringUtil.isNull; | ||
| 54 | String request = StringUtil.isNull; | ||
| 55 | String response = StringUtil.isNull; | ||
| 56 | String method = StringUtil.isNull; | ||
| 57 | String uid = StringUtil.isNull; | ||
| 58 | String biz_data = StringUtil.isNull; | ||
| 59 | String code = StringUtil.isNull; | ||
| 60 | |||
| 61 | String log = value.toString(); | ||
| 62 | if(StringUtil.isMessyCode(log)) { | ||
| 63 | rest = "messy"; | ||
| 64 | } else { | ||
| 65 | JSONObject obj = JSONObject.parseObject(log); | ||
| 66 | |||
| 67 | timestamp = obj.getString("timestamp"); | ||
| 68 | timestamp = StringUtil.format(timestamp); | ||
| 69 | rest = obj.getString("rest"); | ||
| 70 | rest = StringUtil.format(rest); | ||
| 71 | trace = obj.getString("trace"); | ||
| 72 | trace = StringUtil.format(trace); | ||
| 73 | |||
| 74 | String[] split_rest = rest.split("] \\["); | ||
| 75 | if(split_rest != null && split_rest.length == 11) { | ||
| 76 | rest = "ok"; | ||
| 77 | requestUri = split_rest[2] != null ? split_rest[2].replace("requestUri:","") : StringUtil.isNull; | ||
| 78 | requestUri = StringUtil.format(requestUri); | ||
| 79 | status = split_rest[5] != null ? split_rest[5].replace("status:","") : StringUtil.isNull; | ||
| 80 | status = StringUtil.format(status); | ||
| 81 | request = split_rest[8] != null ? split_rest[8].replace("request:","") : StringUtil.isNull; | ||
| 82 | request = StringUtil.format(request); | ||
| 83 | if(StringUtil.isMessyCode(request)) { | ||
| 84 | request = "messy"; | ||
| 85 | status = "messy"; | ||
| 86 | } | ||
| 87 | response = split_rest[9] != null ? split_rest[9].replace("response:","") : StringUtil.isNull; | ||
| 88 | response = StringUtil.format(response); | ||
| 89 | if(request != null && !"".equals(request)) { | ||
| 90 | String[] requests = request.split("&"); | ||
| 91 | if(requests != null && requests.length == 10) { | ||
| 92 | method = requests[1] != null ? requests[1].replace("method=","") : StringUtil.isNull; | ||
| 93 | method = StringUtil.format(method); | ||
| 94 | uid = requests[2] != null ? requests[2].replace("uid=","") : StringUtil.isNull; | ||
| 95 | uid = StringUtil.format(uid); | ||
| 96 | biz_data = requests[6] != null ? requests[6].replace("biz_data=","") : StringUtil.isNull; | ||
| 97 | biz_data = StringUtil.format(biz_data); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | if(response != null && !"".equals(response) && JsonUtil.isJson(response)) { | ||
| 101 | code = StringUtil.format(JSONObject.parseObject(response).getString("code")); | ||
| 102 | } | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | StringBuilder builder = new StringBuilder(); | ||
| 107 | builder.append(rest).append("\t"); | ||
| 108 | builder.append(requestUri).append("\t"); | ||
| 109 | builder.append(request).append("\t"); | ||
| 110 | builder.append(response).append("\t"); | ||
| 111 | builder.append(status).append("\t"); | ||
| 112 | builder.append(method).append("\t"); | ||
| 113 | builder.append(uid).append("\t"); | ||
| 114 | builder.append(biz_data).append("\t"); | ||
| 115 | builder.append(code).append("\t"); | ||
| 116 | builder.append(trace).append("\t"); | ||
| 117 | builder.append(timestamp); | ||
| 118 | |||
| 119 | context.write(NullWritable.get(), new Text(builder.toString())); | ||
| 120 | } | ||
| 121 | } | ||
| 122 | } | ||
| 123 | |||
| 124 |
src/main/java/com/bigdata/test/JsonUtil.java
0 → 100644
| 1 | package com.bigdata.test; | ||
| 2 | |||
| 3 | import com.alibaba.fastjson.JSONObject; | ||
| 4 | import com.alibaba.fastjson.TypeReference; | ||
| 5 | import com.alibaba.fastjson.serializer.SerializerFeature; | ||
| 6 | import org.apache.commons.lang.StringUtils; | ||
| 7 | |||
| 8 | import java.util.List; | ||
| 9 | |||
| 10 | public class JsonUtil { | ||
| 11 | |||
| 12 | /** | ||
| 13 | * 将对象序列化为Json字符串 | ||
| 14 | * 注意:obj String类型的值Null将会被转化成"" | ||
| 15 | * | ||
| 16 | * @param obj 对象 | ||
| 17 | * @return String Json字符串 | ||
| 18 | */ | ||
| 19 | public static final String toJson(Object obj) { | ||
| 20 | return JSONObject.toJSONString(obj, SerializerFeature.WriteNullStringAsEmpty); | ||
| 21 | } | ||
| 22 | |||
| 23 | |||
| 24 | /** | ||
| 25 | * 将对象序列化为Json字符串 | ||
| 26 | * | ||
| 27 | * @param obj 对象 | ||
| 28 | * @return String Json字符串 | ||
| 29 | */ | ||
| 30 | public static final String toJson(Object obj, SerializerFeature... features) { | ||
| 31 | return JSONObject.toJSONString(obj, features); | ||
| 32 | } | ||
| 33 | |||
| 34 | |||
| 35 | /** | ||
| 36 | * 将Json字符串反序列化为对象 | ||
| 37 | * | ||
| 38 | * @param jsonString Json字符串 | ||
| 39 | * @param clazz 需要转换到的对象类 | ||
| 40 | * @return <T>对象 | ||
| 41 | */ | ||
| 42 | public static final <T> T toObj(String jsonString, Class<T> clazz) { | ||
| 43 | return JSONObject.parseObject(jsonString, clazz); | ||
| 44 | } | ||
| 45 | |||
| 46 | public static final <T> T toObj(String jsonString, TypeReference<T> typeReference) { | ||
| 47 | return JSONObject.parseObject(jsonString, typeReference); | ||
| 48 | } | ||
| 49 | |||
| 50 | /** | ||
| 51 | * 将Json字符串反序列化为数组对象 | ||
| 52 | * | ||
| 53 | * @param jsonString Json字符串 | ||
| 54 | * @param clazz 需要转换到的对象类(数组内的对象实体) | ||
| 55 | * @return List<T>数组对象 | ||
| 56 | */ | ||
| 57 | public static final <T> List<T> toList(String jsonString, Class<T> clazz) { | ||
| 58 | return JSONObject.parseArray(jsonString, clazz); | ||
| 59 | } | ||
| 60 | |||
| 61 | /** | ||
| 62 | * 将Map转化为对象 | ||
| 63 | * | ||
| 64 | * @param map | ||
| 65 | * @param clazz 需要转换到的对象类 | ||
| 66 | * @return <T>对象 | ||
| 67 | */ | ||
| 68 | public static final <T> T toObj(Object map, Class<T> clazz) { | ||
| 69 | return JSONObject.parseObject(JSONObject.toJSONString(map, SerializerFeature.WriteMapNullValue), clazz); | ||
| 70 | } | ||
| 71 | |||
| 72 | public static boolean isJson(String content) { | ||
| 73 | if(StringUtils.isEmpty(content)){ | ||
| 74 | return false; | ||
| 75 | } | ||
| 76 | boolean isJsonObject = true; | ||
| 77 | boolean isJsonArray = true; | ||
| 78 | try { | ||
| 79 | JSONObject.parseObject(content); | ||
| 80 | } catch (Exception e) { | ||
| 81 | isJsonObject = false; | ||
| 82 | } | ||
| 83 | try { | ||
| 84 | JSONObject.parseArray(content); | ||
| 85 | } catch (Exception e) { | ||
| 86 | isJsonArray = false; | ||
| 87 | } | ||
| 88 | if(!isJsonObject && !isJsonArray){ | ||
| 89 | return false; | ||
| 90 | } | ||
| 91 | return true; | ||
| 92 | } | ||
| 93 | |||
| 94 | } |
| 1 | package com.bigdata.test; | ||
| 2 | |||
| 3 | import java.util.regex.Matcher; | ||
| 4 | import java.util.regex.Pattern; | ||
| 5 | |||
| 6 | public class StringUtil { | ||
| 7 | public static String temp = ""; | ||
| 8 | public static final String isNull = "N/A"; | ||
| 9 | |||
| 10 | public static String format(String str) { | ||
| 11 | temp = str != null ? str : isNull; | ||
| 12 | return !"".equals(temp) ? temp : isNull; | ||
| 13 | } | ||
| 14 | |||
| 15 | public static boolean isChinese(char c) { | ||
| 16 | Character.UnicodeBlock ub = Character.UnicodeBlock.of(c); | ||
| 17 | if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS | ||
| 18 | || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS | ||
| 19 | || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A | ||
| 20 | || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION | ||
| 21 | || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION | ||
| 22 | || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) { | ||
| 23 | return true; | ||
| 24 | } | ||
| 25 | return false; | ||
| 26 | } | ||
| 27 | |||
| 28 | public static boolean isMessyCode(String strName) { | ||
| 29 | Pattern p = Pattern.compile("\\s*|t*|r*|n*"); | ||
| 30 | Matcher m = p.matcher(strName); | ||
| 31 | String after = m.replaceAll(""); | ||
| 32 | String temp = after.replaceAll("\\p{P}", ""); | ||
| 33 | char[] ch = temp.trim().toCharArray(); | ||
| 34 | float chLength = ch.length; | ||
| 35 | float count = 0; | ||
| 36 | for (int i = 0; i < ch.length; i++) { | ||
| 37 | char c = ch[i]; | ||
| 38 | if (!Character.isLetterOrDigit(c)) { | ||
| 39 | if (!isChinese(c)) { | ||
| 40 | count = count + 1; | ||
| 41 | } | ||
| 42 | } | ||
| 43 | } | ||
| 44 | float result = count / chLength; | ||
| 45 | if (result > 0.4) { | ||
| 46 | return true; | ||
| 47 | } else { | ||
| 48 | return false; | ||
| 49 | } | ||
| 50 | } | ||
| 51 | } |
src/main/resources/log4j.properties
0 → 100644
| 1 | log4j.rootLogger=debug,rootFile,console | ||
| 2 | log4j.logger.org.apache.http=OFF | ||
| 3 | #日志文件 | ||
| 4 | log4j.appender.rootFile=org.apache.log4j.RollingFileAppender | ||
| 5 | log4j.appender.rootFile.File=${user.dir}/logs/debug.log | ||
| 6 | log4j.appender.rootFile.Encoding=UTF8 | ||
| 7 | log4j.appender.rootFile.MaxFileSize=50000KB | ||
| 8 | log4j.appender.rootFile.MaxBackupIndex=1000 | ||
| 9 | log4j.appender.rootFile.Threshold=TRACE | ||
| 10 | log4j.appender.rootFile.layout=org.apache.log4j.PatternLayout | ||
| 11 | log4j.appender.rootFile.layout.ConversionPattern= %-d{yyyy-MM-dd HH:mm:ss.SSS} [%-5p] %c - %m%n | ||
| 12 | #控制台 | ||
| 13 | log4j.appender.console=org.apache.log4j.ConsoleAppender | ||
| 14 | log4j.appender.console.Threshold=TRACE | ||
| 15 | log4j.appender.console.ImmediateFlush=true | ||
| 16 | log4j.appender.console.Target=System.out | ||
| 17 | log4j.appender.console.layout=org.apache.log4j.PatternLayout | ||
| 18 | log4j.appender.console.layout.ConversionPattern= %-d{yyyy-MM-dd HH:mm:ss.SSS} [%-5p] %c - %m%n | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or sign in to post a comment