Commit 23a41958 23a41958491839ca6a4049a315475cea8c8ec641 by zhanghao

commit

1 parent db9ebff4
...@@ -12,9 +12,12 @@ ...@@ -12,9 +12,12 @@
12 <url>http://maven.apache.org</url> 12 <url>http://maven.apache.org</url>
13 13
14 <properties> 14 <properties>
15 <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> 15 <encoding>UTF-8</encoding>
16 <maven.compiler.source>1.8</maven.compiler.source> 16 <maven.compiler.source>1.8</maven.compiler.source>
17 <maven.compiler.target>1.8</maven.compiler.target> 17 <maven.compiler.target>1.8</maven.compiler.target>
18 <scala.tools.version>2.11</scala.tools.version>
19 <scala.version>2.11.8</scala.version>
20 <spark.version>2.4.3</spark.version>
18 <hadoop.version>2.6.0-cdh5.15.1</hadoop.version> 21 <hadoop.version>2.6.0-cdh5.15.1</hadoop.version>
19 </properties> 22 </properties>
20 23
...@@ -42,51 +45,90 @@ ...@@ -42,51 +45,90 @@
42 <artifactId>fastjson</artifactId> 45 <artifactId>fastjson</artifactId>
43 <version>1.2.76</version> 46 <version>1.2.76</version>
44 </dependency> 47 </dependency>
48 <dependency>
49 <groupId>org.scala-lang</groupId>
50 <artifactId>scala-library</artifactId>
51 <version>${scala.version}</version>
52 </dependency>
53 <dependency>
54 <groupId>org.apache.spark</groupId>
55 <artifactId>spark-sql_2.11</artifactId>
56 <version>${spark.version}</version>
57 </dependency>
58 <dependency>
59 <groupId>org.apache.spark</groupId>
60 <artifactId>spark-hive_2.11</artifactId>
61 <version>${spark.version}</version>
62 </dependency>
63 <dependency>
64 <groupId>org.apache.spark</groupId>
65 <artifactId>spark-hive-thriftserver_2.11</artifactId>
66 <version>${spark.version}</version>
67 </dependency>
68 <dependency>
69 <groupId>mysql</groupId>
70 <artifactId>mysql-connector-java</artifactId>
71 <version>5.1.47</version>
72 </dependency>
73 <dependency>
74 <groupId>com.typesafe</groupId>
75 <artifactId>config</artifactId>
76 <version>1.3.3</version>
77 </dependency>
78 <dependency>
79 <groupId>org.apache.hive</groupId>
80 <artifactId>hive-jdbc</artifactId>
81 <version>1.2.1</version>
82 </dependency>
83 <dependency>
84 <groupId>org.apache.kudu</groupId>
85 <artifactId>kudu-client</artifactId>
86 <version>1.7.0</version>
87 </dependency>
88 <dependency>
89 <groupId>org.apache.kudu</groupId>
90 <artifactId>kudu-spark2_2.11</artifactId>
91 <version>1.7.0</version>
92 </dependency>
45 </dependencies> 93 </dependencies>
46 94
47 <build> 95 <build>
48 <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) --> 96 <plugins>
49 <plugins> 97 <plugin>
50 <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle --> 98 <!-- see http://davidb.github.com/scala-maven-plugin -->
51 <plugin> 99 <groupId>net.alchim31.maven</groupId>
52 <artifactId>maven-clean-plugin</artifactId> 100 <artifactId>scala-maven-plugin</artifactId>
53 <version>3.1.0</version> 101 <version>3.1.3</version>
54 </plugin> 102 <executions>
55 <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging --> 103 <execution>
56 <plugin> 104 <goals>
57 <artifactId>maven-resources-plugin</artifactId> 105 <goal>compile</goal>
58 <version>3.0.2</version> 106 <goal>testCompile</goal>
59 </plugin> 107 </goals>
60 <plugin> 108 <configuration>
61 <artifactId>maven-compiler-plugin</artifactId> 109 <args>
62 <version>3.8.0</version> 110 <arg>-dependencyfile</arg>
63 </plugin> 111 <arg>${project.build.directory}/.scala_dependencies</arg>
64 <plugin> 112 </args>
65 <artifactId>maven-surefire-plugin</artifactId> 113 </configuration>
66 <version>2.22.1</version> 114 </execution>
67 </plugin> 115 </executions>
68 <plugin> 116 </plugin>
69 <artifactId>maven-jar-plugin</artifactId> 117 <plugin>
70 <version>3.0.2</version> 118 <groupId>org.apache.maven.plugins</groupId>
71 </plugin> 119 <artifactId>maven-surefire-plugin</artifactId>
72 <plugin> 120 <version>2.13</version>
73 <artifactId>maven-install-plugin</artifactId> 121 <configuration>
74 <version>2.5.2</version> 122 <useFile>false</useFile>
75 </plugin> 123 <disableXmlReport>true</disableXmlReport>
76 <plugin> 124 <!-- If you have classpath issue like NoDefClassError,... -->
77 <artifactId>maven-deploy-plugin</artifactId> 125 <!-- useManifestOnlyJar>false</useManifestOnlyJar -->
78 <version>2.8.2</version> 126 <includes>
79 </plugin> 127 <include>**/*Test.*</include>
80 <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle --> 128 <include>**/*Suite.*</include>
81 <plugin> 129 </includes>
82 <artifactId>maven-site-plugin</artifactId> 130 </configuration>
83 <version>3.7.1</version> 131 </plugin>
84 </plugin> 132 </plugins>
85 <plugin>
86 <artifactId>maven-project-info-reports-plugin</artifactId>
87 <version>3.0.0</version>
88 </plugin>
89 </plugins>
90 </pluginManagement>
91 </build> 133 </build>
92 </project> 134 </project>
......
...@@ -23,6 +23,7 @@ public class ETLApp { ...@@ -23,6 +23,7 @@ public class ETLApp {
23 23
24 FileSystem fileSystem = FileSystem.get(configuration); 24 FileSystem fileSystem = FileSystem.get(configuration);
25 Path outputPath = new Path(args[1]); 25 Path outputPath = new Path(args[1]);
26 // Path outputPath = new Path("./input/etl");
26 if(fileSystem.exists(outputPath)) { 27 if(fileSystem.exists(outputPath)) {
27 fileSystem.delete(outputPath,true); 28 fileSystem.delete(outputPath,true);
28 } 29 }
...@@ -38,6 +39,8 @@ public class ETLApp { ...@@ -38,6 +39,8 @@ public class ETLApp {
38 39
39 FileInputFormat.setInputPaths(job, new Path(args[0])); 40 FileInputFormat.setInputPaths(job, new Path(args[0]));
40 FileOutputFormat.setOutputPath(job, new Path(args[1])); 41 FileOutputFormat.setOutputPath(job, new Path(args[1]));
42 // FileInputFormat.setInputPaths(job, new Path("./input/data/log-2021-04-13.03.log"));
43 // FileOutputFormat.setOutputPath(job, new Path("./input/etl"));
41 44
42 job.waitForCompletion(true); 45 job.waitForCompletion(true);
43 } 46 }
......
1 package com.bigdata.stest
2
3 import org.apache.spark.{SparkConf, SparkContext}
4
5 object SparkWordCountApp {
6 def main(args: Array[String]): Unit = {
7 val sparkConf = new SparkConf()
8 sparkConf.setMaster("local")
9 sparkConf.setAppName("SparkWordCountApp")
10 val sc = new SparkContext(sparkConf)
11 // val rdd = sc.textFile("./input/data/input.txt")
12 // rdd.collect().foreach(println)
13 sc.stop()
14 }
15
16 }