commit
Showing
3 changed files
with
105 additions
and
44 deletions
| ... | @@ -12,9 +12,12 @@ | ... | @@ -12,9 +12,12 @@ |
| 12 | <url>http://maven.apache.org</url> | 12 | <url>http://maven.apache.org</url> |
| 13 | 13 | ||
| 14 | <properties> | 14 | <properties> |
| 15 | <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | 15 | <encoding>UTF-8</encoding> |
| 16 | <maven.compiler.source>1.8</maven.compiler.source> | 16 | <maven.compiler.source>1.8</maven.compiler.source> |
| 17 | <maven.compiler.target>1.8</maven.compiler.target> | 17 | <maven.compiler.target>1.8</maven.compiler.target> |
| 18 | <scala.tools.version>2.11</scala.tools.version> | ||
| 19 | <scala.version>2.11.8</scala.version> | ||
| 20 | <spark.version>2.4.3</spark.version> | ||
| 18 | <hadoop.version>2.6.0-cdh5.15.1</hadoop.version> | 21 | <hadoop.version>2.6.0-cdh5.15.1</hadoop.version> |
| 19 | </properties> | 22 | </properties> |
| 20 | 23 | ||
| ... | @@ -42,51 +45,90 @@ | ... | @@ -42,51 +45,90 @@ |
| 42 | <artifactId>fastjson</artifactId> | 45 | <artifactId>fastjson</artifactId> |
| 43 | <version>1.2.76</version> | 46 | <version>1.2.76</version> |
| 44 | </dependency> | 47 | </dependency> |
| 48 | <dependency> | ||
| 49 | <groupId>org.scala-lang</groupId> | ||
| 50 | <artifactId>scala-library</artifactId> | ||
| 51 | <version>${scala.version}</version> | ||
| 52 | </dependency> | ||
| 53 | <dependency> | ||
| 54 | <groupId>org.apache.spark</groupId> | ||
| 55 | <artifactId>spark-sql_2.11</artifactId> | ||
| 56 | <version>${spark.version}</version> | ||
| 57 | </dependency> | ||
| 58 | <dependency> | ||
| 59 | <groupId>org.apache.spark</groupId> | ||
| 60 | <artifactId>spark-hive_2.11</artifactId> | ||
| 61 | <version>${spark.version}</version> | ||
| 62 | </dependency> | ||
| 63 | <dependency> | ||
| 64 | <groupId>org.apache.spark</groupId> | ||
| 65 | <artifactId>spark-hive-thriftserver_2.11</artifactId> | ||
| 66 | <version>${spark.version}</version> | ||
| 67 | </dependency> | ||
| 68 | <dependency> | ||
| 69 | <groupId>mysql</groupId> | ||
| 70 | <artifactId>mysql-connector-java</artifactId> | ||
| 71 | <version>5.1.47</version> | ||
| 72 | </dependency> | ||
| 73 | <dependency> | ||
| 74 | <groupId>com.typesafe</groupId> | ||
| 75 | <artifactId>config</artifactId> | ||
| 76 | <version>1.3.3</version> | ||
| 77 | </dependency> | ||
| 78 | <dependency> | ||
| 79 | <groupId>org.apache.hive</groupId> | ||
| 80 | <artifactId>hive-jdbc</artifactId> | ||
| 81 | <version>1.2.1</version> | ||
| 82 | </dependency> | ||
| 83 | <dependency> | ||
| 84 | <groupId>org.apache.kudu</groupId> | ||
| 85 | <artifactId>kudu-client</artifactId> | ||
| 86 | <version>1.7.0</version> | ||
| 87 | </dependency> | ||
| 88 | <dependency> | ||
| 89 | <groupId>org.apache.kudu</groupId> | ||
| 90 | <artifactId>kudu-spark2_2.11</artifactId> | ||
| 91 | <version>1.7.0</version> | ||
| 92 | </dependency> | ||
| 45 | </dependencies> | 93 | </dependencies> |
| 46 | 94 | ||
| 47 | <build> | 95 | <build> |
| 48 | <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) --> | 96 | <plugins> |
| 49 | <plugins> | 97 | <plugin> |
| 50 | <!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle --> | 98 | <!-- see http://davidb.github.com/scala-maven-plugin --> |
| 51 | <plugin> | 99 | <groupId>net.alchim31.maven</groupId> |
| 52 | <artifactId>maven-clean-plugin</artifactId> | 100 | <artifactId>scala-maven-plugin</artifactId> |
| 53 | <version>3.1.0</version> | 101 | <version>3.1.3</version> |
| 54 | </plugin> | 102 | <executions> |
| 55 | <!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging --> | 103 | <execution> |
| 56 | <plugin> | 104 | <goals> |
| 57 | <artifactId>maven-resources-plugin</artifactId> | 105 | <goal>compile</goal> |
| 58 | <version>3.0.2</version> | 106 | <goal>testCompile</goal> |
| 59 | </plugin> | 107 | </goals> |
| 60 | <plugin> | 108 | <configuration> |
| 61 | <artifactId>maven-compiler-plugin</artifactId> | 109 | <args> |
| 62 | <version>3.8.0</version> | 110 | <arg>-dependencyfile</arg> |
| 63 | </plugin> | 111 | <arg>${project.build.directory}/.scala_dependencies</arg> |
| 64 | <plugin> | 112 | </args> |
| 65 | <artifactId>maven-surefire-plugin</artifactId> | 113 | </configuration> |
| 66 | <version>2.22.1</version> | 114 | </execution> |
| 67 | </plugin> | 115 | </executions> |
| 68 | <plugin> | 116 | </plugin> |
| 69 | <artifactId>maven-jar-plugin</artifactId> | 117 | <plugin> |
| 70 | <version>3.0.2</version> | 118 | <groupId>org.apache.maven.plugins</groupId> |
| 71 | </plugin> | 119 | <artifactId>maven-surefire-plugin</artifactId> |
| 72 | <plugin> | 120 | <version>2.13</version> |
| 73 | <artifactId>maven-install-plugin</artifactId> | 121 | <configuration> |
| 74 | <version>2.5.2</version> | 122 | <useFile>false</useFile> |
| 75 | </plugin> | 123 | <disableXmlReport>true</disableXmlReport> |
| 76 | <plugin> | 124 | <!-- If you have classpath issue like NoDefClassError,... --> |
| 77 | <artifactId>maven-deploy-plugin</artifactId> | 125 | <!-- useManifestOnlyJar>false</useManifestOnlyJar --> |
| 78 | <version>2.8.2</version> | 126 | <includes> |
| 79 | </plugin> | 127 | <include>**/*Test.*</include> |
| 80 | <!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle --> | 128 | <include>**/*Suite.*</include> |
| 81 | <plugin> | 129 | </includes> |
| 82 | <artifactId>maven-site-plugin</artifactId> | 130 | </configuration> |
| 83 | <version>3.7.1</version> | 131 | </plugin> |
| 84 | </plugin> | 132 | </plugins> |
| 85 | <plugin> | ||
| 86 | <artifactId>maven-project-info-reports-plugin</artifactId> | ||
| 87 | <version>3.0.0</version> | ||
| 88 | </plugin> | ||
| 89 | </plugins> | ||
| 90 | </pluginManagement> | ||
| 91 | </build> | 133 | </build> |
| 92 | </project> | 134 | </project> | ... | ... |
| ... | @@ -23,6 +23,7 @@ public class ETLApp { | ... | @@ -23,6 +23,7 @@ public class ETLApp { |
| 23 | 23 | ||
| 24 | FileSystem fileSystem = FileSystem.get(configuration); | 24 | FileSystem fileSystem = FileSystem.get(configuration); |
| 25 | Path outputPath = new Path(args[1]); | 25 | Path outputPath = new Path(args[1]); |
| 26 | // Path outputPath = new Path("./input/etl"); | ||
| 26 | if(fileSystem.exists(outputPath)) { | 27 | if(fileSystem.exists(outputPath)) { |
| 27 | fileSystem.delete(outputPath,true); | 28 | fileSystem.delete(outputPath,true); |
| 28 | } | 29 | } |
| ... | @@ -38,6 +39,8 @@ public class ETLApp { | ... | @@ -38,6 +39,8 @@ public class ETLApp { |
| 38 | 39 | ||
| 39 | FileInputFormat.setInputPaths(job, new Path(args[0])); | 40 | FileInputFormat.setInputPaths(job, new Path(args[0])); |
| 40 | FileOutputFormat.setOutputPath(job, new Path(args[1])); | 41 | FileOutputFormat.setOutputPath(job, new Path(args[1])); |
| 42 | // FileInputFormat.setInputPaths(job, new Path("./input/data/log-2021-04-13.03.log")); | ||
| 43 | // FileOutputFormat.setOutputPath(job, new Path("./input/etl")); | ||
| 41 | 44 | ||
| 42 | job.waitForCompletion(true); | 45 | job.waitForCompletion(true); |
| 43 | } | 46 | } | ... | ... |
| 1 | package com.bigdata.stest | ||
| 2 | |||
| 3 | import org.apache.spark.{SparkConf, SparkContext} | ||
| 4 | |||
| 5 | object SparkWordCountApp { | ||
| 6 | def main(args: Array[String]): Unit = { | ||
| 7 | val sparkConf = new SparkConf() | ||
| 8 | sparkConf.setMaster("local") | ||
| 9 | sparkConf.setAppName("SparkWordCountApp") | ||
| 10 | val sc = new SparkContext(sparkConf) | ||
| 11 | // val rdd = sc.textFile("./input/data/input.txt") | ||
| 12 | // rdd.collect().foreach(println) | ||
| 13 | sc.stop() | ||
| 14 | } | ||
| 15 | |||
| 16 | } |
-
Please register or sign in to post a comment