Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
张浩
/
BrBigDataTest
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
Commit
23a41958
...
23a41958491839ca6a4049a315475cea8c8ec641
authored
2021-05-10 10:07:15 +0800
by
zhanghao
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
commit
1 parent
db9ebff4
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
105 additions
and
44 deletions
pom.xml
src/main/java/com/bigdata/test/ETLApp.java
src/main/scala/com/bigdata/stest/SparkWordCountApp.scala
pom.xml
View file @
23a4195
...
...
@@ -12,9 +12,12 @@
<url>
http://maven.apache.org
</url>
<properties>
<
project.build.sourceEncoding>
UTF-8
</project.build.sourceE
ncoding>
<
encoding>
UTF-8
</e
ncoding>
<maven.compiler.source>
1.8
</maven.compiler.source>
<maven.compiler.target>
1.8
</maven.compiler.target>
<scala.tools.version>
2.11
</scala.tools.version>
<scala.version>
2.11.8
</scala.version>
<spark.version>
2.4.3
</spark.version>
<hadoop.version>
2.6.0-cdh5.15.1
</hadoop.version>
</properties>
...
...
@@ -42,51 +45,90 @@
<artifactId>
fastjson
</artifactId>
<version>
1.2.76
</version>
</dependency>
<dependency>
<groupId>
org.scala-lang
</groupId>
<artifactId>
scala-library
</artifactId>
<version>
${scala.version}
</version>
</dependency>
<dependency>
<groupId>
org.apache.spark
</groupId>
<artifactId>
spark-sql_2.11
</artifactId>
<version>
${spark.version}
</version>
</dependency>
<dependency>
<groupId>
org.apache.spark
</groupId>
<artifactId>
spark-hive_2.11
</artifactId>
<version>
${spark.version}
</version>
</dependency>
<dependency>
<groupId>
org.apache.spark
</groupId>
<artifactId>
spark-hive-thriftserver_2.11
</artifactId>
<version>
${spark.version}
</version>
</dependency>
<dependency>
<groupId>
mysql
</groupId>
<artifactId>
mysql-connector-java
</artifactId>
<version>
5.1.47
</version>
</dependency>
<dependency>
<groupId>
com.typesafe
</groupId>
<artifactId>
config
</artifactId>
<version>
1.3.3
</version>
</dependency>
<dependency>
<groupId>
org.apache.hive
</groupId>
<artifactId>
hive-jdbc
</artifactId>
<version>
1.2.1
</version>
</dependency>
<dependency>
<groupId>
org.apache.kudu
</groupId>
<artifactId>
kudu-client
</artifactId>
<version>
1.7.0
</version>
</dependency>
<dependency>
<groupId>
org.apache.kudu
</groupId>
<artifactId>
kudu-spark2_2.11
</artifactId>
<version>
1.7.0
</version>
</dependency>
</dependencies>
<build>
<pluginManagement>
<!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>
maven-clean-plugin
</artifactId>
<version>
3.1.0
</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>
maven-resources-plugin
</artifactId>
<version>
3.0.2
</version>
</plugin>
<plugin>
<artifactId>
maven-compiler-plugin
</artifactId>
<version>
3.8.0
</version>
</plugin>
<plugin>
<artifactId>
maven-surefire-plugin
</artifactId>
<version>
2.22.1
</version>
</plugin>
<plugin>
<artifactId>
maven-jar-plugin
</artifactId>
<version>
3.0.2
</version>
</plugin>
<plugin>
<artifactId>
maven-install-plugin
</artifactId>
<version>
2.5.2
</version>
</plugin>
<plugin>
<artifactId>
maven-deploy-plugin
</artifactId>
<version>
2.8.2
</version>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>
maven-site-plugin
</artifactId>
<version>
3.7.1
</version>
</plugin>
<plugin>
<artifactId>
maven-project-info-reports-plugin
</artifactId>
<version>
3.0.0
</version>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<!-- see http://davidb.github.com/scala-maven-plugin -->
<groupId>
net.alchim31.maven
</groupId>
<artifactId>
scala-maven-plugin
</artifactId>
<version>
3.1.3
</version>
<executions>
<execution>
<goals>
<goal>
compile
</goal>
<goal>
testCompile
</goal>
</goals>
<configuration>
<args>
<arg>
-dependencyfile
</arg>
<arg>
${project.build.directory}/.scala_dependencies
</arg>
</args>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>
org.apache.maven.plugins
</groupId>
<artifactId>
maven-surefire-plugin
</artifactId>
<version>
2.13
</version>
<configuration>
<useFile>
false
</useFile>
<disableXmlReport>
true
</disableXmlReport>
<!-- If you have classpath issue like NoDefClassError,... -->
<!-- useManifestOnlyJar>false</useManifestOnlyJar -->
<includes>
<include>
**/*Test.*
</include>
<include>
**/*Suite.*
</include>
</includes>
</configuration>
</plugin>
</plugins>
</build>
</project>
...
...
src/main/java/com/bigdata/test/ETLApp.java
View file @
23a4195
...
...
@@ -23,6 +23,7 @@ public class ETLApp {
FileSystem
fileSystem
=
FileSystem
.
get
(
configuration
);
Path
outputPath
=
new
Path
(
args
[
1
]);
// Path outputPath = new Path("./input/etl");
if
(
fileSystem
.
exists
(
outputPath
))
{
fileSystem
.
delete
(
outputPath
,
true
);
}
...
...
@@ -38,6 +39,8 @@ public class ETLApp {
FileInputFormat
.
setInputPaths
(
job
,
new
Path
(
args
[
0
]));
FileOutputFormat
.
setOutputPath
(
job
,
new
Path
(
args
[
1
]));
// FileInputFormat.setInputPaths(job, new Path("./input/data/log-2021-04-13.03.log"));
// FileOutputFormat.setOutputPath(job, new Path("./input/etl"));
job
.
waitForCompletion
(
true
);
}
...
...
src/main/scala/com/bigdata/stest/SparkWordCountApp.scala
0 → 100644
View file @
23a4195
package
com.bigdata.stest
import
org.apache.spark.
{
SparkConf
,
SparkContext
}
object
SparkWordCountApp
{
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
val
sparkConf
=
new
SparkConf
()
sparkConf
.
setMaster
(
"local"
)
sparkConf
.
setAppName
(
"SparkWordCountApp"
)
val
sc
=
new
SparkContext
(
sparkConf
)
// val rdd = sc.textFile("./input/data/input.txt")
// rdd.collect().foreach(println)
sc
.
stop
()
}
}
Please
register
or
sign in
to post a comment