Skip to content

Commit 0fd7f4f

Browse files
committed
Add sbt project generated from core/Simple Spark.snb
0 parents  commit 0fd7f4f

File tree

7 files changed

+458
-0
lines changed

7 files changed

+458
-0
lines changed

simple-spark-sources/build.sbt

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
2+
organization := "generated"
3+
4+
name := "simple-spark"
5+
6+
version := "0.0.1-SNAPSHOT"
7+
// append scala version to artifact name(s)
8+
crossPaths := true
9+
10+
scalaVersion := "2.10.6"
11+
12+
maintainer := "" //Docker
13+
14+
resolvers ++= Seq(
15+
"Maven2 Local" at "file:/Users/vidma-vinted/.m2/repository/" ,
16+
"public" at "https://repo1.maven.org/maven2/" ,
17+
"spark-packages" at "http://dl.bintray.com/spark-packages/maven/" ,
18+
new sbt.URLRepository("typesafe-ivy-releases", new sbt.Patterns(
19+
List("https://repo.typesafe.com/typesafe/ivy-releases/[organisation]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext]"),
20+
List("https://repo.typesafe.com/typesafe/ivy-releases/[organisation]/[module]/(scala_[scalaVersion]/)(sbt_[sbtVersion]/)[revision]/[type]s/[artifact](-[classifier]).[ext]"),
21+
false
22+
)
23+
) ,
24+
"jcenter" at "https://jcenter.bintray.com/" ,
25+
"sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases" )
26+
27+
net.virtualvoid.sbt.graph.Plugin.graphSettings
28+
29+
enablePlugins(UniversalPlugin)
30+
31+
enablePlugins(DockerPlugin)
32+
33+
enablePlugins(JavaAppPackaging)
34+
35+
import com.typesafe.sbt.SbtNativePackager.autoImport.NativePackagerHelper._
36+
37+
import com.typesafe.sbt.packager.docker._
38+
39+
dockerBaseImage := "data-fellas-docker-public.bintray.io/base-adst:0.0.1"
40+
41+
dockerExposedPorts := Seq(9000, 9443)
42+
43+
daemonUser in Docker := "root"
44+
45+
packageName in Docker := "generated.simplespark"
46+
47+
mappings in Docker ++= directory("spark-lib")
48+
49+
mappings in Universal ++= directory("spark-lib")
50+
51+
resolvers += Resolver.mavenLocal
52+
53+
resolvers += Resolver.typesafeRepo("releases")
54+
55+
resolvers += "cloudera" at "https://repository.cloudera.com/artifactory/cloudera-repos"
56+
57+
58+
59+
credentials += Credentials(Path.userHome / ".bintray" / ".credentials")
60+
61+
resolvers += Resolver.url("bintray-data-fellas-maven", url("http://dl.bintray.com/data-fellas/maven"))(Resolver.ivyStylePatterns)
62+
63+
dockerCommands ++= Seq(Cmd("ENV", "SPARK_HOME \"\""))
64+
65+
dockerRepository := Some("") //Docker
66+
67+
enablePlugins(DebianPlugin)
68+
69+
name in Debian := "simple-spark"
70+
71+
maintainer in Debian := "Data Fellas"
72+
73+
packageSummary in Debian := "Data Fellas Generated Job"
74+
75+
packageDescription := "Generated Job by Spark-notebook"
76+
77+
debianPackageDependencies in Debian += "java8-runtime-headless"
78+
79+
serverLoading in Debian := com.typesafe.sbt.packager.archetypes.ServerLoader.Upstart
80+
81+
daemonUser in Linux := "root"
82+
83+
daemonGroup in Linux := "root"
84+
85+
bashScriptExtraDefines += "export SPARK_HOME=\"\""
86+
87+
88+
89+
90+
val sparkVersion = sys.env.get("SPARK_VERSION") .orElse(sys.props.get("spark.version")) .getOrElse("2.0.1")
91+
92+
val hadoopVersion = sys.env.get("HADOOP_VERSION").orElse(sys.props.get("hadoop.version")).getOrElse("2.2.0")
93+
94+
// TODO: needed only if you use some of spark-notebook code
95+
// (most likely you don't want to use this, otherwise you'd need to publishLocal the SN libs)
96+
// libraryDependencies += "io.kensu" %% "common" % (sparkVersion + "_0.8.0-SNAPSHOT") excludeAll(
97+
// ExclusionRule("org.apache.hadoop"),
98+
// ExclusionRule("org.apache.spark")
99+
// )
100+
101+
libraryDependencies += "com.typesafe" % "config" % "1.3.1"
102+
103+
// you might not need all of the Spark jars below
104+
libraryDependencies += "org.apache.spark" %% "spark-core" % sparkVersion excludeAll(
105+
ExclusionRule("org.apache.hadoop"),
106+
ExclusionRule("org.apache.ivy", "ivy")
107+
)
108+
109+
libraryDependencies += "org.apache.spark" %% "spark-mllib" % sparkVersion excludeAll(
110+
ExclusionRule("org.apache.hadoop"),
111+
ExclusionRule("org.apache.ivy", "ivy")
112+
)
113+
114+
libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion excludeAll(
115+
ExclusionRule("org.apache.hadoop")
116+
)
117+
118+
libraryDependencies += "org.apache.spark" %% "spark-yarn" % sparkVersion excludeAll(
119+
ExclusionRule("org.apache.hadoop"),
120+
ExclusionRule("org.apache.ivy", "ivy")
121+
)
122+
123+
libraryDependencies += "org.apache.spark" %% "spark-hive" % sparkVersion excludeAll(
124+
ExclusionRule("org.apache.hadoop"),
125+
ExclusionRule("org.apache.ivy", "ivy"),
126+
ExclusionRule("javax.servlet", "servlet-api"),
127+
ExclusionRule("org.mortbay.jetty", "servlet-api")
128+
)
129+
130+
libraryDependencies += "org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(
131+
ExclusionRule("org.apache.commons", "commons-exec"),
132+
ExclusionRule("commons-codec", "commons-codec"),
133+
ExclusionRule("com.google.guava", "guava"),
134+
ExclusionRule("javax.servlet")
135+
)
136+
137+
libraryDependencies += "org.apache.hadoop" % "hadoop-yarn-server-web-proxy" % hadoopVersion excludeAll(
138+
ExclusionRule("org.apache.commons", "commons-exec"),
139+
ExclusionRule("commons-codec", "commons-codec"),
140+
ExclusionRule("com.google.guava", "guava"),
141+
ExclusionRule("javax.servlet")
142+
)
143+
144+
libraryDependencies += "net.java.dev.jets3t" % "jets3t" % "0.9.0" force()
145+
146+
libraryDependencies += "com.google.guava" % "guava" % "16.0.1" force()
147+
148+
149+
150+
//asssembly
151+
// skip test during assembly
152+
test in assembly := {}
153+
154+
//main class
155+
mainClass in assembly := Some("generated.Main")
156+
157+
artifact in (Compile, assembly) ~= { art =>
158+
art.copy(`classifier` = Some("assembly"))
159+
}
160+
161+
162+
163+
// merging files... specially application.conf!
164+
assemblyMergeStrategy in assembly := {
165+
case PathList("javax", "servlet", xs @ _*) => MergeStrategy.first
166+
case PathList("org", "apache", xs @ _*) => MergeStrategy.first
167+
case PathList("org", "fusesource", xs @ _*) => MergeStrategy.first
168+
case PathList("org", "slf4j", xs @ _*) => MergeStrategy.first
169+
case PathList("com", "google", xs @ _*) => MergeStrategy.first
170+
case PathList("play", "core", xs @ _*) => MergeStrategy.first
171+
case PathList("javax", "xml", xs @ _*) => MergeStrategy.first
172+
case PathList("com", "esotericsoftware", xs @ _*) => MergeStrategy.first
173+
case PathList("xsbt", xs @ _*) => MergeStrategy.first
174+
case PathList("META-INF", "MANIFEST.MF" ) => MergeStrategy.discard
175+
case PathList("META-INF", xs @ _*) => MergeStrategy.first
176+
case "application.conf" => MergeStrategy.concat
177+
case "module.properties" => MergeStrategy.first
178+
case PathList(ps @ _*) if ps.last endsWith ".html" => MergeStrategy.discard
179+
case PathList(ps @ _*) if ps.last endsWith ".thrift" => MergeStrategy.first
180+
case PathList(ps @ _*) if ps.last endsWith ".xml" => MergeStrategy.first
181+
case x =>
182+
val oldStrategy = (assemblyMergeStrategy in assembly).value
183+
oldStrategy(x)
184+
}
185+
186+
aggregate in update := false
187+
188+
updateOptions := updateOptions.value.withCachedResolution(true)
189+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
sbt.version=0.13.9
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.7.5")
2+
3+
addSbtPlugin("com.typesafe.sbt" % "sbt-native-packager" % "1.1.6")
4+
5+
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.3")
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#-- End Config --

simple-spark-sources/src/main/resources/notebook.snb

Lines changed: 152 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
2+
package generated.simplespark
3+
4+
object Main {
5+
6+
def main(args:Array[String]):Unit = {
7+
// spark context
8+
import org.apache.spark.{SparkContext, SparkConf}
9+
import org.apache.spark.SparkContext._
10+
import org.apache.spark.rdd._
11+
import org.apache.spark.sql._
12+
import org.apache.spark.sql.functions._
13+
import com.typesafe.config._
14+
import scala.collection.JavaConverters._
15+
import scala.util.Try
16+
17+
18+
// Spark notebook widgets (can be removed if you do not use them)
19+
// Dummy implementation of the most common ones (to avoid shipping 80+ MB of spark-notebook jars)
20+
def display[C](originalData:C, fields:Option[(String, String)]=None, maxPoints:Int=0) = {}
21+
def pairs[C](originalData:C, maxPoints:Int=0) = {}
22+
def ul(capacity:Int=10, initData:Seq[String]=Nil, prefill:Option[String]=None) = {}
23+
def ol(capacity:Int=10, initData:Seq[String]=Nil, prefill:Option[String]=None) = {}
24+
def img(tpe:String="png", width:String="", height:String="") = {}
25+
def text(value: String) = {}
26+
27+
// Create spark configuration holder
28+
val sparkConf = new SparkConf()
29+
30+
// Set configuration
31+
val config = ConfigFactory.load()
32+
val sparkConfig = Try(config.getConfig("spark"))
33+
.getOrElse(com.typesafe.config.ConfigFactory.empty)
34+
.atPath("spark").entrySet.asScala.map(e => e.getKey -> config.getString(e.getKey))
35+
36+
sparkConf.setAll(sparkConfig)
37+
38+
sparkConf.setMaster(sparkConf.get("spark.master", "local[*]"))
39+
sparkConf.set("spark.app.name", sparkConf.get("spark.app.name", "simple-spark"))
40+
41+
// Distribute the jars to executors, so this do not require a separate spark installation
42+
// This is needed only if not using spark-submit (comment otherwise)
43+
44+
def setExecutorJars() = {
45+
val currentProjectJars = Array("lib/generated.simple-spark-0.0.1-SNAPSHOT.jar", "target/scala-2.10/generated.simple-spark_2.10-0.0.1-SNAPSHOT.jar").map{j => new java.io.File(j)}.filter(_.exists()).map(_.getAbsolutePath)
46+
val sparkLibDir = new java.io.File("spark-lib")
47+
val fromProjectJars = Array[String]().map{j => new java.io.File(sparkLibDir, j).getAbsolutePath}
48+
val jarsArray = (sparkConf.get("spark.jars", "").split(",").toArray ++ currentProjectJars ++ fromProjectJars).distinct.filter(!_.isEmpty)
49+
println("Add Jars: \n" + jarsArray.mkString("\n"))
50+
sparkConf.setJars(jarsArray)
51+
}
52+
53+
setExecutorJars()
54+
55+
// Create Spark Session / Spark Context
56+
val sparkSession = SparkSession.builder().config(sparkConf).getOrCreate
57+
val sparkContext = sparkSession.sparkContext
58+
println("SparkConf used:" + sparkContext.getConf.toDebugString)
59+
60+
// aliases
61+
val sc = sparkContext
62+
val ss = sparkSession
63+
import ss.implicits._
64+
65+
// main code
66+
// no custom variables
67+
/* -- Code Cell: Some(2A2F6AA618AC48018D01E7D2F4183B76) -- */
68+
69+
sparkContext.getConf.toDebugString
70+
/****************/
71+
72+
73+
/* -- Code Cell: Some(9088B578DE2F4BA48DF323F11895488A) -- */
74+
75+
def transform(i: Int) = (i, i+1)
76+
/****************/
77+
78+
79+
/* -- Code Cell: Some(BF434E47187740E78B7A7A521D2D87DD) -- */
80+
81+
val dataset = sparkSession.createDataset(1 to 1000).map(transform)
82+
/****************/
83+
84+
85+
/* -- Code Cell: Some(EE80B60DB2C645D58B30EB2B793A5BEC) -- */
86+
87+
display(dataset.toDF)
88+
/****************/
89+
90+
91+
/* -- Code Cell: Some(7797C8DBEB7643D788F4D14F6C8E2B40) -- */
92+
93+
val sum = dataset.map(_._2).reduce(_+_)
94+
95+
println(sum)
96+
/****************/
97+
98+
99+
/* -- Code Cell: Some(73F60C7C9F2945E38AC0C252F2C3AC1E) -- */
100+
101+
sparkContext.stop
102+
}
103+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
2+
package generated.simplespark
3+
4+
5+
//---//
6+
7+

0 commit comments

Comments
 (0)