-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSpark_Standalone.R
22 lines (18 loc) · 1.23 KB
/
Spark_Standalone.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
## this connection to standalone on your machine for Spark
## spark.sql options list:
properties <- SparkR::sql("SET -v")
SparkR::showDF(properties, numRows = 200, truncate = FALSE)
# Testing Spark ----- not needed to run on bigRED
library(SparkR, lib.loc = c(file.path(Sys.getenv("SPARK_HOME"), "R", "lib")))
library(rJava) # didn't need this to get below to work.
# note: did not run Start up file to get this to work...start_up changes the Java directory, don't forget!!!!
sparkR.session(enableHiveSupport = TRUE ,
master = "local[*]", # master = "yarn", mode = "cluster",
sparkHome = Sys.getenv("SPARK_HOME") , # this was the missing link!!
sparkConfig = list(spark.driver.memory = "10g", spark.executor.memory = "22g", spark.driver.cores = "250",
spark.rpc.message.maxSize = "1024", spark.sql.crossJoin.enabled=TRUE,
spark.sql.shuffle.partitions = "2000",
spark.serializer = "org.apache.spark.serializer.KryoSerializer",
spark.rdd.compress = "true"
spark.sql.warehouse.dir="C:\\Apps\\winutils\\winutils-master\\hadoop-2.7.1")
)