forked from paypay/DataEngineerChallenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
35 lines (29 loc) · 1.52 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
FROM java:8-jdk-alpine
ENV DAEMON_RUN=true
ENV SPARK_VERSION=3.0.1
ENV HADOOP_VERSION=2.7
ENV SCALA_VERSION=2.12.12
ENV SCALA_HOME=/usr/share/scala
RUN apk update && \
apk add --no-cache --virtual=.build-dependencies wget ca-certificates && \
apk add --no-cache bash curl jq procps coreutils rsync openssh && \
cd "/tmp" && \
wget --no-verbose "https://downloads.typesafe.com/scala/${SCALA_VERSION}/scala-${SCALA_VERSION}.tgz" && \
tar xzf "scala-${SCALA_VERSION}.tgz" && \
mkdir "${SCALA_HOME}" && \
rm "/tmp/scala-${SCALA_VERSION}/bin/"*.bat && \
mv "/tmp/scala-${SCALA_VERSION}/bin" "/tmp/scala-${SCALA_VERSION}/lib" "${SCALA_HOME}" && \
ln -s "${SCALA_HOME}/bin/"* "/usr/bin/" && \
apk del .build-dependencies && \
rm -rf "/tmp/"*
#Scala instalation
RUN export PATH="/usr/local/sbt/bin:$PATH" && apk update && apk add ca-certificates wget tar && mkdir -p "/usr/local/sbt" && wget -qO - --no-check-certificate "https://piccolo.link/sbt-1.3.13.tgz" | tar xz -C /usr/local/sbt --strip-components=1 && sbt sbtVersion
RUN apk add --no-cache python3
RUN wget --no-verbose http://apache.mirror.iphh.net/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && tar -xvzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
&& mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark \
&& rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
COPY src /src
COPY build.sbt /
COPY data /data
COPY spark-submit.sh /
CMD ["/bin/bash", "/spark-submit.sh"]