@@ -61,6 +61,17 @@ COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patche
61
61
COPY --chown=${STACKABLE_USER_UID}:0 spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR} /stackable/src/spark-k8s/hbase-connectors/stackable/patches/${HBASE_CONNECTOR}
62
62
63
63
RUN <<EOF
64
+
65
+ # IMPORTANT: HBase connectors don't support Spark 4 yet, so we skip the build.
66
+ # Watch this PR for updates: https://github.com/apache/hbase-connectors/pull/130
67
+ if [[ "${PRODUCT}" == 4* ]]; then
68
+ # Create this empty directory so that following COPY layers succeed.
69
+ mkdir -p /stackable/spark/jars
70
+ # Create a dummy tarball to satisfy the build process for Spark 3.
71
+ touch hbase-connector-${HBASE_CONNECTOR}-stackable${RELEASE}-src.tar.gz
72
+ exit 0
73
+ fi
74
+
64
75
cd "$(/stackable/patchable --images-repo-root=src checkout spark-k8s/hbase-connectors ${HBASE_CONNECTOR})/spark"
65
76
66
77
NEW_VERSION="${HBASE_CONNECTOR}-stackable${RELEASE}"
@@ -80,18 +91,10 @@ export JDK_JAVA_OPTIONS="\
80
91
--add-opens java.base/java.util=ALL-UNNAMED"
81
92
82
93
# Get the Scala version used by Spark
83
- SCALA_VERSION=$( \
84
- mvn --quiet --non-recursive --file /stackable/spark/pom.xml \
85
- org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
86
- -DforceStdout \
87
- -Dexpression='project.properties(scala.version)' )
94
+ SCALA_VERSION=$(grep "scala.version" /stackable/spark/pom.xml | head -n1 | awk -F '[<>]' '{print $3}' )
88
95
89
96
# Get the Scala binary version used by Spark
90
- SCALA_BINARY_VERSION=$( \
91
- mvn --quiet --non-recursive --file /stackable/spark/pom.xml \
92
- org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
93
- -DforceStdout \
94
- -Dexpression='project.properties(scala.binary.version)' )
97
+ SCALA_BINARY_VERSION=$(grep "scala.binary.version" /stackable/spark/pom.xml | head -n1 | awk -F '[<>]' '{print $3}' )
95
98
96
99
# Build the Spark HBase connector
97
100
# Skip the tests because the MiniHBaseCluster does not get ready for
@@ -108,6 +111,7 @@ mvn \
108
111
--define hadoop-three.version="${HADOOP_VERSION}" \
109
112
--define hbase.version="${HBASE}" \
110
113
--define skipTests \
114
+ --define maven.test.skip=true \
111
115
clean package
112
116
113
117
mkdir -p /stackable/spark/jars
@@ -157,22 +161,36 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=spark-source-builder \
157
161
COPY --from=hadoop-builder --chown=${STACKABLE_USER_UID}:0 /stackable/patched-libs /stackable/patched-libs
158
162
159
163
# >>> Build spark
160
- # Compiling the tests takes a lot of time, so we skip them
161
- # -Dmaven.test.skip=true skips both the compilation and execution of tests
162
- # -DskipTests skips only the execution
163
164
RUN <<EOF
164
165
# Make Maven aware of custom Stackable libraries
165
166
mv /stackable/patched-libs/maven /root/.m2/repository
166
167
167
168
ORIGINAL_VERSION="${PRODUCT}"
168
169
NEW_VERSION="${PRODUCT}-stackable${RELEASE}"
169
170
171
+ MAVEN_BIN="/usr/bin/mvn"
170
172
export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g"
171
173
172
- ./dev/make-distribution.sh \
173
- -Dhadoop.version="${HADOOP_VERSION}-stackable${RELEASE}" \
174
- -DskipTests \
175
- -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver
174
+ case "${PRODUCT}" in
175
+ 4*)
176
+ # The Spark 4 script has a --connect option which is not available in Spark 3.
177
+ # This option is required to build Spark Connect.
178
+ # Also this option breaks the Spark 3 build so we ensure it's only provided here.
179
+ ./dev/make-distribution.sh \
180
+ --mvn "${MAVEN_BIN}" \
181
+ --connect \
182
+ -Dhadoop.version="${HADOOP_VERSION}-stackable${RELEASE}" \
183
+ -DskipTests \
184
+ -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver
185
+ ;;
186
+ *)
187
+ ./dev/make-distribution.sh \
188
+ --mvn "${MAVEN_BIN}" \
189
+ -Dhadoop.version="${HADOOP_VERSION}-stackable${RELEASE}" \
190
+ -DskipTests \
191
+ -P'hadoop-3' -Pkubernetes -Phive -Phive-thriftserver
192
+ ;;
193
+ esac
176
194
177
195
sed -i "s/${NEW_VERSION}/${ORIGINAL_VERSION}/g" assembly/target/bom.json
178
196
EOF
@@ -183,22 +201,30 @@ EOF
183
201
# we create a new dist/connect folder, and copy them here.
184
202
RUN <<EOF
185
203
186
- # Get the Scala binary version
187
- SCALA_BINARY_VERSION=$( \
188
- mvn --quiet --non-recursive --file pom.xml \
189
- org.apache.maven.plugins:maven-help-plugin:3.5.0:evaluate \
190
- -DforceStdout \
191
- -Dexpression='project.properties(scala.binary.version)' )
204
+ SCALA_BINARY_VERSION=$(grep "scala.binary.version" pom.xml | head -n1 | awk -F '[<>]' '{print $3}' )
192
205
193
206
mkdir -p dist/connect
194
207
cd dist/connect
195
208
196
- cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
197
- cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
198
- cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
199
-
200
- # The Spark operator expects a file named spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar without the -stackable${RELEASE} suffix.
209
+ case "${PRODUCT}" in
210
+ 4*)
211
+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
212
+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
213
+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/sql/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
214
+ ;;
215
+ *)
216
+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/server/target/spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
217
+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/common/target/spark-connect-common_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
218
+ cp "/stackable/spark-${PRODUCT}-stackable${RELEASE}/connector/connect/client/jvm/target/spark-connect-client-jvm_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" .
219
+ ;;
220
+ esac
221
+
222
+ # This link is needed by the operator and is kept for backwards compatibility.
223
+ # TODO: remove it at some time in the future.
201
224
ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}.jar"
225
+ # Link to the spark-connect jar without the stackable suffix and scala version.
226
+ # This link supersedes the previous link.
227
+ ln -s "spark-connect_${SCALA_BINARY_VERSION}-${PRODUCT}-stackable${RELEASE}.jar" "spark-connect-${PRODUCT}.jar"
202
228
EOF
203
229
204
230
# <<< Build spark
0 commit comments