4545import java .util .concurrent .Executors ;
4646import java .util .concurrent .TimeUnit ;
4747import java .util .function .BiConsumer ;
48- import java .util .regex .Matcher ;
49- import java .util .regex .Pattern ;
5048import javax .annotation .Nullable ;
5149
5250/**
@@ -57,8 +55,7 @@ public abstract class AbstractSparkSubmitter implements SparkSubmitter {
5755 private static final Logger LOG = LoggerFactory .getLogger (AbstractSparkSubmitter .class );
5856
5957 // Transforms LocalizeResource to URI string
60- private static final Function <LocalizeResource , String > RESOURCE_TO_PATH = input ->
61- input .getURI ().toString ().split ("#" )[0 ];
58+ private static final Function <LocalizeResource , String > RESOURCE_TO_PATH = input -> input .getURI ().toString ();
6259
6360 @ Override
6461 public final <V > SparkJobFuture <V > submit (SparkRuntimeContext runtimeContext ,
@@ -187,6 +184,10 @@ protected boolean waitForFinish() throws Exception {
187184 return true ;
188185 }
189186
187+ protected Function <LocalizeResource , String > getLocalizeResourceToURIFunc () {
188+ return RESOURCE_TO_PATH ;
189+ }
190+
190191 /**
191192 * Submits the Spark job using {@link SparkSubmit}.
192193 *
@@ -208,21 +209,7 @@ private void submit(SparkRuntimeContext runtimeContext, String[] args) {
208209 ClassLoaders .setContextClassLoader (oldClassLoader );
209210 }
210211 }
211- private static final Pattern LOCAL_MASTER_PATTERN = Pattern .compile ("local\\ [([0-9]+|\\ *)\\ ]" );
212- protected void addMasterPOC (Map <String , String > configs , ImmutableList .Builder <String > argBuilder ) {
213- // Use at least two threads for Spark Streaming
214- String masterArg = "local[2]" ;
215-
216- String master = configs .get ("spark.master" );
217- if (master != null ) {
218- Matcher matcher = LOCAL_MASTER_PATTERN .matcher (master );
219- if (matcher .matches ()) {
220- masterArg = "local[" + matcher .group (1 ) + "]" ;
221- }
222- }
223212
224- argBuilder .add ("--master" ).add (masterArg );
225- }
226213 /**
227214 * Creates the list of arguments that will be used for calling {@link SparkSubmit#main(String[])}.
228215 *
@@ -241,22 +228,16 @@ private List<String> createSubmitArguments(SparkRuntimeContext runtimeContext, M
241228 Iterable <LocalizeResource > archivesIterable = getArchives (resources );
242229 Iterable <LocalizeResource > filesIterable = getFiles (resources );
243230
244- // addMaster(configs, builder);
245- addMasterPOC (configs , builder );
231+ addMaster (configs , builder );
246232 builder .add ("--conf" ).add ("spark.app.name=" + spec .getName ());
247233
248234 configs .putAll (generateSubmitConf (configs ));
249- // TODO : Error : for distributed spark : $destFile exists and does not match contents
250- configs .put ("spark.files" ,"" );
251- configs .put ("spark.jars" ,"" );
252- configs .put ("spark.repl.local.jars" ,"" );
253- // TODO : Error : DataprocMetricsListener is not a subclass of org.apache.spark.scheduler.SparkListenerInterface
254- configs .put ("spark.dataproc.listeners" ,"" );
255235 BiConsumer <String , String > confAdder = (k , v ) -> builder .add ("--conf" ).add (k + "=" + v );
256236 configs .forEach (confAdder );
257237
258- String archives = Joiner .on (',' ).join (Iterables .transform (archivesIterable , RESOURCE_TO_PATH ));
259- String files = Joiner .on (',' ).join (Iterables .transform (filesIterable , RESOURCE_TO_PATH ));
238+ String archives = Joiner .on (',' ).join (Iterables .transform (archivesIterable ,
239+ getLocalizeResourceToURIFunc ()));
240+ String files = Joiner .on (',' ).join (Iterables .transform (filesIterable , getLocalizeResourceToURIFunc ()));
260241
261242 if (!Strings .isNullOrEmpty (archives )) {
262243 builder .add ("--archives" ).add (archives );
0 commit comments