mahmoudparsian
diff --git a/‎.gitignore
+1-2 b/‎.gitignore
+1-2
diff --git a/‎code/chap01/scala/.gitignore
-4 b/‎code/chap01/scala/.gitignore
-4
diff --git a/‎code/chap02/scala/.gitignore
-4 b/‎code/chap02/scala/.gitignore
-4
diff --git a/‎code/chap03/scala/.gitignore
-3 b/‎code/chap03/scala/.gitignore
-3
diff --git a/‎code/chap04/scala/.gitignore
-3 b/‎code/chap04/scala/.gitignore
-3
diff --git a/‎code/chap06/scala/.gitignore
-3 b/‎code/chap06/scala/.gitignore
-3
diff --git a/‎code/chap07/scala/.gitignore
-3 b/‎code/chap07/scala/.gitignore
-3
diff --git a/‎code/chap08/scala/.gitignore
-3 b/‎code/chap08/scala/.gitignore
-3
diff --git a/‎code/chap10/scala/.gitignore
+1 b/‎code/chap10/scala/.gitignore
+1
diff --git a/‎code/chap10/scala/README.md
+63-1 b/‎code/chap10/scala/README.md
+63-1
diff --git a/‎code/chap10/scala/build.gradle
+24 b/‎code/chap10/scala/build.gradle
+24
diff --git a/‎code/chap10/scala/data/sample_dna_seq.txt
+11 b/‎code/chap10/scala/data/sample_dna_seq.txt
+11
diff --git a/‎code/chap10/scala/data/sample_input.txt
+12 b/‎code/chap10/scala/data/sample_input.txt
+12
diff --git a/‎code/chap10/scala/data/sample_numbers.txt
+11 b/‎code/chap10/scala/data/sample_numbers.txt
+11
diff --git a/‎code/chap10/scala/gradle/wrapper/gradle-wrapper.jar
57.8 KB b/‎code/chap10/scala/gradle/wrapper/gradle-wrapper.jar
57.8 KB
diff --git a/‎code/chap10/scala/gradle/wrapper/gradle-wrapper.properties
+5 b/‎code/chap10/scala/gradle/wrapper/gradle-wrapper.properties
+5
diff --git a/‎code/chap10/scala/gradlew
+185 b/‎code/chap10/scala/gradlew
+185
@@ -3,5 +3,4 @@
 build
 .gradle
 .idea
-!gradle-wrapper.jar
-scala
+!gradle-wrapper.jar
@@ -0,0 +1 @@
+data/*.gz
@@ -1 +1,63 @@
-Scala Solutions
+# Chapter 10
+
+The Program covers the following algorithms
+* ###Average Monoid Use Aggregate By Key
+    * `org.data.algorithms.spark.ch10.AverageMonoidUseAggregateByKey` (Spark Program)
+    * `./run_spark_applications_scripts/average_monoid_use_aggregate_by_key.sh` (Shell Script to call the spark application)
+* ###Average Monoid Use Combine By Key
+    * `org.data.algorithms.spark.ch10.AverageMonoidUseCombineByKey` (Spark Program)
+    * `./run_spark_applications_scripts/average_monoid_use_combine_by_key.sh` (Shell Script to call the spark application)
+* ###Average Monoid Use Group By Key
+    * `org.data.algorithms.spark.ch10.AverageMonoidUseGroupByKey` (Spark Program)
+    * `./run_spark_applications_scripts/average_monoid_use_group_by_key.sh` (Shell Script to call the spark application)
+* ###Average Monoid Use Reduce By Key
+    * `org.data.algorithms.spark.ch10.AverageMonoidUseReduceByKey` (Spark Program)
+    * `./run_spark_applications_scripts/average_monoid_use_reduce_by_key.sh` (Shell Script to call the spark application)
+* ###DNA Base Count Basic inmapper Combiner Using Combine By Key
+    * `org.data.algorithms.spark.ch10.DNABaseCountBasicInMapperCombinerUsingCombineByKey` (Spark Program)
+    * `./run_spark_applications_scripts/dna_base_count_basic_in_mapper_combiner_using_combine_by_key.sh` (Shell Script to call the spark application)
+* ###DNA Base Count Basic inmapper Combiner Using Group By Key
+    * `org.data.algorithms.spark.ch10.DNABaseCountBasicInMapperCombinerUsingGroupByKey` (Spark Program)
+    * `./run_spark_applications_scripts/dna_base_count_basic_in_mapper_combiner_using_group_by_key.sh` (Shell Script to call the spark application)
+* ###DNA Base Count Basic inmapper Combiner Using Reduce By Key
+    * `org.data.algorithms.spark.ch10.DNABaseCountBasicInMapperCombinerUsingReduceByKey` (Spark Program)
+    * `./run_spark_applications_scripts/dna_base_count_basic_in_mapper_combiner_using_reduce_by_key.sh` (Shell Script to call the spark application)
+* ###DNA Base Count Basic Using Combine By Key
+    * `org.data.algorithms.spark.ch10.DNABaseCountBasicUsingCombineByKey` (Spark Program)
+    * `./run_spark_applications_scripts/dna_base_count_basic_using_combine_by_key.sh` (Shell Script to call the spark application)
+* ###DNA Base Count Basic Using Group By Key
+    * `org.data.algorithms.spark.ch10.DNABaseCountBasicUsingGroupByKey` (Spark Program)
+    * `./run_spark_applications_scripts/dna_base_count_basic_using_group_by_key.sh` (Shell Script to call the spark application)
+* ###DNA Base Count Basic Using Mappartitions
+    * `org.data.algorithms.spark.ch10.DNABaseCountBasicUsingMappartitions` (Spark Program)
+    * `./run_spark_applications_scripts/dna_base_count_basic_using_mappartitions.sh` (Shell Script to call the spark application)
+* ###DNA Base Count Basic Using Reduce By Key
+    * `org.data.algorithms.spark.ch10.DNABaseCountBasicUsingReduceByKey` (Spark Program)
+    * `./run_spark_applications_scripts/dna_base_count_basic_using_reduce_by_key.sh` (Shell Script to call the spark application)
+* ###inmapper Combiner Use Mappartitions
+    * `org.data.algorithms.spark.ch10.InMapperCombinerUseMappartitions` (Spark Program)
+    * `./run_spark_applications_scripts/in_mapper_combiner_use_mappartitions.sh` (Shell Script to call the spark application)
+* ###inmapper Combiner Using Local Aggregation
+    * `org.data.algorithms.spark.ch10.InMapperCombinerUsingLocalAggregation` (Spark Program)
+    * `./run_spark_applications_scripts/in_mapper_combiner_using_local_aggregation.sh` (Shell Script to call the spark application)
+* ###inmapper Combiner Using Map Reduce
+    * `org.data.algorithms.spark.ch10.InMapperCombinerUsingMapReduce` (Spark Program)
+    * `./run_spark_applications_scripts/in_mapper_combiner_using_map_reduce.sh` (Shell Script to call the spark application)
+* ###Min Max Force Empty Partitions
+    * `org.data.algorithms.spark.ch10.MinMaxForceEmptyPartitions` (Spark Program)
+    * `./run_spark_applications_scripts/min_max_force_empty_partitions.sh` (Shell Script to call the spark application)
+* ###Min Max Use Mappartitions
+    * `org.data.algorithms.spark.ch10.MinMaxUseMappartitions` (Spark Program)
+    * `./run_spark_applications_scripts/min_max_use_mappartitions.sh` (Shell Script to call the spark application)
+* ###Structured To Hierarchical To Xml Dataframe
+    * `org.data.algorithms.spark.ch10.StructuredToHierarchicalToXmlDataframe` (Spark Program)
+    * `./run_spark_applications_scripts/structured_to_hierarchical_to_xml_dataframe.sh` (Shell Script to call the spark application)
+* ###Structured To Hierarchical To Xml RDD
+    * `org.data.algorithms.spark.ch10.StructuredToHierarchicalToXmlRDD` (Spark Program)
+    * `./run_spark_applications_scripts/structured_to_hierarchical_to_xml_rdd.sh` (Shell Script to call the spark application)
+* ###Top N Use Map Partitions
+    * `org.data.algorithms.spark.ch10.TopNUseMapPartitions` (Spark Program)
+    * `./run_spark_applications_scripts/top_n_use_map_partitions.sh` (Shell Script to call the spark application)
+* ###Top N Use Take Ordered
+    * `org.data.algorithms.spark.ch10.TopNUseTakeOrdered` (Spark Program)
+    * `./run_spark_applications_scripts/top_n_use_take_ordered.sh` (Shell Script to call the spark application)
@@ -0,0 +1,24 @@
+apply plugin: 'scala'
+apply plugin: 'application'
+
+ext.scalaClassifier = '2.13'
+ext.scalaVersion = '2.13.7'
+ext.sparkVersion = '3.2.0'
+
+group 'org.data.algorithms.spark.ch10'
+version '1.0-SNAPSHOT'
+
+repositories {
+    mavenLocal()
+    mavenCentral()
+}
+
+dependencies {
+    implementation "org.scala-lang:scala-library:$scalaVersion"
+    implementation "org.apache.spark:spark-core_$scalaClassifier:$sparkVersion"
+    implementation "org.apache.spark:spark-sql_$scalaClassifier:$sparkVersion"
+}
+
+application {
+    mainClass = project.hasProperty("mainClass") ? project.getProperty("mainClass") : "NULL"
+}
@@ -0,0 +1,11 @@
+ATCGGGATCCGGG
+ATTCCGGGATTCCCC
+ATGGCCCCCGGGATCGGG
+CGGTATCCGGGGAAAAA
+aaattCCGGAACCGGGGGTTT
+CCTTTTATCGGGCAAATTTTCCCGG
+attttcccccggaaaAAATTTCCGGG
+ACTGACTAGCTAGCTAACTG
+GCATCGTAGCTAGCTACGAT
+AATTCCCGCATCGATCGTACGTACGTAG
+ATCGATCGATCGTACGATCG
@@ -0,0 +1,12 @@
+a,2
+a,3
+a,4
+a,5
+a,7
+b,4
+b,5
+b,6
+c,3
+c,4
+c,5
+c,6
@@ -0,0 +1,11 @@
+23,24,22,44,66,77,44,44,555,666
+12,4,555,66,67,68,57,55,56,45,45,45,66,77
+34,35,36,97300,78,79
+120,44,444,445,345,345,555
+11,33,34,35,36,37,47,7777,8888,6666,44,55
+10,11,44,66,77,78,79,80,90,98,99,100,102,103,104,105
+6,7,8,9,10
+8,9,10,12,12
+7777
+222,333,444,555,666,111,112,5,113,114
+5555,4444,24
@@ -0,0 +1,5 @@
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-6.8-bin.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
@@ -0,0 +1,185 @@
+#!/usr/bin/env sh
+
+#
+# Copyright 2015 the original author or authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+##############################################################################
+##
+##  Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+    ls=`ls -ld "$PRG"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '/.*' > /dev/null; then
+        PRG="$link"
+    else
+        PRG=`dirname "$PRG"`"/$link"
+    fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn () {
+    echo "$*"
+}
+
+die () {
+    echo
+    echo "$*"
+    echo
+    exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+  CYGWIN* )
+    cygwin=true
+    ;;
+  Darwin* )
+    darwin=true
+    ;;
+  MSYS* | MINGW* )
+    msys=true
+    ;;
+  NONSTOP* )
+    nonstop=true
+    ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+        JAVACMD="$JAVA_HOME/bin/java"
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD="java"
+    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+    MAX_FD_LIMIT=`ulimit -H -n`
+    if [ $? -eq 0 ] ; then
+        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+            MAX_FD="$MAX_FD_LIMIT"
+        fi
+        ulimit -n $MAX_FD
+        if [ $? -ne 0 ] ; then
+            warn "Could not set maximum file descriptor limit: $MAX_FD"
+        fi
+    else
+        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+    fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
+    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+
+    JAVACMD=`cygpath --unix "$JAVACMD"`
+
+    # We build the pattern for arguments to be converted via cygpath
+    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+    SEP=""
+    for dir in $ROOTDIRSRAW ; do
+        ROOTDIRS="$ROOTDIRS$SEP$dir"
+        SEP="|"
+    done
+    OURCYGPATTERN="(^($ROOTDIRS))"
+    # Add a user-defined pattern to the cygpath arguments
+    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+    fi
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    i=0
+    for arg in "$@" ; do
+        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
+
+        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
+            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+        else
+            eval `echo args$i`="\"$arg\""
+        fi
+        i=`expr $i + 1`
+    done
+    case $i in
+        0) set -- ;;
+        1) set -- "$args0" ;;
+        2) set -- "$args0" "$args1" ;;
+        3) set -- "$args0" "$args1" "$args2" ;;
+        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+    esac
+fi
+
+# Escape application args
+save () {
+    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
+    echo " "
+}
+APP_ARGS=`save "$@"`
+
+# Collect all arguments for the java command, following the shell quoting and substitution rules
+eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+
+exec "$JAVACMD" "$@"
-Original file line number
+Diff line change
 +a,2
 +a,3
 +a,4
 +a,5
 +a,7
 +b,4
 +b,5
 +b,6
 +c,3
 +c,4
 +c,5
 +c,6