Skip to content

Commit e05feac

Browse files
committed
kotlin 2.0.0-RC1, enabled jupyter module. Added basic Sparkify conversion for jupyter. Disabled html renderes in favor of just outputting them as text. Notebooks can render them however they like. RDDs are converted to ds before rendering
1 parent ab4c455 commit e05feac

File tree

9 files changed

+203
-85
lines changed

9 files changed

+203
-85
lines changed

buildSrc/src/main/kotlin/Versions.kt

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ object Versions : Dsl<Versions> {
22
const val project = "2.0.0-SNAPSHOT"
33
const val kotlinSparkApiGradlePlugin = "2.0.0-SNAPSHOT"
44
const val groupID = "org.jetbrains.kotlinx.spark"
5-
const val kotlin = "2.0.0-Beta5"
5+
const val kotlin = "2.0.0-RC1"
66
const val jvmTarget = "8"
77
const val jupyterJvmTarget = "8"
88
inline val spark get() = System.getProperty("spark") as String

compiler-plugin/src/main/kotlin/org/jetbrains/kotlinx/spark/api/compilerPlugin/fir/DataClassSparkifySuperTypeGenerator.kt

+2-3
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ class DataClassSparkifySuperTypeGenerator(
3333
}
3434
}
3535

36-
context(TypeResolveServiceContainer)
3736
override fun computeAdditionalSupertypes(
3837
classLikeDeclaration: FirClassLikeDeclaration,
39-
resolvedSupertypes: List<FirResolvedTypeRef>
38+
resolvedSupertypes: List<FirResolvedTypeRef>,
39+
typeResolver: TypeResolveService,
4040
): List<FirResolvedTypeRef> = listOf(
4141
buildResolvedTypeRef {
4242
val scalaProduct = productFqNames.first().let {
@@ -48,7 +48,6 @@ class DataClassSparkifySuperTypeGenerator(
4848
isNullable = false,
4949
)
5050
}
51-
5251
)
5352

5453
override fun needTransformSupertypes(declaration: FirClassLikeDeclaration): Boolean =

examples/build.gradle.kts

+6
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@ import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
33
plugins {
44
// Needs to be installed in the local maven repository or have the bootstrap jar on the classpath
55
id("org.jetbrains.kotlinx.spark.api")
6+
java
67
kotlin("jvm")
8+
kotlin("plugin.noarg") version Versions.kotlin
9+
}
10+
11+
noArg {
12+
annotation("org.jetbrains.kotlinx.spark.examples.NoArg")
713
}
814

915
kotlinSparkApi {

gradle/bootstraps/compiler-plugin.jar

-15 Bytes
Binary file not shown.

gradle/bootstraps/gradle-plugin.jar

2 Bytes
Binary file not shown.

jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/Integration.kt

+133-39
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,43 @@
1919
*/
2020
package org.jetbrains.kotlinx.spark.api.jupyter
2121

22-
import kotlinx.serialization.Serializable
23-
import kotlinx.serialization.json.*
22+
import org.apache.spark.api.java.JavaDoubleRDD
23+
import org.apache.spark.api.java.JavaPairRDD
24+
import org.apache.spark.api.java.JavaRDD
2425
import org.apache.spark.api.java.JavaRDDLike
2526
import org.apache.spark.rdd.RDD
2627
import org.apache.spark.sql.Dataset
2728
import org.intellij.lang.annotations.Language
28-
import org.jetbrains.kotlinx.jupyter.api.*
29+
import org.jetbrains.kotlinx.jupyter.api.Code
30+
import org.jetbrains.kotlinx.jupyter.api.FieldValue
31+
import org.jetbrains.kotlinx.jupyter.api.KotlinKernelHost
32+
import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult
33+
import org.jetbrains.kotlinx.jupyter.api.Notebook
34+
import org.jetbrains.kotlinx.jupyter.api.VariableDeclaration
35+
import org.jetbrains.kotlinx.jupyter.api.createRendererByCompileTimeType
36+
import org.jetbrains.kotlinx.jupyter.api.declare
2937
import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration
38+
import org.jetbrains.kotlinx.jupyter.api.textResult
39+
import org.jetbrains.kotlinx.spark.api.SparkSession
3040
import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.displayLimitName
3141
import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.displayTruncateName
3242
import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.scalaName
3343
import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.sparkName
3444
import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.sparkPropertiesName
3545
import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.versionName
36-
import kotlin.reflect.KProperty1
46+
import org.jetbrains.kotlinx.spark.api.kotlinEncoderFor
47+
import org.jetbrains.kotlinx.spark.api.plugin.annotations.ColumnName
48+
import org.jetbrains.kotlinx.spark.api.plugin.annotations.Sparkify
49+
import scala.Tuple2
50+
import kotlin.reflect.KClass
51+
import kotlin.reflect.KMutableProperty
52+
import kotlin.reflect.full.createType
53+
import kotlin.reflect.full.findAnnotation
54+
import kotlin.reflect.full.isSubtypeOf
55+
import kotlin.reflect.full.memberFunctions
56+
import kotlin.reflect.full.memberProperties
57+
import kotlin.reflect.full.primaryConstructor
58+
import kotlin.reflect.full.valueParameters
3759
import kotlin.reflect.typeOf
3860

3961

@@ -46,9 +68,6 @@ abstract class Integration(private val notebook: Notebook, private val options:
4668
protected val sparkVersion = /*$"\""+spark+"\""$*/ /*-*/ ""
4769
protected val version = /*$"\""+version+"\""$*/ /*-*/ ""
4870

49-
protected val displayLimitOld = "DISPLAY_LIMIT"
50-
protected val displayTruncateOld = "DISPLAY_TRUNCATE"
51-
5271
protected val properties: Properties
5372
get() = notebook
5473
.variablesState[sparkPropertiesName]!!
@@ -101,6 +120,7 @@ abstract class Integration(private val notebook: Notebook, private val options:
101120
)
102121

103122
open val imports: Array<String> = arrayOf(
123+
"org.jetbrains.kotlinx.spark.api.plugin.annotations.*",
104124
"org.jetbrains.kotlinx.spark.api.*",
105125
"org.jetbrains.kotlinx.spark.api.tuples.*",
106126
*(1..22).map { "scala.Tuple$it" }.toTypedArray(),
@@ -116,6 +136,9 @@ abstract class Integration(private val notebook: Notebook, private val options:
116136
"org.apache.spark.streaming.*",
117137
)
118138

139+
// Needs to be set by integration
140+
var spark: SparkSession? = null
141+
119142
override fun Builder.onLoaded() {
120143
dependencies(*dependencies)
121144
import(*imports)
@@ -135,27 +158,6 @@ abstract class Integration(private val notebook: Notebook, private val options:
135158
)
136159
)
137160

138-
@Language("kts")
139-
val _0 = execute(
140-
"""
141-
@Deprecated("Use ${displayLimitName}=${properties.displayLimit} in %use magic or ${sparkPropertiesName}.${displayLimitName} = ${properties.displayLimit} instead", ReplaceWith("${sparkPropertiesName}.${displayLimitName}"))
142-
var $displayLimitOld: Int
143-
get() = ${sparkPropertiesName}.${displayLimitName}
144-
set(value) {
145-
println("$displayLimitOld is deprecated: Use ${sparkPropertiesName}.${displayLimitName} instead")
146-
${sparkPropertiesName}.${displayLimitName} = value
147-
}
148-
149-
@Deprecated("Use ${displayTruncateName}=${properties.displayTruncate} in %use magic or ${sparkPropertiesName}.${displayTruncateName} = ${properties.displayTruncate} instead", ReplaceWith("${sparkPropertiesName}.${displayTruncateName}"))
150-
var $displayTruncateOld: Int
151-
get() = ${sparkPropertiesName}.${displayTruncateName}
152-
set(value) {
153-
println("$displayTruncateOld is deprecated: Use ${sparkPropertiesName}.${displayTruncateName} instead")
154-
${sparkPropertiesName}.${displayTruncateName} = value
155-
}
156-
""".trimIndent()
157-
)
158-
159161
onLoaded()
160162
}
161163

@@ -180,27 +182,119 @@ abstract class Integration(private val notebook: Notebook, private val options:
180182
onShutdown()
181183
}
182184

185+
onClassAnnotation<Sparkify> {
186+
for (klass in it) {
187+
if (klass.isData) {
188+
execute(generateSparkifyClass(klass))
189+
}
190+
}
191+
}
183192

184193
// Render Dataset
185194
render<Dataset<*>> {
186-
with(properties) {
187-
HTML(it.toHtml(limit = displayLimit, truncate = displayTruncate))
188-
}
195+
renderDataset(it)
189196
}
190197

191-
render<RDD<*>> {
192-
with(properties) {
193-
HTML(it.toJavaRDD().toHtml(limit = displayLimit, truncate = displayTruncate))
198+
// using compile time KType, convert this JavaRDDLike to Dataset and render it
199+
notebook.renderersProcessor.registerWithoutOptimizing(
200+
createRendererByCompileTimeType<JavaRDDLike<*, *>> {
201+
if (spark == null) return@createRendererByCompileTimeType it.value.toString()
202+
203+
val rdd = (it.value as JavaRDDLike<*, *>).rdd()
204+
val type = when {
205+
it.type.isSubtypeOf(typeOf<JavaDoubleRDD>()) ->
206+
typeOf<Double>()
207+
208+
it.type.isSubtypeOf(typeOf<JavaPairRDD<*, *>>()) ->
209+
Tuple2::class.createType(
210+
listOf(
211+
it.type.arguments.first(),
212+
it.type.arguments.last(),
213+
)
214+
)
215+
216+
it.type.isSubtypeOf(typeOf<JavaRDD<*>>()) ->
217+
it.type.arguments.first().type!!
218+
219+
else -> it.type.arguments.first().type!!
220+
}
221+
val ds = spark!!.createDataset(rdd, kotlinEncoderFor(type))
222+
renderDataset(ds)
194223
}
195-
}
224+
)
225+
226+
// using compile time KType, convert this RDD to Dataset and render it
227+
notebook.renderersProcessor.registerWithoutOptimizing(
228+
createRendererByCompileTimeType<RDD<*>> {
229+
if (spark == null) return@createRendererByCompileTimeType it.value.toString()
196230

197-
render<JavaRDDLike<*, *>> {
198-
with(properties) {
199-
HTML(it.toHtml(limit = displayLimit, truncate = displayTruncate))
231+
val rdd = it.value as RDD<*>
232+
val type = it.type.arguments.first().type!!
233+
val ds = spark!!.createDataset(rdd, kotlinEncoderFor(type))
234+
renderDataset(ds)
200235
}
236+
)
237+
238+
onLoadedAlsoDo()
239+
}
201240

241+
private fun renderDataset(it: Dataset<*>): MimeTypedResult =
242+
with(properties) {
243+
val showFunction = Dataset::class
244+
.memberFunctions
245+
.firstOrNull { it.name == "showString" && it.valueParameters.size == 3 }
246+
247+
textResult(
248+
if (showFunction != null) {
249+
showFunction.call(it, displayLimit, displayTruncate, false) as String
250+
} else {
251+
// if the function cannot be called, make sure it will call println instead
252+
it.show(displayLimit, displayTruncate)
253+
""
254+
}
255+
)
202256
}
203257

204-
onLoadedAlsoDo()
258+
259+
// TODO wip
260+
private fun generateSparkifyClass(klass: KClass<*>): Code {
261+
// val name = "`${klass.simpleName!!}${'$'}Generated`"
262+
val name = klass.simpleName
263+
val constructorArgs = klass.primaryConstructor!!.parameters
264+
val visibility = klass.visibility?.name?.lowercase() ?: ""
265+
val memberProperties = klass.memberProperties
266+
267+
val properties = constructorArgs.associateWith {
268+
memberProperties.first { it.name == it.name }
269+
}
270+
271+
val constructorParamsCode = properties.entries.joinToString("\n") { (param, prop) ->
272+
// TODO check override
273+
if (param.isOptional) TODO()
274+
val modifier = if (prop is KMutableProperty<*>) "var" else "val"
275+
val paramVisiblity = prop.visibility?.name?.lowercase() ?: ""
276+
val columnName = param.findAnnotation<ColumnName>()?.name ?: param.name!!
277+
278+
"| @get:kotlin.jvm.JvmName(\"$columnName\") $paramVisiblity $modifier ${param.name}: ${param.type},"
279+
}
280+
281+
val productElementWhenParamsCode = properties.entries.joinToString("\n") { (param, _) ->
282+
"| ${param.index} -> this.${param.name}"
283+
}
284+
285+
@Language("kotlin")
286+
val code = """
287+
|$visibility data class $name(
288+
$constructorParamsCode
289+
|): scala.Product, java.io.Serializable {
290+
| override fun canEqual(that: Any?): Boolean = that is $name
291+
| override fun productArity(): Int = ${constructorArgs.size}
292+
| override fun productElement(n: Int): Any = when (n) {
293+
$productElementWhenParamsCode
294+
| else -> throw IndexOutOfBoundsException()
295+
| }
296+
|}
297+
""".trimMargin()
298+
return code
205299
}
206300
}

jupyter/src/main/kotlin/org/jetbrains/kotlinx/spark/api/jupyter/SparkIntegration.kt

+4-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ package org.jetbrains.kotlinx.spark.api.jupyter
2525
import org.intellij.lang.annotations.Language
2626
import org.jetbrains.kotlinx.jupyter.api.KotlinKernelHost
2727
import org.jetbrains.kotlinx.jupyter.api.Notebook
28+
import org.jetbrains.kotlinx.spark.api.SparkSession
2829
import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.appNameName
2930
import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.sparkMasterName
3031

@@ -86,7 +87,7 @@ class SparkIntegration(notebook: Notebook, options: MutableMap<String, String?>)
8687
"""
8788
inline fun <reified T> dfOf(vararg arg: T): Dataset<Row> = spark.dfOf(*arg)""".trimIndent(),
8889
"""
89-
inline fun <reified T> emptyDataset(): Dataset<T> = spark.emptyDataset(encoder<T>())""".trimIndent(),
90+
inline fun <reified T> emptyDataset(): Dataset<T> = spark.emptyDataset(kotlinEncoderFor<T>())""".trimIndent(),
9091
"""
9192
inline fun <reified T> dfOf(colNames: Array<String>, vararg arg: T): Dataset<Row> = spark.dfOf(colNames, *arg)""".trimIndent(),
9293
"""
@@ -108,6 +109,8 @@ class SparkIntegration(notebook: Notebook, options: MutableMap<String, String?>)
108109
"""
109110
inline fun <RETURN, reified NAMED_UDF : NamedUserDefinedFunction<RETURN, *>> UserDefinedFunction<RETURN, NAMED_UDF>.register(name: String): NAMED_UDF = spark.udf().register(name = name, udf = this)""".trimIndent(),
110111
).map(::execute)
112+
113+
spark = execute("spark").value as SparkSession
111114
}
112115

113116
override fun KotlinKernelHost.onShutdown() {

0 commit comments

Comments
 (0)