19
19
*/
20
20
package org.jetbrains.kotlinx.spark.api.jupyter
21
21
22
- import kotlinx.serialization.Serializable
23
- import kotlinx.serialization.json.*
22
+ import org.apache.spark.api.java.JavaDoubleRDD
23
+ import org.apache.spark.api.java.JavaPairRDD
24
+ import org.apache.spark.api.java.JavaRDD
24
25
import org.apache.spark.api.java.JavaRDDLike
25
26
import org.apache.spark.rdd.RDD
26
27
import org.apache.spark.sql.Dataset
27
28
import org.intellij.lang.annotations.Language
28
- import org.jetbrains.kotlinx.jupyter.api.*
29
+ import org.jetbrains.kotlinx.jupyter.api.Code
30
+ import org.jetbrains.kotlinx.jupyter.api.FieldValue
31
+ import org.jetbrains.kotlinx.jupyter.api.KotlinKernelHost
32
+ import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult
33
+ import org.jetbrains.kotlinx.jupyter.api.Notebook
34
+ import org.jetbrains.kotlinx.jupyter.api.VariableDeclaration
35
+ import org.jetbrains.kotlinx.jupyter.api.createRendererByCompileTimeType
36
+ import org.jetbrains.kotlinx.jupyter.api.declare
29
37
import org.jetbrains.kotlinx.jupyter.api.libraries.JupyterIntegration
38
+ import org.jetbrains.kotlinx.jupyter.api.textResult
39
+ import org.jetbrains.kotlinx.spark.api.SparkSession
30
40
import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.displayLimitName
31
41
import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.displayTruncateName
32
42
import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.scalaName
33
43
import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.sparkName
34
44
import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.sparkPropertiesName
35
45
import org.jetbrains.kotlinx.spark.api.jupyter.Properties.Companion.versionName
36
- import kotlin.reflect.KProperty1
46
+ import org.jetbrains.kotlinx.spark.api.kotlinEncoderFor
47
+ import org.jetbrains.kotlinx.spark.api.plugin.annotations.ColumnName
48
+ import org.jetbrains.kotlinx.spark.api.plugin.annotations.Sparkify
49
+ import scala.Tuple2
50
+ import kotlin.reflect.KClass
51
+ import kotlin.reflect.KMutableProperty
52
+ import kotlin.reflect.full.createType
53
+ import kotlin.reflect.full.findAnnotation
54
+ import kotlin.reflect.full.isSubtypeOf
55
+ import kotlin.reflect.full.memberFunctions
56
+ import kotlin.reflect.full.memberProperties
57
+ import kotlin.reflect.full.primaryConstructor
58
+ import kotlin.reflect.full.valueParameters
37
59
import kotlin.reflect.typeOf
38
60
39
61
@@ -46,9 +68,6 @@ abstract class Integration(private val notebook: Notebook, private val options:
46
68
protected val sparkVersion = /* $"\""+spark+"\""$*/ /* -*/ " "
47
69
protected val version = /* $"\""+version+"\""$*/ /* -*/ " "
48
70
49
- protected val displayLimitOld = " DISPLAY_LIMIT"
50
- protected val displayTruncateOld = " DISPLAY_TRUNCATE"
51
-
52
71
protected val properties: Properties
53
72
get() = notebook
54
73
.variablesState[sparkPropertiesName]!!
@@ -101,6 +120,7 @@ abstract class Integration(private val notebook: Notebook, private val options:
101
120
)
102
121
103
122
open val imports: Array <String > = arrayOf(
123
+ " org.jetbrains.kotlinx.spark.api.plugin.annotations.*" ,
104
124
" org.jetbrains.kotlinx.spark.api.*" ,
105
125
" org.jetbrains.kotlinx.spark.api.tuples.*" ,
106
126
* (1 .. 22 ).map { " scala.Tuple$it " }.toTypedArray(),
@@ -116,6 +136,9 @@ abstract class Integration(private val notebook: Notebook, private val options:
116
136
" org.apache.spark.streaming.*" ,
117
137
)
118
138
139
+ // Needs to be set by integration
140
+ var spark: SparkSession ? = null
141
+
119
142
override fun Builder.onLoaded () {
120
143
dependencies(* dependencies)
121
144
import(* imports)
@@ -135,27 +158,6 @@ abstract class Integration(private val notebook: Notebook, private val options:
135
158
)
136
159
)
137
160
138
- @Language(" kts" )
139
- val _0 = execute(
140
- """
141
- @Deprecated("Use ${displayLimitName} =${properties.displayLimit} in %use magic or ${sparkPropertiesName} .${displayLimitName} = ${properties.displayLimit} instead", ReplaceWith("${sparkPropertiesName} .${displayLimitName} "))
142
- var $displayLimitOld : Int
143
- get() = ${sparkPropertiesName} .${displayLimitName}
144
- set(value) {
145
- println("$displayLimitOld is deprecated: Use ${sparkPropertiesName} .${displayLimitName} instead")
146
- ${sparkPropertiesName} .${displayLimitName} = value
147
- }
148
-
149
- @Deprecated("Use ${displayTruncateName} =${properties.displayTruncate} in %use magic or ${sparkPropertiesName} .${displayTruncateName} = ${properties.displayTruncate} instead", ReplaceWith("${sparkPropertiesName} .${displayTruncateName} "))
150
- var $displayTruncateOld : Int
151
- get() = ${sparkPropertiesName} .${displayTruncateName}
152
- set(value) {
153
- println("$displayTruncateOld is deprecated: Use ${sparkPropertiesName} .${displayTruncateName} instead")
154
- ${sparkPropertiesName} .${displayTruncateName} = value
155
- }
156
- """ .trimIndent()
157
- )
158
-
159
161
onLoaded()
160
162
}
161
163
@@ -180,27 +182,119 @@ abstract class Integration(private val notebook: Notebook, private val options:
180
182
onShutdown()
181
183
}
182
184
185
+ onClassAnnotation<Sparkify > {
186
+ for (klass in it) {
187
+ if (klass.isData) {
188
+ execute(generateSparkifyClass(klass))
189
+ }
190
+ }
191
+ }
183
192
184
193
// Render Dataset
185
194
render<Dataset <* >> {
186
- with (properties) {
187
- HTML (it.toHtml(limit = displayLimit, truncate = displayTruncate))
188
- }
195
+ renderDataset(it)
189
196
}
190
197
191
- render<RDD <* >> {
192
- with (properties) {
193
- HTML (it.toJavaRDD().toHtml(limit = displayLimit, truncate = displayTruncate))
198
+ // using compile time KType, convert this JavaRDDLike to Dataset and render it
199
+ notebook.renderersProcessor.registerWithoutOptimizing(
200
+ createRendererByCompileTimeType<JavaRDDLike <* , * >> {
201
+ if (spark == null ) return @createRendererByCompileTimeType it.value.toString()
202
+
203
+ val rdd = (it.value as JavaRDDLike <* , * >).rdd()
204
+ val type = when {
205
+ it.type.isSubtypeOf(typeOf<JavaDoubleRDD >()) ->
206
+ typeOf<Double >()
207
+
208
+ it.type.isSubtypeOf(typeOf<JavaPairRDD <* , * >>()) ->
209
+ Tuple2 ::class .createType(
210
+ listOf (
211
+ it.type.arguments.first(),
212
+ it.type.arguments.last(),
213
+ )
214
+ )
215
+
216
+ it.type.isSubtypeOf(typeOf<JavaRDD <* >>()) ->
217
+ it.type.arguments.first().type!!
218
+
219
+ else -> it.type.arguments.first().type!!
220
+ }
221
+ val ds = spark!! .createDataset(rdd, kotlinEncoderFor(type))
222
+ renderDataset(ds)
194
223
}
195
- }
224
+ )
225
+
226
+ // using compile time KType, convert this RDD to Dataset and render it
227
+ notebook.renderersProcessor.registerWithoutOptimizing(
228
+ createRendererByCompileTimeType<RDD <* >> {
229
+ if (spark == null ) return @createRendererByCompileTimeType it.value.toString()
196
230
197
- render<JavaRDDLike <* , * >> {
198
- with (properties) {
199
- HTML (it.toHtml(limit = displayLimit, truncate = displayTruncate))
231
+ val rdd = it.value as RDD <* >
232
+ val type = it.type.arguments.first().type!!
233
+ val ds = spark!! .createDataset(rdd, kotlinEncoderFor(type))
234
+ renderDataset(ds)
200
235
}
236
+ )
237
+
238
+ onLoadedAlsoDo()
239
+ }
201
240
241
+ private fun renderDataset (it : Dataset <* >): MimeTypedResult =
242
+ with (properties) {
243
+ val showFunction = Dataset ::class
244
+ .memberFunctions
245
+ .firstOrNull { it.name == " showString" && it.valueParameters.size == 3 }
246
+
247
+ textResult(
248
+ if (showFunction != null ) {
249
+ showFunction.call(it, displayLimit, displayTruncate, false ) as String
250
+ } else {
251
+ // if the function cannot be called, make sure it will call println instead
252
+ it.show(displayLimit, displayTruncate)
253
+ " "
254
+ }
255
+ )
202
256
}
203
257
204
- onLoadedAlsoDo()
258
+
259
+ // TODO wip
260
+ private fun generateSparkifyClass (klass : KClass <* >): Code {
261
+ // val name = "`${klass.simpleName!!}${'$'}Generated`"
262
+ val name = klass.simpleName
263
+ val constructorArgs = klass.primaryConstructor!! .parameters
264
+ val visibility = klass.visibility?.name?.lowercase() ? : " "
265
+ val memberProperties = klass.memberProperties
266
+
267
+ val properties = constructorArgs.associateWith {
268
+ memberProperties.first { it.name == it.name }
269
+ }
270
+
271
+ val constructorParamsCode = properties.entries.joinToString(" \n " ) { (param, prop) ->
272
+ // TODO check override
273
+ if (param.isOptional) TODO ()
274
+ val modifier = if (prop is KMutableProperty <* >) " var" else " val"
275
+ val paramVisiblity = prop.visibility?.name?.lowercase() ? : " "
276
+ val columnName = param.findAnnotation<ColumnName >()?.name ? : param.name!!
277
+
278
+ " | @get:kotlin.jvm.JvmName(\" $columnName \" ) $paramVisiblity $modifier ${param.name} : ${param.type} ,"
279
+ }
280
+
281
+ val productElementWhenParamsCode = properties.entries.joinToString(" \n " ) { (param, _) ->
282
+ " | ${param.index} -> this.${param.name} "
283
+ }
284
+
285
+ @Language(" kotlin" )
286
+ val code = """
287
+ |$visibility data class $name (
288
+ $constructorParamsCode
289
+ |): scala.Product, java.io.Serializable {
290
+ | override fun canEqual(that: Any?): Boolean = that is $name
291
+ | override fun productArity(): Int = ${constructorArgs.size}
292
+ | override fun productElement(n: Int): Any = when (n) {
293
+ $productElementWhenParamsCode
294
+ | else -> throw IndexOutOfBoundsException()
295
+ | }
296
+ |}
297
+ """ .trimMargin()
298
+ return code
205
299
}
206
300
}
0 commit comments