Skip to content

Commit 00b712b

Browse files
authored
Merge pull request #1147 from Kotlin/json-extraction
Json module extraction
2 parents 9e462a6 + 780b61f commit 00b712b

File tree

65 files changed

+848
-3339
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+848
-3339
lines changed

build.gradle.kts

+2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ dependencies {
5454
api(projects.dataframeExcel)
5555
api(projects.dataframeJdbc)
5656
api(projects.dataframeCsv)
57+
api(projects.dataframeJson)
5758

5859
// experimental, so not included by default:
5960
// api(projects.dataframeOpenapi)
@@ -64,6 +65,7 @@ dependencies {
6465
kover(projects.dataframeOpenapi)
6566
kover(projects.dataframeJdbc)
6667
kover(projects.dataframeCsv)
68+
kover(projects.dataframeJson)
6769
kover(projects.plugins.kotlinDataframe)
6870
kover(projects.dataframeJupyter)
6971
}

core/api/core.api

+19-101
Large diffs are not rendered by default.

core/build.gradle.kts

+3-2
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,6 @@ dependencies {
6666
api(libs.commonsCsv)
6767

6868
implementation(libs.commonsIo)
69-
implementation(libs.serialization.core)
70-
implementation(libs.serialization.json)
7169
implementation(libs.fastDoubleParser)
7270

7371
api(libs.kotlin.datetimeJvm)
@@ -82,6 +80,9 @@ dependencies {
8280
testImplementation(libs.kotlin.scriptingJvm)
8381
testImplementation(libs.jsoup)
8482
testImplementation(libs.sl4jsimple)
83+
testImplementation(projects.dataframeJson)
84+
testImplementation(libs.serialization.core)
85+
testImplementation(libs.serialization.json)
8586

8687
// for checking results
8788
testImplementation(libs.commonsStatisticsDescriptive)

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/annotations/ImportDataSchema.kt

+11-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import org.jetbrains.kotlinx.dataframe.api.KeyValueProperty
55
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
66
import org.jetbrains.kotlinx.dataframe.columns.FrameColumn
77
import org.jetbrains.kotlinx.dataframe.documentation.UnifyingNumbers
8-
import org.jetbrains.kotlinx.dataframe.io.JSON
98

109
/**
1110
* Annotation preprocessing will generate a DataSchema interface from the data at `path`.
@@ -73,8 +72,11 @@ public annotation class JdbcOptions(
7372
)
7473

7574
public annotation class JsonOptions(
76-
/** Allows the choice of how to handle type clashes when reading a JSON file. */
77-
public val typeClashTactic: JSON.TypeClashTactic = JSON.TypeClashTactic.ARRAY_AND_VALUE_COLUMNS,
75+
/**
76+
* Allows the choice of how to handle type clashes when reading a JSON file.
77+
* Must be either [JsonOptions.TypeClashTactics.ARRAY_AND_VALUE_COLUMNS] or [JsonOptions.TypeClashTactics.ANY_COLUMNS]
78+
* */
79+
public val typeClashTactic: String = TypeClashTactics.ARRAY_AND_VALUE_COLUMNS,
7880
/**
7981
* List of [JsonPath]s where instead of a [ColumnGroup], a [FrameColumn]<[KeyValueProperty]>
8082
* will be created.
@@ -85,4 +87,9 @@ public annotation class JsonOptions(
8587
public val keyValuePaths: Array<String> = [],
8688
/** Whether to [unify the numbers that are read][UnifyingNumbers]. `true` by default. */
8789
public val unifyNumbers: Boolean = true,
88-
)
90+
) {
91+
public object TypeClashTactics {
92+
public const val ARRAY_AND_VALUE_COLUMNS: String = "ARRAY_AND_VALUE_COLUMNS"
93+
public const val ANY_COLUMNS: String = "ANY_COLUMNS"
94+
}
95+
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt

+8-4
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,14 @@ import org.jetbrains.kotlinx.dataframe.type
1515
* Creates a [FrameColumn] from [this] by splitting the dataframe into
1616
* smaller ones, with their number of rows at most [size].
1717
*/
18-
public fun <T> DataFrame<T>.chunked(size: Int, name: String = "groups"): FrameColumn<T> {
19-
val startIndices = (0 until nrow step size)
20-
return this.chunkedImpl(startIndices, name)
21-
}
18+
public fun <T> DataFrame<T>.chunked(size: Int, name: String = "groups"): FrameColumn<T> =
19+
chunked(
20+
startIndices = 0 until nrow step size,
21+
name = name,
22+
)
23+
24+
public fun <T> DataFrame<T>.chunked(startIndices: Iterable<Int>, name: String = "groups"): FrameColumn<T> =
25+
chunkedImpl(startIndices, name)
2226

2327
public fun <T> DataColumn<T>.chunked(size: Int): ValueColumn<List<T>> {
2428
val values = toList().chunked(size)

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/DefaultReadDfMethods.kt

-8
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ private const val CAST = "cast"
2323
private const val VERIFY = "verify" // cast(true) is obscure, i think it's better to use named argument here
2424
private const val READ_CSV = "readCSV"
2525
private const val READ_TSV = "readTSV"
26-
private const val READ_JSON = "readJson"
2726
private const val READ_JDBC = "readJdbc"
2827

2928
public abstract class AbstractDefaultReadMethod(
@@ -82,13 +81,6 @@ public abstract class AbstractDefaultReadMethod(
8281
override val additionalImports: List<String> = listOf("import org.jetbrains.kotlinx.dataframe.io.$methodName")
8382
}
8483

85-
internal class DefaultReadJsonMethod(path: String?, arguments: MethodArguments) :
86-
AbstractDefaultReadMethod(
87-
path = path,
88-
arguments = arguments,
89-
methodName = READ_JSON,
90-
)
91-
9284
internal class DefaultReadCsvMethod(path: String?, arguments: MethodArguments) :
9385
AbstractDefaultReadMethod(path, arguments, READ_CSV)
9486

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/UnifyingNumbers.kt

+1-25
Original file line numberDiff line numberDiff line change
@@ -42,28 +42,4 @@ import org.jetbrains.kotlinx.dataframe.impl.UnifiedNumberTypeOptions
4242
*
4343
* At the bottom of the graph is [Nothing]. This can be interpreted as `null`.
4444
*/
45-
internal interface UnifyingNumbers {
46-
47-
/**
48-
* ```
49-
* (BigDecimal)
50-
* / \
51-
* (BigInteger) \
52-
* / \ \
53-
* <~ ULong Long ~> Double ..
54-
* .. | / | / | \..
55-
* \ | / | / |
56-
* UInt Int Float
57-
* .. | / | / \..
58-
* \ | / | /
59-
* UShort Short
60-
* | / |
61-
* | / |
62-
* UByte Byte
63-
* \ /
64-
* \ /
65-
* Nothing?
66-
* ```
67-
*/
68-
interface Graph
69-
}
45+
public interface UnifyingNumbers

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/ColumnDataCollector.kt

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import kotlin.reflect.full.isSubclassOf
1414
import kotlin.reflect.full.withNullability
1515
import kotlin.reflect.jvm.jvmErasure
1616

17-
internal interface DataCollector<T> {
17+
public interface DataCollector<T> {
1818

1919
public val data: List<T?>
2020
public val hasNulls: Boolean

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/Utils.kt

+8
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,10 @@ internal fun <T> catchSilent(body: () -> T): T? =
150150
internal fun Iterable<KClass<*>>.commonType(nullable: Boolean, upperBound: KType? = null) =
151151
commonParents(this).createType(nullable, upperBound)
152152

153+
// helper overload for friend modules
154+
@JvmName("commonTypeOverload")
155+
internal fun commonType(types: Iterable<KType?>, useStar: Boolean = true) = types.commonType(useStar)
156+
153157
/**
154158
* Returns the common supertype of the given types.
155159
*
@@ -276,6 +280,10 @@ internal fun <T> DataFrame<T>.splitByIndices(startIndices: Sequence<Int>): Seque
276280
}
277281
}
278282

283+
// helper overload for friend modules
284+
@JvmName("splitByIndicesOverload")
285+
internal fun <T> splitByIndices(list: List<T>, startIndices: Sequence<Int>) = list.splitByIndices(startIndices)
286+
279287
internal fun <T> List<T>.splitByIndices(startIndices: Sequence<Int>): Sequence<List<T>> =
280288
(startIndices + size).zipWithNext { start, endExclusive ->
281289
subList(start, endExclusive)

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt

+108-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.jetbrains.kotlinx.dataframe.impl.api
22

3+
import io.github.oshai.kotlinlogging.KotlinLogging
34
import kotlinx.datetime.Instant
45
import kotlinx.datetime.LocalDate
56
import kotlinx.datetime.LocalDateTime
@@ -25,19 +26,18 @@ import org.jetbrains.kotlinx.dataframe.api.isColumnGroup
2526
import org.jetbrains.kotlinx.dataframe.api.isFrameColumn
2627
import org.jetbrains.kotlinx.dataframe.api.isSubtypeOf
2728
import org.jetbrains.kotlinx.dataframe.api.map
28-
import org.jetbrains.kotlinx.dataframe.api.parser
2929
import org.jetbrains.kotlinx.dataframe.api.to
3030
import org.jetbrains.kotlinx.dataframe.columns.TypeSuggestion
3131
import org.jetbrains.kotlinx.dataframe.columns.size
3232
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
3333
import org.jetbrains.kotlinx.dataframe.hasNulls
34+
import org.jetbrains.kotlinx.dataframe.impl.api.Parsers.resetToDefault
3435
import org.jetbrains.kotlinx.dataframe.impl.canParse
3536
import org.jetbrains.kotlinx.dataframe.impl.catchSilent
3637
import org.jetbrains.kotlinx.dataframe.impl.createStarProjectedType
3738
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
3839
import org.jetbrains.kotlinx.dataframe.impl.javaDurationCanParse
3940
import org.jetbrains.kotlinx.dataframe.io.isUrl
40-
import org.jetbrains.kotlinx.dataframe.io.readJsonStr
4141
import org.jetbrains.kotlinx.dataframe.values
4242
import java.math.BigDecimal
4343
import java.math.BigInteger
@@ -61,6 +61,8 @@ import java.time.LocalDate as JavaLocalDate
6161
import java.time.LocalDateTime as JavaLocalDateTime
6262
import java.time.LocalTime as JavaLocalTime
6363

64+
private val logger = KotlinLogging.logger { }
65+
6466
internal interface StringParser<T> {
6567
fun toConverter(options: ParserOptions?): TypeConverter
6668

@@ -335,6 +337,94 @@ internal object Parsers : GlobalParserOptions {
335337
parser
336338
}
337339

340+
// TODO rewrite using parser service later https://github.com/Kotlin/dataframe/issues/962
341+
// null when dataframe-json is not present
342+
private val readJsonStrAnyFrame: ((text: String) -> AnyFrame)? by lazy {
343+
try {
344+
val klass = Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt")
345+
val typeClashTactic = Class.forName("org.jetbrains.kotlinx.dataframe.io.JSON\$TypeClashTactic")
346+
val readJsonStr = klass.getMethod(
347+
"readJsonStr",
348+
// this =
349+
DataFrame.Companion::class.java,
350+
// text =
351+
String::class.java,
352+
// header =
353+
List::class.java,
354+
// keyValuePaths =
355+
List::class.java,
356+
// typeClashTactic =
357+
typeClashTactic,
358+
// unifyNumbers =
359+
Boolean::class.java,
360+
)
361+
362+
return@lazy { text: String ->
363+
readJsonStr.invoke(
364+
null,
365+
// this =
366+
DataFrame.Companion,
367+
// text =
368+
text,
369+
// header =
370+
emptyList<Any>(),
371+
// keyValuePaths =
372+
emptyList<Any>(),
373+
// typeClashTactic =
374+
typeClashTactic.enumConstants[0],
375+
// unifyNumbers =
376+
true,
377+
) as AnyFrame
378+
}
379+
} catch (_: ClassNotFoundException) {
380+
return@lazy null
381+
}
382+
}
383+
384+
// TODO rewrite using parser service later https://github.com/Kotlin/dataframe/issues/962
385+
// null when dataframe-json is not present
386+
private val readJsonStrAnyRow: ((text: String) -> AnyRow)? by lazy {
387+
try {
388+
val klass = Class.forName("org.jetbrains.kotlinx.dataframe.io.JsonKt")
389+
val typeClashTactic = Class.forName("org.jetbrains.kotlinx.dataframe.io.JSON\$TypeClashTactic")
390+
val readJsonStr = klass.getMethod(
391+
"readJsonStr",
392+
// this =
393+
DataRow.Companion::class.java,
394+
// text =
395+
String::class.java,
396+
// header =
397+
List::class.java,
398+
// keyValuePaths =
399+
List::class.java,
400+
// typeClashTactic =
401+
typeClashTactic,
402+
// unifyNumbers =
403+
Boolean::class.java,
404+
)
405+
406+
return@lazy { text: String ->
407+
readJsonStr.invoke(
408+
null,
409+
// this =
410+
DataRow.Companion,
411+
// text =
412+
text,
413+
// header =
414+
emptyList<Any>(),
415+
// keyValuePaths =
416+
emptyList<Any>(),
417+
// typeClashTactic =
418+
typeClashTactic.enumConstants[0],
419+
// unifyNumbers =
420+
true,
421+
) as AnyRow
422+
}
423+
} catch (_: ClassNotFoundException) {
424+
return@lazy null
425+
}
426+
}
427+
338428
internal val parsersOrder = listOf(
339429
// Int
340430
stringParser<Int> { it.toIntOrNull() },
@@ -408,7 +498,14 @@ internal object Parsers : GlobalParserOptions {
408498
stringParser<AnyFrame>(catch = true) {
409499
val trimmed = it.trim()
410500
if (trimmed.startsWith("[") && trimmed.endsWith("]")) {
411-
DataFrame.readJsonStr(it)
501+
if (readJsonStrAnyFrame == null) {
502+
logger.warn {
503+
"parse() encountered a string that looks like a JSON array, but the dataframe-json dependency was not detected. Skipping for now."
504+
}
505+
null
506+
} else {
507+
readJsonStrAnyFrame!!(trimmed)
508+
}
412509
} else {
413510
null
414511
}
@@ -417,7 +514,14 @@ internal object Parsers : GlobalParserOptions {
417514
stringParser<AnyRow>(catch = true) {
418515
val trimmed = it.trim()
419516
if (trimmed.startsWith("{") && trimmed.endsWith("}")) {
420-
DataRow.readJsonStr(it)
517+
if (readJsonStrAnyRow == null) {
518+
logger.warn {
519+
"parse() encountered a string that looks like a JSON object, but the dataframe-json dependency was not detected. Skipping for now."
520+
}
521+
null
522+
} else {
523+
readJsonStrAnyRow!!(trimmed)
524+
}
421525
} else {
422526
null
423527
}

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/io/image.kt

+23
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,26 @@ internal fun BufferedImage.toByteArray(format: String = DEFAULT_IMG_FORMAT): Byt
5858
ImageIO.write(this, format, bos)
5959
bos.toByteArray()
6060
}
61+
62+
// helper overload for friend modules
63+
@JvmName("resizeKeepingAspectRatioOverload")
64+
internal fun resizeKeepingAspectRatio(
65+
image: BufferedImage,
66+
maxSize: Int,
67+
resultImageType: Int = BufferedImage.TYPE_INT_ARGB,
68+
interpolation: Any = RenderingHints.VALUE_INTERPOLATION_NEAREST_NEIGHBOR,
69+
renderingQuality: Any = RenderingHints.VALUE_RENDER_QUALITY,
70+
antialiasing: Any = RenderingHints.VALUE_ANTIALIAS_ON,
71+
observer: ImageObserver? = null,
72+
) = image.resizeKeepingAspectRatio(
73+
maxSize = maxSize,
74+
resultImageType = resultImageType,
75+
interpolation = interpolation,
76+
renderingQuality = renderingQuality,
77+
antialiasing = antialiasing,
78+
observer = observer,
79+
)
80+
81+
// helper overload for friend modules
82+
@JvmName("toByteArrayOverload")
83+
internal fun toByteArray(image: BufferedImage, format: String = DEFAULT_IMG_FORMAT) = image.toByteArray(format)

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/schema/Utils.kt

+4
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ import kotlin.reflect.typeOf
2929
internal fun AnyFrame.extractSchema(): DataFrameSchema =
3030
DataFrameSchemaImpl(columns().filter { it.name().isNotEmpty() }.associate { it.name() to it.extractSchema() })
3131

32+
// helper overload for friend modules
33+
@JvmName("intersectSchemasOverload")
34+
internal fun intersectSchemas(schemas: Iterable<DataFrameSchema>): DataFrameSchema = schemas.intersectSchemas()
35+
3236
internal fun Iterable<DataFrameSchema>.intersectSchemas(): DataFrameSchema {
3337
val collectedTypes = mutableMapOf<String, MutableSet<ColumnSchema>>()
3438
var first = true

core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt

+3-3
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import java.net.URL
1717
* Opens a stream to [url] to create a [DataFrame] from it.
1818
* If the URL is a file URL, the file is read directly.
1919
* If the URL is an HTTP URL, it's also read directly, but if the server returns an error code,
20-
* the error response is read as JSON and parsed as [DataFrame] too.
20+
* the error response is read and parsed as [DataFrame] too.
2121
*
2222
* Public so it may be used in other modules.
2323
*/
@@ -32,8 +32,8 @@ public fun catchHttpResponse(url: URL, body: (InputStream) -> AnyFrame): AnyFram
3232
if (code != 200) {
3333
val response = connection.responseMessage
3434
try {
35-
// attempt to read error response as JSON
36-
return DataFrame.readJson(connection.errorStream)
35+
// attempt to read error response as dataframe
36+
return DataFrame.read(connection.errorStream).df
3737
} catch (_: Exception) {
3838
throw RuntimeException("Server returned HTTP response code: $code. Response: $response")
3939
}

0 commit comments

Comments
 (0)