Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -3549,8 +3549,12 @@ public final class org/jetbrains/kotlinx/dataframe/api/ParseKt {
public static synthetic fun parse$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame;[Lorg/jetbrains/kotlinx/dataframe/columns/ColumnReference;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
public static final fun parseAnyFrameNullable (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun parseAnyFrameNullable$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static final fun parseChar (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun parseChar$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static final fun tryParse (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun tryParse$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static final fun tryParseChar (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
public static synthetic fun tryParseChar$default (Lorg/jetbrains/kotlinx/dataframe/DataColumn;Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataColumn;
}

public final class org/jetbrains/kotlinx/dataframe/api/ParserOptions {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import java.time.format.DateTimeFormatter
import java.util.Locale
import kotlin.reflect.KProperty
import kotlin.reflect.KType
import kotlin.reflect.typeOf
import kotlin.uuid.ExperimentalUuidApi
import kotlin.uuid.Uuid

Expand Down Expand Up @@ -312,6 +313,28 @@ public class ParserOptions(
* @return a new column with parsed values */
public fun DataColumn<String?>.tryParse(options: ParserOptions? = null): DataColumn<*> = tryParseImpl(options)

/**
* Tries to parse a column of chars into a column of a different type.
* Each parser in [Parsers] is run in order until a valid parser is found,
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
* fails to parse any value, the next parser is tried. If all the others fail, the final parser
* returns strings.
*
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
*
* @param options options for parsing, like providing a locale or a custom date-time formatter
* @throws IllegalStateException if no valid parser is found (unlikely, unless the `String` parser is disabled)
* @return a new column with parsed values
*/
@JvmName("tryParseChar")
public fun DataColumn<Char?>.tryParse(options: ParserOptions? = null): DataColumn<*> {
// skip the Char parser, as we're trying to parse away from Char
val providedSkipTypes = options?.skipTypes ?: DataFrame.parser.skipTypes
val parserOptions = (options ?: ParserOptions()).copy(skipTypes = providedSkipTypes + typeOf<Char>())

return map { it?.toString() }.tryParse(parserOptions)
}

public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T> =
parse(options) {
colsAtAnyDepth().filter { !it.isColumnGroup() }
Expand All @@ -335,6 +358,23 @@ public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T>
public fun DataColumn<String?>.parse(options: ParserOptions? = null): DataColumn<*> =
tryParse(options).also { if (it.typeClass == String::class) error("Can't guess column type") }

/**
* Tries to parse a column of chars as strings into a column of a different type.
* Each parser in [Parsers] is run in order until a valid parser is found,
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
* fails to parse any value, the next parser is tried.
*
* If all fail, the column is returned as `String`, this can never fail.
*
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
*
* @param options options for parsing, like providing a locale or a custom date-time formatter
* @return a new column with parsed values
*/
@JvmName("parseChar")
public fun DataColumn<Char?>.parse(options: ParserOptions? = null): DataColumn<*> =
tryParse(options) // no need to throw an exception, as Char can always be parsed as String

@JvmName("parseAnyFrameNullable")
public fun DataColumn<AnyFrame?>.parse(options: ParserOptions? = null): DataColumn<AnyFrame?> =
map { it?.parse(options) }
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,13 @@ internal fun createConverter(from: KType, to: KType, options: ParserOptions? = n

Char::class -> when (toClass) {
Int::class -> convert<Char> { it.code }
else -> null

else -> // convert char to string and then to target type
getConverter(typeOf<String>(), to, options)?.let { stringConverter ->
convert<Char> {
stringConverter(it.toString())
}
}
}

Int::class -> when (toClass) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -716,29 +716,24 @@ internal fun <T> DataFrame<T>.parseImpl(options: ParserOptions?, columns: Column
when {
// when a frame column is requested to be parsed,
// parse each value/frame column at any depth inside each DataFrame in the frame column
col.isFrameColumn() -> {
col.isFrameColumn() ->
col.map {
it.parseImpl(options) {
colsAtAnyDepth().filter { !it.isColumnGroup() }
}
}
}

// when a column group is requested to be parsed,
// parse each column in the group
col.isColumnGroup() -> {
col.isColumnGroup() ->
col.parseImpl(options) { all() }
.asColumnGroup(col.name())
.asDataColumn()
}

// Base case, parse the column if it's a `String?` column
col.isSubtypeOf<String?>() -> {
col.isSubtypeOf<String?>() ->
col.cast<String?>().tryParseImpl(options)
}

else -> {
col
}
else -> col
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package org.jetbrains.kotlinx.dataframe.api
import io.kotest.assertions.throwables.shouldNotThrow
import io.kotest.assertions.throwables.shouldThrow
import io.kotest.matchers.shouldBe
import io.kotest.matchers.shouldNotBe
import kotlinx.datetime.Clock
import kotlinx.datetime.Instant
import kotlinx.datetime.LocalTime
Expand Down Expand Up @@ -69,6 +70,20 @@ class ConvertTests {
@Test
fun `convert string to enum`() {
columnOf("A", "B").convertTo<EnumClass>() shouldBe columnOf(EnumClass.A, EnumClass.B)

dataFrameOf(columnOf("A", "B") named "colA")
.convert("colA").to<EnumClass>()
.getColumn("colA") shouldBe columnOf(EnumClass.A, EnumClass.B).named("colA")
}

@Test
fun `convert char to enum`() {
// Char -> String -> Enum
columnOf('A', 'B').convertTo<EnumClass>() shouldBe columnOf(EnumClass.A, EnumClass.B)

dataFrameOf(columnOf('A', 'B') named "colA")
.convert("colA").to<EnumClass>()
.getColumn("colA") shouldBe columnOf(EnumClass.A, EnumClass.B).named("colA")
}

@JvmInline
Expand Down Expand Up @@ -199,6 +214,15 @@ class ConvertTests {
val col = columnOf(65, 66)
col.convertTo<Char>() shouldBe columnOf('A', 'B')
col.convertTo<Char>().convertTo<Int>() shouldBe col

// this means
columnOf('1', '2').convertToInt() shouldNotBe columnOf(1, 2)
columnOf('1', '2').convertToInt() shouldBe columnOf(49, 50)

// but
columnOf('1', '2').convertToString().convertToInt() shouldBe columnOf(1, 2)
// or
columnOf('1', '2').parse() shouldBe columnOf(1, 2)
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,22 @@ import kotlin.time.Instant as StdlibInstant
import kotlinx.datetime.Instant as DeprecatedInstant

class ParseTests {

@Test
fun `parse to chars`() {
val char = columnOf('a', 'b', 'c')
char.parse() shouldBe char
char.tryParse() shouldBe char
char.convertToString().parse() shouldBe char
}

@Test
fun `parse chars to int`() {
val char = columnOf('1', '2', '3')
char.parse() shouldBe columnOf(1, 2, 3)
char.tryParse() shouldBe columnOf(1, 2, 3)
}

@Test
fun parseDate() {
val currentLocale = Locale.getDefault()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ class ParserTests {
DataFrame.parser.resetToDefault()
}

@Test
fun `parse to Char`() {
val col by columnOf("a", "b")
col.parse().type() shouldBe typeOf<Char>()
}

@Test(expected = IllegalStateException::class)
fun `parse should throw`() {
val col by columnOf("a", "bc")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ internal interface ConvertDocs {
* * [LocalDateTime], [LocalDate], [LocalTime],
* `Instant` ([kotlinx.datetime][DeprecatedInstant], [kotlin.time][StdlibInstant], and [java.time]),
* * [URL], [IMG], [IFRAME].
*
* __NOTE__: Conversion between [Int] and [Char] is done by UTF-16 [Char.code].
* To convert [Char]->[Int] the way it is written, use [parse()][parse] instead, or,
* in either case, use [String] as intermediary type.
*/
interface SupportedTypes

Expand Down Expand Up @@ -362,7 +366,7 @@ public class Convert<T, out C>(
* preserving their original names and positions within the [DataFrame].
*
* The target type is provided as a reified type argument.
* For the full list of supported types, see [ConvertDocs.SupportedTypes].
* For the full list of supported types, see [SupportedTypes][ConvertDocs.SupportedTypes].
*
* For more information: {@include [DocumentationUrls.Convert]}
*
Expand Down Expand Up @@ -390,7 +394,7 @@ public class Convert<T, out C>(
* preserving their original names and positions within the [DataFrame].
*
* The target type is provided as a [KType].
* For the full list of supported types, see [ConvertDocs.SupportedTypes].
* For the full list of supported types, see [SupportedTypes][ConvertDocs.SupportedTypes].
*
* For more information: {@include [DocumentationUrls.Convert]}
*
Expand Down Expand Up @@ -554,7 +558,7 @@ public inline fun <T, C, reified R> Convert<T, C>.perRowCol(
*
* The target type is provided as a reified type argument.
*
* For the full list of supported types, see [ConvertDocs.SupportedTypes].
* For the full list of supported types, see [SupportedTypes][ConvertDocs.SupportedTypes].
*
* @param [C] The target type to convert values to.
* @return A new [DataColumn] with the values converted to type [C].
Expand All @@ -564,7 +568,7 @@ public inline fun <reified C> AnyCol.convertTo(): DataColumn<C> = convertTo(type
/**
* Converts values in this column to the specified [type].
*
* For the full list of supported types, see [ConvertDocs.SupportedTypes].
* For the full list of supported types, see [SupportedTypes][ConvertDocs.SupportedTypes].
*
* @param type The target type, provided as a [KType], to convert values to.
* @return A new [DataColumn] with the values converted to [type].
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public class ConverterScope(public val fromType: KType, public val toSchema: Col
* df.convertTo<SomeSchema> {
* // defines how to convert Int? -> String
* convert<Int?>().with { it?.toString() ?: "No input given" }
* // defines how to convert String -> SomeType
* // defines how to convert String/Char -> SomeType
* parser { SomeType(it) }
* // fill missing column `sum` with expression `a+b`
* fill { sum }.with { a + b }
Expand Down Expand Up @@ -102,6 +102,10 @@ public fun <T, C> ConvertToFill<T, C>.with(expr: RowExpression<T, C>) {

/**
* Defines how to convert `String` values into given type [C].
*
* This method is a shortcut for `convert<String>().with { }`.
*
* If no converter is defined for `Char` values, this converter will be used for them as well.
*/
public inline fun <reified C> ConvertSchemaDsl<*>.parser(noinline parser: (String) -> C): Unit =
convert<String>().with(parser)
Expand Down
40 changes: 38 additions & 2 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import org.jetbrains.kotlinx.dataframe.impl.api.StringParser
import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl
import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl
import org.jetbrains.kotlinx.dataframe.impl.io.FastDoubleParser
import org.jetbrains.kotlinx.dataframe.typeClass
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS_COPY
Expand Down Expand Up @@ -302,6 +301,23 @@ public class ParserOptions(
/** @include [tryParseImpl] */
public fun DataColumn<String?>.tryParse(options: ParserOptions? = null): DataColumn<*> = tryParseImpl(options)

/**
* Tries to parse a column of chars into a column of a different type.
* Each parser in [Parsers] is run in order until a valid parser is found,
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
* fails to parse any value, the next parser is tried. If all the others fail, the final parser
* returns strings.
*
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
*
* @param options options for parsing, like providing a locale or a custom date-time formatter
* @throws IllegalStateException if no valid parser is found (unlikely, unless the `String` parser is disabled)
* @return a new column with parsed values
*/
@JvmName("tryParseChar")
public fun DataColumn<Char?>.tryParse(options: ParserOptions? = null): DataColumn<*> =
map { it?.toString() }.tryParseImpl(options)

public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T> =
parse(options) {
colsAtAnyDepth().filter { !it.isColumnGroup() }
Expand All @@ -323,7 +339,27 @@ public fun <T> DataFrame<T>.parse(options: ParserOptions? = null): DataFrame<T>
* @return a new column with parsed values
*/
public fun DataColumn<String?>.parse(options: ParserOptions? = null): DataColumn<*> =
tryParse(options).also { if (it.typeClass == String::class) error("Can't guess column type") }
tryParse(options).also { if (it.isSubtypeOf<String?>()) error("Can't guess column type") }

/**
* Tries to parse a column of chars as strings into a column of a different type.
* Each parser in [Parsers] is run in order until a valid parser is found,
* a.k.a. that parser was able to parse all values in the column successfully. If a parser
* fails to parse any value, the next parser is tried.
*
* If all fail [IllegalStateException] is thrown. If you don't want this exception to be thrown,
* use [tryParse] instead.
*
* Parsers that are [covered by][StringParser.coveredBy] other parsers are skipped.
*
* @param options options for parsing, like providing a locale or a custom date-time formatter
* @return a new column with parsed values
*/
@JvmName("parseChar")
public fun DataColumn<Char?>.parse(options: ParserOptions? = null): DataColumn<*> =
map { it?.toString() }
.tryParse(options)
.also { if (it.isSubtypeOf<Char?>() || it.isSubtypeOf<String?>()) error("Can't guess column type") }

@JvmName("parseAnyFrameNullable")
public fun DataColumn<AnyFrame?>.parse(options: ParserOptions? = null): DataColumn<AnyFrame?> =
Expand Down
Loading