diff --git a/core/api/core.api b/core/api/core.api index 9e106234b3..7c4f4c9fd8 100644 --- a/core/api/core.api +++ b/core/api/core.api @@ -2419,10 +2419,12 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/api/GlobalParser public abstract fun addSkipType (Lkotlin/reflect/KType;)V public abstract fun getLocale ()Ljava/util/Locale; public abstract fun getNulls ()Ljava/util/Set; + public abstract fun getParseExperimentalUuid ()Z public abstract fun getSkipTypes ()Ljava/util/Set; public abstract fun getUseFastDoubleParser ()Z public abstract fun resetToDefault ()V public abstract fun setLocale (Ljava/util/Locale;)V + public abstract fun setParseExperimentalUuid (Z)V public abstract fun setUseFastDoubleParser (Z)V } @@ -3522,17 +3524,22 @@ public final class org/jetbrains/kotlinx/dataframe/api/ParserOptions { public fun ()V public synthetic fun (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;)V public synthetic fun (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;ILkotlin/jvm/internal/DefaultConstructorMarker;)V - public fun (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;)V + public synthetic fun (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;)V public synthetic fun (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;)V + public synthetic fun (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;ILkotlin/jvm/internal/DefaultConstructorMarker;)V public final synthetic fun copy (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; - public final fun copy (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; + public final synthetic fun copy (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; + public final fun copy (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions; public fun equals (Ljava/lang/Object;)Z public final fun getDateTimeFormatter ()Ljava/time/format/DateTimeFormatter; public final fun getDateTimePattern ()Ljava/lang/String; public final fun getLocale ()Ljava/util/Locale; public final fun getNullStrings ()Ljava/util/Set; + public final fun getParseExperimentalUuid ()Ljava/lang/Boolean; public final fun getSkipTypes ()Ljava/util/Set; public final fun getUseFastDoubleParser ()Ljava/lang/Boolean; public fun hashCode ()I diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt index e0055c71af..121e86ad3b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt @@ -73,6 +73,12 @@ public interface GlobalParserOptions { public val nulls: Set public val skipTypes: Set + + /** + * Whether to allow parsing UUIDs to the experimental [kotlin.uuid.Uuid] type. + * By default, this is false and UUIDs are not recognized. + */ + public var parseExperimentalUuid: Boolean } /** @@ -101,6 +107,8 @@ public interface GlobalParserOptions { * @param skipTypes a set of types that should be skipped during parsing. Parsing will be attempted for all other types. * By default, it's an empty set. To skip all types except a specified one, use [convertTo] instead. * @param useFastDoubleParser whether to use [FastDoubleParser], defaults to `true`. Please report any issues you encounter. + * @param parseExperimentalUuid whether to allow parsing UUIDs to the experimental [kotlin.uuid.Uuid] type. + * By default, this is false and UUIDs are not recognized. */ public class ParserOptions( public val locale: Locale? = null, @@ -110,8 +118,31 @@ public class ParserOptions( public val nullStrings: Set? = null, public val skipTypes: Set? = null, public val useFastDoubleParser: Boolean? = null, + public val parseExperimentalUuid: Boolean? = null, ) { + /** For binary compatibility. */ + @Deprecated( + message = PARSER_OPTIONS, + level = DeprecationLevel.HIDDEN, + ) + public constructor( + locale: Locale? = null, + dateTimeFormatter: DateTimeFormatter? = null, + dateTimePattern: String? = null, + nullStrings: Set? = null, + skipTypes: Set? = null, + useFastDoubleParser: Boolean? = null, + ) : this( + locale = locale, + dateTimeFormatter = dateTimeFormatter, + dateTimePattern = dateTimePattern, + nullStrings = nullStrings, + skipTypes = skipTypes, + useFastDoubleParser = useFastDoubleParser, + parseExperimentalUuid = null, + ) + /** For binary compatibility. */ @Deprecated( message = PARSER_OPTIONS, @@ -129,7 +160,31 @@ public class ParserOptions( nullStrings = nullStrings, skipTypes = null, useFastDoubleParser = null, + parseExperimentalUuid = null, + ) + + /** For binary compatibility. */ + @Deprecated( + message = PARSER_OPTIONS_COPY, + level = DeprecationLevel.HIDDEN, ) + public fun copy( + locale: Locale? = this.locale, + dateTimeFormatter: DateTimeFormatter? = this.dateTimeFormatter, + dateTimePattern: String? = this.dateTimePattern, + nullStrings: Set? = this.nullStrings, + skipTypes: Set? = this.skipTypes, + useFastDoubleParser: Boolean? = this.useFastDoubleParser, + ): ParserOptions = + ParserOptions( + locale = locale, + dateTimeFormatter = dateTimeFormatter, + dateTimePattern = dateTimePattern, + nullStrings = nullStrings, + skipTypes = skipTypes, + useFastDoubleParser = useFastDoubleParser, + parseExperimentalUuid = null, + ) /** For binary compatibility. */ @Deprecated( @@ -149,6 +204,7 @@ public class ParserOptions( nullStrings = nullStrings, skipTypes = skipTypes, useFastDoubleParser = useFastDoubleParser, + parseExperimentalUuid = null, ) internal fun getDateTimeFormatter(): DateTimeFormatter? = @@ -166,6 +222,7 @@ public class ParserOptions( nullStrings: Set? = this.nullStrings, skipTypes: Set? = this.skipTypes, useFastDoubleParser: Boolean? = this.useFastDoubleParser, + parseExperimentalUuid: Boolean? = this.parseExperimentalUuid, ): ParserOptions = ParserOptions( locale = locale, @@ -174,6 +231,7 @@ public class ParserOptions( nullStrings = nullStrings, skipTypes = skipTypes, useFastDoubleParser = useFastDoubleParser, + parseExperimentalUuid = parseExperimentalUuid, ) override fun equals(other: Any?): Boolean { @@ -188,6 +246,7 @@ public class ParserOptions( if (dateTimePattern != other.dateTimePattern) return false if (nullStrings != other.nullStrings) return false if (skipTypes != other.skipTypes) return false + if (parseExperimentalUuid != other.parseExperimentalUuid) return false return true } @@ -199,11 +258,12 @@ public class ParserOptions( result = 31 * result + (dateTimePattern?.hashCode() ?: 0) result = 31 * result + (nullStrings?.hashCode() ?: 0) result = 31 * result + (skipTypes?.hashCode() ?: 0) + result = 31 * result + (parseExperimentalUuid?.hashCode() ?: 0) return result } override fun toString(): String = - "ParserOptions(locale=$locale, dateTimeFormatter=$dateTimeFormatter, dateTimePattern=$dateTimePattern, nullStrings=$nullStrings, skipTypes=$skipTypes, useFastDoubleParser=$useFastDoubleParser)" + "ParserOptions(locale=$locale, dateTimeFormatter=$dateTimeFormatter, dateTimePattern=$dateTimePattern, nullStrings=$nullStrings, skipTypes=$skipTypes, useFastDoubleParser=$useFastDoubleParser, parseExperimentalUuid=$parseExperimentalUuid)" } /** @include [tryParseImpl] */ diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt index 0999f76337..827df998fd 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/parse.kt @@ -142,6 +142,8 @@ internal object Parsers : GlobalParserOptions { override val skipTypes: Set get() = skipTypesSet + override var parseExperimentalUuid: Boolean = false + override fun addDateTimePattern(pattern: String) { formatters.add(DateTimeFormatter.ofPattern(pattern)) } @@ -180,6 +182,7 @@ internal object Parsers : GlobalParserOptions { .let { formatters.add(it) } useFastDoubleParser = true + parseExperimentalUuid = false _locale = null nullStrings.addAll(listOf("null", "NULL", "NA", "N/A")) } @@ -428,6 +431,8 @@ internal object Parsers : GlobalParserOptions { } } + private val uuidRegex = Regex("[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}") + @OptIn(ExperimentalUuidApi::class) internal val parsersOrder = listOf( // Int @@ -494,20 +499,25 @@ internal object Parsers : GlobalParserOptions { posixParserToDoubleWithOptions, // Boolean stringParser { it.toBooleanOrNull() }, - // UUID - stringParser { str -> - - val uuidRegex = Regex("[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}") + // Uuid + stringParserWithOptions { options -> + val parser = { str: String -> + val parseExperimentalUuid = options?.parseExperimentalUuid ?: this.parseExperimentalUuid + when { + !parseExperimentalUuid -> null + + uuidRegex.matches(str) -> { + try { + Uuid.parse(str) + } catch (_: IllegalArgumentException) { + null + } + } - if (uuidRegex.matches(str)) { - try { - Uuid.parse(str) - } catch (e: IllegalArgumentException) { - null + else -> null } - } else { - null } + parser }, // BigInteger stringParser { it.toBigIntegerOrNull() }, diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt index dc2d2bc495..0350b8ab71 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt @@ -489,7 +489,20 @@ class ParseTests { fun `parse valid Uuid`() { val validUUID = "550e8400-e29b-41d4-a716-446655440000" val column by columnOf(validUUID) + val parsed = column.parse(ParserOptions(parseExperimentalUuid = true)) + + parsed.type() shouldBe typeOf() + (parsed[0] as Uuid).toString() shouldBe validUUID // Change UUID to Uuid + } + + @OptIn(ExperimentalUuidApi::class) + @Test + fun `parse valid Uuid with GlobalParserOptions`() { + val validUUID = "550e8400-e29b-41d4-a716-446655440000" + val column by columnOf(validUUID) + DataFrame.parser.parseExperimentalUuid = true val parsed = column.parse() + DataFrame.parser.resetToDefault() parsed.type() shouldBe typeOf() (parsed[0] as Uuid).toString() shouldBe validUUID // Change UUID to Uuid @@ -500,6 +513,20 @@ class ParseTests { fun `parse invalid Uuid`() { val invalidUUID = "this is not a UUID" val column = columnOf(invalidUUID) + // tryParse as string is not formatted. + val parsed = column.tryParse( + ParserOptions(parseExperimentalUuid = true), + ) + + parsed.type() shouldNotBe typeOf() + parsed.type() shouldBe typeOf() + } + + @OptIn(ExperimentalUuidApi::class) + @Test + fun `do not parse Uuid by default`() { + val validUUID = "550e8400-e29b-41d4-a716-446655440000" + val column = columnOf(validUUID) val parsed = column.tryParse() // tryParse as string is not formatted. parsed.type() shouldNotBe typeOf() diff --git a/docs/StardustDocs/topics/parse.md b/docs/StardustDocs/topics/parse.md index f76a7c2961..1c427f721d 100644 --- a/docs/StardustDocs/topics/parse.md +++ b/docs/StardustDocs/topics/parse.md @@ -42,6 +42,7 @@ df.parse { age and weight } * `URL` (`java.net`) * [`Double` (with optional locale settings)](#parsing-doubles) * `Boolean` +* `Uuid` ([`kotlin.uuid.Uuid`](https://kotlinlang.org/api/core/kotlin-stdlib/kotlin.uuid/-uuid/)) (requires `parseExperimentalUuid = true`) * `BigDecimal` * `JSON` (arrays and objects) (requires the `org.jetbrains.kotlinx:dataframe-json` dependency) @@ -69,6 +70,8 @@ Available parser options: * Empty set by global default; parsing can result in any supported type * `useFastDoubleParser: Boolean` is used to enable or disable the [new fast double parser](#parsing-doubles) * Enabled by global default +* `parseExperimentalUuid: Boolean` is used to enable or disable parsing to the experimental [`kotlin.uuid.Uuid` class](https://kotlinlang.org/api/core/kotlin-stdlib/kotlin.uuid/-uuid/). + * Disabled by global default