Skip to content

parseExperimentalUuid in ParserOptions #1306

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions core/api/core.api
Original file line number Diff line number Diff line change
Expand Up @@ -2419,10 +2419,12 @@ public abstract interface class org/jetbrains/kotlinx/dataframe/api/GlobalParser
public abstract fun addSkipType (Lkotlin/reflect/KType;)V
public abstract fun getLocale ()Ljava/util/Locale;
public abstract fun getNulls ()Ljava/util/Set;
public abstract fun getParseExperimentalUuid ()Z
public abstract fun getSkipTypes ()Ljava/util/Set;
public abstract fun getUseFastDoubleParser ()Z
public abstract fun resetToDefault ()V
public abstract fun setLocale (Ljava/util/Locale;)V
public abstract fun setParseExperimentalUuid (Z)V
public abstract fun setUseFastDoubleParser (Z)V
}

Expand Down Expand Up @@ -3522,17 +3524,22 @@ public final class org/jetbrains/kotlinx/dataframe/api/ParserOptions {
public fun <init> ()V
public synthetic fun <init> (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;)V
public synthetic fun <init> (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;ILkotlin/jvm/internal/DefaultConstructorMarker;)V
public fun <init> (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;)V
public synthetic fun <init> (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;)V
public synthetic fun <init> (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;ILkotlin/jvm/internal/DefaultConstructorMarker;)V
public fun <init> (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;)V
public synthetic fun <init> (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;ILkotlin/jvm/internal/DefaultConstructorMarker;)V
public final synthetic fun copy (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;
public final fun copy (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;
public final synthetic fun copy (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;
public final fun copy (Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;
public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;
public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;
public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;Ljava/util/Locale;Ljava/time/format/DateTimeFormatter;Ljava/lang/String;Ljava/util/Set;Ljava/util/Set;Ljava/lang/Boolean;Ljava/lang/Boolean;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/api/ParserOptions;
public fun equals (Ljava/lang/Object;)Z
public final fun getDateTimeFormatter ()Ljava/time/format/DateTimeFormatter;
public final fun getDateTimePattern ()Ljava/lang/String;
public final fun getLocale ()Ljava/util/Locale;
public final fun getNullStrings ()Ljava/util/Set;
public final fun getParseExperimentalUuid ()Ljava/lang/Boolean;
public final fun getSkipTypes ()Ljava/util/Set;
public final fun getUseFastDoubleParser ()Ljava/lang/Boolean;
public fun hashCode ()I
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ public interface GlobalParserOptions {
public val nulls: Set<String>

public val skipTypes: Set<KType>

/**
* Whether to allow parsing UUIDs to the experimental [kotlin.uuid.Uuid] type.
* By default, this is false and UUIDs are not recognized.
*/
public var parseExperimentalUuid: Boolean
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Btw, do you plan to later rename this property later? It will be an incompatible change.. Or drop it completely? Should we call it "parseUuid" and provide note about experimental status in kdocs?

}

/**
Expand Down Expand Up @@ -101,6 +107,8 @@ public interface GlobalParserOptions {
* @param skipTypes a set of types that should be skipped during parsing. Parsing will be attempted for all other types.
* By default, it's an empty set. To skip all types except a specified one, use [convertTo] instead.
* @param useFastDoubleParser whether to use [FastDoubleParser], defaults to `true`. Please report any issues you encounter.
* @param parseExperimentalUuid whether to allow parsing UUIDs to the experimental [kotlin.uuid.Uuid] type.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And we now know that in notebooks dataframe properties with such type require additional opt in!

* By default, this is false and UUIDs are not recognized.
*/
public class ParserOptions(
public val locale: Locale? = null,
Expand All @@ -110,8 +118,31 @@ public class ParserOptions(
public val nullStrings: Set<String>? = null,
public val skipTypes: Set<KType>? = null,
public val useFastDoubleParser: Boolean? = null,
public val parseExperimentalUuid: Boolean? = null,
) {

/** For binary compatibility. */
@Deprecated(
message = PARSER_OPTIONS,
level = DeprecationLevel.HIDDEN,
)
public constructor(
locale: Locale? = null,
dateTimeFormatter: DateTimeFormatter? = null,
dateTimePattern: String? = null,
nullStrings: Set<String>? = null,
skipTypes: Set<KType>? = null,
useFastDoubleParser: Boolean? = null,
) : this(
locale = locale,
dateTimeFormatter = dateTimeFormatter,
dateTimePattern = dateTimePattern,
nullStrings = nullStrings,
skipTypes = skipTypes,
useFastDoubleParser = useFastDoubleParser,
parseExperimentalUuid = null,
)

/** For binary compatibility. */
@Deprecated(
message = PARSER_OPTIONS,
Expand All @@ -129,7 +160,31 @@ public class ParserOptions(
nullStrings = nullStrings,
skipTypes = null,
useFastDoubleParser = null,
parseExperimentalUuid = null,
)

/** For binary compatibility. */
@Deprecated(
message = PARSER_OPTIONS_COPY,
level = DeprecationLevel.HIDDEN,
)
public fun copy(
locale: Locale? = this.locale,
dateTimeFormatter: DateTimeFormatter? = this.dateTimeFormatter,
dateTimePattern: String? = this.dateTimePattern,
nullStrings: Set<String>? = this.nullStrings,
skipTypes: Set<KType>? = this.skipTypes,
useFastDoubleParser: Boolean? = this.useFastDoubleParser,
): ParserOptions =
ParserOptions(
locale = locale,
dateTimeFormatter = dateTimeFormatter,
dateTimePattern = dateTimePattern,
nullStrings = nullStrings,
skipTypes = skipTypes,
useFastDoubleParser = useFastDoubleParser,
parseExperimentalUuid = null,
)

/** For binary compatibility. */
@Deprecated(
Expand All @@ -149,6 +204,7 @@ public class ParserOptions(
nullStrings = nullStrings,
skipTypes = skipTypes,
useFastDoubleParser = useFastDoubleParser,
parseExperimentalUuid = null,
)

internal fun getDateTimeFormatter(): DateTimeFormatter? =
Expand All @@ -166,6 +222,7 @@ public class ParserOptions(
nullStrings: Set<String>? = this.nullStrings,
skipTypes: Set<KType>? = this.skipTypes,
useFastDoubleParser: Boolean? = this.useFastDoubleParser,
parseExperimentalUuid: Boolean? = this.parseExperimentalUuid,
): ParserOptions =
ParserOptions(
locale = locale,
Expand All @@ -174,6 +231,7 @@ public class ParserOptions(
nullStrings = nullStrings,
skipTypes = skipTypes,
useFastDoubleParser = useFastDoubleParser,
parseExperimentalUuid = parseExperimentalUuid,
)

override fun equals(other: Any?): Boolean {
Expand All @@ -188,6 +246,7 @@ public class ParserOptions(
if (dateTimePattern != other.dateTimePattern) return false
if (nullStrings != other.nullStrings) return false
if (skipTypes != other.skipTypes) return false
if (parseExperimentalUuid != other.parseExperimentalUuid) return false

return true
}
Expand All @@ -199,11 +258,12 @@ public class ParserOptions(
result = 31 * result + (dateTimePattern?.hashCode() ?: 0)
result = 31 * result + (nullStrings?.hashCode() ?: 0)
result = 31 * result + (skipTypes?.hashCode() ?: 0)
result = 31 * result + (parseExperimentalUuid?.hashCode() ?: 0)
return result
}

override fun toString(): String =
"ParserOptions(locale=$locale, dateTimeFormatter=$dateTimeFormatter, dateTimePattern=$dateTimePattern, nullStrings=$nullStrings, skipTypes=$skipTypes, useFastDoubleParser=$useFastDoubleParser)"
"ParserOptions(locale=$locale, dateTimeFormatter=$dateTimeFormatter, dateTimePattern=$dateTimePattern, nullStrings=$nullStrings, skipTypes=$skipTypes, useFastDoubleParser=$useFastDoubleParser, parseExperimentalUuid=$parseExperimentalUuid)"
}

/** @include [tryParseImpl] */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ internal object Parsers : GlobalParserOptions {
override val skipTypes: Set<KType>
get() = skipTypesSet

override var parseExperimentalUuid: Boolean = false

override fun addDateTimePattern(pattern: String) {
formatters.add(DateTimeFormatter.ofPattern(pattern))
}
Expand Down Expand Up @@ -180,6 +182,7 @@ internal object Parsers : GlobalParserOptions {
.let { formatters.add(it) }

useFastDoubleParser = true
parseExperimentalUuid = false
_locale = null
nullStrings.addAll(listOf("null", "NULL", "NA", "N/A"))
}
Expand Down Expand Up @@ -428,6 +431,8 @@ internal object Parsers : GlobalParserOptions {
}
}

private val uuidRegex = Regex("[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}")

@OptIn(ExperimentalUuidApi::class)
internal val parsersOrder = listOf(
// Int
Expand Down Expand Up @@ -494,20 +499,25 @@ internal object Parsers : GlobalParserOptions {
posixParserToDoubleWithOptions,
// Boolean
stringParser<Boolean> { it.toBooleanOrNull() },
// UUID
stringParser<Uuid> { str ->

val uuidRegex = Regex("[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}")
// Uuid
stringParserWithOptions<Uuid> { options ->
val parser = { str: String ->
val parseExperimentalUuid = options?.parseExperimentalUuid ?: this.parseExperimentalUuid
when {
!parseExperimentalUuid -> null

uuidRegex.matches(str) -> {
try {
Uuid.parse(str)
} catch (_: IllegalArgumentException) {
null
}
}

if (uuidRegex.matches(str)) {
try {
Uuid.parse(str)
} catch (e: IllegalArgumentException) {
null
else -> null
}
} else {
null
}
parser
},
// BigInteger
stringParser<BigInteger> { it.toBigIntegerOrNull() },
Expand Down
27 changes: 27 additions & 0 deletions core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,20 @@ class ParseTests {
fun `parse valid Uuid`() {
val validUUID = "550e8400-e29b-41d4-a716-446655440000"
val column by columnOf(validUUID)
val parsed = column.parse(ParserOptions(parseExperimentalUuid = true))

parsed.type() shouldBe typeOf<Uuid>()
(parsed[0] as Uuid).toString() shouldBe validUUID // Change UUID to Uuid
}

@OptIn(ExperimentalUuidApi::class)
@Test
fun `parse valid Uuid with GlobalParserOptions`() {
val validUUID = "550e8400-e29b-41d4-a716-446655440000"
val column by columnOf(validUUID)
DataFrame.parser.parseExperimentalUuid = true
val parsed = column.parse()
DataFrame.parser.resetToDefault()

parsed.type() shouldBe typeOf<Uuid>()
(parsed[0] as Uuid).toString() shouldBe validUUID // Change UUID to Uuid
Expand All @@ -500,6 +513,20 @@ class ParseTests {
fun `parse invalid Uuid`() {
val invalidUUID = "this is not a UUID"
val column = columnOf(invalidUUID)
// tryParse as string is not formatted.
val parsed = column.tryParse(
ParserOptions(parseExperimentalUuid = true),
)

parsed.type() shouldNotBe typeOf<Uuid>()
parsed.type() shouldBe typeOf<String>()
}

@OptIn(ExperimentalUuidApi::class)
@Test
fun `do not parse Uuid by default`() {
val validUUID = "550e8400-e29b-41d4-a716-446655440000"
val column = columnOf(validUUID)
val parsed = column.tryParse() // tryParse as string is not formatted.

parsed.type() shouldNotBe typeOf<Uuid>()
Expand Down
3 changes: 3 additions & 0 deletions docs/StardustDocs/topics/parse.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ df.parse { age and weight }
* `URL` (`java.net`)
* [`Double` (with optional locale settings)](#parsing-doubles)
* `Boolean`
* `Uuid` ([`kotlin.uuid.Uuid`](https://kotlinlang.org/api/core/kotlin-stdlib/kotlin.uuid/-uuid/)) (requires `parseExperimentalUuid = true`)
* `BigDecimal`
* `JSON` (arrays and objects) (requires the `org.jetbrains.kotlinx:dataframe-json` dependency)

Expand Down Expand Up @@ -69,6 +70,8 @@ Available parser options:
* Empty set by global default; parsing can result in any supported type
* `useFastDoubleParser: Boolean` is used to enable or disable the [new fast double parser](#parsing-doubles)
* Enabled by global default
* `parseExperimentalUuid: Boolean` is used to enable or disable parsing to the experimental [`kotlin.uuid.Uuid` class](https://kotlinlang.org/api/core/kotlin-stdlib/kotlin.uuid/-uuid/).
* Disabled by global default

<!---FUN parseWithOptions-->

Expand Down