Skip to content

Commit

Permalink
Automated commit of generated code
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions[bot] committed Nov 25, 2024
1 parent 2f3f364 commit 114c570
Show file tree
Hide file tree
Showing 16 changed files with 546 additions and 239 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ public enum class DataSchemaVisibility {
EXPLICIT_PUBLIC,
}

// TODO add more options
public annotation class CsvOptions(public val delimiter: Char)

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,9 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
import org.jetbrains.kotlinx.dataframe.dataTypes.IFRAME
import org.jetbrains.kotlinx.dataframe.dataTypes.IMG
import org.jetbrains.kotlinx.dataframe.exceptions.CellConversionException
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException
import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
import org.jetbrains.kotlinx.dataframe.impl.api.convertRowColumnImpl
import org.jetbrains.kotlinx.dataframe.impl.api.convertToDoubleImpl
import org.jetbrains.kotlinx.dataframe.impl.api.convertToTypeImpl
import org.jetbrains.kotlinx.dataframe.impl.api.defaultTimeZone
import org.jetbrains.kotlinx.dataframe.impl.api.toLocalDate
Expand All @@ -36,14 +35,12 @@ import org.jetbrains.kotlinx.dataframe.impl.api.toLocalTime
import org.jetbrains.kotlinx.dataframe.impl.api.withRowCellImpl
import org.jetbrains.kotlinx.dataframe.impl.headPlusArray
import org.jetbrains.kotlinx.dataframe.io.toDataFrame
import org.jetbrains.kotlinx.dataframe.path
import java.math.BigDecimal
import java.net.URL
import java.util.Locale
import kotlin.reflect.KProperty
import kotlin.reflect.KType
import kotlin.reflect.full.isSubtypeOf
import kotlin.reflect.full.withNullability
import kotlin.reflect.typeOf

@Interpretable("Convert0")
Expand Down Expand Up @@ -129,15 +126,29 @@ public inline fun <T, C, reified R> Convert<T, C>.perRowCol(

public inline fun <reified C> AnyCol.convertTo(): DataColumn<C> = convertTo(typeOf<C>()) as DataColumn<C>

public fun AnyCol.convertTo(newType: KType): AnyCol {
val isTypesAreCorrect = this.type().withNullability(true).isSubtypeOf(typeOf<String?>()) &&
newType.withNullability(true) == typeOf<Double?>()
@Suppress("UNCHECKED_CAST")
public fun AnyCol.convertTo(newType: KType): AnyCol =
when {
type().isSubtypeOf(typeOf<String?>()) ->
(this as DataColumn<String?>).convertTo(newType)

if (isTypesAreCorrect) {
return (this as DataColumn<String?>).convertToDouble().setNullable(newType.isMarkedNullable)
else -> convertToTypeImpl(newType, null)
}

public inline fun <reified C> DataColumn<String?>.convertTo(parserOptions: ParserOptions? = null): DataColumn<C> =
convertTo(typeOf<C>(), parserOptions) as DataColumn<C>

public fun DataColumn<String?>.convertTo(newType: KType, parserOptions: ParserOptions? = null): AnyCol =
when {
newType.isSubtypeOf(typeOf<Double?>()) ->
convertToDoubleImpl(
locale = parserOptions?.locale,
nullStrings = parserOptions?.nullStrings,
useFastDoubleParser = parserOptions?.useFastDoubleParser,
).setNullable(newType.isMarkedNullable)

else -> convertToTypeImpl(newType, parserOptions)
}
return convertToTypeImpl(newType)
}

@JvmName("convertToLocalDateTimeFromT")
public fun <T : Any> DataColumn<T>.convertToLocalDateTime(): DataColumn<LocalDateTime> = convertTo()
Expand Down Expand Up @@ -185,78 +196,72 @@ public fun <T : Any> DataColumn<T>.convertToDouble(): DataColumn<Double> = conve
public fun <T : Any> DataColumn<T?>.convertToDouble(): DataColumn<Double?> = convertTo()

/** Parses a String column to Double considering locale (number format).
* If [locale] parameter is defined, it's number format is used for parsing.
* If [locale] parameter is null, the current system locale is used.
* If the column cannot be parsed, then the POSIX format is used. */
*
* If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
*
* @param locale If defined, its number format is used for parsing.
* The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
* If the column cannot be parsed, the POSIX format is used. */
@JvmName("convertToDoubleFromString")
public fun DataColumn<String>.convertToDouble(locale: Locale? = null): DataColumn<Double> =
convertToDouble(locale = locale, useFastDoubleParser = false)
convertToDouble(locale = locale, nullStrings = null, useFastDoubleParser = null)

/**
* Parses a String column to Double considering locale (number format).
* If [locale] parameter is defined, it's number format is used for parsing.
* If [locale] parameter is null, the current system locale is used.
* If the column cannot be parsed, then the POSIX format is used.
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
*
* If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
*
* @param locale If defined, its number format is used for parsing.
* The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
* If the column cannot be parsed, the POSIX format is used.
* @param nullStrings a set of strings that should be treated as `null` values.
* The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
*/
@JvmName("convertToDoubleFromString")
public fun DataColumn<String>.convertToDouble(
locale: Locale? = null,
useFastDoubleParser: Boolean,
): DataColumn<Double> = this.castToNullable().convertToDouble(locale, useFastDoubleParser).castToNotNullable()
nullStrings: Set<String>?,
useFastDoubleParser: Boolean?,
): DataColumn<Double> =
this.castToNullable().convertToDouble(locale, nullStrings, useFastDoubleParser).castToNotNullable()

/** Parses a String column to Double considering locale (number format).
* If [locale] parameter is defined, it's number format is used for parsing.
* If [locale] parameter is null, the current system locale is used.
* If the column cannot be parsed, then the POSIX format is used. */
*
* If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
*
* @param locale If defined, its number format is used for parsing.
* The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
* If the column cannot be parsed, the POSIX format is used. */
@JvmName("convertToDoubleFromStringNullable")
public fun DataColumn<String?>.convertToDouble(locale: Locale? = null): DataColumn<Double?> =
convertToDouble(locale = locale, useFastDoubleParser = false)
convertToDouble(locale = locale, nullStrings = null, useFastDoubleParser = null)

/**
* Parses a String column to Double considering locale (number format).
* If [locale] parameter is defined, it's number format is used for parsing.
* If [locale] parameter is null, the current system locale is used.
* If the column cannot be parsed, then the POSIX format is used.
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
*
* If any of the parameters is `null`, the global default (in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser]) is used.
*
* @param locale If defined, its number format is used for parsing.
* The default in [DataFrame.parser][org.jetbrains.kotlinx.dataframe.DataFrame.Companion.parser] is the system locale.
* If the column cannot be parsed, the POSIX format is used.
* @param nullStrings a set of strings that should be treated as `null` values.
* The default in [DataFrame.parser][DataFrame.Companion.parser] is ["null", "NULL", "NA", "N/A"].
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser.
* The default in [DataFrame.parser][DataFrame.Companion.parser] is `false` for now.
*/
@JvmName("convertToDoubleFromStringNullable")
public fun DataColumn<String?>.convertToDouble(
locale: Locale? = null,
useFastDoubleParser: Boolean,
): DataColumn<Double?> {
fun applyParser(parser: (String) -> Double?): DataColumn<Double?> {
var currentRow = 0
try {
return mapIndexed { row, value ->
currentRow = row
value?.let {
parser(value.trim()) ?: throw TypeConversionException(
value = value,
from = typeOf<String>(),
to = typeOf<Double>(),
column = path,
)
}
}
} catch (e: TypeConversionException) {
throw CellConversionException(e.value, e.from, e.to, path, currentRow, e)
}
}

return if (locale != null) {
val explicitParser = Parsers.getDoubleParser(locale, useFastDoubleParser)
applyParser(explicitParser)
} else {
try {
val defaultParser = Parsers.getDoubleParser(useFastDoubleParser = useFastDoubleParser)
applyParser(defaultParser)
} catch (e: TypeConversionException) {
val posixParser = Parsers.getDoubleParser(Locale.forLanguageTag("C.UTF-8"), useFastDoubleParser)
applyParser(posixParser)
}
}
}
nullStrings: Set<String>?,
useFastDoubleParser: Boolean?,
): DataColumn<Double?> =
convertToDoubleImpl(
locale = locale,
nullStrings = nullStrings,
useFastDoubleParser = useFastDoubleParser,
)

@JvmName("convertToFloatFromT")
public fun <T : Any> DataColumn<T>.convertToFloat(): DataColumn<Float> = convertTo()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,25 @@ import org.jetbrains.kotlinx.dataframe.impl.api.Parsers
import org.jetbrains.kotlinx.dataframe.impl.api.StringParser
import org.jetbrains.kotlinx.dataframe.impl.api.parseImpl
import org.jetbrains.kotlinx.dataframe.impl.api.tryParseImpl
import org.jetbrains.kotlinx.dataframe.io.readCSV
import org.jetbrains.kotlinx.dataframe.typeClass
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS
import org.jetbrains.kotlinx.dataframe.util.PARSER_OPTIONS_COPY
import java.time.format.DateTimeFormatter
import java.util.Locale
import kotlin.reflect.KProperty
import kotlin.reflect.KType

public val DataFrame.Companion.parser: GlobalParserOptions get() = Parsers
/**
* ### Global Parser Options
*
* These options are used to configure how [DataColumns][DataColumn] of type [String] or [String?][String]
* should be parsed.
* You can always pass a [ParserOptions] object to functions that perform parsing, like [tryParse], [parse],
* or even [DataFrame.readCSV][DataFrame.Companion.readCSV] to override these options.
*/
public val DataFrame.Companion.parser: GlobalParserOptions
get() = Parsers

public fun <T> DataFrame<T>.parse(options: ParserOptions? = null, columns: ColumnsSelector<T, Any?>): DataFrame<T> =
parseImpl(options, columns)
Expand All @@ -37,14 +48,33 @@ public interface GlobalParserOptions {

public fun addNullString(str: String)

/** This function can be called to skip some types. Parsing will be attempted for all other types. */
public fun addSkipType(type: KType)

/** Whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now. */
public var useFastDoubleParser: Boolean

public fun resetToDefault()

public var locale: Locale

public val nulls: Set<String>

public val skipTypes: Set<KType>
}

/**
* ### Options for parsing [String]`?` columns
*
* These options are used to configure how [DataColumn]s of type [String] or [String?][String] should be parsed.
* They can be passed to [tryParse] and [parse] functions.
*
* You can also use the [DataFrame.parser][DataFrame.Companion.parser] property to access and modify
* the global parser configuration.
*
* If any of the arguments in [ParserOptions] are `null` (or [ParserOptions] itself is `null`),
* the global configuration will be queried.
*
* @param locale locale to use for parsing dates and numbers, defaults to the System default locale.
* If specified instead of [dateTimeFormatter], it will be used in combination with [dateTimePattern]
* to create a [DateTimeFormatter]. Just providing [locale] will not allow you to parse
Expand All @@ -55,16 +85,19 @@ public interface GlobalParserOptions {
* @param dateTimePattern a pattern to use for parsing dates. If specified instead of [dateTimeFormatter],
* it will be used to create a [DateTimeFormatter].
* @param nullStrings a set of strings that should be treated as `null` values. By default, it's
* ["null", "NULL", "NA", "N/A"].
* `["null", "NULL", "NA", "N/A"]`.
* @param skipTypes a set of types that should be skipped during parsing. Parsing will be attempted for all other types.
* By default, it's an empty set. To skip all types except a specified one, use [convertTo] instead.
* @param useFastDoubleParser whether to use the new _experimental_ FastDoubleParser, defaults to `false` for now.
*/
public data class ParserOptions(
val locale: Locale? = null,
public class ParserOptions(
public val locale: Locale? = null,
// TODO, migrate to kotlinx.datetime.format.DateTimeFormat? https://github.com/Kotlin/dataframe/issues/876
val dateTimeFormatter: DateTimeFormatter? = null,
val dateTimePattern: String? = null,
val nullStrings: Set<String>? = null,
val useFastDoubleParser: Boolean = false,
public val dateTimeFormatter: DateTimeFormatter? = null,
public val dateTimePattern: String? = null,
public val nullStrings: Set<String>? = null,
public val skipTypes: Set<KType>? = null,
public val useFastDoubleParser: Boolean? = null,
) {

/** For binary compatibility. */
Expand All @@ -82,7 +115,8 @@ public data class ParserOptions(
dateTimeFormatter = dateTimeFormatter,
dateTimePattern = dateTimePattern,
nullStrings = nullStrings,
useFastDoubleParser = false,
skipTypes = null,
useFastDoubleParser = null,
)

/** For binary compatibility. */
Expand All @@ -101,6 +135,7 @@ public data class ParserOptions(
dateTimeFormatter = dateTimeFormatter,
dateTimePattern = dateTimePattern,
nullStrings = nullStrings,
skipTypes = skipTypes,
useFastDoubleParser = useFastDoubleParser,
)

Expand All @@ -111,6 +146,52 @@ public data class ParserOptions(
dateTimePattern != null -> DateTimeFormatter.ofPattern(dateTimePattern)
else -> null
}

public fun copy(
locale: Locale? = this.locale,
dateTimeFormatter: DateTimeFormatter? = this.dateTimeFormatter,
dateTimePattern: String? = this.dateTimePattern,
nullStrings: Set<String>? = this.nullStrings,
skipTypes: Set<KType>? = this.skipTypes,
useFastDoubleParser: Boolean? = this.useFastDoubleParser,
): ParserOptions =
ParserOptions(
locale = locale,
dateTimeFormatter = dateTimeFormatter,
dateTimePattern = dateTimePattern,
nullStrings = nullStrings,
skipTypes = skipTypes,
useFastDoubleParser = useFastDoubleParser,
)

override fun equals(other: Any?): Boolean {
if (this === other) return true
if (javaClass != other?.javaClass) return false

other as ParserOptions

if (useFastDoubleParser != other.useFastDoubleParser) return false
if (locale != other.locale) return false
if (dateTimeFormatter != other.dateTimeFormatter) return false
if (dateTimePattern != other.dateTimePattern) return false
if (nullStrings != other.nullStrings) return false
if (skipTypes != other.skipTypes) return false

return true
}

override fun hashCode(): Int {
var result = useFastDoubleParser?.hashCode() ?: 0
result = 31 * result + (locale?.hashCode() ?: 0)
result = 31 * result + (dateTimeFormatter?.hashCode() ?: 0)
result = 31 * result + (dateTimePattern?.hashCode() ?: 0)
result = 31 * result + (nullStrings?.hashCode() ?: 0)
result = 31 * result + (skipTypes?.hashCode() ?: 0)
return result
}

override fun toString(): String =
"ParserOptions(locale=$locale, dateTimeFormatter=$dateTimeFormatter, dateTimePattern=$dateTimePattern, nullStrings=$nullStrings, skipTypes=$skipTypes, useFastDoubleParser=$useFastDoubleParser)"
}

/** Tries to parse a column of strings into a column of a different type.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ package org.jetbrains.kotlinx.dataframe.impl

import org.jetbrains.kotlinx.dataframe.AnyFrame

internal class ColumnNameGenerator(columnNames: List<String> = emptyList()) {
public class ColumnNameGenerator(columnNames: List<String> = emptyList()) {

private val usedNames = columnNames.toMutableSet()

private val colNames = columnNames.toMutableList()

fun addUnique(preferredName: String): String {
public fun addUnique(preferredName: String): String {
var name = preferredName
var k = 1
while (usedNames.contains(name)) {
Expand All @@ -19,17 +19,17 @@ internal class ColumnNameGenerator(columnNames: List<String> = emptyList()) {
return name
}

fun addIfAbsent(name: String) {
public fun addIfAbsent(name: String) {
if (!usedNames.contains(name)) {
usedNames.add(name)
colNames.add(name)
}
}

val names: List<String>
public val names: List<String>
get() = colNames

fun contains(name: String) = usedNames.contains(name)
public operator fun contains(name: String): Boolean = usedNames.contains(name)
}

internal fun AnyFrame.nameGenerator() = ColumnNameGenerator(columnNames())
Expand Down
Loading

0 comments on commit 114c570

Please sign in to comment.