From 1c67663c59da0939aca43d78d820acca74ba09f5 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 18 Sep 2025 14:46:08 +0200 Subject: [PATCH 1/7] made :samples depend on core .jar --- samples/build.gradle.kts | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/samples/build.gradle.kts b/samples/build.gradle.kts index 138d87988..27b392160 100644 --- a/samples/build.gradle.kts +++ b/samples/build.gradle.kts @@ -24,9 +24,9 @@ plugins { alias(korro) alias(ktlint) // Compiler plugin doesn't work properly for now: https://github.com/Kotlin/dataframe/issues/1432 -// alias(dataframePlugin) + alias(dataframe.compiler.plugin) // using deprecated gradle plugin instead - alias(dataframe) +// alias(dataframe) // alias(kover) alias(ksp) } @@ -37,8 +37,22 @@ repositories { mavenLocal() // for local development } +tasks.compileKotlin { + dependsOn(projects.core.path + ":jar") +} + +val coreJar = project(projects.core.path).configurations + .getByName("instrumentedJars") + .artifacts.single() + .file.absolutePath + .replace(File.separatorChar, '/') + dependencies { - implementation(projects.dataframe) + implementation(projects.dataframe) { + // exclude(group, "dataframe-core") + } + implementation(files(coreJar)) + testImplementation(libs.junit) testImplementation(libs.kotestAssertions) { exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8") From de9e1f0717c510cba751ec160624999fd9c4c01d Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 18 Sep 2025 14:46:26 +0200 Subject: [PATCH 2/7] trying to get samples to compiler with compiler plugin --- .../kotlinx/dataframe/samples/api/Generate.kt | 292 +++++--------- .../kotlinx/dataframe/samples/api/Modify.kt | 162 ++++---- .../kotlinx/dataframe/samples/api/TestBase.kt | 10 +- .../collectionsInterop/AssociateBySamples.kt | 2 +- .../collectionsInterop/AssociateSamples.kt | 2 +- .../kotlinx/dataframe/samples/api/tmp.kt | 13 + .../dataframe/samples/guides/quickstart.kt | 376 +++++++++--------- 7 files changed, 390 insertions(+), 467 deletions(-) create mode 100644 samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/tmp.kt diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Generate.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Generate.kt index cde85c0a7..d5509285a 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Generate.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Generate.kt @@ -1,183 +1,109 @@ -@file:Suppress("UNUSED_VARIABLE", "unused", "UNCHECKED_CAST", "ktlint", "ClassName") - -package org.jetbrains.kotlinx.dataframe.samples.api - -import org.jetbrains.kotlinx.dataframe.annotations.DataSchema -import org.jetbrains.kotlinx.dataframe.api.add -import org.jetbrains.kotlinx.dataframe.api.all -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.filter -import org.jetbrains.kotlinx.dataframe.api.generateCode -import org.jetbrains.kotlinx.dataframe.api.generateDataClasses -import org.jetbrains.kotlinx.dataframe.api.generateInterfaces -import org.jetbrains.kotlinx.dataframe.api.into -import org.jetbrains.kotlinx.dataframe.api.rename -import org.jetbrains.kotlinx.dataframe.api.sumOf -import org.jetbrains.kotlinx.dataframe.api.toList -import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper -import org.junit.Test - -class Generate : DataFrameSampleHelper("generate_docs", "api") { - val ordersAlice = dataFrameOf( - "orderId" to listOf(101, 102), - "amount" to listOf(50.0, 75.5), - ) - - val ordersBob = dataFrameOf( - "orderId" to listOf(103, 104, 105), - "amount" to listOf(20.0, 30.0, 25.0), - ) - - val df = dataFrameOf( - "user" to listOf("Alice", "Bob"), - "orders" to listOf(ordersAlice, ordersBob), - ) - - @DataSchema(isOpen = false) - interface _DataFrameType11 { - val amount: kotlin.Double - val orderId: kotlin.Int - } - - val org.jetbrains.kotlinx.dataframe.ColumnsContainer<_DataFrameType11>.amount: org.jetbrains.kotlinx.dataframe.DataColumn - @JvmName( - "_DataFrameType11_amount", - ) - get() = this["amount"] as org.jetbrains.kotlinx.dataframe.DataColumn - val org.jetbrains.kotlinx.dataframe.DataRow<_DataFrameType11>.amount: kotlin.Double - @JvmName("_DataFrameType11_amount") - get() = this["amount"] as kotlin.Double - val org.jetbrains.kotlinx.dataframe.ColumnsContainer<_DataFrameType11>.orderId: org.jetbrains.kotlinx.dataframe.DataColumn - @JvmName( - "_DataFrameType11_orderId", - ) - get() = this["orderId"] as org.jetbrains.kotlinx.dataframe.DataColumn - val org.jetbrains.kotlinx.dataframe.DataRow<_DataFrameType11>.orderId: kotlin.Int - @JvmName("_DataFrameType11_orderId") - get() = this["orderId"] as kotlin.Int - - @DataSchema - interface _DataFrameType1 { - val orders: List<_DataFrameType11> - val user: kotlin.String - } - - val org.jetbrains.kotlinx.dataframe.ColumnsContainer<_DataFrameType1>.orders: org.jetbrains.kotlinx.dataframe.DataColumn> - @JvmName( - "_DataFrameType1_orders", - ) - get() = this["orders"] as org.jetbrains.kotlinx.dataframe.DataColumn> - val org.jetbrains.kotlinx.dataframe.DataRow<_DataFrameType1>.orders: org.jetbrains.kotlinx.dataframe.DataFrame<_DataFrameType11> - @JvmName( - "_DataFrameType1_orders", - ) - get() = this["orders"] as org.jetbrains.kotlinx.dataframe.DataFrame<_DataFrameType11> - val org.jetbrains.kotlinx.dataframe.ColumnsContainer<_DataFrameType1>.user: org.jetbrains.kotlinx.dataframe.DataColumn - @JvmName( - "_DataFrameType1_user", - ) - get() = this["user"] as org.jetbrains.kotlinx.dataframe.DataColumn - val org.jetbrains.kotlinx.dataframe.DataRow<_DataFrameType1>.user: kotlin.String - @JvmName("_DataFrameType1_user") - get() = this["user"] as kotlin.String - - @DataSchema - data class Customer1(val amount: Double, val orderId: Int) - - @DataSchema - data class Customer(val orders: List, val user: String) - - val org.jetbrains.kotlinx.dataframe.ColumnsContainer.amount: org.jetbrains.kotlinx.dataframe.DataColumn - @JvmName( - "Customer1_amount", - ) - get() = this["amount"] as org.jetbrains.kotlinx.dataframe.DataColumn - val org.jetbrains.kotlinx.dataframe.DataRow.amount: kotlin.Double - @JvmName("Customer1_amount") - get() = this["amount"] as kotlin.Double - val org.jetbrains.kotlinx.dataframe.ColumnsContainer.orderId: org.jetbrains.kotlinx.dataframe.DataColumn - @JvmName( - "Customer1_orderId", - ) - get() = this["orderId"] as org.jetbrains.kotlinx.dataframe.DataColumn - val org.jetbrains.kotlinx.dataframe.DataRow.orderId: kotlin.Int - @JvmName("Customer1_orderId") - get() = this["orderId"] as kotlin.Int - - val org.jetbrains.kotlinx.dataframe.ColumnsContainer.orders: org.jetbrains.kotlinx.dataframe.DataColumn> - @JvmName( - "Customer_orders", - ) - get() = this["orders"] as org.jetbrains.kotlinx.dataframe.DataColumn> - val org.jetbrains.kotlinx.dataframe.DataRow.orders: org.jetbrains.kotlinx.dataframe.DataFrame - @JvmName( - "Customer_orders", - ) - get() = this["orders"] as org.jetbrains.kotlinx.dataframe.DataFrame - val org.jetbrains.kotlinx.dataframe.ColumnsContainer.user: org.jetbrains.kotlinx.dataframe.DataColumn - @JvmName( - "Customer_user", - ) - get() = this["user"] as org.jetbrains.kotlinx.dataframe.DataColumn - val org.jetbrains.kotlinx.dataframe.DataRow.user: kotlin.String - @JvmName("Customer_user") - get() = this["user"] as kotlin.String - - private val customers: List = df.cast().toList() - - @Test - fun notebook_test_generate_docs_1() { - // SampleStart - df - // SampleEnd - .saveDfHtmlSample() - } - - @Test - fun notebook_test_generate_docs_2() { - // SampleStart - df.generateInterfaces() - // SampleEnd - } - - @Test - fun notebook_test_generate_docs_3() { - // SampleStart - df.cast<_DataFrameType1>().filter { orders.all { orderId >= 102 } } - // SampleEnd - // .saveDfHtmlSample() - } - - @Test - fun notebook_test_generate_docs_4() { - // SampleStart - df.generateDataClasses("Customer") - // SampleEnd - } - - @Test - fun notebook_test_generate_docs_5() { - // SampleStart - val customers: List = df.cast().toList() - // SampleEnd - } - - @Test - fun notebook_test_generate_docs_6() { - // SampleStart - df.generateCode("Customer") - // SampleEnd - } - - @Test - fun notebook_test_generate_docs_7() { - // SampleStart - df.cast() - .add("ordersTotal") { orders.sumOf { it.amount } } - .filter { user.startsWith("A") } - .rename { user }.into("customer") - // SampleEnd - // .saveDfHtmlSample() - } -} +//@file:Suppress("UNUSED_VARIABLE", "unused", "UNCHECKED_CAST", "ktlint", "ClassName") +// +//package org.jetbrains.kotlinx.dataframe.samples.api +// +//import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +//import org.jetbrains.kotlinx.dataframe.api.add +//import org.jetbrains.kotlinx.dataframe.api.all +//import org.jetbrains.kotlinx.dataframe.api.cast +//import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +//import org.jetbrains.kotlinx.dataframe.api.filter +//import org.jetbrains.kotlinx.dataframe.api.generateCode +//import org.jetbrains.kotlinx.dataframe.api.generateDataClasses +//import org.jetbrains.kotlinx.dataframe.api.generateInterfaces +//import org.jetbrains.kotlinx.dataframe.api.into +//import org.jetbrains.kotlinx.dataframe.api.rename +//import org.jetbrains.kotlinx.dataframe.api.sumOf +//import org.jetbrains.kotlinx.dataframe.api.toList +//import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper +//import org.junit.Test +// +//class Generate : DataFrameSampleHelper("generate_docs", "api") { +// private val ordersAlice = dataFrameOf( +// "orderId" to listOf(101, 102), +// "amount" to listOf(50.0, 75.5), +// ) +// +// private val ordersBob = dataFrameOf( +// "orderId" to listOf(103, 104, 105), +// "amount" to listOf(20.0, 30.0, 25.0), +// ) +// +// private val df = dataFrameOf( +// "user" to listOf("Alice", "Bob"), +// "orders" to listOf(ordersAlice, ordersBob), +// ) +// +// @DataSchema(isOpen = false) +// interface _DataFrameType11 { +// val amount: kotlin.Double +// val orderId: kotlin.Int +// } +// +// @DataSchema +// interface _DataFrameType1 { +// val orders: List<_DataFrameType11> +// val user: kotlin.String +// } +// +// @DataSchema +// data class Customer1(val amount: Double, val orderId: Int) +// +// @DataSchema +// data class Customer(val orders: List, val user: String) +// +// @Test +// fun notebook_test_generate_docs_1() { +// // SampleStart +// df +// // SampleEnd +// .saveDfHtmlSample() +// } +// +// @Test +// fun notebook_test_generate_docs_2() { +// // SampleStart +// df.generateInterfaces() +// // SampleEnd +// } +// +// @Test +// fun notebook_test_generate_docs_3() { +// // SampleStart +// df.filter { orders.all { orderId >= 102 } } +// // SampleEnd +// // .saveDfHtmlSample() +// } +// +// @Test +// fun notebook_test_generate_docs_4() { +// // SampleStart +// df.generateDataClasses("Customer") +// // SampleEnd +// } +// +// @Test +// fun notebook_test_generate_docs_5() { +// // SampleStart +// val customers: List = df.cast().toList() +// // SampleEnd +// } +// +// @Test +// fun notebook_test_generate_docs_6() { +// // SampleStart +// df.generateCode("Customer") +// // SampleEnd +// } +// +// @Test +// fun notebook_test_generate_docs_7() { +// // SampleStart +// df.cast() +// .add("ordersTotal") { orders.sumOf { it.amount } } +// .filter { user.startsWith("A") } +// .rename { user }.into("customer") +// // SampleEnd +// // .saveDfHtmlSample() +// } +//} diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt index e91347c42..eacc03d37 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt @@ -18,93 +18,87 @@ import org.jetbrains.kotlinx.dataframe.api.with import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper import org.junit.Test -@Suppress("ktlint:standard:argument-list-wrapping") -class Modify : DataFrameSampleHelper("operations", "modify") { - - val df = dataFrameOf("firstName", "lastName", "age", "city", "weight", "isHappy")( - "Alice", "Cooper", 15, "London", 54, true, - "Bob", "Dylan", 45, "Dubai", 87, true, - "Charlie", "Daniels", 20, "Moscow", null, false, - "Charlie", "Chaplin", 40, "Milan", null, true, - "Bob", "Marley", 30, "Tokyo", 68, true, - "Alice", "Wolf", 20, null, 55, false, - "Charlie", "Byrd", 30, "Moscow", 90, true, - ).group("firstName", "lastName").into("name").cast() - val df2 = dataFrameOf( - "col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9", "col10", - )( - 45, 12, 78, 34, 90, 23, 67, 89, 56, 43, - 87, 34, 56, 78, 12, 45, 90, 23, 67, 89, - 23, 67, 89, 45, 78, 90, 12, 56, 34, 78, - 90, 45, 23, 67, 34, 78, 89, 12, 56, 23, - 12, 89, 45, 90, 56, 34, 78, 67, 23, 90, - 78, 56, 12, 23, 89, 67, 34, 90, 45, 12, - 34, 90, 67, 12, 45, 23, 56, 78, 89, 67, - 56, 23, 34, 89, 67, 12, 45, 34, 78, 90, - 89, 78, 90, 56, 23, 89, 67, 45, 12, 34, - 67, 45, 78, 12, 90, 56, 23, 89, 34, 78, - ) - @Suppress("UNCHECKED_CAST") - @Test - fun formatExample_strings() { - // SampleStart - val ageMin = df.min { "age"() } - val ageMax = df.max { "age"() } - - df - .format().with { bold and textColor(black) and background(white) } - .format("name").with { underline } - .format { "name"["lastName"] }.with { italic } - .format("isHappy").with { - background(if (it as Boolean) green else red) - } - .format("weight").notNull().with { linearBg(it as Int, 50 to blue, 90 to red) } - .format("age").perRowCol { row, col -> - col as DataColumn - textColor( - linear(value = col[row], from = ageMin to blue, to = ageMax to green), - ) - } - // SampleEnd - .saveDfHtmlSample() - } +@Suppress("ktlint:standard:argument-list-wrapping") +class Modify : DataFrameSampleHelper("operations", "modify") { - @Test - fun formatExample_properties() { - // SampleStart - val ageMin = df.age.min() - val ageMax = df.age.max() + //.cast() - df - .format().with { bold and textColor(black) and background(white) } - .format { name }.with { underline } - .format { name.lastName }.with { italic } - .format { isHappy }.with { background(if (it) green else red) } - .format { weight }.notNull().linearBg(50 to FormattingDsl.blue, 90 to FormattingDsl.red) - .format { age }.perRowCol { row, col -> - textColor( - linear(value = col[row], from = ageMin to blue, to = ageMax to green), - ) - } - // SampleEnd - .saveDfHtmlSample() - } +// private val df2 = dataFrameOf( +// "col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9", "col10", +// )( +// 45, 12, 78, 34, 90, 23, 67, 89, 56, 43, +// 87, 34, 56, 78, 12, 45, 90, 23, 67, 89, +// 23, 67, 89, 45, 78, 90, 12, 56, 34, 78, +// 90, 45, 23, 67, 34, 78, 89, 12, 56, 23, +// 12, 89, 45, 90, 56, 34, 78, 67, 23, 90, +// 78, 56, 12, 23, 89, 67, 34, 90, 45, 12, +// 34, 90, 67, 12, 45, 23, 56, 78, 89, 67, +// 56, 23, 34, 89, 67, 12, 45, 34, 78, 90, +// 89, 78, 90, 56, 23, 89, 67, 45, 12, 34, +// 67, 45, 78, 12, 90, 56, 23, 89, 34, 78, +// ) - @Test - fun formatExampleNumbers() { - // SampleStart - df2.format().perRowCol { row, col -> - val rowIndex = row.index() - val colIndex = row.df().getColumnIndex(col) - if ((rowIndex - colIndex) % 3 == 0) { - background(darkGray) and textColor(white) - } else { - background(white) and textColor(black) - } - } - // SampleEnd - .saveDfHtmlSample() - } +// @Suppress("UNCHECKED_CAST") +// @Test +// fun formatExample_strings() { +// // SampleStart +// val ageMin = df.min { "age"() } +// val ageMax = df.max { "age"() } +// +// df +// .format().with { bold and textColor(black) and background(white) } +// .format("name").with { underline } +// .format { "name"["lastName"] }.with { italic } +// .format("isHappy").with { +// background(if (it as Boolean) green else red) +// } +// .format("weight").notNull().with { linearBg(it as Int, 50 to blue, 90 to red) } +// .format("age").perRowCol { row, col -> +// col as DataColumn +// textColor( +// linear(value = col[row], from = ageMin to blue, to = ageMax to green), +// ) +// } +// // SampleEnd +// .saveDfHtmlSample() +// } +// +// @Test +// fun formatExample_properties() { +// // SampleStart +// val ageMin = df.age.min() +// val ageMax = df.age.max() +// +// df +// .format().with { bold and textColor(black) and background(white) } +// .format { name }.with { underline } +// .format { name.lastName }.with { italic } +// .format { isHappy }.with { background(if (it) green else red) } +// .format { weight }.notNull().linearBg(50 to FormattingDsl.blue, 90 to FormattingDsl.red) +// .format { age }.perRowCol { row, col -> +// textColor( +// linear(value = col[row], from = ageMin to blue, to = ageMax to green), +// ) +// } +// // SampleEnd +// .saveDfHtmlSample() +// } +// +// @Test +// fun formatExampleNumbers() { +// // SampleStart +// df2.format().perRowCol { row, col -> +// val rowIndex = row.index() +// val colIndex = row.df().getColumnIndex(col) +// if ((rowIndex - colIndex) % 3 == 0) { +// background(darkGray) and textColor(white) +// } else { +// background(white) and textColor(black) +// } +// } +// // SampleEnd +// .saveDfHtmlSample() +// } } diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt index 6bdd81854..71a130f1b 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt @@ -13,7 +13,7 @@ import org.jetbrains.kotlinx.dataframe.api.into @Suppress("ktlint:standard:argument-list-wrapping") public open class TestBase { - val df = dataFrameOf("firstName", "lastName", "age", "city", "weight", "isHappy")( + private val df = dataFrameOf("firstName", "lastName", "age", "city", "weight", "isHappy")( "Alice", "Cooper", 15, "London", 54, true, "Bob", "Dylan", 45, "Dubai", 87, true, "Charlie", "Daniels", 20, "Moscow", null, false, @@ -21,19 +21,19 @@ public open class TestBase { "Bob", "Marley", 30, "Tokyo", 68, true, "Alice", "Wolf", 20, null, 55, false, "Charlie", "Byrd", 30, "Moscow", 90, true, - ).group("firstName", "lastName").into("name").cast() + ).group { firstName and lastName }.into("name")//.cast() @DataSchema interface Name { - val firstName: String - val lastName: String + val firstNameA: String + val lastNameA: String } @DataSchema interface Person { val age: Int val city: String? - val name: DataRow + val name: Name val weight: Int? val isHappy: Boolean } diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/collectionsInterop/AssociateBySamples.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/collectionsInterop/AssociateBySamples.kt index 2e8dc27e5..005da8c63 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/collectionsInterop/AssociateBySamples.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/collectionsInterop/AssociateBySamples.kt @@ -34,7 +34,7 @@ class AssociateBySamples : DataFrameSampleHelper("associateBy", "api/collections "city" to listOf("London", "Dubai", "Moscow", "Milan", "Tokyo", null, "Moscow"), "weight" to listOf(54, 87, null, null, 68, 55, 90), "isHappy" to listOf(true, true, false, true, true, false, true), - ).group("firstName", "lastName").into("name").cast() + ).group { firstName and lastName }.into("name").cast() @Test fun notebook_test_associateBy_1() { diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/collectionsInterop/AssociateSamples.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/collectionsInterop/AssociateSamples.kt index 2b8191a09..9afabd623 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/collectionsInterop/AssociateSamples.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/collectionsInterop/AssociateSamples.kt @@ -36,7 +36,7 @@ class AssociateSamples : DataFrameSampleHelper("associate", "api/collectionsInte "city" to listOf("London", "Dubai", "Moscow", "Milan", "Tokyo", null, "Moscow"), "weight" to listOf(54, 87, null, null, 68, 55, 90), "isHappy" to listOf(true, true, false, true, true, false, true), - ).group("firstName", "lastName").into("name").cast() + ).group { firstName and lastName }.into("name").cast() @Test fun notebook_test_associate_1() { diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/tmp.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/tmp.kt new file mode 100644 index 000000000..101a20164 --- /dev/null +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/tmp.kt @@ -0,0 +1,13 @@ +package org.jetbrains.kotlinx.dataframe.samples.api + +import org.jetbrains.kotlinx.dataframe.api.* + +private val a = dataFrameOf("firstName", "lastName", "age", "city", "weight", "isHappy")( + "Alice", "Cooper", 15, "London", 54, true, + "Bob", "Dylan", 45, "Dubai", 87, true, + "Charlie", "Daniels", 20, "Moscow", null, false, + "Charlie", "Chaplin", 40, "Milan", null, true, + "Bob", "Marley", 30, "Tokyo", 68, true, + "Alice", "Wolf", 20, null, 55, false, + "Charlie", "Byrd", 30, "Moscow", 90, true, +).group { firstName and lastName }.into("name") diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/guides/quickstart.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/guides/quickstart.kt index 570b4cfd6..28aa8811f 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/guides/quickstart.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/guides/quickstart.kt @@ -2,13 +2,11 @@ package org.jetbrains.kotlinx.dataframe.samples.guides -import org.jetbrains.kotlinx.dataframe.ColumnsContainer -import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema import org.jetbrains.kotlinx.dataframe.api.add import org.jetbrains.kotlinx.dataframe.api.aggregate -import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.convert import org.jetbrains.kotlinx.dataframe.api.count import org.jetbrains.kotlinx.dataframe.api.describe @@ -33,198 +31,190 @@ import org.junit.Ignore import org.junit.Test class QuickStartGuide : DataFrameSampleHelper("quickstart", "guides") { - private val df = DataFrame.readCsv( - "https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv", - ) - - private val full_name by column() - private val name by column() - private val stargazers_count by column() - private val starsCount by column() - private val topics by column() - - interface DFUpdatedSchema - - // TODO remove, use plugin - val DataRow.name: String - get() = get("name") as String - val ColumnsContainer.topics: DataColumn - get() = get("topics") as DataColumn - val DataRow.topics: List - get() = get("topics") as List - val ColumnsContainer.isIntellij: DataColumn - get() = get("isIntellij") as DataColumn - val DataRow.starsCount: Int - get() = get("starsCount") as Int - - private val dfSelected = df.select { full_name and stargazers_count and topics } - private val dfFiltered = dfSelected.filter { stargazers_count >= 1000 } - private val dfRenamed = dfFiltered.rename { full_name }.into("name") - // And "stargazers_count" into "starsCount" - .rename { stargazers_count }.into("starsCount") - private val dfUpdated = dfRenamed - // Update "name" values with only its second part (after '/') - .update { name }.with { it.split("/")[1] } - // Convert "topics" `String` values into `List` by splitting: - .convert { topics }.with { it.removePrefix("[").removeSuffix("]").split(", ") } as DataFrame - private val dfWithIsIntellij = dfUpdated.add("isIntellij") { - name.contains("intellij") || "intellij" in topics - } - private val groupedByIsIntellij = dfWithIsIntellij.groupBy { isIntellij } - private val dfTop10 = dfWithIsIntellij - // Sort by "starsCount" value descending - .sortByDesc { starsCount }.take(10) - - @Test - fun notebook_test_quickstart_2() { - // SampleStart - val df = DataFrame.readCsv( - "https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv", - ) - // SampleEnd - } - @Test - fun notebook_test_quickstart_3() { - // SampleStart - df - // SampleEnd - .saveDfHtmlSample() + @DataSchema + interface Repositories { + val full_name: String + val html_url: java.net.URL + val stargazers_count: Int + val topics: String + val watchers: Int } - @Test - fun notebook_test_quickstart_4() { - // SampleStart - df.describe() - // SampleEnd - .saveDfHtmlSample() - } - - @Test - fun notebook_test_quickstart_5() { - // SampleStart - // Select "full_name", "stargazers_count" and "topics" columns - val dfSelected = df.select { full_name and stargazers_count and topics } - dfSelected - // SampleEnd - .saveDfHtmlSample() - } - - @Test - fun notebook_test_quickstart_6() { - // SampleStart - // Keep only rows where "stargazers_count" value is more than 1000 - val dfFiltered = dfSelected.filter { stargazers_count >= 1000 } - dfFiltered - // SampleEnd - .saveDfHtmlSample() - } - - @Test - fun notebook_test_quickstart_7() { - // SampleStart - // Rename "full_name" column into "name" - val dfRenamed = dfFiltered.rename { full_name }.into("name") - // And "stargazers_count" into "starsCount" - .rename { stargazers_count }.into("starsCount") - dfRenamed - // SampleEnd - .saveDfHtmlSample() - } - - @Test - fun notebook_test_quickstart_8() { - // SampleStart - val dfUpdated = dfRenamed - // Update "name" values with only its second part (after '/') - .update { name }.with { it.split("/")[1] } - // Convert "topics" `String` values into `List` by splitting: - .convert { topics }.with { it.removePrefix("[").removeSuffix("]").split(", ") } - dfUpdated - // SampleEnd - .saveDfHtmlSample() - } - - @Test - fun notebook_test_quickstart_9() { - // SampleStart - dfUpdated.topics.type() - // SampleEnd - } - - @Test - fun notebook_test_quickstart_10() { - // SampleStart - // Add a `Boolean` column indicating whether the `name` contains the "intellij" substring - // or the topics include "intellij". - val dfWithIsIntellij = dfUpdated.add("isIntellij") { - name.contains("intellij") || "intellij" in topics - } - dfWithIsIntellij - // SampleEnd - .saveDfHtmlSample() - } - - @Test - fun notebook_test_quickstart_11() { - // SampleStart - val groupedByIsIntellij = dfWithIsIntellij.groupBy { isIntellij } - groupedByIsIntellij - // SampleEnd - .saveDfHtmlSample() - } - - @Test - fun notebook_test_quickstart_12() { - // SampleStart - groupedByIsIntellij.count() - // SampleEnd - .saveDfHtmlSample() - } - - @Test - fun notebook_test_quickstart_13() { - // SampleStart - groupedByIsIntellij.aggregate { - // Compute sum and max of "starsCount" within each group into "sumStars" and "maxStars" columns - sumOf { starsCount } into "sumStars" - maxOf { starsCount } into "maxStars" - } - // SampleEnd - .saveDfHtmlSample() - } - - @Test - fun notebook_test_quickstart_14() { - // SampleStart - val dfTop10 = dfWithIsIntellij - // Sort by "starsCount" value descending - .sortByDesc { starsCount }.take(10) - dfTop10 - // SampleEnd - .saveDfHtmlSample() - } - - @Test - fun notebook_test_quickstart_16() { - // SampleStart - dfTop10.plot { - bars { - x(name) - y(starsCount) - } + private val df = DataFrame.readCsv( + "https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv", + ).cast() - layout.title = "Top 10 JetBrains repositories by stars count" - } - // SampleEnd - .savePlotSVGSample() - } + interface DFUpdatedSchema - @Ignore - @Test - fun notebook_test_quickstart_17() { - // SampleStart - dfWithIsIntellij.writeExcel("jb_repos.xlsx") - // SampleEnd - } +// private val dfSelected = df.select { full_name and stargazers_count and topics } +// private val dfFiltered = dfSelected.filter { stargazers_count >= 1000 } +// private val dfRenamed = dfFiltered.rename { full_name }.into("name") +// // And "stargazers_count" into "starsCount" +// .rename { stargazers_count }.into("starsCount") +// private val dfUpdated = dfRenamed +// // Update "name" values with only its second part (after '/') +// .update { name }.with { it.split("/")[1] } +// // Convert "topics" `String` values into `List` by splitting: +// .convert { topics }.with { it.removePrefix("[").removeSuffix("]").split(", ") } as DataFrame +// private val dfWithIsIntellij = dfUpdated.add("isIntellij") { +// name.contains("intellij") || "intellij" in topics +// } +// private val groupedByIsIntellij = dfWithIsIntellij.groupBy { isIntellij } +// private val dfTop10 = dfWithIsIntellij +// // Sort by "starsCount" value descending +// .sortByDesc { starsCount }.take(10) + +// @Test +// fun notebook_test_quickstart_2() { +// // SampleStart +// val df = DataFrame.readCsv( +// "https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv", +// ) +// // SampleEnd +// } +// +// @Test +// fun notebook_test_quickstart_3() { +// // SampleStart +// df +// // SampleEnd +// .saveDfHtmlSample() +// } +// +// @Test +// fun notebook_test_quickstart_4() { +// // SampleStart +// df.describe() +// // SampleEnd +// .saveDfHtmlSample() +// } +// +// @Test +// fun notebook_test_quickstart_5() { +// // SampleStart +// // Select "full_name", "stargazers_count" and "topics" columns +// val dfSelected = df.select { full_name and stargazers_count and topics } +// dfSelected +// // SampleEnd +// .saveDfHtmlSample() +// } +// +// @Test +// fun notebook_test_quickstart_6() { +// // SampleStart +// // Keep only rows where "stargazers_count" value is more than 1000 +// val dfFiltered = dfSelected.filter { stargazers_count >= 1000 } +// dfFiltered +// // SampleEnd +// .saveDfHtmlSample() +// } +// +// @Test +// fun notebook_test_quickstart_7() { +// // SampleStart +// // Rename "full_name" column into "name" +// val dfRenamed = dfFiltered.rename { full_name }.into("name") +// // And "stargazers_count" into "starsCount" +// .rename { stargazers_count }.into("starsCount") +// dfRenamed +// // SampleEnd +// .saveDfHtmlSample() +// } +// +// @Test +// fun notebook_test_quickstart_8() { +// // SampleStart +// val dfUpdated = dfRenamed +// // Update "name" values with only its second part (after '/') +// .update { name }.with { it.split("/")[1] } +// // Convert "topics" `String` values into `List` by splitting: +// .convert { topics }.with { it.removePrefix("[").removeSuffix("]").split(", ") } +// dfUpdated +// // SampleEnd +// .saveDfHtmlSample() +// } +// +// @Test +// fun notebook_test_quickstart_9() { +// // SampleStart +// dfUpdated.topics.type() +// // SampleEnd +// } +// +// @Test +// fun notebook_test_quickstart_10() { +// // SampleStart +// // Add a `Boolean` column indicating whether the `name` contains the "intellij" substring +// // or the topics include "intellij". +// val dfWithIsIntellij = dfUpdated.add("isIntellij") { +// name.contains("intellij") || "intellij" in topics +// } +// dfWithIsIntellij +// // SampleEnd +// .saveDfHtmlSample() +// } +// +// @Test +// fun notebook_test_quickstart_11() { +// // SampleStart +// val groupedByIsIntellij = dfWithIsIntellij.groupBy { isIntellij } +// groupedByIsIntellij +// // SampleEnd +// .saveDfHtmlSample() +// } +// +// @Test +// fun notebook_test_quickstart_12() { +// // SampleStart +// groupedByIsIntellij.count() +// // SampleEnd +// .saveDfHtmlSample() +// } +// +// @Test +// fun notebook_test_quickstart_13() { +// // SampleStart +// groupedByIsIntellij.aggregate { +// // Compute sum and max of "starsCount" within each group into "sumStars" and "maxStars" columns +// sumOf { starsCount } into "sumStars" +// maxOf { starsCount } into "maxStars" +// } +// // SampleEnd +// .saveDfHtmlSample() +// } +// +// @Test +// fun notebook_test_quickstart_14() { +// // SampleStart +// val dfTop10 = dfWithIsIntellij +// // Sort by "starsCount" value descending +// .sortByDesc { starsCount }.take(10) +// dfTop10 +// // SampleEnd +// .saveDfHtmlSample() +// } +// +// @Test +// fun notebook_test_quickstart_16() { +// // SampleStart +// dfTop10.plot { +// bars { +// x(name) +// y(starsCount) +// } +// +// layout.title = "Top 10 JetBrains repositories by stars count" +// } +// // SampleEnd +// .savePlotSVGSample() +// } +// +// @Ignore +// @Test +// fun notebook_test_quickstart_17() { +// // SampleStart +// dfWithIsIntellij.writeExcel("jb_repos.xlsx") +// // SampleEnd +// } } From 315045c8271cae778be58944aaf0e76c4a4c72f3 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 18 Sep 2025 15:21:02 +0200 Subject: [PATCH 3/7] moved instrumentedJars creation to top-level. Made :samples depend on all relevant jars --- build.gradle.kts | 14 ++++++++++++++ core/build.gradle.kts | 11 ----------- dataframe-csv/build.gradle.kts | 11 ----------- dataframe-json/build.gradle.kts | 11 ----------- samples/build.gradle.kts | 33 +++++++++++++++++++++++---------- 5 files changed, 37 insertions(+), 43 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 969af82b8..ecadd5264 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -223,6 +223,20 @@ allprojects { } catch (_: UnknownDomainObjectException) { logger.warn("Could not set buildConfig on :${this.name}") } + + try { + val instrumentedJars: Configuration by configurations.creating { + isCanBeConsumed = true + isCanBeResolved = false + } + artifacts { + add("instrumentedJars", tasks.jar.get().archiveFile) { + builtBy(tasks.jar) + } + } + } catch (_: Exception) { + logger.warn("Could not set instrumentedJars on :${this.name}") + } } } diff --git a/core/build.gradle.kts b/core/build.gradle.kts index f47415e43..12d07e8d3 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -386,17 +386,6 @@ kotlinPublications { } } -val instrumentedJars: Configuration by configurations.creating { - isCanBeConsumed = true - isCanBeResolved = false -} - -artifacts { - add("instrumentedJars", tasks.jar.get().archiveFile) { - builtBy(tasks.jar) - } -} - // Disable and enable if updating plugin breaks the build dataframes { schema { diff --git a/dataframe-csv/build.gradle.kts b/dataframe-csv/build.gradle.kts index 1f55fb6eb..6e2fa0cef 100644 --- a/dataframe-csv/build.gradle.kts +++ b/dataframe-csv/build.gradle.kts @@ -160,14 +160,3 @@ kotlinPublications { kotlin { explicitApi() } - -val instrumentedJars: Configuration by configurations.creating { - isCanBeConsumed = true - isCanBeResolved = false -} - -artifacts { - add("instrumentedJars", tasks.jar.get().archiveFile) { - builtBy(tasks.jar) - } -} diff --git a/dataframe-json/build.gradle.kts b/dataframe-json/build.gradle.kts index 56e5aeb8d..614151ce5 100644 --- a/dataframe-json/build.gradle.kts +++ b/dataframe-json/build.gradle.kts @@ -59,17 +59,6 @@ sourceSets { } } -val instrumentedJars: Configuration by configurations.creating { - isCanBeConsumed = true - isCanBeResolved = false -} - -artifacts { - add("instrumentedJars", tasks.jar.get().archiveFile) { - builtBy(tasks.jar) - } -} - kotlinPublications { publication { publicationName = "dataframeJson" diff --git a/samples/build.gradle.kts b/samples/build.gradle.kts index 27b392160..51f8ef6a1 100644 --- a/samples/build.gradle.kts +++ b/samples/build.gradle.kts @@ -37,21 +37,34 @@ repositories { mavenLocal() // for local development } +val dependentProjectPaths = with(projects) { + listOf( + core, + dataframeArrow, + dataframeExcel, + dataframeJdbc, + dataframeCsv, + dataframeJson, + ) +}.map { it.path } + tasks.compileKotlin { - dependsOn(projects.core.path + ":jar") + dependentProjectPaths.forEach { + dependsOn("$it:jar") + } } -val coreJar = project(projects.core.path).configurations - .getByName("instrumentedJars") - .artifacts.single() - .file.absolutePath - .replace(File.separatorChar, '/') +val jarPaths = dependentProjectPaths.map { + project(it).configurations + .getByName("instrumentedJars") + .artifacts.single() + .file.absolutePath + .replace(File.separatorChar, '/') +} dependencies { - implementation(projects.dataframe) { - // exclude(group, "dataframe-core") - } - implementation(files(coreJar)) +// implementation(projects.dataframe) Must depend on jars for the compiler plugin to work! + implementation(files(jarPaths)) testImplementation(libs.junit) testImplementation(libs.kotestAssertions) { From 198624804b52ca39ea1ad57cbad7c55728c6151d Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 18 Sep 2025 20:30:22 +0200 Subject: [PATCH 4/7] enabled the compiler plugin completely for :samples --- samples/build.gradle.kts | 28 +- .../samples/DataFrameSampleHelper.kt | 3 +- .../kotlinx/dataframe/samples/api/Generate.kt | 212 +++++----- .../kotlinx/dataframe/samples/api/Modify.kt | 152 +++---- .../kotlinx/dataframe/samples/api/Read.kt | 3 +- .../kotlinx/dataframe/samples/api/TestBase.kt | 25 +- .../kotlinx/dataframe/samples/api/Write.kt | 4 +- .../collectionsInterop/AssociateBySamples.kt | 33 +- .../dataframe/samples/api/info/TailSamples.kt | 10 +- .../kotlinx/dataframe/samples/api/tmp.kt | 13 - .../samples/api/utils/ChunkedSamples.kt | 2 + .../dataframe/samples/guides/quickstart.kt | 378 ++++++++++-------- 12 files changed, 426 insertions(+), 437 deletions(-) delete mode 100644 samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/tmp.kt diff --git a/samples/build.gradle.kts b/samples/build.gradle.kts index 51f8ef6a1..48be80fc8 100644 --- a/samples/build.gradle.kts +++ b/samples/build.gradle.kts @@ -23,10 +23,7 @@ plugins { alias(kotlin.jvm) alias(korro) alias(ktlint) - // Compiler plugin doesn't work properly for now: https://github.com/Kotlin/dataframe/issues/1432 alias(dataframe.compiler.plugin) - // using deprecated gradle plugin instead -// alias(dataframe) // alias(kover) alias(ksp) } @@ -37,7 +34,7 @@ repositories { mavenLocal() // for local development } -val dependentProjectPaths = with(projects) { +val dependentProjects = with(projects) { listOf( core, dataframeArrow, @@ -46,16 +43,16 @@ val dependentProjectPaths = with(projects) { dataframeCsv, dataframeJson, ) -}.map { it.path } +}.map { project(it.path) } tasks.compileKotlin { - dependentProjectPaths.forEach { - dependsOn("$it:jar") + dependentProjects.forEach { + dependsOn("${it.path}:jar") } } -val jarPaths = dependentProjectPaths.map { - project(it).configurations +val dependentProjectJarPaths = dependentProjects.map { + it.configurations .getByName("instrumentedJars") .artifacts.single() .file.absolutePath @@ -63,8 +60,17 @@ val jarPaths = dependentProjectPaths.map { } dependencies { -// implementation(projects.dataframe) Must depend on jars for the compiler plugin to work! - implementation(files(jarPaths)) + // implementation(projects.dataframe) // Must depend on jars for the compiler plugin to work! + implementation(files(dependentProjectJarPaths)) + + // include api() dependencies from dependent projects, as they are not included in the jars + dependentProjects.forEach { + it.configurations.getByName("api").dependencies.forEach { dep -> + if (dep is ExternalModuleDependency) { + implementation("${dep.group}:${dep.name}:${dep.version ?: "+"}") + } + } + } testImplementation(libs.junit) testImplementation(libs.kotestAssertions) { diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/DataFrameSampleHelper.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/DataFrameSampleHelper.kt index 44ff46b62..5d817d5b3 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/DataFrameSampleHelper.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/DataFrameSampleHelper.kt @@ -2,6 +2,7 @@ package org.jetbrains.kotlinx.dataframe.samples import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.api.toDataFrame +import org.jetbrains.kotlinx.dataframe.samples.api.TestBase import org.jetbrains.kotlinx.kandy.letsplot.samples.SampleHelper abstract class DataFrameSampleHelper(sampleName: String, subFolder: String = "samples") : @@ -10,7 +11,7 @@ abstract class DataFrameSampleHelper(sampleName: String, subFolder: String = "sa subFolder, "../docs/StardustDocs/images", "../docs/StardustDocs/resources", - ) { + ), TestBase { fun DataColumn<*>.saveDfHtmlSample() { toDataFrame().saveDfHtmlSample() diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Generate.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Generate.kt index d5509285a..9ed6d6f36 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Generate.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Generate.kt @@ -1,109 +1,103 @@ -//@file:Suppress("UNUSED_VARIABLE", "unused", "UNCHECKED_CAST", "ktlint", "ClassName") -// -//package org.jetbrains.kotlinx.dataframe.samples.api -// -//import org.jetbrains.kotlinx.dataframe.annotations.DataSchema -//import org.jetbrains.kotlinx.dataframe.api.add -//import org.jetbrains.kotlinx.dataframe.api.all -//import org.jetbrains.kotlinx.dataframe.api.cast -//import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -//import org.jetbrains.kotlinx.dataframe.api.filter -//import org.jetbrains.kotlinx.dataframe.api.generateCode -//import org.jetbrains.kotlinx.dataframe.api.generateDataClasses -//import org.jetbrains.kotlinx.dataframe.api.generateInterfaces -//import org.jetbrains.kotlinx.dataframe.api.into -//import org.jetbrains.kotlinx.dataframe.api.rename -//import org.jetbrains.kotlinx.dataframe.api.sumOf -//import org.jetbrains.kotlinx.dataframe.api.toList -//import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper -//import org.junit.Test -// -//class Generate : DataFrameSampleHelper("generate_docs", "api") { -// private val ordersAlice = dataFrameOf( -// "orderId" to listOf(101, 102), -// "amount" to listOf(50.0, 75.5), -// ) -// -// private val ordersBob = dataFrameOf( -// "orderId" to listOf(103, 104, 105), -// "amount" to listOf(20.0, 30.0, 25.0), -// ) -// -// private val df = dataFrameOf( -// "user" to listOf("Alice", "Bob"), -// "orders" to listOf(ordersAlice, ordersBob), -// ) -// -// @DataSchema(isOpen = false) -// interface _DataFrameType11 { -// val amount: kotlin.Double -// val orderId: kotlin.Int -// } -// -// @DataSchema -// interface _DataFrameType1 { -// val orders: List<_DataFrameType11> -// val user: kotlin.String -// } -// -// @DataSchema -// data class Customer1(val amount: Double, val orderId: Int) -// -// @DataSchema -// data class Customer(val orders: List, val user: String) -// -// @Test -// fun notebook_test_generate_docs_1() { -// // SampleStart -// df -// // SampleEnd -// .saveDfHtmlSample() -// } -// -// @Test -// fun notebook_test_generate_docs_2() { -// // SampleStart -// df.generateInterfaces() -// // SampleEnd -// } -// -// @Test -// fun notebook_test_generate_docs_3() { -// // SampleStart -// df.filter { orders.all { orderId >= 102 } } -// // SampleEnd -// // .saveDfHtmlSample() -// } -// -// @Test -// fun notebook_test_generate_docs_4() { -// // SampleStart -// df.generateDataClasses("Customer") -// // SampleEnd -// } -// -// @Test -// fun notebook_test_generate_docs_5() { -// // SampleStart -// val customers: List = df.cast().toList() -// // SampleEnd -// } -// -// @Test -// fun notebook_test_generate_docs_6() { -// // SampleStart -// df.generateCode("Customer") -// // SampleEnd -// } -// -// @Test -// fun notebook_test_generate_docs_7() { -// // SampleStart -// df.cast() -// .add("ordersTotal") { orders.sumOf { it.amount } } -// .filter { user.startsWith("A") } -// .rename { user }.into("customer") -// // SampleEnd -// // .saveDfHtmlSample() -// } -//} +@file:Suppress("UNUSED_VARIABLE", "unused", "UNCHECKED_CAST", "ktlint", "ClassName") + +package org.jetbrains.kotlinx.dataframe.samples.api + +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.all +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.generateDataClasses +import org.jetbrains.kotlinx.dataframe.api.generateInterfaces +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.rename +import org.jetbrains.kotlinx.dataframe.api.sumOf +import org.jetbrains.kotlinx.dataframe.api.toList +import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper +import org.junit.Test + +class Generate : DataFrameSampleHelper("generate_docs", "api") { + + @DataSchema + interface Orders { + val orderId: Int + val amount: Double + } + + private val ordersAlice = dataFrameOf( + "orderId" to listOf(101, 102), + "amount" to listOf(50.0, 75.5), + ).cast() + + private val ordersBob = dataFrameOf( + "orderId" to listOf(103, 104, 105), + "amount" to listOf(20.0, 30.0, 25.0), + ).cast() + + @DataSchema + interface Customer { + val user: String + val orders: List + } + + private val df = dataFrameOf( + "user" to listOf("Alice", "Bob"), + "orders" to listOf(ordersAlice, ordersBob), + ).cast() + + @Test + fun notebook_test_generate_docs_1() { + // SampleStart + df + // SampleEnd + .saveDfHtmlSample() + } + + @Test + fun notebook_test_generate_docs_2() { + // SampleStart + df.generateInterfaces() + // SampleEnd + } + + @Test + fun notebook_test_generate_docs_3() { + // SampleStart + df.filter { orders.all { orderId >= 102 } } + // SampleEnd + // .saveDfHtmlSample() + } + + @Test + fun notebook_test_generate_docs_4() { + // SampleStart + df.generateDataClasses("Customer") + // SampleEnd + } + + @Test + fun notebook_test_generate_docs_5() { + // SampleStart + val customers: List = df.toList() + // SampleEnd + } + + @Test + fun notebook_test_generate_docs_6() { + // SampleStart + df.generateInterfaces(markerName = "Customer") + // SampleEnd + } + + @Test + fun notebook_test_generate_docs_7() { + // SampleStart + df + .add("ordersTotal") { orders.sumOf { it.amount } } + .filter { user.startsWith("A") } + .rename { user }.into("customer") + // SampleEnd + // .saveDfHtmlSample() + } +} diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt index eacc03d37..ae8394245 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt @@ -23,82 +23,82 @@ import org.junit.Test @Suppress("ktlint:standard:argument-list-wrapping") class Modify : DataFrameSampleHelper("operations", "modify") { - //.cast() + val df = peopleDf -// private val df2 = dataFrameOf( -// "col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9", "col10", -// )( -// 45, 12, 78, 34, 90, 23, 67, 89, 56, 43, -// 87, 34, 56, 78, 12, 45, 90, 23, 67, 89, -// 23, 67, 89, 45, 78, 90, 12, 56, 34, 78, -// 90, 45, 23, 67, 34, 78, 89, 12, 56, 23, -// 12, 89, 45, 90, 56, 34, 78, 67, 23, 90, -// 78, 56, 12, 23, 89, 67, 34, 90, 45, 12, -// 34, 90, 67, 12, 45, 23, 56, 78, 89, 67, -// 56, 23, 34, 89, 67, 12, 45, 34, 78, 90, -// 89, 78, 90, 56, 23, 89, 67, 45, 12, 34, -// 67, 45, 78, 12, 90, 56, 23, 89, 34, 78, -// ) + private val df2 = dataFrameOf( + "col1", "col2", "col3", "col4", "col5", "col6", "col7", "col8", "col9", "col10", + )( + 45, 12, 78, 34, 90, 23, 67, 89, 56, 43, + 87, 34, 56, 78, 12, 45, 90, 23, 67, 89, + 23, 67, 89, 45, 78, 90, 12, 56, 34, 78, + 90, 45, 23, 67, 34, 78, 89, 12, 56, 23, + 12, 89, 45, 90, 56, 34, 78, 67, 23, 90, + 78, 56, 12, 23, 89, 67, 34, 90, 45, 12, + 34, 90, 67, 12, 45, 23, 56, 78, 89, 67, + 56, 23, 34, 89, 67, 12, 45, 34, 78, 90, + 89, 78, 90, 56, 23, 89, 67, 45, 12, 34, + 67, 45, 78, 12, 90, 56, 23, 89, 34, 78, + ) -// @Suppress("UNCHECKED_CAST") -// @Test -// fun formatExample_strings() { -// // SampleStart -// val ageMin = df.min { "age"() } -// val ageMax = df.max { "age"() } -// -// df -// .format().with { bold and textColor(black) and background(white) } -// .format("name").with { underline } -// .format { "name"["lastName"] }.with { italic } -// .format("isHappy").with { -// background(if (it as Boolean) green else red) -// } -// .format("weight").notNull().with { linearBg(it as Int, 50 to blue, 90 to red) } -// .format("age").perRowCol { row, col -> -// col as DataColumn -// textColor( -// linear(value = col[row], from = ageMin to blue, to = ageMax to green), -// ) -// } -// // SampleEnd -// .saveDfHtmlSample() -// } -// -// @Test -// fun formatExample_properties() { -// // SampleStart -// val ageMin = df.age.min() -// val ageMax = df.age.max() -// -// df -// .format().with { bold and textColor(black) and background(white) } -// .format { name }.with { underline } -// .format { name.lastName }.with { italic } -// .format { isHappy }.with { background(if (it) green else red) } -// .format { weight }.notNull().linearBg(50 to FormattingDsl.blue, 90 to FormattingDsl.red) -// .format { age }.perRowCol { row, col -> -// textColor( -// linear(value = col[row], from = ageMin to blue, to = ageMax to green), -// ) -// } -// // SampleEnd -// .saveDfHtmlSample() -// } -// -// @Test -// fun formatExampleNumbers() { -// // SampleStart -// df2.format().perRowCol { row, col -> -// val rowIndex = row.index() -// val colIndex = row.df().getColumnIndex(col) -// if ((rowIndex - colIndex) % 3 == 0) { -// background(darkGray) and textColor(white) -// } else { -// background(white) and textColor(black) -// } -// } -// // SampleEnd -// .saveDfHtmlSample() -// } + @Suppress("UNCHECKED_CAST") + @Test + fun formatExample_strings() { + // SampleStart + val ageMin = df.min { "age"() } + val ageMax = df.max { "age"() } + + df + .format().with { bold and textColor(black) and background(white) } + .format("name").with { underline } + .format { "name"["lastName"] }.with { italic } + .format("isHappy").with { + background(if (it as Boolean) green else red) + } + .format("weight").notNull().with { linearBg(it as Int, 50 to blue, 90 to red) } + .format("age").perRowCol { row, col -> + col as DataColumn + textColor( + linear(value = col[row], from = ageMin to blue, to = ageMax to green), + ) + } + // SampleEnd + .saveDfHtmlSample() + } + + @Test + fun formatExample_properties() { + // SampleStart + val ageMin = df.age.min() + val ageMax = df.age.max() + + df + .format().with { bold and textColor(black) and background(white) } + .format { name }.with { underline } + .format { name.lastName }.with { italic } + .format { isHappy }.with { background(if (it) green else red) } + .format { weight }.notNull().linearBg(50 to FormattingDsl.blue, 90 to FormattingDsl.red) + .format { age }.perRowCol { row, col -> + textColor( + linear(value = col[row], from = ageMin to blue, to = ageMax to green), + ) + } + // SampleEnd + .saveDfHtmlSample() + } + + @Test + fun formatExampleNumbers() { + // SampleStart + df2.format().perRowCol { row, col -> + val rowIndex = row.index() + val colIndex = row.df().getColumnIndex(col) + if ((rowIndex - colIndex) % 3 == 0) { + background(darkGray) and textColor(white) + } else { + background(white) and textColor(black) + } + } + // SampleEnd + .saveDfHtmlSample() + } } diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt index d9e38391c..21ef7478a 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Read.kt @@ -150,6 +150,7 @@ class Read { }, ) // SampleEnd - } catch (_: Exception) {} + } catch (_: Exception) { + } } } diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt index 71a130f1b..73e95c1c9 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt @@ -11,22 +11,23 @@ import org.jetbrains.kotlinx.dataframe.api.group import org.jetbrains.kotlinx.dataframe.api.into @Suppress("ktlint:standard:argument-list-wrapping") -public open class TestBase { +interface TestBase { - private val df = dataFrameOf("firstName", "lastName", "age", "city", "weight", "isHappy")( - "Alice", "Cooper", 15, "London", 54, true, - "Bob", "Dylan", 45, "Dubai", 87, true, - "Charlie", "Daniels", 20, "Moscow", null, false, - "Charlie", "Chaplin", 40, "Milan", null, true, - "Bob", "Marley", 30, "Tokyo", 68, true, - "Alice", "Wolf", 20, null, 55, false, - "Charlie", "Byrd", 30, "Moscow", 90, true, - ).group { firstName and lastName }.into("name")//.cast() + val peopleDf + get() = dataFrameOf("firstName", "lastName", "age", "city", "weight", "isHappy")( + "Alice", "Cooper", 15, "London", 54, true, + "Bob", "Dylan", 45, "Dubai", 87, true, + "Charlie", "Daniels", 20, "Moscow", null, false, + "Charlie", "Chaplin", 40, "Milan", null, true, + "Bob", "Marley", 30, "Tokyo", 68, true, + "Alice", "Wolf", 20, null, 55, false, + "Charlie", "Byrd", 30, "Moscow", 90, true, + ).group { firstName and lastName }.into("name").cast() @DataSchema interface Name { - val firstNameA: String - val lastNameA: String + val firstName: String + val lastName: String } @DataSchema diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Write.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Write.kt index 03683c944..6cc3311aa 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Write.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Write.kt @@ -24,7 +24,9 @@ import org.junit.Test import java.io.File import kotlin.io.path.deleteExisting -class Write : TestBase() { +class Write : TestBase { + + val df = peopleDf @Test fun writeCsv() { diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/collectionsInterop/AssociateBySamples.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/collectionsInterop/AssociateBySamples.kt index 005da8c63..62b135f44 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/collectionsInterop/AssociateBySamples.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/collectionsInterop/AssociateBySamples.kt @@ -1,40 +1,15 @@ package org.jetbrains.kotlinx.dataframe.samples.api.collectionsInterop -import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.annotations.DataSchema import org.jetbrains.kotlinx.dataframe.api.associateBy -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.group -import org.jetbrains.kotlinx.dataframe.api.into import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper +import org.jetbrains.kotlinx.dataframe.samples.api.firstName +import org.jetbrains.kotlinx.dataframe.samples.api.lastName +import org.jetbrains.kotlinx.dataframe.samples.api.name import org.junit.Test class AssociateBySamples : DataFrameSampleHelper("associateBy", "api/collectionsInterop") { - @DataSchema - interface Name { - val firstName: String - val lastName: String - } - - @DataSchema - interface Person { - val age: Int - val city: String? - val name: DataRow - val weight: Int? - val isHappy: Boolean - } - - private val df = dataFrameOf( - "firstName" to listOf("Alice", "Bob", "Charlie", "Charlie", "Bob", "Alice", "Charlie"), - "lastName" to listOf("Cooper", "Dylan", "Daniels", "Chaplin", "Marley", "Wolf", "Byrd"), - "age" to listOf(15, 45, 20, 40, 30, 20, 30), - "city" to listOf("London", "Dubai", "Moscow", "Milan", "Tokyo", null, "Moscow"), - "weight" to listOf(54, 87, null, null, 68, 55, 90), - "isHappy" to listOf(true, true, false, true, true, false, true), - ).group { firstName and lastName }.into("name").cast() + private val df = peopleDf @Test fun notebook_test_associateBy_1() { diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/info/TailSamples.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/info/TailSamples.kt index 63e00a828..91f195e76 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/info/TailSamples.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/info/TailSamples.kt @@ -1,20 +1,12 @@ package org.jetbrains.kotlinx.dataframe.samples.api.info -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.tail import org.jetbrains.kotlinx.dataframe.samples.DataFrameSampleHelper import org.junit.Test class TailSamples : DataFrameSampleHelper("tail", "api") { - private val df = dataFrameOf( - "firstName" to listOf("Alice", "Bob", "Charlie", "Charlie", "Bob", "Alice", "Charlie"), - "lastName" to listOf("Cooper", "Dylan", "Daniels", "Chaplin", "Marley", "Wolf", "Byrd"), - "age" to listOf(15, 45, 20, 40, 30, 20, 30), - "city" to listOf("London", "Dubai", "Moscow", "Milan", "Tokyo", null, "Moscow"), - "weight" to listOf(54, 87, null, null, 68, 55, 90), - "isHappy" to listOf(true, true, false, true, true, false, true), - ) + private val df = peopleDf @Test fun notebook_test_tail_1() { diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/tmp.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/tmp.kt deleted file mode 100644 index 101a20164..000000000 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/tmp.kt +++ /dev/null @@ -1,13 +0,0 @@ -package org.jetbrains.kotlinx.dataframe.samples.api - -import org.jetbrains.kotlinx.dataframe.api.* - -private val a = dataFrameOf("firstName", "lastName", "age", "city", "weight", "isHappy")( - "Alice", "Cooper", 15, "London", 54, true, - "Bob", "Dylan", 45, "Dubai", 87, true, - "Charlie", "Daniels", 20, "Moscow", null, false, - "Charlie", "Chaplin", 40, "Milan", null, true, - "Bob", "Marley", 30, "Tokyo", 68, true, - "Alice", "Wolf", 20, null, 55, false, - "Charlie", "Byrd", 30, "Moscow", 90, true, -).group { firstName and lastName }.into("name") diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/utils/ChunkedSamples.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/utils/ChunkedSamples.kt index 2d5f104ac..b6797c9f3 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/utils/ChunkedSamples.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/utils/ChunkedSamples.kt @@ -1,5 +1,6 @@ package org.jetbrains.kotlinx.dataframe.samples.api.utils +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.chunked import org.jetbrains.kotlinx.dataframe.api.dataFrameOf @@ -8,6 +9,7 @@ import org.junit.Test class ChunkedSamples : DataFrameSampleHelper("chunked", "api") { + @DataSchema interface SimplePerson { val name: String val age: Int diff --git a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/guides/quickstart.kt b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/guides/quickstart.kt index 28aa8811f..6d68c5c7e 100644 --- a/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/guides/quickstart.kt +++ b/samples/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/guides/quickstart.kt @@ -33,188 +33,216 @@ import org.junit.Test class QuickStartGuide : DataFrameSampleHelper("quickstart", "guides") { @DataSchema - interface Repositories { - val full_name: String + interface Repositories : SelectedRepositories { val html_url: java.net.URL + val watchers: Int + } + + @DataSchema + interface SelectedRepositories { + val full_name: String val stargazers_count: Int val topics: String - val watchers: Int + } + + @DataSchema + interface RenamedSelectedRepositories { + val name: String + val starsCount: Int + val topics: String + } + + @DataSchema + interface UpdatedRepositories { + val name: String + val starsCount: Int + val topics: List + } + + @DataSchema + interface IsIntellijRepositories { + val name: String + val starsCount: Int + val topics: List + val isIntellij: Boolean } private val df = DataFrame.readCsv( "https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv", ).cast() - interface DFUpdatedSchema - -// private val dfSelected = df.select { full_name and stargazers_count and topics } -// private val dfFiltered = dfSelected.filter { stargazers_count >= 1000 } -// private val dfRenamed = dfFiltered.rename { full_name }.into("name") -// // And "stargazers_count" into "starsCount" -// .rename { stargazers_count }.into("starsCount") -// private val dfUpdated = dfRenamed -// // Update "name" values with only its second part (after '/') -// .update { name }.with { it.split("/")[1] } -// // Convert "topics" `String` values into `List` by splitting: -// .convert { topics }.with { it.removePrefix("[").removeSuffix("]").split(", ") } as DataFrame -// private val dfWithIsIntellij = dfUpdated.add("isIntellij") { -// name.contains("intellij") || "intellij" in topics -// } -// private val groupedByIsIntellij = dfWithIsIntellij.groupBy { isIntellij } -// private val dfTop10 = dfWithIsIntellij -// // Sort by "starsCount" value descending -// .sortByDesc { starsCount }.take(10) - -// @Test -// fun notebook_test_quickstart_2() { -// // SampleStart -// val df = DataFrame.readCsv( -// "https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv", -// ) -// // SampleEnd -// } -// -// @Test -// fun notebook_test_quickstart_3() { -// // SampleStart -// df -// // SampleEnd -// .saveDfHtmlSample() -// } -// -// @Test -// fun notebook_test_quickstart_4() { -// // SampleStart -// df.describe() -// // SampleEnd -// .saveDfHtmlSample() -// } -// -// @Test -// fun notebook_test_quickstart_5() { -// // SampleStart -// // Select "full_name", "stargazers_count" and "topics" columns -// val dfSelected = df.select { full_name and stargazers_count and topics } -// dfSelected -// // SampleEnd -// .saveDfHtmlSample() -// } -// -// @Test -// fun notebook_test_quickstart_6() { -// // SampleStart -// // Keep only rows where "stargazers_count" value is more than 1000 -// val dfFiltered = dfSelected.filter { stargazers_count >= 1000 } -// dfFiltered -// // SampleEnd -// .saveDfHtmlSample() -// } -// -// @Test -// fun notebook_test_quickstart_7() { -// // SampleStart -// // Rename "full_name" column into "name" -// val dfRenamed = dfFiltered.rename { full_name }.into("name") -// // And "stargazers_count" into "starsCount" -// .rename { stargazers_count }.into("starsCount") -// dfRenamed -// // SampleEnd -// .saveDfHtmlSample() -// } -// -// @Test -// fun notebook_test_quickstart_8() { -// // SampleStart -// val dfUpdated = dfRenamed -// // Update "name" values with only its second part (after '/') -// .update { name }.with { it.split("/")[1] } -// // Convert "topics" `String` values into `List` by splitting: -// .convert { topics }.with { it.removePrefix("[").removeSuffix("]").split(", ") } -// dfUpdated -// // SampleEnd -// .saveDfHtmlSample() -// } -// -// @Test -// fun notebook_test_quickstart_9() { -// // SampleStart -// dfUpdated.topics.type() -// // SampleEnd -// } -// -// @Test -// fun notebook_test_quickstart_10() { -// // SampleStart -// // Add a `Boolean` column indicating whether the `name` contains the "intellij" substring -// // or the topics include "intellij". -// val dfWithIsIntellij = dfUpdated.add("isIntellij") { -// name.contains("intellij") || "intellij" in topics -// } -// dfWithIsIntellij -// // SampleEnd -// .saveDfHtmlSample() -// } -// -// @Test -// fun notebook_test_quickstart_11() { -// // SampleStart -// val groupedByIsIntellij = dfWithIsIntellij.groupBy { isIntellij } -// groupedByIsIntellij -// // SampleEnd -// .saveDfHtmlSample() -// } -// -// @Test -// fun notebook_test_quickstart_12() { -// // SampleStart -// groupedByIsIntellij.count() -// // SampleEnd -// .saveDfHtmlSample() -// } -// -// @Test -// fun notebook_test_quickstart_13() { -// // SampleStart -// groupedByIsIntellij.aggregate { -// // Compute sum and max of "starsCount" within each group into "sumStars" and "maxStars" columns -// sumOf { starsCount } into "sumStars" -// maxOf { starsCount } into "maxStars" -// } -// // SampleEnd -// .saveDfHtmlSample() -// } -// -// @Test -// fun notebook_test_quickstart_14() { -// // SampleStart -// val dfTop10 = dfWithIsIntellij -// // Sort by "starsCount" value descending -// .sortByDesc { starsCount }.take(10) -// dfTop10 -// // SampleEnd -// .saveDfHtmlSample() -// } -// -// @Test -// fun notebook_test_quickstart_16() { -// // SampleStart -// dfTop10.plot { -// bars { -// x(name) -// y(starsCount) -// } -// -// layout.title = "Top 10 JetBrains repositories by stars count" -// } -// // SampleEnd -// .savePlotSVGSample() -// } -// -// @Ignore -// @Test -// fun notebook_test_quickstart_17() { -// // SampleStart -// dfWithIsIntellij.writeExcel("jb_repos.xlsx") -// // SampleEnd -// } + private val dfSelected = df.select { full_name and stargazers_count and topics }.cast() + private val dfFiltered = dfSelected.filter { stargazers_count >= 1000 } + private val dfRenamed = dfFiltered.rename { full_name }.into("name") + // And "stargazers_count" into "starsCount" + .rename { stargazers_count }.into("starsCount") + .cast() + private val dfUpdated = dfRenamed + // Update "name" values with only its second part (after '/') + .update { name }.with { it.split("/")[1] } + // Convert "topics" `String` values into `List` by splitting: + .convert { topics }.with { it.removePrefix("[").removeSuffix("]").split(", ") } + .cast() + + private val dfWithIsIntellij = dfUpdated.add("isIntellij") { + name.contains("intellij") || "intellij" in topics + }.cast() + private val groupedByIsIntellij = dfWithIsIntellij.groupBy { isIntellij } + private val dfTop10 = dfWithIsIntellij + // Sort by "starsCount" value descending + .sortByDesc { starsCount }.take(10) + + @Test + fun notebook_test_quickstart_2() { + // SampleStart + val df = DataFrame.readCsv( + "https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv", + ) + // SampleEnd + } + + @Test + fun notebook_test_quickstart_3() { + // SampleStart + df + // SampleEnd + .saveDfHtmlSample() + } + + @Test + fun notebook_test_quickstart_4() { + // SampleStart + df.describe() + // SampleEnd + .saveDfHtmlSample() + } + + @Test + fun notebook_test_quickstart_5() { + // SampleStart + // Select "full_name", "stargazers_count" and "topics" columns + val dfSelected = df.select { full_name and stargazers_count and topics } + dfSelected + // SampleEnd + .saveDfHtmlSample() + } + + @Test + fun notebook_test_quickstart_6() { + // SampleStart + // Keep only rows where "stargazers_count" value is more than 1000 + val dfFiltered = dfSelected.filter { stargazers_count >= 1000 } + dfFiltered + // SampleEnd + .saveDfHtmlSample() + } + + @Test + fun notebook_test_quickstart_7() { + // SampleStart + // Rename "full_name" column into "name" + val dfRenamed = dfFiltered.rename { full_name }.into("name") + // And "stargazers_count" into "starsCount" + .rename { stargazers_count }.into("starsCount") + dfRenamed + // SampleEnd + .saveDfHtmlSample() + } + + @Test + fun notebook_test_quickstart_8() { + // SampleStart + val dfUpdated = dfRenamed + // Update "name" values with only its second part (after '/') + .update { name }.with { it.split("/")[1] } + // Convert "topics" `String` values into `List` by splitting: + .convert { topics }.with { it.removePrefix("[").removeSuffix("]").split(", ") } + dfUpdated + // SampleEnd + .saveDfHtmlSample() + } + + @Test + fun notebook_test_quickstart_9() { + // SampleStart + dfUpdated.topics.type() + // SampleEnd + } + + @Test + fun notebook_test_quickstart_10() { + // SampleStart + // Add a `Boolean` column indicating whether the `name` contains the "intellij" substring + // or the topics include "intellij". + val dfWithIsIntellij = dfUpdated.add("isIntellij") { + name.contains("intellij") || "intellij" in topics + } + dfWithIsIntellij + // SampleEnd + .saveDfHtmlSample() + } + + @Test + fun notebook_test_quickstart_11() { + // SampleStart + groupedByIsIntellij + // SampleEnd + .saveDfHtmlSample() + } + + @Test + fun notebook_test_quickstart_12() { + // SampleStart + groupedByIsIntellij.count() + // SampleEnd + .saveDfHtmlSample() + } + + @Test + fun notebook_test_quickstart_13() { + // issue #1454 + val groupedByIsIntellij = dfWithIsIntellij.groupBy { isIntellij } + // SampleStart + groupedByIsIntellij.aggregate { + // Compute sum and max of "starsCount" within each group into "sumStars" and "maxStars" columns + sumOf { starsCount } into "sumStars" + maxOf { starsCount } into "maxStars" + } + // SampleEnd + .saveDfHtmlSample() + } + + @Test + fun notebook_test_quickstart_14() { + // SampleStart + val dfTop10 = dfWithIsIntellij + // Sort by "starsCount" value descending + .sortByDesc { starsCount }.take(10) + dfTop10 + // SampleEnd + .saveDfHtmlSample() + } + + @Test + fun notebook_test_quickstart_16() { + // SampleStart + dfTop10.plot { + bars { + x(name) + y(starsCount) + } + + layout.title = "Top 10 JetBrains repositories by stars count" + } + // SampleEnd + .savePlotSVGSample() + } + + @Ignore + @Test + fun notebook_test_quickstart_17() { + // SampleStart + dfWithIsIntellij.writeExcel("jb_repos.xlsx") + // SampleEnd + } } From a96e2ac43c1f8dace59c83b79d0538995029b548 Mon Sep 17 00:00:00 2001 From: Jolan Rensen Date: Thu, 18 Sep 2025 20:40:07 +0200 Subject: [PATCH 5/7] updated quickstart.kt with more compiler plugin magic --- .../api/tail/notebook_test_tail_1.html | 1 + .../api/tail/notebook_test_tail_2.html | 1 + .../api/tail/notebook_test_tail_3.html | 1 + samples/build.gradle.kts | 5 +- .../samples/DataFrameSampleHelper.kt | 3 +- .../kotlinx/dataframe/samples/api/Generate.kt | 20 ++-- .../kotlinx/dataframe/samples/api/Modify.kt | 5 - .../kotlinx/dataframe/samples/api/TestBase.kt | 7 +- .../collectionsInterop/AssociateSamples.kt | 34 +------ .../dataframe/samples/guides/quickstart.kt | 98 +++++++++---------- 10 files changed, 73 insertions(+), 102 deletions(-) diff --git a/docs/StardustDocs/resources/api/tail/notebook_test_tail_1.html b/docs/StardustDocs/resources/api/tail/notebook_test_tail_1.html index e9d458b70..4d410874c 100644 --- a/docs/StardustDocs/resources/api/tail/notebook_test_tail_1.html +++ b/docs/StardustDocs/resources/api/tail/notebook_test_tail_1.html @@ -459,6 +459,7 @@ /*