Kotlin · koperagen · Oct 16, 2025 · Oct 9, 2025 · Oct 15, 2025 · Oct 15, 2025
diff --git a/core/api/core.api b/core/api/core.api
@@ -4462,7 +4462,9 @@ public final class org/jetbrains/kotlinx/dataframe/api/TakeKt {
 }
 
 public final class org/jetbrains/kotlinx/dataframe/api/ToDataFrameKt {
+	public static final fun toDataFrame (Ljava/util/List;Ljava/util/List;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
 	public static final fun toDataFrame (Ljava/util/Map;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
+	public static synthetic fun toDataFrame$default (Ljava/util/List;Ljava/util/List;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
 	public static final fun toDataFrameAnyColumn (Ljava/lang/Iterable;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
 	public static final fun toDataFrameColumnPathAnyNullable (Ljava/lang/Iterable;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;
 	public static final fun toDataFrameColumnPathAnyNullable (Ljava/util/Map;)Lorg/jetbrains/kotlinx/dataframe/DataFrame;

diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt
@@ -2525,4 +2525,4 @@ public fun <T, C> Convert<T, List<List<C>>>.toDataFrames(containsColumns: Boolea
  *  @return A new [DataColumn] with the values converted to [DataFrame].
  */
 public fun <T> DataColumn<List<List<T>>>.toDataFrames(containsColumns: Boolean = false): DataColumn<AnyFrame> =
-    map { it.toDataFrame(containsColumns) }
+    map { it.toDataFrame(containsColumns = containsColumns) }
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt
@@ -256,3 +256,49 @@ public fun Map<ColumnPath, Iterable<Any?>>.toDataFrame(): AnyFrame =
     }.toDataFrameFromPairs<Unit>()
 
 // endregion
+
+/**
+ * Converts a list of lists into a [DataFrame].
+ *
+ * By default, treats lists as row values. If [header] is not provided, the first inner list becomes a header (column names), and the remaining lists are treated as data.
+ *
+ * With [containsColumns] = `true`, interprets each inner list as a column.
+ * If [header] is not provided, the first element will be used as the column name, and the remaining elements as values.
+ *
+ * @param T The type of elements contained in the nested lists.
+ * @param containsColumns If `true`, treats each nested list as a column.
+ *                        Otherwise, each nested list is a row.
+ *                        Defaults to `false`.
+ * @param header overrides extraction of column names from lists - all values are treated as data instead.
+ * @return A [DataFrame] containing the data from the nested list structure.
+ *         Returns an empty [DataFrame] if the input is empty or invalid.
+ */
+@Refine
+@Interpretable("ValuesListsToDataFrame")
+public fun <T> List<List<T>>.toDataFrame(header: List<String>?, containsColumns: Boolean = false): AnyFrame =
+    when {
+        containsColumns -> {
+            mapIndexedNotNull { index, list ->
+                if (list.isEmpty()) return@mapIndexedNotNull null
+                val name = header?.get(index) ?: list[0].toString()
+                val values = if (header == null) list.drop(1) else list
+                createColumnGuessingType(name, values)
+            }.toDataFrame()
+        }
+
+        isEmpty() -> DataFrame.Empty
+
+        else -> {
+            val data = if (header == null) drop(1) else this
+            (header ?: get(0).map { it.toString() }).mapIndexed { colIndex, name ->
+                val values = data.map { row ->
+                    if (row.size <= colIndex) {
+                        null
+                    } else {
+                        row[colIndex]
+                    }
+                }
+                createColumnGuessingType(name, values)
+            }.toDataFrame()
+        }
+    }
diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/common.kt
@@ -4,10 +4,10 @@ import org.apache.commons.io.input.BOMInputStream
 import org.jetbrains.kotlinx.dataframe.AnyFrame
 import org.jetbrains.kotlinx.dataframe.DataFrame
 import org.jetbrains.kotlinx.dataframe.api.toDataFrame
-import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnGuessingType
 import org.jetbrains.kotlinx.dataframe.util.IS_URL
 import org.jetbrains.kotlinx.dataframe.util.IS_URL_IMPORT
 import org.jetbrains.kotlinx.dataframe.util.IS_URL_REPLACE
+import org.jetbrains.kotlinx.dataframe.util.LISTS_TO_DATAFRAME_MIGRATION
 import java.io.File
 import java.io.InputStream
 import java.net.HttpURLConnection
@@ -45,48 +45,13 @@ public fun catchHttpResponse(url: URL, body: (InputStream) -> AnyFrame): AnyFram
     }
 }
 
-/**
- * Converts a list of lists into a [DataFrame].
- *
- * By default, treats the first inner list as a header (column names), and the remaining lists as rows.
- * If [containsColumns] is `true`, interprets each inner list as a column,
- * where the first element is used as the column name, and the remaining elements as values.
- *
- * @param T The type of elements contained in the nested lists.
- * @param containsColumns If `true`, treats each nested list as a column with its first element as the column name.
- *                        Otherwise, the first list is treated as the header.
- *                        Defaults to `false`.
- * @return A [DataFrame] containing the data from the nested list structure.
- *         Returns an empty [DataFrame] if the input is empty or invalid.
- */
+@Deprecated(
+    LISTS_TO_DATAFRAME_MIGRATION,
+    ReplaceWith("this.toDataFrame(header = null, containsColumns)", "org.jetbrains.kotlinx.dataframe.api.toDataFrame"),
+    level = DeprecationLevel.WARNING,
+)
 public fun <T> List<List<T>>.toDataFrame(containsColumns: Boolean = false): AnyFrame =
-    when {
-        containsColumns -> {
-            mapNotNull {
-                if (it.isEmpty()) return@mapNotNull null
-                val name = it[0].toString()
-                val values = it.drop(1)
-                createColumnGuessingType(name, values)
-            }.toDataFrame()
-        }
-
-        isEmpty() -> DataFrame.Empty
-
-        else -> {
-            val header = get(0).map { it.toString() }
-            val data = drop(1)
-            header.mapIndexed { colIndex, name ->
-                val values = data.map { row ->
-                    if (row.size <= colIndex) {
-                        null
-                    } else {
-                        row[colIndex]
-                    }
-                }
-                createColumnGuessingType(name, values)
-            }.toDataFrame()
-        }
-    }
+    toDataFrame(header = null, containsColumns)
 
 @Deprecated(
     message = IS_URL,

diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/util/deprecationMessages.kt
@@ -257,6 +257,9 @@ internal const val GET_ROWS_RANGE_REPLACE = "df().getRows(indices)"
 internal const val GET_ROW_OR_NULL_REPLACE = "df().getRowOrNull(index)"
 internal const val COPY_REPLACE = "columns().toDataFrame().cast()"
 
+internal const val LISTS_TO_DATAFRAME_MIGRATION =
+    "Function moved from io to api package, and a new `header` parameter is introduced. $MESSAGE_1_1"
+
 // endregion
 
 // region keep across releases

diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt
@@ -700,4 +700,92 @@ class CreateDataFrameTests {
         val df = list.toDataFrame(maxDepth = 2)
         df["map"].type() shouldBe typeOf<Map<String, Int>>()
     }
+
+    @Test
+    fun `parsing row-major lines into structured dataframe`() {
+        // I think finding data in such format will be rare, so we need an optional header parameter.
+        val lines = buildList {
+            addAll(listOf("stamp", "header", "data"))
+            repeat(33) { row ->
+                add("stamp $row")
+                add("header $row")
+                add("data $row")
+            }
+        }
+
+        val df = lines.chunked(3).toDataFrame(header = null)
+
+        df.columnNames() shouldBe listOf("stamp", "header", "data")
+        df.columnTypes() shouldBe listOf(typeOf<String>(), typeOf<String>(), typeOf<String>())
+        df.rowsCount() shouldBe 33
+        df[0].values() shouldBe listOf("stamp 0", "header 0", "data 0")
+    }
+
+    @Test
+    fun `parsing srt lines into structured dataframe`() {
+        // *.srt subtitle file format
+        val lines = buildList {
+            repeat(33) { row ->
+                add("stamp $row")
+                add("header $row")
+                add("data $row")
+                add("\n")
+            }
+        }
+
+        val df = lines.chunked(4).map { it.dropLast(1) }.toDataFrame(header = listOf("stamp", "header", "data"))
+
+        df.columnNames() shouldBe listOf("stamp", "header", "data")
+        df.columnTypes() shouldBe listOf(typeOf<String>(), typeOf<String>(), typeOf<String>())
+        df.rowsCount() shouldBe 33
+        df[0].values() shouldBe listOf("stamp 0", "header 0", "data 0")
+
+        // Different approach. I think the dropLast one is better
+        lines.chunked(4)
+            .toDataFrame(header = listOf("stamp", "header", "data", "whitespace"))
+            .remove("whitespace") shouldBe df
+    }
+
+    @Test
+    fun `parsing column-major lines into structured dataframe`() {
+        val lines = buildList {
+            repeat(4) { col ->
+                repeat(5) { row ->
+                    add("data$col $row")
+                }
+                add("\n")
+            }
+        }
+
+        val header = List(4) { "col $it" }
+        val df = lines
+            .chunked(6)
+            .map { it.dropLast(1) }
+            .toDataFrame(header = header, containsColumns = true)
+        df.columnNames() shouldBe header
+        df.columnTypes() shouldBe List(4) { typeOf<String>() }
+        df["col 0"].values() shouldBe listOf("data0 0", "data0 1", "data0 2", "data0 3", "data0 4")
+    }
+
+    @Test
+    fun `parsing column-major lines with header into structured dataframe`() {
+        val lines = buildList {
+            repeat(4) { col ->
+                add("col $col")
+                repeat(5) { row ->
+                    add("data$col $row")
+                }
+                add("\n")
+            }
+        }
+
+        val header = List(4) { "col $it" }
+        val df = lines
+            .chunked(7)
+            .map { it.dropLast(1) }
+            .toDataFrame(header = null, containsColumns = true)
+        df.columnNames() shouldBe header
+        df.columnTypes() shouldBe List(4) { typeOf<String>() }
+        df["col 0"].values() shouldBe listOf("data0 0", "data0 1", "data0 2", "data0 3", "data0 4")
+    }
 }
diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt
@@ -425,4 +425,22 @@ class Create : TestBase() {
         val df = files.toDataFrame(columnName = "data")
         // SampleEnd
     }
+
+    @Test
+    @TransformDataFrameExpressions
+    fun toDataFrameLists() {
+        // SampleStart
+        val lines = """
+            1
+            00:00:05,000 --> 00:00:07,500
+            This is the first subtitle.
+
+            2
+            00:00:08,000 --> 00:00:10,250
+            This is the second subtitle.
+        """.trimIndent().lines()
+
+        lines.chunked(4) { it.take(3) }.toDataFrame(header = listOf("n", "timestamp", "text"))
+        // SampleEnd
+    }
 }