Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,27 @@ public fun <T, C> MoveClause<T, C>.under(
@Interpretable("MoveTo")
public fun <T, C> MoveClause<T, C>.to(columnIndex: Int): DataFrame<T> = moveTo(columnIndex)

/**
* Moves columns, previously selected with [move] to a new position specified
* by [columnIndex] within the column group (remaining inside the group).
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

*if insideGroup == true that is

*
* Returns a new [DataFrame] with updated columns structure.
*
* For more information: {@include [DocumentationUrls.Move]}
*
* ### Examples:
* ```kotlin
* df.move { age and weight }.to(0)
* df.move("age", "weight").to(2)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about an example with insideGroup = true?

* ```
*
* @param [columnIndex] The index specifying the position in the [ColumnGroup] columns
* * where the selected columns will be moved.
Copy link
Collaborator

@Jolanrensen Jolanrensen Oct 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

floating "*"

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no information about @param insideGroup

*/
@Refine
@Interpretable("MoveTo")
public fun <T, C> MoveClause<T, C>.to(columnIndex: Int, insideGroup: Boolean): DataFrame<T> = moveTo(columnIndex, insideGroup)

/**
* Moves columns, previously selected with [move] to the top-level within the [DataFrame].
* Moved columns name can be specified via special ColumnSelectionDsl.
Expand Down Expand Up @@ -691,6 +712,24 @@ public fun <T, C> MoveClause<T, C>.toLeft(): DataFrame<T> = to(0)
@Interpretable("MoveToStart0")
public fun <T, C> MoveClause<T, C>.toStart(): DataFrame<T> = to(0)

/**
* Moves columns, previously selected with [move] to the start of their [ColumnGroup] (remaining inside the group).
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only if insideGroup = true

*
* Returns a new [DataFrame] with updated columns.
*
* For more information: {@include [DocumentationUrls.Move]}
*
* ### Examples:
* ```kotlin
* df.move { age and weight }.toStart()
* df.move { colsOf<String>() }.toStart()
* df.move("age", "weight").toStart()
* ```
*/
@Refine
@Interpretable("MoveToStart0")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will need to add compiler plugin support for the insideGroup argument. Currently, we get an empty schema when this is called. We will do it after it's merged

public fun <T, C> MoveClause<T, C>.toStart(insideGroup: Boolean): DataFrame<T> = to(0, insideGroup)

@Deprecated(TO_RIGHT, ReplaceWith(TO_RIGHT_REPLACE), DeprecationLevel.ERROR)
public fun <T, C> MoveClause<T, C>.toRight(): DataFrame<T> = to(df.ncol)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@ import org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl
import org.jetbrains.kotlinx.dataframe.api.MoveClause
import org.jetbrains.kotlinx.dataframe.api.after
import org.jetbrains.kotlinx.dataframe.api.asColumnGroup
import org.jetbrains.kotlinx.dataframe.api.before
import org.jetbrains.kotlinx.dataframe.api.cast
import org.jetbrains.kotlinx.dataframe.api.getColumn
import org.jetbrains.kotlinx.dataframe.api.getColumnGroup
import org.jetbrains.kotlinx.dataframe.api.getColumnWithPath
import org.jetbrains.kotlinx.dataframe.api.getColumns
import org.jetbrains.kotlinx.dataframe.api.move
import org.jetbrains.kotlinx.dataframe.api.toColumnAccessor
import org.jetbrains.kotlinx.dataframe.api.toDataFrame
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath
Expand All @@ -23,6 +26,7 @@ import org.jetbrains.kotlinx.dataframe.impl.asList
import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnWithPath
import org.jetbrains.kotlinx.dataframe.impl.columns.tree.ColumnPosition
import org.jetbrains.kotlinx.dataframe.impl.columns.tree.getOrPut
import org.jetbrains.kotlinx.dataframe.indices
import org.jetbrains.kotlinx.dataframe.path

internal fun <T, C> MoveClause<T, C>.afterOrBefore(column: ColumnSelector<T, *>, isAfter: Boolean): DataFrame<T> {
Expand Down Expand Up @@ -124,3 +128,65 @@ internal fun <T, C> MoveClause<T, C>.moveTo(columnIndex: Int): DataFrame<T> {
}
return newColumnList.toDataFrame().cast()
}

internal fun <T, C> MoveClause<T, C>.moveTo(columnIndex: Int, insideGroup: Boolean): DataFrame<T> {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's fine to merge moveTo(columnIndex: Int) and moveTo(columnIndex: Int, insideGroup: Boolean). They are internal after all :) You could name it moveToImpl if you like, to adhere to our internal implementation functions convention

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, I think it would genuinely be easier if insideGroup = true to update the group column as follows:

  • convert the group to a DataFrame
  • use the existing moveTo logic
  • convert it back to a column group with the original name

I think that would be easier than reinventing the moving behavior again for moving inside groups :) Of course, some of the checks (like that they share the same parent) should be here. WDYT? Did you try something like that?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for reviewing! IMO it is a good idea but i haven't tried yet a similar approach. I thought that exploiting after and before would have been a good idea because they implement the logic of a movement between nested levels, so i thought of moveTo as a particular case of moveAfter/before.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In some way that's true, however, moveTo is a bit simpler than move after/before and a bit different in some other cases.
For moveTo there are two cases now:

  • either we move a subsection of columns to a certain index at the top level
    • these columns can be from any depth and of different parents
  • or we move a subsection of columns to a certain index within the same group/level
    • these columns need to have the same parent

for move after/before there's only one case, and it's more complicated than either of those above:

  • we move a subsection of columns from any depth and of potentially different parents into another place with a potentially different parent.

(btw, did we test the case move { cols of different parents }.before { some nested col } as well in the last PR?)

So while it would be possible to reuse move after/before for move to, it may be more efficient to avoid it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(btw, did we test the case move { cols of different parents }.before { some nested col } as well in the last PR?)
Yes, it is tested in core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt.

However, I understand that using moveTo instead of moveAfter/before is for the best. I'll try to follow your advice and implement it.

val columnsToMove = df.getColumns(columns)

// check if columns to move have the same parent
val columnsToMoveParents = columnsToMove.map { it.path.dropLast() }
val parentOfFirst = columnsToMoveParents.first()
if (columnsToMoveParents.any { it != parentOfFirst }) {
throw IllegalArgumentException(
"Cannot move columns with different parent to an index",
)
}

// if columns will be moved to top level or columns to move are at top level
if (!insideGroup || parentOfFirst.isEmpty()) {
return moveTo(columnIndex)
}

// columns are nested and will be eventually moved inside their own group
// finding reference column
val parentPath = df[parentOfFirst].path
val referenceAndSiblings = df[parentOfFirst].asColumnGroup().columns()
val referenceAndSiblingsPaths = referenceAndSiblings.map { parentPath + it.path }
val reference = if (columnIndex >= referenceAndSiblingsPaths.size){
referenceAndSiblingsPaths.last()
} else {
referenceAndSiblingsPaths.get(columnIndex)
}

// two cases : columns to move are before, or after, the reference
val columnsBeforeReferenceToMove = mutableListOf<ColumnPath>()
val columnsAfterReferenceToMove = mutableListOf<ColumnPath>()
val columnsToMovePath = columnsToMove.map { it.path }
var referenceWasIterated = false
referenceAndSiblingsPaths.forEach {
if (it.path.last() == reference.path.last())
referenceWasIterated = true
else {
if (columnsToMovePath.contains(it)){
if (referenceWasIterated)
columnsAfterReferenceToMove.add(it)
else
columnsBeforeReferenceToMove.add(it)
}
}
}

// move columns
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably easier to view the logic using a when

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and lift return out of the when-statement

if (columnsBeforeReferenceToMove.isNotEmpty() && columnsAfterReferenceToMove.isNotEmpty()) {
val intermediateDf = df.move { columnsBeforeReferenceToMove.toColumnSet() }.after { reference }
val newReference = columnsBeforeReferenceToMove.last()
val finalDf = intermediateDf.move { columnsAfterReferenceToMove.toColumnSet() }.after { newReference }
return finalDf
}
if (columnsBeforeReferenceToMove.isNotEmpty())
return df.move { columnsBeforeReferenceToMove.toColumnSet() }.after { reference }
if (columnsAfterReferenceToMove.isNotEmpty())
return df.move { columnsAfterReferenceToMove.toColumnSet() }.before { reference }

// if it is not needed to move any of the nested columns
return df
}
83 changes: 83 additions & 0 deletions core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt
Original file line number Diff line number Diff line change
Expand Up @@ -220,4 +220,87 @@ class MoveTests {
grouped.move { "a"["b"] }.before { "a"["b"] }
}.message shouldBe "Cannot move column 'a/b' before its own child column 'a/b'"
}

@Test
fun `move single nested column to the start remaining inside the group`() {
val df = grouped.move { "b"["d"] }.to(0, true)
df.columnNames() shouldBe listOf("q", "a", "b", "w", "e", "r")
df["b"].asColumnGroup().columnNames() shouldBe listOf("d", "c")
}

@Test
fun `move single nested column to the end remaining inside the group`() {
val df = grouped.move { "b"["c"] }.to(2, true)
df.columnNames() shouldBe listOf("q", "a", "b", "w", "e", "r")
df["b"].asColumnGroup().columnNames() shouldBe listOf("d", "c")
}

//not working
@Test
fun `move single nested column between columns remaining inside the group`() {
// creating an appropriate df for the test
val groupedModified = grouped.move("r").before { "b"["c"] }
groupedModified["b"].asColumnGroup().columnNames() shouldBe listOf("r", "c", "d")
// test itself
val df = groupedModified.move { "b"["r"] }.to(1, true)
df.columnNames() shouldBe listOf("q", "a", "b", "w", "e")
df["b"].asColumnGroup().columnNames() shouldBe listOf("c", "r", "d")
}

@Test
fun `move single nested column to the end remaining inside the group, need to switch group's columns`() {
val df = grouped.move { "b"["c"] }.to(1, true)
df.columnNames() shouldBe listOf("q", "a", "b", "w", "e", "r")
df["b"].asColumnGroup().columnNames() shouldBe listOf("d", "c")
}

@Test
fun `move single nested column to current index of the column itself`() {
val df = grouped.move { "b"["d"] }.to(1, true)
df.columnNames() shouldBe listOf("q", "a", "b", "w", "e", "r")
df["b"].asColumnGroup().columnNames() shouldBe listOf("c", "d")
}

@Test
fun `move multiple nested columns to the start`() {
// creating an appropriate df for the test
val groupedModified = grouped.move("r").before { "b"["c"] }
groupedModified["b"].asColumnGroup().columnNames() shouldBe listOf("r", "c", "d")
// test itself
val df = groupedModified.move { "b"["c"] and "b"["d"] }.to(0, true)
df.columnNames() shouldBe listOf("q", "a", "b", "w", "e")
df["b"].asColumnGroup().columnNames() shouldBe listOf("c", "d", "r")
}

@Test
fun `move multiple non bordering nested columns`() {
// creating an appropriate df for the test
val groupedModified = grouped.move("r" , "q").before { "b"["c"] }
groupedModified["b"].asColumnGroup().columnNames() shouldBe listOf("r", "q", "c", "d")
// test itself
val df = groupedModified.move { "b"["r"] and "b"["d"] }.to(1, true)
df.columnNames() shouldBe listOf("a", "b", "w", "e")
df["b"].asColumnGroup().columnNames() shouldBe listOf("q", "r", "d", "c")
}

@Test
fun `move single top level column to the start`() {
val df = grouped.move("e").to(0, true)
df.columnNames() shouldBe listOf("e", "q", "a", "b", "w", "r")
df["e"].asColumnGroup().columnNames() shouldBe listOf("f")
}

@Test
fun `move multiple top level columns between columns`() {
val df = grouped.move("w", "e").to(1, true)
df.columnNames() shouldBe listOf("q", "w", "e", "a", "b", "r")
df["e"].asColumnGroup().columnNames() shouldBe listOf("f")
}

@Test
fun `should throw when moving columns of different groups`() {
shouldThrow<IllegalArgumentException> {
grouped.move { "a"["b"] and "b"["c"] }.to(0, true)
}.message shouldBe "Cannot move columns with different parent to an index"
}
}
Loading