Skip to content

Commit 4db19bf

Browse files
compare is ready to use
1 parent e052646 commit 4db19bf

File tree

2 files changed

+164
-57
lines changed

2 files changed

+164
-57
lines changed

core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/compareDataFrames.kt

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,58 @@
11
package org.jetbrains.kotlinx.dataframe.impl.api
22

33
import org.jetbrains.kotlinx.dataframe.DataFrame
4+
import org.jetbrains.kotlinx.dataframe.api.DataRowSchema
5+
import org.jetbrains.kotlinx.dataframe.api.concat
6+
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
7+
import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame
48
import org.jetbrains.kotlinx.dataframe.nrow
59

10+
internal class ComparisonDescription(
11+
val rowAtIndex: Int,
12+
val of: String,
13+
val wasRemoved: Boolean?,
14+
val wasInserted: Boolean?,
15+
val afterRow: Int?,
16+
) : DataRowSchema
17+
618
/**
7-
* returns a DataFrame whose rows communicate the differences between dfA and dfB
19+
* Returns a DataFrame whose rows explain the differences between dfA and dfB.
20+
* One must think of the set of commands in a script as being executed simultaneously
821
*/
9-
internal fun <T> compareDataFramesImpl(dfA: DataFrame<T>, dfB: DataFrame<T>): DataFrame<*> {
22+
internal fun <T> compareDataFramesImpl(dfA: DataFrame<T>, dfB: DataFrame<T>): DataFrame<ComparisonDescription> {
23+
var comparisonDf = emptyDataFrame<ComparisonDescription>()
24+
// make the comparison exploiting Myers difference algorithm
1025
val shortestEditScript = myersDifferenceAlgorithmImpl(dfA, dfB)
1126
var x: Int?
1227
var y: Int?
1328
var xPrev: Int?
1429
var yPrev: Int?
15-
16-
for(i in 1 until shortestEditScript.size) {
17-
x=shortestEditScript[i].first
18-
y=shortestEditScript[i].second
19-
xPrev=shortestEditScript[i-1].first
20-
yPrev=shortestEditScript[i-1].second
30+
for (i in 1 until shortestEditScript.size) {
31+
x = shortestEditScript[i].first
32+
y = shortestEditScript[i].second
33+
xPrev = shortestEditScript[i - 1].first
34+
yPrev = shortestEditScript[i - 1].second
2135
when {
22-
xPrev+1==x&&yPrev+1==y -> //row in position 'x' of dfA was not removed
23-
24-
xPrev+1==x -> //row in position 'x' of dfA was removed
36+
// row in position 'x' of dfA was removed
37+
xPrev + 1 == x && yPrev + 1 != y -> {
38+
comparisonDf = comparisonDf.concat(
39+
dataFrameOf
40+
(ComparisonDescription(x-1, "dfA", true, null, null)),
41+
)
42+
}
2543

26-
yPrev+1==y -> //row in position 'y' of dfB was inserted after row in position 'x' of dfA
44+
// row in position 'y' of dfB was inserted after row in position 'x' of dfA
45+
yPrev + 1 == y && xPrev + 1 != x -> {
46+
comparisonDf = comparisonDf.concat(
47+
dataFrameOf(
48+
ComparisonDescription
49+
(y-1, "dfB", null, true, x-1),
50+
),
51+
)
52+
}
2753
}
2854
}
55+
return comparisonDf
2956
}
3057

3158
/**
Lines changed: 125 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,109 @@
11
package org.jetbrains.kotlinx.dataframe.api
22

33
import io.kotest.matchers.shouldBe
4+
import org.jetbrains.kotlinx.dataframe.impl.api.ComparisonDescription
5+
import org.jetbrains.kotlinx.dataframe.impl.api.compareDataFramesImpl
46
import org.jetbrains.kotlinx.dataframe.impl.api.myersDifferenceAlgorithmImpl
57
import org.junit.Test
68
import kotlin.Pair
79

10+
private class SchemaForThisTest(val integer: Int, val string: String) : DataRowSchema
11+
812
class CompareDataFramesTest {
13+
14+
// compareDataFrames region
15+
916
@Test
1017
fun `Need both to delete and insert rows, preserving some rows`() {
11-
//dfA
12-
val x by columnOf(0, 1, 2, 0, 1, 1, 0)
13-
val y by columnOf("a", "b", "c", "a", "b", "b", "a")
14-
val dfA = dataFrameOf(x, y)
15-
//dfB
16-
val k by columnOf(2, 1, 0, 1, 0, 2)
17-
val z by columnOf("c", "b", "a", "b", "a", "c")
18-
val dfB = dataFrameOf(k, z)
18+
val dfA = dataFrameOf(
19+
SchemaForThisTest(0, "a"),
20+
SchemaForThisTest(1, "b"),
21+
SchemaForThisTest(2, "c"),
22+
SchemaForThisTest(0, "a"),
23+
SchemaForThisTest(1, "b"),
24+
SchemaForThisTest(1, "b"),
25+
SchemaForThisTest(0, "a"),
26+
)
27+
val dfB = dataFrameOf(
28+
SchemaForThisTest(2, "c"),
29+
SchemaForThisTest(1, "b"),
30+
SchemaForThisTest(0, "a"),
31+
SchemaForThisTest(1, "b"),
32+
SchemaForThisTest(0, "a"),
33+
SchemaForThisTest(2, "c"),
34+
)
35+
val comparison = compareDataFramesImpl(dfA, dfB)
36+
comparison shouldBe dataFrameOf(
37+
ComparisonDescription(0, "dfA", true, null, null),
38+
ComparisonDescription(1, "dfA", true, null, null),
39+
ComparisonDescription(1, "dfB", null, true, 2),
40+
ComparisonDescription(5, "dfA", true, null, null),
41+
ComparisonDescription(5, "dfB", null, true, 6),
42+
)
43+
}
44+
45+
@Test
46+
fun `need to do nothing`() {
47+
val dfA = dataFrameOf(
48+
SchemaForThisTest(0, "a"),
49+
SchemaForThisTest(0, "a"),
50+
SchemaForThisTest(0, "a"),
51+
)
52+
val dfB = dataFrameOf(
53+
SchemaForThisTest(0, "a"),
54+
SchemaForThisTest(0, "a"),
55+
SchemaForThisTest(0, "a"),
56+
)
57+
val comparison = compareDataFramesImpl(dfA, dfB)
58+
comparison shouldBe emptyDataFrame()
59+
}
60+
61+
@Test
62+
fun `need to remove each row of dfA and insert each row of dfB`() {
63+
val dfA = dataFrameOf(
64+
SchemaForThisTest(0, "a"),
65+
SchemaForThisTest(1, "b"),
66+
SchemaForThisTest(2, "c"),
67+
)
68+
val dfB = dataFrameOf(
69+
SchemaForThisTest(3, "d"),
70+
SchemaForThisTest(4, "e"),
71+
SchemaForThisTest(5, "f"),
72+
)
73+
val comparison = compareDataFramesImpl(dfA, dfB)
74+
comparison shouldBe dataFrameOf(
75+
ComparisonDescription(0, "dfA", true, null, null),
76+
ComparisonDescription(1, "dfA", true, null, null),
77+
ComparisonDescription(2, "dfA", true, null, null),
78+
ComparisonDescription(0, "dfB", null, true, 2),
79+
ComparisonDescription(1, "dfB", null, true, 2),
80+
ComparisonDescription(2, "dfB", null, true, 2),
81+
)
82+
}
83+
84+
// end region
85+
86+
// Myers algorithm region
87+
88+
@Test
89+
fun `Need both to delete and insert rows, preserving some rows, Myers algorithm`() {
90+
val dfA = dataFrameOf(
91+
SchemaForThisTest(0, "a"),
92+
SchemaForThisTest(1, "b"),
93+
SchemaForThisTest(2, "c"),
94+
SchemaForThisTest(0, "a"),
95+
SchemaForThisTest(1, "b"),
96+
SchemaForThisTest(1, "b"),
97+
SchemaForThisTest(0, "a"),
98+
)
99+
val dfB = dataFrameOf(
100+
SchemaForThisTest(2, "c"),
101+
SchemaForThisTest(1, "b"),
102+
SchemaForThisTest(0, "a"),
103+
SchemaForThisTest(1, "b"),
104+
SchemaForThisTest(0, "a"),
105+
SchemaForThisTest(2, "c"),
106+
)
19107
val path = myersDifferenceAlgorithmImpl(dfA, dfB)
20108
path shouldBe listOf(
21109
Pair(0, 0),
@@ -32,15 +120,17 @@ class CompareDataFramesTest {
32120
}
33121

34122
@Test
35-
fun `need to do nothing`() {
36-
//dfA
37-
val x by columnOf(0, 0, 0)
38-
val y by columnOf("a", "a", "a")
39-
val dfA = dataFrameOf(x, y)
40-
//dfB
41-
val k by columnOf(0, 0, 0)
42-
val z by columnOf("a", "a", "a")
43-
val dfB = dataFrameOf(k, z)
123+
fun `need to do nothing, Myers algorithm`() {
124+
val dfA = dataFrameOf(
125+
SchemaForThisTest(0, "a"),
126+
SchemaForThisTest(0, "a"),
127+
SchemaForThisTest(0, "a"),
128+
)
129+
val dfB = dataFrameOf(
130+
SchemaForThisTest(0, "a"),
131+
SchemaForThisTest(0, "a"),
132+
SchemaForThisTest(0, "a"),
133+
)
44134
val path = myersDifferenceAlgorithmImpl(dfA, dfB)
45135
path shouldBe listOf(
46136
Pair(0, 0),
@@ -51,15 +141,17 @@ class CompareDataFramesTest {
51141
}
52142

53143
@Test
54-
fun `need to remove each row of dfA and insert each row of dfB`() {
55-
//dfA
56-
val x by columnOf(0, 1, 2)
57-
val y by columnOf("a", "b", "c")
58-
val dfA = dataFrameOf(x, y)
59-
//dfB
60-
val k by columnOf(3, 4, 5)
61-
val z by columnOf("d", "e", "f")
62-
val dfB = dataFrameOf(k, z)
144+
fun `need to remove each row of dfA and insert each row of dfB, Myers Algorithm`() {
145+
val dfA = dataFrameOf(
146+
SchemaForThisTest(0, "a"),
147+
SchemaForThisTest(1, "b"),
148+
SchemaForThisTest(2, "c"),
149+
)
150+
val dfB = dataFrameOf(
151+
SchemaForThisTest(3, "d"),
152+
SchemaForThisTest(4, "e"),
153+
SchemaForThisTest(5, "f"),
154+
)
63155
val path = myersDifferenceAlgorithmImpl(dfA, dfB)
64156
path shouldBe listOf(
65157
Pair(0, 0),
@@ -73,15 +165,13 @@ class CompareDataFramesTest {
73165
}
74166

75167
@Test
76-
fun `need to add each row`() {
77-
//dfA
78-
val x by columnOf(listOf())
79-
val y by columnOf(listOf())
80-
val dfA = dataFrameOf(x, y)
81-
//dfB
82-
val k by columnOf(0, 1, 2)
83-
val z by columnOf("a", "b", "c")
84-
val dfB = dataFrameOf(k, z)
168+
fun `need to add each row, Myers algorithm`() {
169+
val dfA = emptyDataFrame<SchemaForThisTest>()
170+
val dfB = dataFrameOf(
171+
SchemaForThisTest(0, "a"),
172+
SchemaForThisTest(1, "b"),
173+
SchemaForThisTest(2, "c"),
174+
)
85175
val path = myersDifferenceAlgorithmImpl(dfA, dfB)
86176
path shouldBe listOf(
87177
Pair(0, 0),
@@ -90,14 +180,4 @@ class CompareDataFramesTest {
90180
Pair(0, 3),
91181
)
92182
}
93-
94-
@Test
95-
fun `describe`() {
96-
//dfA
97-
val x by columnOf(0, 1, 2, 0, 1, 1, 0)
98-
val y by columnOf("a", "b", "c", "a", "b", "b", "a")
99-
val dfA = dataFrameOf(x, y)
100-
val r = dfA.describe()
101-
r shouldBe emptyDataFrame()
102-
}
103183
}

0 commit comments

Comments
 (0)