Skip to content

Commit b3317c1

Browse files
rithwikgarapatichenlicaXiao-zhen-Liu
authored
feat(operator): Add strip chart visualization operator (#3913)
# The Title of the proposal Author: [email protected] Purpose: Implementing an operator as part of the onboarding process to understand the codebase. # Important Implementation Details This PR introduces a new visualization operator, Strip Chart, which displays multiple metrics over time as a series of horizontal lines, allowing users to track trends and patterns across different data streams. The operator can generate one or multiple charts depending on the input data. X-axis: Maps the primary dimension (e.g., time or category) of the data. Y-axis: Maps the metric values to display along the vertical axis. Facet column: Splits the data into multiple subplots based on a categorical column. Color-by: Applies different colors to data points based on a categorical or grouping attribute. # Testing <img width="1512" height="853" alt="StripChartPt" src="https://github.com/user-attachments/assets/e65422a1-b636-46a6-ad44-213e7a6adc91" /> Co-authored-by: Chen Li <[email protected]> Co-authored-by: Xiaozhen Liu <[email protected]>
1 parent da67968 commit b3317c1

File tree

3 files changed

+123
-0
lines changed

3 files changed

+123
-0
lines changed

common/workflow-operator/src/main/scala/org/apache/amber/operator/LogicalOp.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ import org.apache.amber.operator.visualization.volcanoPlot.VolcanoPlotOpDesc
130130
import org.apache.amber.operator.visualization.waterfallChart.WaterfallChartOpDesc
131131
import org.apache.amber.operator.visualization.wordCloud.WordCloudOpDesc
132132
import org.apache.commons.lang3.builder.{EqualsBuilder, HashCodeBuilder, ToStringBuilder}
133+
import org.apache.amber.operator.visualization.stripChart.StripChartOpDesc
133134

134135
import java.util.UUID
135136
import scala.util.Try
@@ -170,6 +171,7 @@ trait StateTransferFunc
170171
new Type(value = classOf[RegexOpDesc], name = "Regex"),
171172
new Type(value = classOf[SpecializedFilterOpDesc], name = "Filter"),
172173
new Type(value = classOf[ProjectionOpDesc], name = "Projection"),
174+
new Type(value = classOf[StripChartOpDesc], name = "StripChart"),
173175
new Type(value = classOf[UnionOpDesc], name = "Union"),
174176
new Type(value = classOf[KeywordSearchOpDesc], name = "KeywordSearch"),
175177
new Type(value = classOf[SubstringSearchOpDesc], name = "SubstringSearch"),
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.amber.operator.visualization.stripChart
21+
22+
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
23+
import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle
24+
import org.apache.amber.core.tuple.{AttributeType, Schema}
25+
import org.apache.amber.core.workflow.OutputPort.OutputMode
26+
import org.apache.amber.core.workflow.{InputPort, OutputPort, PortIdentity}
27+
import org.apache.amber.operator.PythonOperatorDescriptor
28+
import org.apache.amber.operator.metadata.annotations.AutofillAttributeName
29+
import org.apache.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
30+
31+
class StripChartOpDesc extends PythonOperatorDescriptor {
32+
33+
@JsonProperty(value = "x", required = true)
34+
@JsonSchemaTitle("X-Axis Column")
35+
@JsonPropertyDescription("Column containing numeric values for the x-axis")
36+
@AutofillAttributeName
37+
var x: String = ""
38+
39+
@JsonProperty(value = "y", required = true)
40+
@JsonSchemaTitle("Y-Axis Column")
41+
@JsonPropertyDescription("Column containing categorical values for the y-axis")
42+
@AutofillAttributeName
43+
var y: String = ""
44+
45+
@JsonProperty(value = "colorBy", required = false)
46+
@JsonSchemaTitle("Color By")
47+
@JsonPropertyDescription("Optional - Color points by category")
48+
@AutofillAttributeName
49+
var colorBy: String = ""
50+
51+
@JsonProperty(value = "facetColumn", required = false)
52+
@JsonSchemaTitle("Facet Column")
53+
@JsonPropertyDescription("Optional - Create separate subplots for each category")
54+
@AutofillAttributeName
55+
var facetColumn: String = ""
56+
57+
override def getOutputSchemas(
58+
inputSchemas: Map[PortIdentity, Schema]
59+
): Map[PortIdentity, Schema] = {
60+
val outputSchema = Schema()
61+
.add("html-content", AttributeType.STRING)
62+
Map(operatorInfo.outputPorts.head.id -> outputSchema)
63+
}
64+
65+
override def operatorInfo: OperatorInfo =
66+
OperatorInfo(
67+
"Strip Chart",
68+
"Visualize distribution of data points as a strip plot",
69+
OperatorGroupConstants.VISUALIZATION_STATISTICAL_GROUP,
70+
inputPorts = List(InputPort()),
71+
outputPorts = List(OutputPort(mode = OutputMode.SINGLE_SNAPSHOT))
72+
)
73+
74+
override def generatePythonCode(): String = {
75+
val colorByParam = if (colorBy != null && colorBy.nonEmpty) s", color='$colorBy'" else ""
76+
val facetColParam =
77+
if (facetColumn != null && facetColumn.nonEmpty) s", facet_col='$facetColumn'" else ""
78+
79+
s"""from pytexera import *
80+
|import plotly.express as px
81+
|import plotly.io as pio
82+
|
83+
|class ProcessTableOperator(UDFTableOperator):
84+
|
85+
| @overrides
86+
| def process_table(self, table: Table, port: int) -> Iterator[Optional[TableLike]]:
87+
| x_values = table['$x']
88+
| y_values = table['$y']
89+
|
90+
| # Create data dictionary
91+
| data = {'$x': x_values, '$y': y_values}
92+
|
93+
| # Add optional color column if specified
94+
| if '$colorBy':
95+
| data['$colorBy'] = table['$colorBy']
96+
|
97+
| # Add optional facet column if specified
98+
| if '$facetColumn':
99+
| data['$facetColumn'] = table['$facetColumn']
100+
|
101+
| # Create strip chart
102+
| fig = px.strip(
103+
| data,
104+
| x='$x',
105+
| y='$y'$colorByParam$facetColParam
106+
| )
107+
|
108+
| # Update layout for better visualization
109+
| fig.update_traces(marker=dict(size=8, line=dict(width=0.5, color='DarkSlateGrey')))
110+
| fig.update_layout(
111+
| xaxis_title='$x',
112+
| yaxis_title='$y',
113+
| hovermode='closest'
114+
| )
115+
|
116+
| # Convert to HTML
117+
| html = pio.to_html(fig, include_plotlyjs='cdn', full_html=False)
118+
| yield {'html-content': html}
119+
|""".stripMargin
120+
}
121+
}
44.7 KB
Loading

0 commit comments

Comments
 (0)