generated from UKPLab/ukp-project-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.html
271 lines (231 loc) · 12.5 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<!-- Meta tags for social media banners, these should be filled in appropriatly as they are your "business card" -->
<!-- Replace the content tag with appropriate information -->
<meta name="description" content="Protecting multimodal large language models against misleading visualizations">
<meta property="og:title" content="Protecting multimodal LLMs against misleading visualizations"/>
<meta property="og:description" content="LLM, misleading visualization, question answering"/>
<meta property="og:url" content="https://ukplab.github.io/misleading-visualizations/"/>
<!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X630-->
<meta property="og:image" content="static/image/your_twitter_banner_image.png" />
<meta property="og:image:width" content="1200"/>
<meta property="og:image:height" content="630"/>
<!-- Keywords for your paper to be indexed by-->
<meta name="keywords" content="Multimodal Learning, LLM, MLLM, VLM, Misleading visualization, Misinformation">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Protecting multimodal large language models against misleading visualizations</title>
<link rel="icon" type="image/x-icon" href="static/images/favicon.ico">
<link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro"
rel="stylesheet">
<link rel="stylesheet" href="static/css/bulma.min.css">
<link rel="stylesheet" href="static/css/bulma-carousel.min.css">
<link rel="stylesheet" href="static/css/bulma-slider.min.css">
<link rel="stylesheet" href="static/css/fontawesome.all.min.css">
<link rel="stylesheet"
href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
<link rel="stylesheet" href="static/css/index.css">
<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
<script src="https://documentcloud.adobe.com/view-sdk/main.js"></script>
<script defer src="static/js/fontawesome.all.min.js"></script>
<script src="static/js/bulma-carousel.min.js"></script>
<script src="static/js/bulma-slider.min.js"></script>
<script src="static/js/index.js"></script>
</head>
<body>
<!-- <div class="logo-container">
<img src="static/images/tuda_ukp_logo.png" alt="Logo" class="logo" width="600" style="margin-left: auto; margin-right: auto; display: block;">
</div> -->
<section class="hero">
<div class="hero-body">
<div class="container is-max-desktop">
<div class="columns is-centered">
<div class="column has-text-centered">
<h1 class="title is-1 publication-title"><img src="static/images/project_icon.png" width="50" /> Protecting multimodal large language models against misleading visualizations</h1>
<div class="is-size-5 publication-authors">
<!-- Paper authors -->
<span class="author-block">
<a href="https://jtonglet.github.io/" target="_blank"><b>Jonathan Tonglet</b></a><sup>1,2,3</sup>,</span>
<span class="author-block">
<a href="https://www.esat.kuleuven.be/psi/TT" target="_blank"><b>Tinne Tuytelaars</b></a><sup>2</sup>,</span>
<span class="author-block">
<a href="https://people.cs.kuleuven.be/~sien.moens/" target="_blank"><b>Marie-Francine Moens</b></a><sup>3</sup>,</span>
<span class="author-block">
<a href="https://www.informatik.tu-darmstadt.de/ukp/ukp_home/head_ukp/index.en.jsp" target="_blank"><b>Iryna Gurevych</b></a><sup>1</sup>
</span>
</div>
<div class="is-size-5 publication-authors">
<span class="author-block">
<sup>1</sup>UKP Lab, TU Darmstadt  
<sup>2</sup>Electrical Engineering, KU Leuven 
<sup>3</sup>Computer Science, KU Leuven 
</span>
</div>
<div class="column has-text-centered">
<div class="publication-links">
<!-- Arxiv PDF link -->
<span class="link-block">
<a href="https://arxiv.org/pdf/2502.20503" target="_blank"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fas fa-file-pdf"></i>
</span>
<span>Paper</span>
</a>
</span>
<!-- Supplementary PDF link
<span class="link-block">
<a href="static/pdfs/supplementary_material.pdf" target="_blank"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fas fa-file-pdf"></i>
</span>
<span>Supplementary</span>
</a>
</span> -->
<!-- Github link -->
<span class="link-block">
<a href="https://github.com/UKPLab/arxiv2025-misleading-visualizations" target="_blank"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="fab fa-github"></i>
</span>
<span>Code</span>
</a>
</span>
<!-- ArXiv abstract Link -->
<span class="link-block">
<a href="https://arxiv.org/abs/2502.20503" target="_blank"
class="external-link button is-normal is-rounded is-dark">
<span class="icon">
<i class="ai ai-arxiv"></i>
</span>
<span>arXiv</span>
</a>
</span>
</div>
</div>
</div>
</div>
</div>
</div>
</section>
<!-- Teaser image-->
<section class="hero teaser">
<div class="container is-max-desktop">
<div class="hero-body">
<h2 class="title is-3 has-text-centered">What are misleading visualizations?</h2>
<h2 class="subtitle has-text-justified">
Charts are useful tools to communicate data insights. However, deceiving design patterns, such as truncated, inverted, or dual axes, can lead readers to inaccurate interpretations of the data.
Such misleading visualizations have been used to propagate and increase belief in misinformation during crises, and are effective to deceive humans. <br>
<span style="font-weight:bold;"> What about multimodal large language models (MLLMs)? Are they vulnerable too? </span>
Yes! 😧 Their chart QA performance is reduced to the level of the random baseline, up to 65.5 percentage points lower than their QA performance on the ChartQA benchmark.
</h2>
<img src="static/images/real_world_examples.png" height="60%"/>
<!-- <h2 class="subtitle has-text-centered">Example tasks in <span style="font-weight:bold;">BLINK</span>.</h2> -->
<h2 class="hero-body has-text-centered">
<!-- <br> -->
Two examples of misleading visualizations with QA pairs.
</h2>
</div>
</div>
</section>
<!-- End teaser image -->
<!-- Paper abstract -->
<section class="section hero is-light">
<div class="container is-max-desktop">
<div class="columns is-centered has-text-centered">
<div class="column is-four-fifths">
<h2 class="title is-3">Abstract</h2>
<div class="content has-text-justified">
<p>
We assess the vulnerability of multimodal large language models to misleading visualizations - charts that distort the underlying data using techniques such as truncated or inverted axes, leading readers to draw inaccurate conclusions that may support misinformation or conspiracy theories. Our analysis shows that these distortions severely harm multimodal large language models, reducing their question-answering accuracy to the level of the random baseline. To mitigate this vulnerability, we introduce six inference-time methods to improve performance of MLLMs on misleading visualizations while preserving their accuracy on non-misleading ones. The most effective approach involves (1) extracting the underlying data table and (2) using a text-only large language model to answer questions based on the table. This method improves performance on misleading visualizations by 15.4 to 19.6 percentage points.
</p>
</div>
</div>
</div>
</div>
</section>
<!-- End paper abstract -->
<!-- Accuracy -->
<section class="section hero">
<div class="container is-max-desktop">
<div class="columns is-centered has-text-centered">
<div class="column is-five-sixths">
<h2 class="title is-3">MLLMs are vulnerable to misleading visualizations</h2>
<img src="static/images/accuracy.png" width="70%"/>
<h2 class="content has-text-justified">
We compare the chart question-answering accuracy of 13 MLLMs on 3 datasets: (1) a collection of misleading visualizations, (2) a collection of non-misleading visualizations, (3) the reference ChartQA benchmark, which contains non-misleading visualizations.
MLLMs perform much worse on misleading visualizations than on non-misleading ones. In fact, they do not perform on average better than random.
</h2>
</div>
</div>
</div>
</section>
<!-- Correction methods -->
<section class="section hero is-light">
<div class="container is-max-desktop">
<div class="columns is-centered has-text-centered">
<div class="column is-five-sixths">
<h2 class="title is-3">How to mitigate this vulnerability?</h2>
<img src="static/images/correction_methods.png" width="70%"/>
<h2 class="content has-text-justified">
We propose six inference-time correction methods to mitigate the negative effects of misleading visualizations while preserving the high performance on non-misleading ones.
</h2>
</div>
</div>
</div>
</section>
<!-- Correction results -->
<section class="section hero">
<div class="container is-max-desktop">
<div class="columns is-centered has-text-centered">
<div class="column is-five-sixths">
<h2 class="title is-3">Table-based QA is the best correction method</h2>
<img src="static/images/correction_method_results.png" width="70%"/>
<h2 class="content has-text-justified">
Among all correction methods, the most promising is to extract the underlying table using a MLLM, then provide the table without the chart image to a text-only LLM, framing the task as Table-QA.
However, gains are modest (~15–20 percentage points) and depend on table extraction quality—highlighting the need for further research!
Learn more by reading our paper.
</h2>
</div>
</div>
</div>
</section>
<!--BibTex citation -->
<section class="section" id="BibTeX">
<div class="container is-max-desktop content">
<h2 class="title">BibTeX</h2>
<pre><code>@article{tonglet2025misleadingvisualizations,
title={Protecting multimodal LLMs against misleading visualizations},
author={Tonglet, Jonathan and Tuytelaars, Tinne and Moens, Marie-Francine and Gurevych, Iryna},
journal={arXiv preprint arXiv:2502.20503},
year={2025},
url={https://arxiv.org/abs/2502.20503},
doi={10.48550/arXiv.2502.20503}
}</code></pre>
</div>
</section>
<!--End BibTex citation -->
<footer class="footer">
<div class="container">
<div class="columns is-centered">
<div class="column is-8">
<div class="content">
<p>
This page was built using the <a href="https://github.com/eliahuhorwitz/Academic-project-page-template" target="_blank">Academic Project Page Template</a> which was adopted from the <a href="https://nerfies.github.io" target="_blank">Nerfies</a> project page.
</p>
<p>
<a href="https://www.flaticon.com/free-icons/benefit" title="benefit icons">Benefit icons created by Vectorslab - Flaticon</a>
</p>
</div>
</div>
</div>
</div>
</footer>
<!-- Statcounter tracking code -->
<!-- You can add a tracker to track page visits by creating an account at statcounter.com -->
<!-- End of Statcounter Code -->
</body>
</html>