From 84228e0ec8c62853633479e0be2405bea4b7ca4e Mon Sep 17 00:00:00 2001 From: Sylwia Budzynska <102833689+sylwia-budzynska@users.noreply.github.com> Date: Tue, 27 May 2025 13:10:39 +0200 Subject: [PATCH 1/4] Add Pandas SQLi sinks --- python/ql/lib/semmle/python/frameworks/Pandas.qll | 11 +++++++++++ .../src/change-notes/2025-05-26-pandas-sqli-sinks.md | 4 ++++ .../frameworks/pandas/dataframe_query.py | 8 +++++--- 3 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 python/ql/src/change-notes/2025-05-26-pandas-sqli-sinks.md diff --git a/python/ql/lib/semmle/python/frameworks/Pandas.qll b/python/ql/lib/semmle/python/frameworks/Pandas.qll index eb6c3c44409c..fecfb58674f0 100644 --- a/python/ql/lib/semmle/python/frameworks/Pandas.qll +++ b/python/ql/lib/semmle/python/frameworks/Pandas.qll @@ -151,4 +151,15 @@ private module Pandas { override DataFlow::Node getCode() { result = this.getParameter(0, "expr").asSink() } } + + /** + * A Call to `pandas.read_sql` or `pandas.read_sql_query` + * which allows for executing raw SQL queries against a database. + * See https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html + */ + class ReadSQLCall extends SqlExecution::Range, DataFlow::CallCfgNode { + ReadSQLCall() { this = API::moduleImport("pandas").getMember(["read_sql", "read_sql_query"]).getACall() } + + override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("sql")] } + } } diff --git a/python/ql/src/change-notes/2025-05-26-pandas-sqli-sinks.md b/python/ql/src/change-notes/2025-05-26-pandas-sqli-sinks.md new file mode 100644 index 000000000000..a230dcc63ec3 --- /dev/null +++ b/python/ql/src/change-notes/2025-05-26-pandas-sqli-sinks.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Added SQL injection models from the `pandas` PyPI package. diff --git a/python/ql/test/library-tests/frameworks/pandas/dataframe_query.py b/python/ql/test/library-tests/frameworks/pandas/dataframe_query.py index a524fa214459..6ad7a6101c51 100644 --- a/python/ql/test/library-tests/frameworks/pandas/dataframe_query.py +++ b/python/ql/test/library-tests/frameworks/pandas/dataframe_query.py @@ -1,5 +1,5 @@ import pandas as pd - +import sqlite3 df = pd.DataFrame({'temp_c': [17.0, 25.0]}, index=['Portland', 'Berkeley']) df.sample().query("query") # $getCode="query" @@ -55,11 +55,13 @@ df.query("query") # $getCode="query" df.eval("query") # $getCode="query" -df = pd.read_sql_query("filepath", 'postgres:///db_name') +connection = sqlite3.connect("pets.db") +df = pd.read_sql_query("sql query", connection) # $getSql="sql query" df.query("query") # $getCode="query" df.eval("query") # $getCode="query" -df = pd.read_sql("filepath", 'postgres:///db_name') +connection = sqlite3.connect("pets.db") +df = pd.read_sql("sql query", connection) # $getSql="sql query" df.query("query") # $getCode="query" df.eval("query") # $getCode="query" From 55c70a4cae77e06bdc8db8cce5920127e126f863 Mon Sep 17 00:00:00 2001 From: Sylwia Budzynska <102833689+sylwia-budzynska@users.noreply.github.com> Date: Tue, 27 May 2025 13:44:21 +0200 Subject: [PATCH 2/4] Fix nitpicks --- .../ql/test/library-tests/frameworks/pandas/dataframe_query.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/ql/test/library-tests/frameworks/pandas/dataframe_query.py b/python/ql/test/library-tests/frameworks/pandas/dataframe_query.py index 6ad7a6101c51..e9a368f8c1ba 100644 --- a/python/ql/test/library-tests/frameworks/pandas/dataframe_query.py +++ b/python/ql/test/library-tests/frameworks/pandas/dataframe_query.py @@ -60,7 +60,6 @@ df.query("query") # $getCode="query" df.eval("query") # $getCode="query" -connection = sqlite3.connect("pets.db") df = pd.read_sql("sql query", connection) # $getSql="sql query" df.query("query") # $getCode="query" df.eval("query") # $getCode="query" From 8a1c323a9880b2859fddef47c4c9e79bc1f59822 Mon Sep 17 00:00:00 2001 From: Sylwia Budzynska <102833689+sylwia-budzynska@users.noreply.github.com> Date: Tue, 27 May 2025 13:45:40 +0200 Subject: [PATCH 3/4] Change naming to PascalCase --- python/ql/lib/semmle/python/frameworks/Pandas.qll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Pandas.qll b/python/ql/lib/semmle/python/frameworks/Pandas.qll index fecfb58674f0..2c3601546719 100644 --- a/python/ql/lib/semmle/python/frameworks/Pandas.qll +++ b/python/ql/lib/semmle/python/frameworks/Pandas.qll @@ -157,8 +157,8 @@ private module Pandas { * which allows for executing raw SQL queries against a database. * See https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html */ - class ReadSQLCall extends SqlExecution::Range, DataFlow::CallCfgNode { - ReadSQLCall() { this = API::moduleImport("pandas").getMember(["read_sql", "read_sql_query"]).getACall() } + class ReadSqlCall extends SqlExecution::Range, DataFlow::CallCfgNode { + ReadSqlCall() { this = API::moduleImport("pandas").getMember(["read_sql", "read_sql_query"]).getACall() } override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("sql")] } } From e66659276ba3c4f1f679580fbba98374704fba79 Mon Sep 17 00:00:00 2001 From: Sylwia Budzynska <102833689+sylwia-budzynska@users.noreply.github.com> Date: Tue, 27 May 2025 13:51:03 +0200 Subject: [PATCH 4/4] Fix formatting --- python/ql/lib/semmle/python/frameworks/Pandas.qll | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/frameworks/Pandas.qll b/python/ql/lib/semmle/python/frameworks/Pandas.qll index 2c3601546719..d4c94f3e8386 100644 --- a/python/ql/lib/semmle/python/frameworks/Pandas.qll +++ b/python/ql/lib/semmle/python/frameworks/Pandas.qll @@ -158,7 +158,9 @@ private module Pandas { * See https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html */ class ReadSqlCall extends SqlExecution::Range, DataFlow::CallCfgNode { - ReadSqlCall() { this = API::moduleImport("pandas").getMember(["read_sql", "read_sql_query"]).getACall() } + ReadSqlCall() { + this = API::moduleImport("pandas").getMember(["read_sql", "read_sql_query"]).getACall() + } override DataFlow::Node getSql() { result in [this.getArg(0), this.getArgByName("sql")] } }