From b784246fadaab322a3575c1aafb14fcbd5aa6062 Mon Sep 17 00:00:00 2001 From: Alexey Zinoviev Date: Wed, 2 Apr 2025 11:01:42 +0200 Subject: [PATCH 1/8] Use `PreparedStatement` instead of `Statement` for queries. --- .../jetbrains/kotlinx/dataframe/io/db/util.kt | 4 ++-- .../jetbrains/kotlinx/dataframe/io/readJdbc.kt | 16 ++++++++-------- .../kotlinx/dataframe/io/local/imdbTest.kt | 2 +- .../kotlinx/dataframe/io/local/mariadbTest.kt | 2 +- .../kotlinx/dataframe/io/local/postgresTest.kt | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/util.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/util.kt index 369250c938..45487b5586 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/util.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/util.kt @@ -25,8 +25,8 @@ public fun extractDBTypeFromConnection(connection: Connection): DbType { // works only for H2 version 2 val modeQuery = "SELECT SETTING_VALUE FROM INFORMATION_SCHEMA.SETTINGS WHERE SETTING_NAME = 'MODE'" var mode = "" - connection.createStatement().use { st -> - st.executeQuery(modeQuery).use { rs -> + connection.prepareStatement(modeQuery).use { st -> + st.executeQuery().use { rs -> if (rs.next()) { mode = rs.getString("SETTING_VALUE") logger.debug { "Fetched H2 DB mode: $mode" } diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt index bb47209d10..18d16bf2bd 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt @@ -158,10 +158,10 @@ public fun DataFrame.Companion.readSqlTable( "SELECT * FROM $tableName" } - connection.createStatement().use { st -> + connection.prepareStatement(selectAllQuery).use { st -> logger.debug { "Connection with url:$url is established successfully." } - st.executeQuery(selectAllQuery).use { rs -> + st.executeQuery().use { rs -> val tableColumns = getTableColumnsMetadata(rs) return fetchAndConvertDataFromResultSet(tableColumns, rs, determinedDbType, limit, inferNullability) } @@ -228,8 +228,8 @@ public fun DataFrame.Companion.readSqlQuery( logger.debug { "Executing SQL query: $internalSqlQuery" } - connection.createStatement().use { st -> - st.executeQuery(internalSqlQuery).use { rs -> + connection.prepareStatement(internalSqlQuery).use { st -> + st.executeQuery().use { rs -> val tableColumns = getTableColumnsMetadata(rs) return fetchAndConvertDataFromResultSet(tableColumns, rs, determinedDbType, limit, inferNullability) } @@ -571,8 +571,8 @@ public fun DataFrame.Companion.getSchemaForSqlTable( val sqlQuery = "SELECT * FROM $tableName" val selectFirstRowQuery = determinedDbType.sqlQueryLimit(sqlQuery, limit = 1) - connection.createStatement().use { st -> - st.executeQuery(selectFirstRowQuery).use { rs -> + connection.prepareStatement(selectFirstRowQuery).use { st -> + st.executeQuery().use { rs -> val tableColumns = getTableColumnsMetadata(rs) return buildSchemaByTableColumns(tableColumns, determinedDbType) } @@ -616,8 +616,8 @@ public fun DataFrame.Companion.getSchemaForSqlQuery( ): DataFrameSchema { val determinedDbType = dbType ?: extractDBTypeFromConnection(connection) - connection.createStatement().use { st -> - st.executeQuery(sqlQuery).use { rs -> + connection.prepareStatement(sqlQuery).use { st -> + st.executeQuery().use { rs -> val tableColumns = getTableColumnsMetadata(rs) return buildSchemaByTableColumns(tableColumns, determinedDbType) } diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/imdbTest.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/imdbTest.kt index 1d25648e67..9e216a9b5f 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/imdbTest.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/imdbTest.kt @@ -37,7 +37,7 @@ interface RankedMoviesWithGenres { val genres: String? } -@Ignore +//@Ignore class ImdbTestTest { @Test fun `read table`() { diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/mariadbTest.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/mariadbTest.kt index c79c7f0be8..c1addab482 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/mariadbTest.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/mariadbTest.kt @@ -115,7 +115,7 @@ private const val JSON_STRING = " \t\"favorites\": [{\"description\": \"Pepperoni deep dish\", \"price\": 18.75}, \n" + "{\"description\": \"The Lou\", \"price\": 24.75}]}" -@Ignore +//@Ignore class MariadbTest { companion object { private lateinit var connection: Connection diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/postgresTest.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/postgresTest.kt index 9dddd6521f..19b732576c 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/postgresTest.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/postgresTest.kt @@ -257,7 +257,7 @@ internal fun clearTestData(connection: Connection) { connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table2") } } -@Ignore +//@Ignore class PostgresTest { companion object { private lateinit var connection: Connection From 01c49dad157746bf79008cb6fd689f14352a32ef Mon Sep 17 00:00:00 2001 From: Alexey Zinoviev Date: Wed, 2 Apr 2025 13:08:00 +0200 Subject: [PATCH 2/8] Add SQL injection validation. Phase 1 --- .../kotlinx/dataframe/io/readJdbc.kt | 88 ++++++++++++++++++- .../kotlinx/dataframe/io/h2/h2Test.kt | 64 ++++++++++++++ 2 files changed, 148 insertions(+), 4 deletions(-) diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt index 18d16bf2bd..8303089103 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt @@ -138,6 +138,8 @@ public fun DataFrame.Companion.readSqlTable( * @param [inferNullability] indicates how the column nullability should be inferred. * @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`, * in that case the [dbType] will be recognized from the [connection]. + * @param [strictValidation] if `true`, the method validates that the provided table name is in a valid format. + * Default is `true` for strict validation. * @return the DataFrame containing the data from the SQL table. * * @see DriverManager.getConnection @@ -148,7 +150,17 @@ public fun DataFrame.Companion.readSqlTable( limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, dbType: DbType? = null, + strictValidation: Boolean = true ): AnyFrame { + if (strictValidation) { + require(isValidTableName(tableName)) { + "The provided table name '$tableName' is invalid. Please ensure it matches a valid table name in the database schema." + } + } else { + logger.warn { "Strict validation is disabled. Make sure the table name '$tableName' is correct." } + } + + val url = connection.metaData.url val determinedDbType = dbType ?: extractDBTypeFromConnection(connection) @@ -217,7 +229,7 @@ public fun DataFrame.Companion.readSqlQuery( inferNullability: Boolean = true, dbType: DbType? = null, ): AnyFrame { - require(isValid(sqlQuery)) { + require(isValidSqlQuery(sqlQuery)) { "SQL query should start from SELECT and contain one query for reading data without any manipulation. " + "Also it should not contain any separators like `;`." } @@ -330,11 +342,79 @@ public fun Connection.readDataFrame( } /** SQL query is accepted only if it starts from SELECT */ -private fun isValid(sqlQuery: String): Boolean { +private fun isValidSqlQuery(sqlQuery: String): Boolean { val normalizedSqlQuery = sqlQuery.trim().uppercase() - return normalizedSqlQuery.startsWith(START_OF_READ_SQL_QUERY) && - !normalizedSqlQuery.contains(MULTIPLE_SQL_QUERY_SEPARATOR) + // Check if the query starts with SELECT + if (!normalizedSqlQuery.startsWith("SELECT")) { + return false + } + + // Check that the query does not contain forbidden symbols or constructions + val forbiddenPatterns = listOf( + ";", // Separator for multiple SQL queries + "--", // Single-line comments + "/*", // Multi-line comments + "'''", // Prevent triple quotes + "'", // Single quotes + "\"", // Unnecessary double quotes + " OR ", // Logical operator for potential injections + "DROP", // Command for dropping objects + "DELETE", // Command for deleting data + "INSERT", // Command for inserting data + "UPDATE", // Command for updating data + "EXEC", // Execute stored procedures + "EXECUTE", // Alternative for EXEC + "CREATE", // Command for creating objects + "ALTER", // Command for altering structures + "GRANT", // Command for granting permissions + "REVOKE", // Command for revoking permissions + "MERGE" // Command for data modification + ) + + // Check for the presence of forbidden patterns + for (pattern in forbiddenPatterns) { + if (normalizedSqlQuery.contains(pattern)) { + return false + } + } + + // If all checks pass, the query is considered safe + return true +} + +/** Validates if the given SQL table name is safe */ +private fun isValidTableName(tableName: String): Boolean { + val normalizedTableName = tableName.trim().uppercase() + + // Disallow forbidden patterns or keywords + val forbiddenPatterns = listOf( + ";", // Separator for SQL statements + "--", // Single-line comments + "/*", // Multi-line comments + "DROP", // Command for dropping objects + "DELETE", // Command for deleting data + "INSERT", // Command for inserting data + "UPDATE", // Command for updating data + "EXEC", // Command for executing stored procedures + "EXECUTE", // Alternative for EXEC + "CREATE", // Command for creating objects + "ALTER", // Command for altering structures + "GRANT", // Command for granting permissions + "REVOKE", // Command for revoking permissions + "MERGE" // Command for sophisticated data modifications + ) + + // Ensure the table name does not contain forbidden patterns + for (pattern in forbiddenPatterns) { + if (normalizedTableName.contains(pattern)) { + return false + } + } + + // Ensure the table name contains only valid characters + val tableNameRegex = Regex("^[A-Z0-9_]+$") + return tableNameRegex.matches(normalizedTableName) } /** diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt index b5d8a931dd..d1aa04e993 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt @@ -598,6 +598,70 @@ class JdbcTest { } } + @Test + fun `detect SQL injection attempts`() { + + // SQL injection patterns + @Language("SQL") + val injectionUnion = """ + SELECT * FROM users WHERE id = 1 UNION SELECT * FROM admin_users + """ + + @Language("SQL") + val injectionAlwaysTrueCondition = """ + SELECT * FROM users WHERE username = 'admin' OR 1=1 + """ + + @Language("SQL") + val injectionComment = """ + SELECT * FROM accounts WHERE email = 'admin@example.com' -- AND password = 'password' + """ + + @Language("SQL") + val injectionMultilineComment = """ + SELECT * FROM users /* Possible malicious comment */ WHERE id = 1 + """ + + @Language("SQL") + val injectionSemicolon = """ + SELECT * FROM users WHERE id = 1; DROP TABLE users + """ + + @Language("SQL") + val injectionUnescapedString = """ + SELECT * FROM users WHERE name = 'test' OR '' = '' + """ + + @Language("SQL") + val injectionSQLWithSingleQuote = """ + SELECT * FROM users WHERE username = 'admin' AND password = 'pass' OR '1'='1' + """ + + @Language("SQL") + val injectionUsingDropCommand = """ + DROP TABLE users; SELECT * FROM orders + """ + + // List all SQL injection patterns + val sqlInjectionQueries = listOf( + injectionUnion, + injectionAlwaysTrueCondition, + injectionComment, + injectionMultilineComment, + injectionSemicolon, + injectionUnescapedString, + injectionSQLWithSingleQuote, + injectionUsingDropCommand + ) + + // Test each injection query + sqlInjectionQueries.forEach { query -> + shouldThrow { + DataFrame.readSqlQuery(connection, query) + } + } + } + @Test fun `read from table with name from reserved SQL keywords`() { // Create table Sale From 5a81763212f07d86cd5be012bb3286c12c5e4d0e Mon Sep 17 00:00:00 2001 From: Alexey Zinoviev Date: Thu, 3 Apr 2025 18:49:18 +0200 Subject: [PATCH 3/8] Add strict validation parameter to SQL reading functions Introduces a new `strictValidation` parameter to SQL reading methods, defaulting to `true`, to ensure table names and queries follow a valid format. If disabled, a warning is logged, granting users flexibility while maintaining safety by default. --- .../kotlinx/dataframe/io/readJdbc.kt | 34 ++++++++++++++++--- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt index 8303089103..3840b07e31 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt @@ -115,6 +115,8 @@ public data class DbConnectionConfig(val url: String, val user: String = "", val * @param [inferNullability] indicates how the column nullability should be inferred. * @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`, * in that case the [dbType] will be recognized from the [dbConfig]. + * @param [strictValidation] if `true`, the method validates that the provided table name is in a valid format. + * Default is `true` for strict validation. * @return the DataFrame containing the data from the SQL table. */ public fun DataFrame.Companion.readSqlTable( @@ -123,9 +125,10 @@ public fun DataFrame.Companion.readSqlTable( limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, dbType: DbType? = null, + strictValidation: Boolean = true, ): AnyFrame { DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection -> - return readSqlTable(connection, tableName, limit, inferNullability, dbType) + return readSqlTable(connection, tableName, limit, inferNullability, dbType, strictValidation) } } @@ -192,17 +195,21 @@ public fun DataFrame.Companion.readSqlTable( * @param [inferNullability] indicates how the column nullability should be inferred. * @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`, * in that case the [dbType] will be recognized from the [dbConfig]. + * @param [strictValidation] if `true`, the method validates that the provided query is in a valid format. + * Default is `true` for strict validation. * @return the DataFrame containing the result of the SQL query. */ + public fun DataFrame.Companion.readSqlQuery( dbConfig: DbConnectionConfig, sqlQuery: String, limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, dbType: DbType? = null, + strictValidation: Boolean = true, ): AnyFrame { DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection -> - return readSqlQuery(connection, sqlQuery, limit, inferNullability, dbType) + return readSqlQuery(connection, sqlQuery, limit, inferNullability, dbType, strictValidation) } } @@ -218,6 +225,8 @@ public fun DataFrame.Companion.readSqlQuery( * @param [inferNullability] indicates how the column nullability should be inferred. * @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`, * in that case the [dbType] will be recognized from the [connection]. + * @param [strictValidation] if `true`, the method validates that the provided query is in a valid format. + * Default is `true` for strict validation. * @return the DataFrame containing the result of the SQL query. * * @see DriverManager.getConnection @@ -228,10 +237,15 @@ public fun DataFrame.Companion.readSqlQuery( limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, dbType: DbType? = null, + strictValidation: Boolean = true, ): AnyFrame { - require(isValidSqlQuery(sqlQuery)) { - "SQL query should start from SELECT and contain one query for reading data without any manipulation. " + - "Also it should not contain any separators like `;`." + if (strictValidation) { + require(isValidSqlQuery(sqlQuery)) { + "SQL query should start from SELECT and contain one query for reading data without any manipulation. " + + "Also it should not contain any separators like `;`." + } + } else { + logger.warn { "Strict validation is disabled. Ensure the SQL query '$sqlQuery' is correct and safe." } } val determinedDbType = dbType ?: extractDBTypeFromConnection(connection) @@ -259,6 +273,8 @@ public fun DataFrame.Companion.readSqlQuery( * @param [inferNullability] indicates how the column nullability should be inferred. * @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`, * in that case the [dbType] will be recognized from the [DbConnectionConfig]. + * @param [strictValidation] if `true`, the method validates that the provided query or table name is in a valid format. + * Default is `true` for strict validation. * @return the DataFrame containing the result of the SQL query. */ public fun DbConnectionConfig.readDataFrame( @@ -266,6 +282,7 @@ public fun DbConnectionConfig.readDataFrame( limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, dbType: DbType? = null, + strictValidation: Boolean = true, ): AnyFrame = when { isSqlQuery(sqlQueryOrTableName) -> DataFrame.readSqlQuery( @@ -274,6 +291,7 @@ public fun DbConnectionConfig.readDataFrame( limit, inferNullability, dbType, + strictValidation, ) isSqlTableName(sqlQueryOrTableName) -> DataFrame.readSqlTable( @@ -282,6 +300,7 @@ public fun DbConnectionConfig.readDataFrame( limit, inferNullability, dbType, + strictValidation, ) else -> throw IllegalArgumentException( @@ -311,6 +330,8 @@ private fun isSqlTableName(sqlQueryOrTableName: String): Boolean { * @param [inferNullability] indicates how the column nullability should be inferred. * @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`, * in that case the [dbType] will be recognized from the [Connection]. + * @param [strictValidation] if `true`, the method validates that the provided query or table name is in a valid format. + * Default is `true` for strict validation. * @return the DataFrame containing the result of the SQL query. */ public fun Connection.readDataFrame( @@ -318,6 +339,7 @@ public fun Connection.readDataFrame( limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, dbType: DbType? = null, + strictValidation: Boolean = true, ): AnyFrame = when { isSqlQuery(sqlQueryOrTableName) -> DataFrame.readSqlQuery( @@ -326,6 +348,7 @@ public fun Connection.readDataFrame( limit, inferNullability, dbType, + strictValidation ) isSqlTableName(sqlQueryOrTableName) -> DataFrame.readSqlTable( @@ -334,6 +357,7 @@ public fun Connection.readDataFrame( limit, inferNullability, dbType, + strictValidation ) else -> throw IllegalArgumentException( From 076e7283d64f2c4dee752cf23290483d4567c292 Mon Sep 17 00:00:00 2001 From: Alexey Zinoviev Date: Thu, 3 Apr 2025 21:12:17 +0200 Subject: [PATCH 4/8] Improve SQL injection prevention and validation mechanisms --- .../kotlinx/dataframe/io/readJdbc.kt | 131 +++++++------- .../kotlinx/dataframe/io/h2/h2Test.kt | 160 +++++++++++++----- 2 files changed, 185 insertions(+), 106 deletions(-) diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt index 3840b07e31..f214cccb38 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt @@ -56,15 +56,6 @@ private const val DEFAULT_LIMIT = Int.MIN_VALUE */ private const val START_OF_READ_SQL_QUERY = "SELECT" -/** - * Constant representing the separator used to separate multiple SQL queries. - * - * This separator is used when multiple SQL queries need to be executed together. - * Each query should be separated by this separator to indicate the end of one query - * and the start of the next query. - */ -private const val MULTIPLE_SQL_QUERY_SEPARATOR = ";" - /** * Represents a column in a database table to keep all required meta-information. * @@ -365,80 +356,94 @@ public fun Connection.readDataFrame( ) } -/** SQL query is accepted only if it starts from SELECT */ +/** + * Checks if a given string contains forbidden patterns or keywords. + * Logs a clear and friendly message if any forbidden pattern is found. + */ +private fun containsForbiddenPatterns(input: String): Boolean { + // List of forbidden patterns or commands + val forbiddenPatterns = listOf( + ";", // Separator for SQL statements + "--", // Single-line comments + "/*", // Start of multi-line comments + "*/", // End of multi-line comments + "DROP", "DELETE", "INSERT", "UPDATE", + "EXEC", "EXECUTE", "CREATE", "ALTER", + "GRANT", "REVOKE", "MERGE" // Dangerous SQL commands + ) + + for (pattern in forbiddenPatterns) { + if (input.contains(pattern)) { + logger.error { "Validation failed: The input contains a forbidden element '$pattern'. Please review the input: '$input'." } + return true + } + } + return false +} + +/** + * Validates if the SQL query is safe and starts with SELECT. + * Ensures proper syntax structure, checks for balanced quotes, and disallows dangerous commands or patterns. + */ private fun isValidSqlQuery(sqlQuery: String): Boolean { val normalizedSqlQuery = sqlQuery.trim().uppercase() - // Check if the query starts with SELECT + // Log the query being validated + logger.warn { "Validating SQL query: '$sqlQuery'" } + + // Ensure the query starts with "SELECT" if (!normalizedSqlQuery.startsWith("SELECT")) { + logger.error { "Validation failed: The SQL query must start with 'SELECT'. Given query: '$sqlQuery'." } return false } - // Check that the query does not contain forbidden symbols or constructions - val forbiddenPatterns = listOf( - ";", // Separator for multiple SQL queries - "--", // Single-line comments - "/*", // Multi-line comments - "'''", // Prevent triple quotes - "'", // Single quotes - "\"", // Unnecessary double quotes - " OR ", // Logical operator for potential injections - "DROP", // Command for dropping objects - "DELETE", // Command for deleting data - "INSERT", // Command for inserting data - "UPDATE", // Command for updating data - "EXEC", // Execute stored procedures - "EXECUTE", // Alternative for EXEC - "CREATE", // Command for creating objects - "ALTER", // Command for altering structures - "GRANT", // Command for granting permissions - "REVOKE", // Command for revoking permissions - "MERGE" // Command for data modification - ) + // Validate against forbidden patterns + if (containsForbiddenPatterns(normalizedSqlQuery)) { + return false + } - // Check for the presence of forbidden patterns - for (pattern in forbiddenPatterns) { - if (normalizedSqlQuery.contains(pattern)) { - return false - } + // Check if there are balanced quotes (single and double) + val singleQuotes = sqlQuery.count { it == '\'' } + val doubleQuotes = sqlQuery.count { it == '"' } + if (singleQuotes % 2 != 0) { + logger.error { "Validation failed: Unbalanced single quotes in the SQL query. Please correct the query: '$sqlQuery'." } + return false + } + if (doubleQuotes % 2 != 0) { + logger.error { "Validation failed: Unbalanced double quotes in the SQL query. Please correct the query: '$sqlQuery'." } + return false } - // If all checks pass, the query is considered safe + logger.warn { "SQL query validation succeeded for query: '$sqlQuery'." } return true } -/** Validates if the given SQL table name is safe */ +/** + * Validates if the given SQL table name is safe and logs any validation violations. + */ private fun isValidTableName(tableName: String): Boolean { val normalizedTableName = tableName.trim().uppercase() - // Disallow forbidden patterns or keywords - val forbiddenPatterns = listOf( - ";", // Separator for SQL statements - "--", // Single-line comments - "/*", // Multi-line comments - "DROP", // Command for dropping objects - "DELETE", // Command for deleting data - "INSERT", // Command for inserting data - "UPDATE", // Command for updating data - "EXEC", // Command for executing stored procedures - "EXECUTE", // Alternative for EXEC - "CREATE", // Command for creating objects - "ALTER", // Command for altering structures - "GRANT", // Command for granting permissions - "REVOKE", // Command for revoking permissions - "MERGE" // Command for sophisticated data modifications - ) + // Log the table name being validated + logger.warn { "Validating SQL table name: '$tableName'" } - // Ensure the table name does not contain forbidden patterns - for (pattern in forbiddenPatterns) { - if (normalizedTableName.contains(pattern)) { - return false + // Validate against forbidden patterns + if (containsForbiddenPatterns(normalizedTableName)) { + return false + } + + // Validate the table name structure: letters, numbers, underscores, and dots are allowed + val tableNameRegex = Regex("^[A-Z0-9_]+(\\.[A-Z0-9_]+)*$") + if (!tableNameRegex.matches(normalizedTableName)) { + logger.error { + "Validation failed: The table name contains invalid characters. " + + "Only letters, numbers, underscores, and dots are allowed. Provided name: '$tableName'." } + return false } - // Ensure the table name contains only valid characters - val tableNameRegex = Regex("^[A-Z0-9_]+$") - return tableNameRegex.matches(normalizedTableName) + logger.warn { "Table name validation passed for table: '$tableName'." } + return true } /** diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt index d1aa04e993..dcb08defba 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt @@ -599,62 +599,59 @@ class JdbcTest { } @Test - fun `detect SQL injection attempts`() { - - // SQL injection patterns - @Language("SQL") - val injectionUnion = """ - SELECT * FROM users WHERE id = 1 UNION SELECT * FROM admin_users - """ + fun `readFromTable should reject invalid table names to prevent SQL injections`() { + // Invalid table names that attempt SQL injection + val invalidTableNames = listOf( + "Customer; DROP TABLE Customer", // Injection using semicolon + "Sale -- Comment", // Injection using single-line comment + "/* Multi-line comment */ Customer", // Injection using multi-line comment + "Sale WHERE 1=1", // Injection using always-true condition + "Sale UNION SELECT * FROM Customer" // UNION injection + ) - @Language("SQL") - val injectionAlwaysTrueCondition = """ - SELECT * FROM users WHERE username = 'admin' OR 1=1 - """ + invalidTableNames.forEach { tableName -> + shouldThrow { + DataFrame.readSqlTable(connection, tableName) + } + } + } + @Test + fun `readSqlQuery should reject malicious SQL queries to prevent SQL injections`() { + // Malicious SQL queries attempting injection @Language("SQL") val injectionComment = """ - SELECT * FROM accounts WHERE email = 'admin@example.com' -- AND password = 'password' - """ + SELECT * FROM Sale WHERE amount = 100.0 -- AND id = 5 + """ @Language("SQL") val injectionMultilineComment = """ - SELECT * FROM users /* Possible malicious comment */ WHERE id = 1 - """ + SELECT * FROM Customer /* Possible malicious comment */ WHERE id = 1 + """ @Language("SQL") val injectionSemicolon = """ - SELECT * FROM users WHERE id = 1; DROP TABLE users - """ - - @Language("SQL") - val injectionUnescapedString = """ - SELECT * FROM users WHERE name = 'test' OR '' = '' - """ + SELECT * FROM Sale WHERE amount = 500.0; DROP TABLE Customer + """ @Language("SQL") val injectionSQLWithSingleQuote = """ - SELECT * FROM users WHERE username = 'admin' AND password = 'pass' OR '1'='1' - """ + SELECT * FROM Sale WHERE id = 1 AND amount = 100.0 OR '1'='1 + """ @Language("SQL") val injectionUsingDropCommand = """ - DROP TABLE users; SELECT * FROM orders - """ + DROP TABLE Customer; SELECT * FROM Sale + """ - // List all SQL injection patterns val sqlInjectionQueries = listOf( - injectionUnion, - injectionAlwaysTrueCondition, injectionComment, injectionMultilineComment, injectionSemicolon, - injectionUnescapedString, injectionSQLWithSingleQuote, injectionUsingDropCommand ) - // Test each injection query sqlInjectionQueries.forEach { query -> shouldThrow { DataFrame.readSqlQuery(connection, query) @@ -663,25 +660,102 @@ class JdbcTest { } @Test - fun `read from table with name from reserved SQL keywords`() { - // Create table Sale + fun `readFromTable should work with non-standard table names when strictValidation is disabled`() { + // Non-standard table names that are still valid but may appear strange + val nonStandardTableNames = listOf( + "`Customer With Space`", // Table name with spaces + "`Important-Data`", // Table name with hyphens + "`[123TableName]`" // Table name that resembles a special syntax + ) + + try { + // Create these tables to ensure they exist for the test + connection.createStatement().use { stmt -> + nonStandardTableNames.forEach { tableName -> + stmt.execute("CREATE TABLE IF NOT EXISTS $tableName (id INT, name VARCHAR(255))") + } + } + + // Read from these tables with strictValidation disabled + nonStandardTableNames.forEach { tableName -> + DataFrame.readSqlTable(connection, tableName, strictValidation = false) + } + } finally { + // Clean up by deleting all created tables + connection.createStatement().use { stmt -> + nonStandardTableNames.forEach { tableName -> + stmt.execute("DROP TABLE IF EXISTS $tableName") + } + } + } + } + + @Test + fun `readSqlQuery should execute DROP TABLE when validation is disabled`() { + // Query to create a temporary test table @Language("SQL") - val createAlterTableQuery = """ - CREATE TABLE "ALTER" ( - id INT PRIMARY KEY, - description TEXT - ) + val createTableQuery = """ + CREATE TABLE IF NOT EXISTS TestTable ( + id INT PRIMARY KEY, + data VARCHAR(255) + ) + """ + + // Query to drop the test table + @Language("SQL") + val dropTableQuery = """ + SELECT * FROM TestTable; DROP TABLE TestTable; + """ - connection.createStatement().execute(createAlterTableQuery) + try { + // Create the test table + connection.createStatement().use { stmt -> + stmt.execute(createTableQuery) // Create table for the test case + } + + // Execute the DROP TABLE command with validation disabled + DataFrame.readSqlQuery(connection, dropTableQuery, strictValidation = false) + // Verify that the table has been successfully dropped + connection.createStatement().use { stmt -> + shouldThrow { + stmt.executeQuery("SELECT * FROM TestTable") + } + } + } finally { + // Cleanup: Ensure the table is removed in case of failure + connection.createStatement().use { stmt -> + stmt.execute("DROP TABLE IF EXISTS TestTable") + } + } + } + + @Test + fun `read from table with name from reserved SQL keywords`() { @Language("SQL") - val selectFromWeirdTableSQL = """ - SELECT * from "ALTER" + val createAlterTableQuery = """ + CREATE TABLE "ALTER" ( + id INT PRIMARY KEY, + description TEXT + ) """ - DataFrame.readSqlQuery(connection, selectFromWeirdTableSQL).rowsCount() shouldBe 0 - connection.createStatement().execute("DROP TABLE \"ALTER\"") + @Language("SQL") + val selectFromWeirdTableSQL = """SELECT * from "ALTER"""" + + try { + connection.createStatement().execute(createAlterTableQuery) + // with enabled strictValidation + shouldThrow { + DataFrame.readSqlQuery(connection, selectFromWeirdTableSQL) + } + // with disabled strictValidation + DataFrame.readSqlQuery(connection, selectFromWeirdTableSQL, strictValidation = false).rowsCount() shouldBe 0 + } finally { + connection.createStatement().execute("DROP TABLE IF EXISTS \"ALTER\"") + } + } @Test From 3d361e0d47c406768d04b0670c1f6b245d3575da Mon Sep 17 00:00:00 2001 From: Alexey Zinoviev Date: Thu, 3 Apr 2025 21:18:33 +0200 Subject: [PATCH 5/8] Format code and apply `@Ignore` to ignored tests --- .../kotlinx/dataframe/io/readJdbc.kt | 44 +++++++++++++------ .../kotlinx/dataframe/io/h2/h2Test.kt | 9 ++-- .../kotlinx/dataframe/io/local/imdbTest.kt | 2 +- .../kotlinx/dataframe/io/local/mariadbTest.kt | 2 +- .../dataframe/io/local/postgresTest.kt | 2 +- 5 files changed, 37 insertions(+), 22 deletions(-) diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt index f214cccb38..734322acae 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt @@ -144,7 +144,7 @@ public fun DataFrame.Companion.readSqlTable( limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, dbType: DbType? = null, - strictValidation: Boolean = true + strictValidation: Boolean = true, ): AnyFrame { if (strictValidation) { require(isValidTableName(tableName)) { @@ -154,7 +154,6 @@ public fun DataFrame.Companion.readSqlTable( logger.warn { "Strict validation is disabled. Make sure the table name '$tableName' is correct." } } - val url = connection.metaData.url val determinedDbType = dbType ?: extractDBTypeFromConnection(connection) @@ -339,7 +338,7 @@ public fun Connection.readDataFrame( limit, inferNullability, dbType, - strictValidation + strictValidation, ) isSqlTableName(sqlQueryOrTableName) -> DataFrame.readSqlTable( @@ -348,7 +347,7 @@ public fun Connection.readDataFrame( limit, inferNullability, dbType, - strictValidation + strictValidation, ) else -> throw IllegalArgumentException( @@ -363,18 +362,29 @@ public fun Connection.readDataFrame( private fun containsForbiddenPatterns(input: String): Boolean { // List of forbidden patterns or commands val forbiddenPatterns = listOf( - ";", // Separator for SQL statements - "--", // Single-line comments - "/*", // Start of multi-line comments - "*/", // End of multi-line comments - "DROP", "DELETE", "INSERT", "UPDATE", - "EXEC", "EXECUTE", "CREATE", "ALTER", - "GRANT", "REVOKE", "MERGE" // Dangerous SQL commands + ";", // Separator for SQL statements + "--", // Single-line comments + "/*", // Start of multi-line comments + "*/", // End of multi-line comments + "DROP", + "DELETE", + "INSERT", + "UPDATE", + "EXEC", + "EXECUTE", + "CREATE", + "ALTER", + "GRANT", + "REVOKE", + "MERGE", ) for (pattern in forbiddenPatterns) { if (input.contains(pattern)) { - logger.error { "Validation failed: The input contains a forbidden element '$pattern'. Please review the input: '$input'." } + logger.error { + "Validation failed: The input contains a forbidden element '$pattern'. " + + "Please review the input: '$input'." + } return true } } @@ -406,11 +416,17 @@ private fun isValidSqlQuery(sqlQuery: String): Boolean { val singleQuotes = sqlQuery.count { it == '\'' } val doubleQuotes = sqlQuery.count { it == '"' } if (singleQuotes % 2 != 0) { - logger.error { "Validation failed: Unbalanced single quotes in the SQL query. Please correct the query: '$sqlQuery'." } + logger.error { + "Validation failed: Unbalanced single quotes in the SQL query. " + + "Please correct the query: '$sqlQuery'." + } return false } if (doubleQuotes % 2 != 0) { - logger.error { "Validation failed: Unbalanced double quotes in the SQL query. Please correct the query: '$sqlQuery'." } + logger.error { + "Validation failed: Unbalanced double quotes in the SQL query. " + + "Please correct the query: '$sqlQuery'." + } return false } diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt index dcb08defba..722910d0f9 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt @@ -606,7 +606,7 @@ class JdbcTest { "Sale -- Comment", // Injection using single-line comment "/* Multi-line comment */ Customer", // Injection using multi-line comment "Sale WHERE 1=1", // Injection using always-true condition - "Sale UNION SELECT * FROM Customer" // UNION injection + "Sale UNION SELECT * FROM Customer", // UNION injection ) invalidTableNames.forEach { tableName -> @@ -649,7 +649,7 @@ class JdbcTest { injectionMultilineComment, injectionSemicolon, injectionSQLWithSingleQuote, - injectionUsingDropCommand + injectionUsingDropCommand, ) sqlInjectionQueries.forEach { query -> @@ -664,8 +664,8 @@ class JdbcTest { // Non-standard table names that are still valid but may appear strange val nonStandardTableNames = listOf( "`Customer With Space`", // Table name with spaces - "`Important-Data`", // Table name with hyphens - "`[123TableName]`" // Table name that resembles a special syntax + "`Important-Data`", // Table name with hyphens + "`[123TableName]`", // Table name that resembles a special syntax ) try { @@ -755,7 +755,6 @@ class JdbcTest { } finally { connection.createStatement().execute("DROP TABLE IF EXISTS \"ALTER\"") } - } @Test diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/imdbTest.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/imdbTest.kt index 9e216a9b5f..1d25648e67 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/imdbTest.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/imdbTest.kt @@ -37,7 +37,7 @@ interface RankedMoviesWithGenres { val genres: String? } -//@Ignore +@Ignore class ImdbTestTest { @Test fun `read table`() { diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/mariadbTest.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/mariadbTest.kt index c1addab482..c79c7f0be8 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/mariadbTest.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/mariadbTest.kt @@ -115,7 +115,7 @@ private const val JSON_STRING = " \t\"favorites\": [{\"description\": \"Pepperoni deep dish\", \"price\": 18.75}, \n" + "{\"description\": \"The Lou\", \"price\": 24.75}]}" -//@Ignore +@Ignore class MariadbTest { companion object { private lateinit var connection: Connection diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/postgresTest.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/postgresTest.kt index 19b732576c..9dddd6521f 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/postgresTest.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/local/postgresTest.kt @@ -257,7 +257,7 @@ internal fun clearTestData(connection: Connection) { connection.createStatement().use { st -> st.execute("DROP TABLE IF EXISTS table2") } } -//@Ignore +@Ignore class PostgresTest { companion object { private lateinit var connection: Connection From 57711dbf93797510811bfd1aecc4b481f152cae8 Mon Sep 17 00:00:00 2001 From: Alexey Zinoviev Date: Thu, 3 Apr 2025 21:29:42 +0200 Subject: [PATCH 6/8] Format code and apply `@Ignore` to ignored tests --- dataframe-jdbc/api/dataframe-jdbc.api | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dataframe-jdbc/api/dataframe-jdbc.api b/dataframe-jdbc/api/dataframe-jdbc.api index 94cb132607..552c069316 100644 --- a/dataframe-jdbc/api/dataframe-jdbc.api +++ b/dataframe-jdbc/api/dataframe-jdbc.api @@ -54,26 +54,26 @@ public final class org/jetbrains/kotlinx/dataframe/io/ReadJdbcKt { public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Ljava/util/Map; public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Ljava/util/Map; public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Ljava/util/Map; - public static final fun readDataFrame (Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readDataFrame (Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun readDataFrame (Ljava/sql/ResultSet;Ljava/sql/Connection;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun readDataFrame (Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;IZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readDataFrame (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readDataFrame$default (Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readDataFrame (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readDataFrame$default (Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readDataFrame$default (Ljava/sql/ResultSet;Ljava/sql/Connection;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readDataFrame$default (Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;IZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readDataFrame$default (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readDataFrame$default (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun readResultSet (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/ResultSet;Ljava/sql/Connection;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun readResultSet (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;IZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readResultSet$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/ResultSet;Ljava/sql/Connection;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static synthetic fun readResultSet$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;IZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; - public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readSqlQuery (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readSqlQuery$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static final fun readSqlTable (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; + public static synthetic fun readSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; } public final class org/jetbrains/kotlinx/dataframe/io/TableColumnMetadata { From 03239a14b0836f1a648ea8e09c9c925564328e90 Mon Sep 17 00:00:00 2001 From: Alexey Zinoviev Date: Mon, 14 Apr 2025 13:24:19 +0200 Subject: [PATCH 7/8] Support Unicode table names in SQL operations Added handling and tests for tables with Unicode names in `readSqlTable`. Updated the table name validation regex to support Unicode letters, enhancing compatibility with diverse database naming conventions. --- .../kotlinx/dataframe/io/readJdbc.kt | 23 +++++++++--- .../kotlinx/dataframe/io/h2/h2Test.kt | 35 +++++++++++++++++++ 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt index 734322acae..7bd110ba73 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt @@ -51,10 +51,25 @@ private val logger = KotlinLogging.logger {} private const val DEFAULT_LIMIT = Int.MIN_VALUE /** - * Constant variable indicating the start of an SQL read query. - * The value of this variable is "SELECT". + * A regular expression defining the valid pattern for SQL table names. + * + * This pattern enforces that table names must: + * - Contain only Unicode letters, Unicode digits, or underscores. + * - Optionally be segmented by dots to indicate schema and table separation. + * + * It ensures compatibility with most SQL database naming conventions, thus minimizing risks of invalid names + * or injection vulnerabilities. + * + * Example of valid table names: + * - `my_table` + * - `schema1.table2` + * + * Example of invalid table names: + * - `my-table` (contains a dash) + * - `table!name` (contains special characters) + * - `.startWithDot` (cannot start with a dot) */ -private const val START_OF_READ_SQL_QUERY = "SELECT" +private const val TABLE_NAME_VALID_PATTERN = "^[\\p{L}\\p{N}_]+(\\.[\\p{L}\\p{N}_]+)*$" /** * Represents a column in a database table to keep all required meta-information. @@ -449,7 +464,7 @@ private fun isValidTableName(tableName: String): Boolean { } // Validate the table name structure: letters, numbers, underscores, and dots are allowed - val tableNameRegex = Regex("^[A-Z0-9_]+(\\.[A-Z0-9_]+)*$") + val tableNameRegex = Regex(TABLE_NAME_VALID_PATTERN) if (!tableNameRegex.matches(normalizedTableName)) { logger.error { "Validation failed: The table name contains invalid characters. " + diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt index 722910d0f9..f95f0a141b 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt @@ -690,6 +690,41 @@ class JdbcTest { } } + @Test + fun `read from Unicode table names`() { + val unicodeTableNames = listOf( + "Таблица", // Russian Cyrillic + "表", // Chinese character + "テーブル", // Japanese Katakana + "عربي", // Arabic + "Δοκιμή", // Greek + ) + + try { + // Create tables with Unicode names + connection.createStatement().use { stmt -> + unicodeTableNames.forEach { tableName -> + stmt.execute("CREATE TABLE IF NOT EXISTS `$tableName` (id INT PRIMARY KEY, name VARCHAR(255))") + stmt.execute("INSERT INTO `$tableName` (id, name) VALUES (1, 'TestName')") + } + } + + // Read from the tables and validate correctness + unicodeTableNames.forEach { tableName -> + val df = DataFrame.readSqlTable(connection, tableName) + df.rowsCount() shouldBe 1 + df[0][1] shouldBe "TestName" + } + } finally { + // Drop the Unicode tables + connection.createStatement().use { stmt -> + unicodeTableNames.forEach { tableName -> + stmt.execute("DROP TABLE IF EXISTS `$tableName`") + } + } + } + } + @Test fun `readSqlQuery should execute DROP TABLE when validation is disabled`() { // Query to create a temporary test table From fbaba685880ba11f78e07e2e6e00dab55713b766 Mon Sep 17 00:00:00 2001 From: Alexey Zinoviev Date: Mon, 14 Apr 2025 13:26:44 +0200 Subject: [PATCH 8/8] Fixing linter --- .../kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt index f95f0a141b..408ca65dc9 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt @@ -694,10 +694,10 @@ class JdbcTest { fun `read from Unicode table names`() { val unicodeTableNames = listOf( "Таблица", // Russian Cyrillic - "表", // Chinese character + "表", // Chinese character "テーブル", // Japanese Katakana - "عربي", // Arabic - "Δοκιμή", // Greek + "عربي", // Arabic + "Δοκιμή", // Greek ) try {