Skip to content

Commit

Permalink
pgcdc: validate quoted identifier
Browse files Browse the repository at this point in the history
  • Loading branch information
rockwotj committed Dec 13, 2024
1 parent ec0d162 commit 781d5bc
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 12 deletions.
38 changes: 26 additions & 12 deletions internal/impl/postgresql/pglogicalstream/sanitize/sanitize.go
Original file line number Diff line number Diff line change
Expand Up @@ -366,26 +366,38 @@ func SQLQuery(sql string, args ...any) (string, error) {
// ValidatePostgresIdentifier checks if a string is a valid PostgreSQL identifier
// This follows PostgreSQL's standard naming rules
func ValidatePostgresIdentifier(name string) error {
if parts := strings.Split(name, "."); len(parts) == 2 {
if err := ValidatePostgresIdentifier(parts[0]); err != nil {
return fmt.Errorf("invalid schema identifier: %s", err)
}
name = parts[1]
}

// Strip quotes if they are present
if strings.HasPrefix(name, "\"") && strings.HasSuffix(name, "\"") {
name = strings.Trim(name, "\"")
}

if len(name) == 0 {
return errors.New("empty identifier is not allowed")
}

// It's not fully clear to me if the max here is before or after unescaping the quotes.
// We'll just play it safe and validate before quotes, it seems unlikely folks are using large
// identifiers.
if len(name) > MaxIdentifierLength {
return fmt.Errorf("identifier length exceeds maximum of %d characters", MaxIdentifierLength)
}

// Handle quoted identifiers.
if strings.HasPrefix(name, `"`) && strings.HasSuffix(name, `"`) && len(name) >= 2 {
name := name[1 : len(name)-1]
if name == "" {
return errors.New("quoted identifiers cannot be empty")
}
for i := 0; i < len(name); i++ {
if name[i] != '"' {
continue
}
if i+1 >= len(name) {
return fmt.Errorf("invalid quoted identifier: %s", name)
}
if name[i+1] != '"' {
return fmt.Errorf("invalid quoted identifier: %s", name)
}
i++ // Skip over the next character to handle triple quotes
}
return nil
}

// First character must be a letter or underscore
if !unicode.IsLetter(rune(name[0])) && name[0] != '_' {
return errors.New("identifier must start with a letter or underscore")
Expand All @@ -398,5 +410,7 @@ func ValidatePostgresIdentifier(name string) error {
}
}

// TODO(cdc): We should also ensure that this is not a reserved keyword.

return nil
}
38 changes: 38 additions & 0 deletions internal/impl/postgresql/pglogicalstream/sanitize/sanitize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@
package sanitize_test

import (
"strings"
"testing"
"time"

"github.com/redpanda-data/connect/v4/internal/impl/postgresql/pglogicalstream/sanitize"
"github.com/stretchr/testify/require"
)

func TestNewQuery(t *testing.T) {
Expand Down Expand Up @@ -250,3 +252,39 @@ func TestQuerySanitize(t *testing.T) {
}
}
}

func TestIdentifierValidation(t *testing.T) {
successfulTests := []string{
`"FooBar"`,
`"Foo""Bar"`,
`"Foo""""Bar"`,
`_Foobar`,
strings.Repeat("a", 63),
}

for _, i := range successfulTests {
i := i
t.Run(i, func(t *testing.T) {
require.NoError(t, sanitize.ValidatePostgresIdentifier(i))
})
}

errorTests := []string{
``,
`"`,
`""`,
`"""`,
`"foo"""bar"`,
`"foo"bar"`,
`"foobar""`,
`""foobar""`,
strings.Repeat("a", 64),
}

for _, i := range errorTests {
i := i
t.Run(i, func(t *testing.T) {
require.Error(t, sanitize.ValidatePostgresIdentifier(i))
})
}
}

0 comments on commit 781d5bc

Please sign in to comment.