Skip to content

Commit

Permalink
Search indexes (#2152)
Browse files Browse the repository at this point in the history
* Add support for Search Indexes

* Reverted order of where clause

* Add tests and fix formatting

* Fixing tests and addressing comments

* Addressing comments and simplifying code

* Fixed formatting
  • Loading branch information
n-d-joshi authored Feb 3, 2025
1 parent 4e82f2b commit 79c9fe9
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ public void prettyPrint(Appendable appendable) throws IOException {

private void prettyPrintPg(Appendable appendable) throws IOException {
appendable.append("CREATE");
if (unique()) {
if (type() != null && (type().equals("SEARCH"))) {
appendable.append(" " + type());
} else if (unique()) {
appendable.append(" UNIQUE");
}
appendable
Expand All @@ -111,13 +113,42 @@ private void prettyPrintPg(Appendable appendable) throws IOException {
appendable.append(" INCLUDE (").append(storingString).append(")");
}

if (partitionBy() != null) {
String partitionByString =
partitionBy().stream()
.map(c -> quoteIdentifier(c, dialect()))
.collect(Collectors.joining(","));

if (!partitionByString.isEmpty()) {
appendable.append(" PARTITION BY ").append(partitionByString);
}
}

if (orderBy() != null) {
String orderByString =
orderBy().stream()
.map(c -> quoteIdentifier(c, dialect()))
.collect(Collectors.joining(","));

if (!orderByString.isEmpty()) {
appendable.append(" ORDER BY ").append(orderByString);
}
}

if (interleaveIn() != null) {
appendable.append(" INTERLEAVE IN ").append(quoteIdentifier(interleaveIn(), dialect()));
}

if (filter() != null && !filter().isEmpty()) {
appendable.append(" WHERE ").append(filter());
}

if (options() != null) {
String optionsString = String.join(",", options());
if (!optionsString.isEmpty()) {
appendable.append(" WITH (").append(optionsString).append(")");
}
}
}

private void prettyPrintGsql(Appendable appendable) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,7 @@ public Ddl scan() {
Map<String, NavigableMap<String, Index.Builder>> indexes = Maps.newHashMap();
listIndexes(indexes);
listIndexColumns(builder, indexes);
if (dialect == Dialect.GOOGLE_STANDARD_SQL) {
listIndexOptions(builder, indexes);
}
listIndexOptions(builder, indexes);

for (Map.Entry<String, NavigableMap<String, Index.Builder>> tableEntry : indexes.entrySet()) {
String tableName = tableEntry.getKey();
Expand Down Expand Up @@ -364,10 +362,7 @@ private void listColumns(Ddl.Builder builder) {
dialect == Dialect.GOOGLE_STANDARD_SQL
? resultSet.getBoolean(15)
: resultSet.getString(15).equalsIgnoreCase("YES");
boolean isPlacementKey =
dialect == Dialect.GOOGLE_STANDARD_SQL
? resultSet.getBoolean(16)
: resultSet.getBoolean(16);
boolean isPlacementKey = resultSet.getBoolean(16);

builder
.createTable(tableName)
Expand Down Expand Up @@ -463,20 +458,15 @@ private void listIndexes(Map<String, NavigableMap<String, Index.Builder>> indexe
: resultSet.getString(5).equalsIgnoreCase("YES");
String filter = resultSet.isNull(6) ? null : resultSet.getString(6);

// Note that 'type' is only queried from GoogleSQL and is not from Postgres and
// the number of columns will be different.
String type =
(dialect == Dialect.GOOGLE_STANDARD_SQL && !resultSet.isNull(7))
? resultSet.getString(7)
: null;
String type = !resultSet.isNull(7) ? resultSet.getString(7) : null;

ImmutableList<String> searchPartitionBy =
(dialect == Dialect.GOOGLE_STANDARD_SQL && !resultSet.isNull(8))
!resultSet.isNull(8)
? ImmutableList.<String>builder().addAll(resultSet.getStringList(8)).build()
: null;

ImmutableList<String> searchOrderBy =
(dialect == Dialect.GOOGLE_STANDARD_SQL && !resultSet.isNull(9))
!resultSet.isNull(9)
? ImmutableList.<String>builder().addAll(resultSet.getStringList(9)).build()
: null;

Expand Down Expand Up @@ -513,10 +503,11 @@ Statement listIndexesSQL() {
case POSTGRESQL:
return Statement.of(
"SELECT t.table_schema, t.table_name, t.index_name, t.parent_table_name, t.is_unique,"
+ " t.is_null_filtered, t.filter FROM information_schema.indexes AS t "
+ " t.is_null_filtered, t.filter, t.index_type, t.search_partition_by, t.search_order_by"
+ " FROM information_schema.indexes AS t "
+ " WHERE t.table_schema NOT IN "
+ " ('information_schema', 'spanner_sys', 'pg_catalog')"
+ " AND t.index_type='INDEX' AND t.spanner_is_managed = 'NO' "
+ " AND (t.index_type='INDEX' OR t.index_type='SEARCH') AND t.spanner_is_managed = 'NO' "
+ " ORDER BY t.table_name, t.index_name");
default:
throw new IllegalArgumentException("Unrecognized dialect: " + dialect);
Expand All @@ -533,8 +524,8 @@ private void listIndexColumns(
String columnName = resultSet.getString(2);
String ordering = resultSet.isNull(3) ? null : resultSet.getString(3);
String indexLocalName = resultSet.getString(4);
String indexType = dialect == Dialect.GOOGLE_STANDARD_SQL ? resultSet.getString(5) : null;
String spannerType = dialect == Dialect.GOOGLE_STANDARD_SQL ? resultSet.getString(6) : null;
String indexType = resultSet.getString(5);
String spannerType = resultSet.getString(6);

if (indexLocalName.equals("PRIMARY_KEY")) {
IndexColumn.IndexColumnsBuilder<Table.Builder> pkBuilder =
Expand All @@ -546,8 +537,10 @@ private void listIndexColumns(
}
pkBuilder.end().endTable();
} else {
String tokenlistType = dialect == Dialect.POSTGRESQL ? "spanner.tokenlist" : "TOKENLIST";
if (indexType != null && ordering != null) {
if ((indexType.equals("SEARCH") && !spannerType.equals("TOKENLIST"))
// Non-tokenlist columns should not be included in the key for Search Indexes.
if ((indexType.equals("SEARCH") && !spannerType.contains(tokenlistType))
|| (indexType.equals("VECTOR") && !spannerType.startsWith("ARRAY"))) {
continue;
}
Expand All @@ -567,8 +560,9 @@ private void listIndexColumns(
}
IndexColumn.IndexColumnsBuilder<Index.Builder> indexColumnsBuilder =
indexBuilder.columns().create().name(columnName);
// Tokenlist columns do not have ordering.
if (spannerType != null
&& (spannerType.equals("TOKENLIST") || spannerType.startsWith("ARRAY"))) {
&& (spannerType.equals(tokenlistType) || spannerType.startsWith("ARRAY"))) {
indexColumnsBuilder.none();
} else if (ordering == null) {
indexColumnsBuilder.storing();
Expand Down Expand Up @@ -605,7 +599,8 @@ Statement listIndexColumnsSQL() {
+ "ORDER BY t.table_name, t.index_name, t.ordinal_position");
case POSTGRESQL:
return Statement.of(
"SELECT t.table_schema, t.table_name, t.column_name, t.column_ordering, t.index_name "
"SELECT t.table_schema, t.table_name, t.column_name, t.column_ordering, t.index_name,"
+ " t.index_type, t.spanner_type "
+ "FROM information_schema.index_columns AS t "
+ "WHERE t.table_schema NOT IN "
+ "('information_schema', 'spanner_sys', 'pg_catalog') "
Expand Down Expand Up @@ -674,6 +669,14 @@ Statement listIndexOptionsSQL() {
+ " WHERE t.table_schema NOT IN"
+ " ('INFORMATION_SCHEMA', 'SPANNER_SYS')"
+ " ORDER BY t.table_name, t.index_name, t.option_name");
case POSTGRESQL:
return Statement.of(
"SELECT t.table_schema, t.table_name, t.index_name, t.index_type,"
+ " t.option_name, t.option_type, t.option_value"
+ " FROM information_schema.index_options AS t"
+ " WHERE t.table_schema NOT IN"
+ " ('information_schema', 'spanner_sys', 'pg_catalog') "
+ " ORDER BY t.table_name, t.index_name, t.option_name");
default:
throw new IllegalArgumentException("Unrecognized dialect: " + dialect);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,10 @@ public void pgSimple() {
.asc("last_name")
.end()
.indexes(
ImmutableList.of("CREATE INDEX \"UsersByFirstName\" ON \"Users\" (\"first_name\")"))
ImmutableList.of(
"CREATE INDEX \"UsersByFirstName\" ON \"Users\" (\"first_name\")",
"CREATE SEARCH INDEX \"SearchIndex\" ON \"Users\" (\"tokens\")"
+ " WITH (sort_order_sharding=TRUE)"))
.foreignKeys(
ImmutableList.of(
"ALTER TABLE \"Users\" ADD CONSTRAINT \"fk\" FOREIGN KEY (\"first_name\")"
Expand Down Expand Up @@ -496,6 +499,12 @@ public void pgSimple() {
assertThat(
avroSchema.getProp(SPANNER_INDEX + "0"),
equalTo("CREATE INDEX \"UsersByFirstName\" ON \"Users\" (\"first_name\")"));

assertThat(
avroSchema.getProp(SPANNER_INDEX + "1"),
equalTo(
"CREATE SEARCH INDEX \"SearchIndex\" ON \"Users\" (\"tokens\") WITH (sort_order_sharding=TRUE)"));

assertThat(
avroSchema.getProp(SPANNER_FOREIGN_KEY + "0"),
equalTo(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,9 +278,16 @@ private void testPostgresSpannerToGCSAvroBase(
+ " \"NameTokens\" spanner.tokenlist generated always as (spanner.tokenize_fulltext(\"FirstName\")) stored hidden,\n"
+ "PRIMARY KEY(\"Id\"))",
testName);
String createSearchIndexStatement =
String.format(
"CREATE SEARCH INDEX \"%s_SearchIndex\"\n"
+ " ON \"%s_Singers\"(\"NameTokens\") ORDER BY \"Id\" WHERE \"Id\" IS NOT NULL\n"
+ " WITH (sort_order_sharding=TRUE, disable_automatic_uid_column=TRUE)",
testName, testName);

spannerResourceManager.executeDdlStatement(createEmptyTableStatement);
spannerResourceManager.executeDdlStatement(createSingersTableStatement);
spannerResourceManager.executeDdlStatement(createSearchIndexStatement);
List<Mutation> expectedData = generateTableRows(String.format("%s_Singers", testName));
spannerResourceManager.write(expectedData);
PipelineLauncher.LaunchConfig.Builder options =
Expand All @@ -305,6 +312,10 @@ private void testPostgresSpannerToGCSAvroBase(
List<Artifact> emptyArtifacts =
gcsClient.listArtifacts(
"output/", Pattern.compile(String.format(".*/%s_%s.*\\.avro.*", testName, "Empty")));
List<Artifact> searchIndexArtifacts =
gcsClient.listArtifacts(
"output/",
Pattern.compile(String.format(".*/%s_%s.*\\.avro.*", testName, "SearchIndex")));
assertThat(singersArtifacts).isNotEmpty();
assertThat(emptyArtifacts).isNotEmpty();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,40 @@ public void testSearchIndex() {
+ " STORING (`Data`) PARTITION BY `UserId`, INTERLEAVE IN `Users` OPTIONS (sort_order_sharding=TRUE)"));
}

@Test
public void testpgSearchIndex() {
Index.Builder builder =
Index.builder(Dialect.POSTGRESQL)
.name("SearchIndex")
.type("SEARCH")
.table("Messages")
.interleaveIn("Users")
.partitionBy(ImmutableList.of("userid"))
.orderBy(ImmutableList.of("orderid"))
.options(ImmutableList.of("sort_order_sharding=TRUE"));
builder
.columns()
.create()
.name("subject_tokens")
.none()
.endIndexColumn()
.create()
.name("body_tokens")
.none()
.endIndexColumn()
.create()
.name("data")
.storing()
.endIndexColumn()
.end();
Index index = builder.build();
assertThat(
index.prettyPrint(),
equalToCompressingWhiteSpace(
"CREATE SEARCH INDEX \"SearchIndex\" ON \"Messages\"(\"subject_tokens\" , \"body_tokens\" )"
+ " INCLUDE (\"data\") PARTITION BY \"userid\" ORDER BY \"orderid\" INTERLEAVE IN \"Users\" WITH (sort_order_sharding=TRUE)"));
}

@Test
public void testVectorIndex() {
Index.Builder builder =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,38 @@ public void searchIndexes() throws Exception {
assertThat(ddl.prettyPrint(), equalToCompressingWhiteSpace(String.join("", statements)));
}

@Test
public void pgSearchIndexes() throws Exception {
// Prefix indexes to ensure ordering.
List<String> statements =
Arrays.asList(
"CREATE TABLE \"Users\" ("
+ " \"userid\" bigint NOT NULL,"
+ " PRIMARY KEY (\"userid\")"
+ " )",
" CREATE TABLE \"Messages\" ("
+ " \"userid\" bigint NOT NULL,"
+ " \"messageid\" bigint NOT NULL,"
+ " \"orderid\" bigint NOT NULL,"
+ " \"subject\" character varying,"
+ " \"subject_tokens\" spanner.tokenlist GENERATED ALWAYS AS (spanner.tokenize_fulltext(subject)) STORED HIDDEN,"
+ " \"body\" character varying,"
+ " \"body_tokens\" spanner.tokenlist GENERATED ALWAYS AS (spanner.tokenize_fulltext(body)) STORED HIDDEN,"
+ " \"data\" character varying,"
+ " PRIMARY KEY (\"userid\", \"messageid\")"
+ " ) INTERLEAVE IN PARENT \"Users\"",
" CREATE SEARCH INDEX \"SearchIndex\" ON \"Messages\"(\"subject_tokens\" , \"body_tokens\" )"
+ " INCLUDE (\"data\")"
+ " PARTITION BY \"userid\""
+ " ORDER BY \"orderid\""
+ " INTERLEAVE IN \"Users\""
+ " WITH (sort_order_sharding=TRUE)");

SPANNER_SERVER.createPgDatabase(dbId, statements);
Ddl ddl = getPgDatabaseDdl();
assertThat(ddl.prettyPrint(), equalToCompressingWhiteSpace(String.join("", statements)));
}

@Test
public void vectorIndexes() throws Exception {
List<String> statements =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,10 +100,11 @@ public void testListIndexesSQL() {
postgresSQLInfoScanner.listIndexesSQL().getSql(),
equalToCompressingWhiteSpace(
"SELECT t.table_schema, t.table_name, t.index_name, t.parent_table_name, t.is_unique,"
+ " t.is_null_filtered, t.filter FROM information_schema.indexes AS t "
+ " t.is_null_filtered, t.filter, t.index_type, t.search_partition_by, t.search_order_by"
+ " FROM information_schema.indexes AS t "
+ " WHERE t.table_schema NOT IN "
+ " ('information_schema', 'spanner_sys', 'pg_catalog')"
+ " AND t.index_type='INDEX' AND t.spanner_is_managed = 'NO' "
+ " AND (t.index_type='INDEX' OR t.index_type='SEARCH') AND t.spanner_is_managed = 'NO' "
+ " ORDER BY t.table_name, t.index_name"));
}

Expand All @@ -122,7 +123,8 @@ public void testListIndexColumnsSQL() {
assertThat(
postgresSQLInfoScanner.listIndexColumnsSQL().getSql(),
equalToCompressingWhiteSpace(
"SELECT t.table_schema, t.table_name, t.column_name, t.column_ordering, t.index_name "
"SELECT t.table_schema, t.table_name, t.column_name, t.column_ordering, t.index_name, "
+ "t.index_type, t.spanner_type "
+ "FROM information_schema.index_columns AS t "
+ "WHERE t.table_schema NOT IN "
+ "('information_schema', 'spanner_sys', 'pg_catalog') "
Expand Down

0 comments on commit 79c9fe9

Please sign in to comment.