Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ request adding CHANGELOG notes for breaking (!) changes and possibly other secti

### Changes

- The `gcpServiceAccount` configuration value now affects Polaris behavior (enables service account impersonation). This value was previously defined but unused. This change may affect existing deployments that have populated this property.
- `client.region` is no longer considered a "credential" property (related to Iceberg REST Catalog API).
- Relaxed the requirements for S3 storage's ARN to allow Polaris to connect to more non-AWS S3 storage appliances.
- Added checksum to helm deployment so that it will restart when the configmap has changed.
Expand Down
1 change: 1 addition & 0 deletions gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ cel-bom = { module = "org.projectnessie.cel:cel-bom", version = "0.5.3" }
commons-lang3 = { module = "org.apache.commons:commons-lang3", version = "3.20.0" }
commons-text = { module = "org.apache.commons:commons-text", version = "1.15.0" }
errorprone = { module = "com.google.errorprone:error_prone_core", version = "2.45.0" }
google-cloud-iamcredentials = { module = "com.google.cloud:google-cloud-iamcredentials", version = "2.60.0" }
google-cloud-storage-bom = { module = "com.google.cloud:google-cloud-storage-bom", version = "2.60.0" }
guava = { module = "com.google.guava:guava", version = "33.5.0-jre" }
h2 = { module = "com.h2database:h2", version = "2.4.240" }
Expand Down
1 change: 1 addition & 0 deletions polaris-core/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ dependencies {
implementation("org.apache.iceberg:iceberg-gcp")
implementation(platform(libs.google.cloud.storage.bom))
implementation("com.google.cloud:google-cloud-storage")
implementation(libs.google.cloud.iamcredentials)

testCompileOnly(project(":polaris-immutables"))
testAnnotationProcessor(project(":polaris-immutables", configuration = "processor"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,25 @@

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.api.gax.core.FixedCredentialsProvider;
import com.google.auth.http.HttpTransportFactory;
import com.google.auth.oauth2.AccessToken;
import com.google.auth.oauth2.CredentialAccessBoundary;
import com.google.auth.oauth2.DownscopedCredentials;
import com.google.auth.oauth2.GoogleCredentials;
import com.google.cloud.iam.credentials.v1.GenerateAccessTokenRequest;
import com.google.cloud.iam.credentials.v1.GenerateAccessTokenResponse;
import com.google.cloud.iam.credentials.v1.IamCredentialsClient;
import com.google.cloud.iam.credentials.v1.IamCredentialsSettings;
import com.google.common.annotations.VisibleForTesting;
import com.google.protobuf.Duration;
import com.google.protobuf.Timestamp;
import jakarta.annotation.Nonnull;
import java.io.IOException;
import java.net.URI;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
Expand All @@ -55,6 +64,9 @@ public class GcpCredentialsStorageIntegration
extends InMemoryStorageIntegration<GcpStorageConfigurationInfo> {
private static final Logger LOGGER =
LoggerFactory.getLogger(GcpCredentialsStorageIntegration.class);
public static final String SERVICE_ACCOUNT_PREFIX = "projects/-/serviceAccounts/";
public static final String IMPERSONATION_SCOPE =
"https://www.googleapis.com/auth/devstorage.read_write";

private final GoogleCredentials sourceCredentials;
private final HttpTransportFactory transportFactory;
Expand Down Expand Up @@ -84,18 +96,20 @@ public StorageAccessConfig getSubscopedCreds(
throw new RuntimeException("Unable to refresh GCP credentials", e);
}

GoogleCredentials credentialsToDownscope = getBaseCredentials();

CredentialAccessBoundary accessBoundary =
generateAccessBoundaryRules(
allowListOperation, allowedReadLocations, allowedWriteLocations);
DownscopedCredentials credentials =
DownscopedCredentials.newBuilder()
.setHttpTransportFactory(transportFactory)
.setSourceCredential(sourceCredentials)
.setSourceCredential(credentialsToDownscope)
.setCredentialAccessBoundary(accessBoundary)
.build();
AccessToken token;
try {
token = credentials.refreshAccessToken();
token = refreshAccessToken(credentials);
} catch (IOException e) {
LOGGER
.atError()
Expand Down Expand Up @@ -123,6 +137,46 @@ public StorageAccessConfig getSubscopedCreds(
return accessConfig.build();
}

/**
* Returns the credential to be used as the source for downscoping. If a specific service account
* is configured, it impersonates that account first.
*/
private GoogleCredentials getBaseCredentials() {
if (config().getGcpServiceAccount() != null) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting that this config value was defined before, but not used 🤔

Does anyone have insight into why it was this way?

Technically, now that GcpServiceAccount config values start affecting Polaris behaviour, this could be a breaking change in existing deployments that may have (possibly accidental and/or incorrect) values in that config property.

I do believe that such a situation is unlikely, so it should be fine to proceed with this PR. However, it probably deserves mentioning as a Change in CHANGELOG.md

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like this is in the CLI doc for a while, https://polaris.apache.org/in-dev/unreleased/command-line-interface/#catalogs

      --service-account  (Only for GCS) The service account to use when connecting to GCS

Agreed with @dimas-b, not particularly a big concern, but +1 on adding this to the CHANGELOG.md.

return createImpersonatedCredentials(sourceCredentials, config().getGcpServiceAccount());
}
return sourceCredentials;
}

private GoogleCredentials createImpersonatedCredentials(
GoogleCredentials source, String targetServiceAccount) {
try (IamCredentialsClient iamCredentialsClient = createIamCredentialsClient(source)) {
GenerateAccessTokenRequest request =
GenerateAccessTokenRequest.newBuilder()
.setName(SERVICE_ACCOUNT_PREFIX + targetServiceAccount)
.addAllDelegates(new ArrayList<>())
// 'cloud-platform' is often preferred for impersonation,
// but devstorage.read_write is sufficient for GCS specific operations.
// See https://docs.cloud.google.com/storage/docs/oauth-scopes
.addScope(IMPERSONATION_SCOPE)
.setLifetime(Duration.newBuilder().setSeconds(3600).build())
.build();

GenerateAccessTokenResponse response = iamCredentialsClient.generateAccessToken(request);

Timestamp expirationTime = response.getExpireTime();
// Use Instant to avoid precision loss or overflow issues with Date multiplication
Date expirationDate =
Date.from(Instant.ofEpochSecond(expirationTime.getSeconds(), expirationTime.getNanos()));

AccessToken accessToken = new AccessToken(response.getAccessToken(), expirationDate);
return GoogleCredentials.create(accessToken);
} catch (IOException e) {
throw new RuntimeException(
"Unable to impersonate GCP service account: " + targetServiceAccount, e);
}
}

private String convertToString(CredentialAccessBoundary accessBoundary) {
try {
return new ObjectMapper().writeValueAsString(accessBoundary);
Expand Down Expand Up @@ -211,6 +265,20 @@ public static CredentialAccessBoundary generateAccessBoundaryRules(
return accessBoundaryBuilder.build();
}

@VisibleForTesting
protected AccessToken refreshAccessToken(DownscopedCredentials credentials) throws IOException {
return credentials.refreshAccessToken();
}

@VisibleForTesting
protected IamCredentialsClient createIamCredentialsClient(GoogleCredentials credentials)
throws IOException {
return IamCredentialsClient.create(
IamCredentialsSettings.newBuilder()
.setCredentialsProvider(FixedCredentialsProvider.create(credentials))
.build());
}

private static String bucketResource(String bucket) {
return "//storage.googleapis.com/projects/_/buckets/" + bucket;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,18 @@
import com.google.auth.http.HttpTransportFactory;
import com.google.auth.oauth2.AccessToken;
import com.google.auth.oauth2.CredentialAccessBoundary;
import com.google.auth.oauth2.DownscopedCredentials;
import com.google.auth.oauth2.GoogleCredentials;
import com.google.cloud.ServiceOptions;
import com.google.cloud.iam.credentials.v1.GenerateAccessTokenRequest;
import com.google.cloud.iam.credentials.v1.GenerateAccessTokenResponse;
import com.google.cloud.iam.credentials.v1.IamCredentialsClient;
import com.google.cloud.storage.BlobId;
import com.google.cloud.storage.BlobInfo;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.StorageException;
import com.google.cloud.storage.StorageOptions;
import com.google.protobuf.Timestamp;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
Expand All @@ -55,6 +60,7 @@
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;
import org.mockito.Mockito;

class GcpCredentialsStorageIntegrationTest extends BaseStorageIntegrationTest {

Expand Down Expand Up @@ -309,6 +315,67 @@ public void testRefreshCredentialsEndpointIsReturned() throws IOException {
.isEqualTo(REFRESH_ENDPOINT);
}

@Test
public void testImpersonation() throws IOException {
String serviceAccount = "[email protected]";
GcpStorageConfigurationInfo config =
GcpStorageConfigurationInfo.builder()
.addAllAllowedLocations(List.of("gs://bucket/path"))
.gcpServiceAccount(serviceAccount)
.build();

IamCredentialsClient mockIamClient = Mockito.mock(IamCredentialsClient.class);
GenerateAccessTokenResponse mockResponse =
GenerateAccessTokenResponse.newBuilder()
.setAccessToken("impersonated-token")
.setExpireTime(
Timestamp.newBuilder().setSeconds(System.currentTimeMillis() / 1000 + 3600).build())
.build();
Mockito.when(mockIamClient.generateAccessToken(Mockito.any(GenerateAccessTokenRequest.class)))
.thenReturn(mockResponse);

GoogleCredentials mockCreds = Mockito.mock(GoogleCredentials.class);
Mockito.when(mockCreds.createScoped(Mockito.any(String.class))).thenReturn(mockCreds);

GcpCredentialsStorageIntegration integration =
new GcpCredentialsStorageIntegration(
config,
mockCreds,
ServiceOptions.getFromServiceLoader(
HttpTransportFactory.class, NetHttpTransport::new)) {
@Override
protected IamCredentialsClient createIamCredentialsClient(GoogleCredentials credentials) {
return mockIamClient;
}

@Override
protected AccessToken refreshAccessToken(DownscopedCredentials credentials) {
return new AccessToken("downscoped-token", new Date());
}
};

integration.getSubscopedCreds(
EMPTY_REALM_CONFIG,
true,
Set.of("gs://bucket/path"),
Set.of("gs://bucket/path"),
Optional.empty());

Mockito.verify(mockIamClient)
.generateAccessToken(
Mockito.argThat(
request ->
request
.getName()
.equals(
GcpCredentialsStorageIntegration.SERVICE_ACCOUNT_PREFIX
+ serviceAccount)
&& request.getScopeCount() > 0
&& request
.getScope(0)
.equals(GcpCredentialsStorageIntegration.IMPERSONATION_SCOPE)));
}

private boolean isNotNull(JsonNode node) {
return node != null && !node.isNull();
}
Expand Down