Skip to content

Commit 0279ea7

Browse files
committed
[Iceberg]Support setting warehouse data directory for Hadoop catalog
1 parent 57dc966 commit 0279ea7

19 files changed

+931
-46
lines changed

presto-iceberg/pom.xml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,28 @@
263263
</exclusions>
264264
</dependency>
265265

266+
<dependency>
267+
<groupId>org.testcontainers</groupId>
268+
<artifactId>testcontainers</artifactId>
269+
<scope>test</scope>
270+
<exclusions>
271+
<exclusion>
272+
<groupId>org.slf4j</groupId>
273+
<artifactId>slf4j-api</artifactId>
274+
</exclusion>
275+
</exclusions>
276+
</dependency>
277+
278+
<dependency>
279+
<groupId>com.amazonaws</groupId>
280+
<artifactId>aws-java-sdk-core</artifactId>
281+
</dependency>
282+
283+
<dependency>
284+
<groupId>com.amazonaws</groupId>
285+
<artifactId>aws-java-sdk-s3</artifactId>
286+
</dependency>
287+
266288
<dependency>
267289
<groupId>org.apache.iceberg</groupId>
268290
<artifactId>iceberg-core</artifactId>

presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergConfig.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ public class IcebergConfig
5050
private HiveCompressionCodec compressionCodec = GZIP;
5151
private CatalogType catalogType = HIVE;
5252
private String catalogWarehouse;
53+
private String catalogWarehouseDataDir;
5354
private int catalogCacheSize = 10;
5455
private int maxPartitionsPerWriter = 100;
5556
private List<String> hadoopConfigResources = ImmutableList.of();
@@ -127,6 +128,19 @@ public IcebergConfig setCatalogWarehouse(String catalogWarehouse)
127128
return this;
128129
}
129130

131+
public String getCatalogWarehouseDataDir()
132+
{
133+
return catalogWarehouseDataDir;
134+
}
135+
136+
@Config("iceberg.catalog.warehouse.datadir")
137+
@ConfigDescription("Iceberg catalog default root data writing directory")
138+
public IcebergConfig setCatalogWarehouseDataDir(String catalogWarehouseDataDir)
139+
{
140+
this.catalogWarehouseDataDir = catalogWarehouseDataDir;
141+
return this;
142+
}
143+
130144
@Min(1)
131145
public int getCatalogCacheSize()
132146
{

presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeCatalogFactory.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ public class IcebergNativeCatalogFactory
5252
private final String catalogName;
5353
protected final CatalogType catalogType;
5454
private final String catalogWarehouse;
55+
private final String catalogWarehouseDataDir;
5556
protected final IcebergConfig icebergConfig;
5657

5758
private final List<String> hadoopConfigResources;
@@ -69,6 +70,7 @@ public IcebergNativeCatalogFactory(
6970
this.icebergConfig = requireNonNull(config, "config is null");
7071
this.catalogType = config.getCatalogType();
7172
this.catalogWarehouse = config.getCatalogWarehouse();
73+
this.catalogWarehouseDataDir = config.getCatalogWarehouseDataDir();
7274
this.hadoopConfigResources = icebergConfig.getHadoopConfigResources();
7375
this.s3ConfigurationUpdater = requireNonNull(s3ConfigurationUpdater, "s3ConfigurationUpdater is null");
7476
this.gcsConfigurationInitialize = requireNonNull(gcsConfigurationInitialize, "gcsConfigurationInitialize is null");
@@ -90,6 +92,11 @@ public Catalog getCatalog(ConnectorSession session)
9092
}
9193
}
9294

95+
public String getCatalogWarehouseDataDir()
96+
{
97+
return this.catalogWarehouseDataDir;
98+
}
99+
93100
public SupportsNamespaces getNamespaces(ConnectorSession session)
94101
{
95102
Catalog catalog = getCatalog(session);

presto-iceberg/src/main/java/com/facebook/presto/iceberg/IcebergNativeMetadata.java

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
import java.util.concurrent.ConcurrentMap;
5757
import java.util.stream.Stream;
5858

59+
import static com.facebook.presto.iceberg.CatalogType.HADOOP;
5960
import static com.facebook.presto.iceberg.IcebergSessionProperties.getCompressionCodec;
6061
import static com.facebook.presto.iceberg.IcebergTableProperties.getFileFormat;
6162
import static com.facebook.presto.iceberg.IcebergTableProperties.getPartitioning;
@@ -83,12 +84,14 @@
8384
import static java.util.Objects.requireNonNull;
8485
import static java.util.stream.Collectors.toList;
8586
import static java.util.stream.Collectors.toMap;
87+
import static org.apache.iceberg.TableProperties.WRITE_DATA_LOCATION;
8688

8789
public class IcebergNativeMetadata
8890
extends IcebergAbstractMetadata
8991
{
9092
private static final String VIEW_DIALECT = "presto";
9193

94+
private final Optional<String> warehouseDataDir;
9295
private final IcebergNativeCatalogFactory catalogFactory;
9396
private final CatalogType catalogType;
9497
private final ConcurrentMap<SchemaTableName, View> icebergViews = new ConcurrentHashMap<>();
@@ -107,6 +110,7 @@ public IcebergNativeMetadata(
107110
super(typeManager, functionResolution, rowExpressionService, commitTaskCodec, nodeVersion, filterStatsCalculatorService, statisticsFileCache);
108111
this.catalogFactory = requireNonNull(catalogFactory, "catalogFactory is null");
109112
this.catalogType = requireNonNull(catalogType, "catalogType is null");
113+
this.warehouseDataDir = Optional.ofNullable(catalogFactory.getCatalogWarehouseDataDir());
110114
}
111115

112116
@Override
@@ -316,20 +320,30 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con
316320
try {
317321
TableIdentifier tableIdentifier = toIcebergTableIdentifier(schemaTableName, catalogFactory.isNestedNamespaceEnabled());
318322
String targetPath = getTableLocation(tableMetadata.getProperties());
323+
Map<String, String> tableProperties = populateTableProperties(tableMetadata, fileFormat, session, catalogType);
324+
if (!tableProperties.containsKey(WRITE_DATA_LOCATION)) {
325+
Optional<String> dataLocation = getDataLocationBasedOnWarehouseDataDir(schemaTableName);
326+
if (dataLocation.isPresent()) {
327+
ImmutableMap.Builder<String, String> propertiesBuilder = ImmutableMap.builder();
328+
tableProperties = propertiesBuilder.putAll(tableProperties)
329+
.put(WRITE_DATA_LOCATION, dataLocation.get())
330+
.build();
331+
}
332+
}
319333
if (!isNullOrEmpty(targetPath)) {
320334
transaction = catalogFactory.getCatalog(session).newCreateTableTransaction(
321335
tableIdentifier,
322336
schema,
323337
partitionSpec,
324338
targetPath,
325-
populateTableProperties(tableMetadata, fileFormat, session, catalogType));
339+
tableProperties);
326340
}
327341
else {
328342
transaction = catalogFactory.getCatalog(session).newCreateTableTransaction(
329343
tableIdentifier,
330344
schema,
331345
partitionSpec,
332-
populateTableProperties(tableMetadata, fileFormat, session, catalogType));
346+
tableProperties);
333347
}
334348
}
335349
catch (AlreadyExistsException e) {
@@ -379,4 +393,13 @@ public void unregisterTable(ConnectorSession clientSession, SchemaTableName sche
379393
{
380394
catalogFactory.getCatalog(clientSession).dropTable(toIcebergTableIdentifier(schemaTableName, catalogFactory.isNestedNamespaceEnabled()), false);
381395
}
396+
397+
private Optional<String> getDataLocationBasedOnWarehouseDataDir(SchemaTableName schemaTableName)
398+
{
399+
if (!catalogType.equals(HADOOP)) {
400+
return Optional.empty();
401+
}
402+
return Optional.ofNullable(warehouseDataDir.map(base -> base + schemaTableName.getSchemaName() + "/" + schemaTableName.getTableName())
403+
.orElse(null));
404+
}
382405
}

presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedSmokeTestBase.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import com.facebook.presto.testing.assertions.Assert;
2525
import com.facebook.presto.tests.AbstractTestIntegrationSmokeTest;
2626
import com.google.common.collect.ImmutableMap;
27+
import org.apache.hadoop.fs.Path;
2728
import org.apache.iceberg.Table;
2829
import org.apache.iceberg.UpdateProperties;
2930
import org.intellij.lang.annotations.Language;
@@ -32,7 +33,6 @@
3233

3334
import java.io.IOException;
3435
import java.nio.file.Files;
35-
import java.nio.file.Path;
3636
import java.util.function.BiConsumer;
3737
import java.util.regex.Matcher;
3838
import java.util.regex.Pattern;
@@ -738,7 +738,7 @@ private void testSchemaEvolution(Session session, FileFormat fileFormat)
738738
}
739739

740740
@Test
741-
private void testCreateTableLike()
741+
protected void testCreateTableLike()
742742
{
743743
Session session = getSession();
744744
String schemaName = session.getSchema().get();
@@ -892,7 +892,7 @@ private void testWithAllFormatVersions(BiConsumer<String, String> test)
892892
test.accept("2", "merge-on-read");
893893
}
894894

895-
private String getTablePropertiesString(String tableName)
895+
protected String getTablePropertiesString(String tableName)
896896
{
897897
MaterializedResult showCreateTable = computeActual("SHOW CREATE TABLE " + tableName);
898898
String createTable = (String) getOnlyElement(showCreateTable.getOnlyColumnAsSet());
@@ -1225,8 +1225,8 @@ protected String getLocation(String schema, String table)
12251225

12261226
protected Path getCatalogDirectory()
12271227
{
1228-
Path dataDirectory = getDistributedQueryRunner().getCoordinator().getDataDirectory();
1229-
return getIcebergDataDirectoryPath(dataDirectory, catalogType.name(), new IcebergConfig().getFileFormat(), false);
1228+
java.nio.file.Path dataDirectory = getDistributedQueryRunner().getCoordinator().getDataDirectory();
1229+
return new Path(getIcebergDataDirectoryPath(dataDirectory, catalogType.name(), new IcebergConfig().getFileFormat(), false).toFile().toURI());
12301230
}
12311231

12321232
protected Table getIcebergTable(ConnectorSession session, String namespace, String tableName)

presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergDistributedTestBase.java

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@
3232
import com.facebook.presto.hive.HiveHdfsConfiguration;
3333
import com.facebook.presto.hive.MetastoreClientConfig;
3434
import com.facebook.presto.hive.authentication.NoHdfsAuthentication;
35+
import com.facebook.presto.hive.s3.HiveS3Config;
36+
import com.facebook.presto.hive.s3.PrestoS3ConfigurationUpdater;
37+
import com.facebook.presto.hive.s3.S3ConfigurationUpdater;
3538
import com.facebook.presto.iceberg.delete.DeleteFile;
3639
import com.facebook.presto.metadata.CatalogMetadata;
3740
import com.facebook.presto.metadata.Metadata;
@@ -63,6 +66,7 @@
6366
import org.apache.hadoop.conf.Configuration;
6467
import org.apache.hadoop.fs.FileStatus;
6568
import org.apache.hadoop.fs.FileSystem;
69+
import org.apache.hadoop.fs.Path;
6670
import org.apache.iceberg.BaseTable;
6771
import org.apache.iceberg.CatalogUtil;
6872
import org.apache.iceberg.FileScanTask;
@@ -96,7 +100,6 @@
96100
import java.lang.reflect.Field;
97101
import java.net.URI;
98102
import java.nio.ByteBuffer;
99-
import java.nio.file.Path;
100103
import java.time.LocalDateTime;
101104
import java.time.LocalTime;
102105
import java.time.format.DateTimeFormatter;
@@ -1679,14 +1682,14 @@ public void testMetadataVersionsMaintainingProperties()
16791682
// Table `test_table_with_default_setting_properties`'s current metadata record all 5 previous metadata files
16801683
assertEquals(defaultTableMetadata.previousFiles().size(), 5);
16811684

1682-
FileSystem fileSystem = getHdfsEnvironment().getFileSystem(new HdfsContext(SESSION), new org.apache.hadoop.fs.Path(settingTable.location()));
1685+
FileSystem fileSystem = getHdfsEnvironment().getFileSystem(new HdfsContext(SESSION), new Path(settingTable.location()));
16831686

16841687
// Table `test_table_with_setting_properties`'s all existing metadata files count is 2
1685-
FileStatus[] settingTableFiles = fileSystem.listStatus(new org.apache.hadoop.fs.Path(settingTable.location(), "metadata"), name -> name.getName().contains(METADATA_FILE_EXTENSION));
1688+
FileStatus[] settingTableFiles = fileSystem.listStatus(new Path(settingTable.location(), "metadata"), name -> name.getName().contains(METADATA_FILE_EXTENSION));
16861689
assertEquals(settingTableFiles.length, 2);
16871690

16881691
// Table `test_table_with_default_setting_properties`'s all existing metadata files count is 6
1689-
FileStatus[] defaultTableFiles = fileSystem.listStatus(new org.apache.hadoop.fs.Path(defaultTable.location(), "metadata"), name -> name.getName().contains(METADATA_FILE_EXTENSION));
1692+
FileStatus[] defaultTableFiles = fileSystem.listStatus(new Path(defaultTable.location(), "metadata"), name -> name.getName().contains(METADATA_FILE_EXTENSION));
16901693
assertEquals(defaultTableFiles.length, 6);
16911694
}
16921695
finally {
@@ -2261,12 +2264,12 @@ private void testCheckDeleteFiles(Table icebergTable, int expectedSize, List<Fil
22612264
private void writePositionDeleteToNationTable(Table icebergTable, String dataFilePath, long deletePos)
22622265
throws IOException
22632266
{
2264-
Path dataDirectory = getDistributedQueryRunner().getCoordinator().getDataDirectory();
2267+
java.nio.file.Path dataDirectory = getDistributedQueryRunner().getCoordinator().getDataDirectory();
22652268
File metastoreDir = getIcebergDataDirectoryPath(dataDirectory, catalogType.name(), new IcebergConfig().getFileFormat(), false).toFile();
2266-
org.apache.hadoop.fs.Path metadataDir = new org.apache.hadoop.fs.Path(metastoreDir.toURI());
2269+
Path metadataDir = new Path(metastoreDir.toURI());
22672270
String deleteFileName = "delete_file_" + randomUUID();
22682271
FileSystem fs = getHdfsEnvironment().getFileSystem(new HdfsContext(SESSION), metadataDir);
2269-
org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(metadataDir, deleteFileName);
2272+
Path path = new Path(metadataDir, deleteFileName);
22702273
PositionDeleteWriter<Record> writer = Parquet.writeDeletes(HadoopOutputFile.fromPath(path, fs))
22712274
.createWriterFunc(GenericParquetWriter::buildWriter)
22722275
.forTable(icebergTable)
@@ -2293,13 +2296,13 @@ private void writeEqualityDeleteToNationTable(Table icebergTable, Map<String, Ob
22932296
private void writeEqualityDeleteToNationTable(Table icebergTable, Map<String, Object> overwriteValues, Map<String, Object> partitionValues)
22942297
throws Exception
22952298
{
2296-
Path dataDirectory = getDistributedQueryRunner().getCoordinator().getDataDirectory();
2299+
java.nio.file.Path dataDirectory = getDistributedQueryRunner().getCoordinator().getDataDirectory();
22972300
File metastoreDir = getIcebergDataDirectoryPath(dataDirectory, catalogType.name(), new IcebergConfig().getFileFormat(), false).toFile();
2298-
org.apache.hadoop.fs.Path metadataDir = new org.apache.hadoop.fs.Path(metastoreDir.toURI());
2301+
Path metadataDir = new Path(metastoreDir.toURI());
22992302
String deleteFileName = "delete_file_" + randomUUID();
23002303
FileSystem fs = getHdfsEnvironment().getFileSystem(new HdfsContext(SESSION), metadataDir);
23012304
Schema deleteRowSchema = icebergTable.schema().select(overwriteValues.keySet());
2302-
Parquet.DeleteWriteBuilder writerBuilder = Parquet.writeDeletes(HadoopOutputFile.fromPath(new org.apache.hadoop.fs.Path(metadataDir, deleteFileName), fs))
2305+
Parquet.DeleteWriteBuilder writerBuilder = Parquet.writeDeletes(HadoopOutputFile.fromPath(new Path(metadataDir, deleteFileName), fs))
23032306
.forTable(icebergTable)
23042307
.rowSchema(deleteRowSchema)
23052308
.createWriterFunc(GenericParquetWriter::buildWriter)
@@ -2320,13 +2323,19 @@ private void writeEqualityDeleteToNationTable(Table icebergTable, Map<String, Ob
23202323
icebergTable.newRowDelta().addDeletes(writer.toDeleteFile()).commit();
23212324
}
23222325

2323-
public static HdfsEnvironment getHdfsEnvironment()
2326+
protected HdfsEnvironment getHdfsEnvironment()
23242327
{
23252328
HiveClientConfig hiveClientConfig = new HiveClientConfig();
23262329
MetastoreClientConfig metastoreClientConfig = new MetastoreClientConfig();
2327-
HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hiveClientConfig, metastoreClientConfig),
2328-
ImmutableSet.of(),
2329-
hiveClientConfig);
2330+
HiveS3Config hiveS3Config = new HiveS3Config();
2331+
return getHdfsEnvironment(hiveClientConfig, metastoreClientConfig, hiveS3Config);
2332+
}
2333+
2334+
public static HdfsEnvironment getHdfsEnvironment(HiveClientConfig hiveClientConfig, MetastoreClientConfig metastoreClientConfig, HiveS3Config hiveS3Config)
2335+
{
2336+
S3ConfigurationUpdater s3ConfigurationUpdater = new PrestoS3ConfigurationUpdater(hiveS3Config);
2337+
HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hiveClientConfig, metastoreClientConfig, s3ConfigurationUpdater, ignored -> {}),
2338+
ImmutableSet.of(), hiveClientConfig);
23302339
return new HdfsEnvironment(hdfsConfiguration, metastoreClientConfig, new NoHdfsAuthentication());
23312340
}
23322341

@@ -2348,18 +2357,18 @@ protected Table loadTable(String tableName)
23482357

23492358
protected Map<String, String> getProperties()
23502359
{
2351-
File metastoreDir = getCatalogDirectory();
2360+
Path metastoreDir = getCatalogDirectory();
23522361
return ImmutableMap.of("warehouse", metastoreDir.toString());
23532362
}
23542363

2355-
protected File getCatalogDirectory()
2364+
protected Path getCatalogDirectory()
23562365
{
2357-
Path dataDirectory = getDistributedQueryRunner().getCoordinator().getDataDirectory();
2366+
java.nio.file.Path dataDirectory = getDistributedQueryRunner().getCoordinator().getDataDirectory();
23582367
switch (catalogType) {
23592368
case HIVE:
23602369
case HADOOP:
23612370
case NESSIE:
2362-
return getIcebergDataDirectoryPath(dataDirectory, catalogType.name(), new IcebergConfig().getFileFormat(), false).toFile();
2371+
return new Path(getIcebergDataDirectoryPath(dataDirectory, catalogType.name(), new IcebergConfig().getFileFormat(), false).toFile().toURI());
23632372
}
23642373

23652374
throw new PrestoException(NOT_SUPPORTED, "Unsupported Presto Iceberg catalog type " + catalogType);

presto-iceberg/src/test/java/com/facebook/presto/iceberg/IcebergQueryRunner.java

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import java.nio.file.Files;
4444
import java.nio.file.Path;
4545
import java.util.Collections;
46+
import java.util.HashMap;
4647
import java.util.Map;
4748
import java.util.Optional;
4849
import java.util.OptionalInt;
@@ -202,13 +203,11 @@ public static DistributedQueryRunner createIcebergQueryRunner(
202203
String catalogType = extraConnectorProperties.getOrDefault("iceberg.catalog.type", HIVE.name());
203204
Path icebergDataDirectory = getIcebergDataDirectoryPath(queryRunner.getCoordinator().getDataDirectory(), catalogType, format, addStorageFormatToPath);
204205

205-
Map<String, String> icebergProperties = ImmutableMap.<String, String>builder()
206-
.put("iceberg.file-format", format.name())
207-
.putAll(getConnectorProperties(CatalogType.valueOf(catalogType), icebergDataDirectory))
208-
.putAll(extraConnectorProperties)
209-
.build();
210-
211-
queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", icebergProperties);
206+
Map<String, String> icebergProperties = new HashMap<>();
207+
icebergProperties.put("iceberg.file-format", format.name());
208+
icebergProperties.putAll(getConnectorProperties(CatalogType.valueOf(catalogType), icebergDataDirectory));
209+
icebergProperties.putAll(extraConnectorProperties);
210+
queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", ImmutableMap.copyOf(icebergProperties));
212211

213212
if (addJmxPlugin) {
214213
queryRunner.installPlugin(new JmxPlugin());

presto-iceberg/src/test/java/com/facebook/presto/iceberg/TestIcebergConfig.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ public void testDefaults()
4949
.setCompressionCodec(GZIP)
5050
.setCatalogType(HIVE)
5151
.setCatalogWarehouse(null)
52+
.setCatalogWarehouseDataDir(null)
5253
.setCatalogCacheSize(10)
5354
.setHadoopConfigResources(null)
5455
.setHiveStatisticsMergeFlags("")
@@ -81,6 +82,7 @@ public void testExplicitPropertyMappings()
8182
.put("iceberg.compression-codec", "NONE")
8283
.put("iceberg.catalog.type", "HADOOP")
8384
.put("iceberg.catalog.warehouse", "path")
85+
.put("iceberg.catalog.warehouse.datadir", "path_data_dir")
8486
.put("iceberg.catalog.cached-catalog-num", "6")
8587
.put("iceberg.hadoop.config.resources", "/etc/hadoop/conf/core-site.xml")
8688
.put("iceberg.max-partitions-per-writer", "222")
@@ -110,6 +112,7 @@ public void testExplicitPropertyMappings()
110112
.setCompressionCodec(NONE)
111113
.setCatalogType(HADOOP)
112114
.setCatalogWarehouse("path")
115+
.setCatalogWarehouseDataDir("path_data_dir")
113116
.setCatalogCacheSize(6)
114117
.setHadoopConfigResources("/etc/hadoop/conf/core-site.xml")
115118
.setMaxPartitionsPerWriter(222)

0 commit comments

Comments
 (0)