diff --git a/CMakeLists.txt b/CMakeLists.txt index 06edeaff54..277f534ec9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -808,6 +808,14 @@ if(NOT NETCDF_ENABLE_BYTERANGE AND NETCDF_ENABLE_HDF5_ROS3) set(NETCDF_ENABLE_HDF5_ROS3 OFF CACHE BOOL "ROS3 support" FORCE) endif() +# if ROS3 enabled +if(NETCDF_ENABLE_HDF5_ROS3) + add_subdirectory(libs3util) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DS3_UTIL") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DS3_UTIL") +endif() + + ## # Enable Tests ## diff --git a/libdispatch/CMakeLists.txt b/libdispatch/CMakeLists.txt index 0f5d66d085..79fc0eb62e 100644 --- a/libdispatch/CMakeLists.txt +++ b/libdispatch/CMakeLists.txt @@ -100,6 +100,11 @@ if(NETCDF_ENABLE_S3) endif() endif() +if(NETCDF_ENABLE_HDF5_ROS3) + target_include_directories(dispatch PRIVATE ../libs3util) +endif() + + if(NETCDF_ENABLE_TESTS) BUILD_BIN_TEST(ncrandom) endif() diff --git a/libdispatch/dfile.c b/libdispatch/dfile.c index cd70d174a5..b8f12261fa 100644 --- a/libdispatch/dfile.c +++ b/libdispatch/dfile.c @@ -44,6 +44,9 @@ #define nulldup(s) ((s)?strdup(s):NULL) #endif +#ifdef S3_UTIL +#include "s3util.h" +#endif /* User-defined formats. */ NC_Dispatch *UDF0_dispatch_table = NULL; @@ -2162,7 +2165,14 @@ NC_open(const char *path0, int omode, int basepe, size_t *chunksizehintp, goto done; } } - +#ifdef S3_UTIL + // if s3 link, set the dispatcher to hdf5 + if (is_s3_link(path)) + { + dispatcher = HDF5_dispatch_table; + remove_mode(path); + } +#endif /* If we can't figure out what dispatch table to use, give up. */ if (!dispatcher) {stat = NC_ENOTNC; goto done;} diff --git a/libhdf5/CMakeLists.txt b/libhdf5/CMakeLists.txt index 9281a258ad..0a885bd4bb 100644 --- a/libhdf5/CMakeLists.txt +++ b/libhdf5/CMakeLists.txt @@ -32,6 +32,11 @@ if(STATUS_PARALLEL) target_link_libraries(netcdfhdf5 PUBLIC MPI::MPI_C) endif(STATUS_PARALLEL) +if(NETCDF_ENABLE_HDF5_ROS3) + target_include_directories(netcdfhdf5 PRIVATE ../libs3util) + target_sources(netcdfhdf5 PRIVATE ../libs3util/s3util.c) +endif() + target_link_libraries(netcdfhdf5 PUBLIC HDF5::HDF5) # Remember to package this file for CMake builds. diff --git a/libhdf5/hdf5open.c b/libhdf5/hdf5open.c index 082d528a0a..de7ea59cd0 100644 --- a/libhdf5/hdf5open.c +++ b/libhdf5/hdf5open.c @@ -29,6 +29,12 @@ #include "ncs3sdk.h" #endif +#ifdef S3_UTIL +#include "s3util.h" +#else +#define is_s3_link(val) 0 +#endif + /*Nemonic */ #define FILTERACTIVE 1 @@ -880,7 +886,7 @@ nc4_open_file(const char *path, int mode, void* parameters, int ncid) BAIL(NC_EHDFERR); } #ifdef NETCDF_ENABLE_BYTERANGE - else if(h5->byterange) { /* Arrange to use the byte-range drivers */ + else if(h5->byterange || is_s3_link(path)) { /* Arrange to use the byte-range drivers */ char* newpath = NULL; #ifdef NETCDF_ENABLE_HDF5_ROS3 H5FD_ros3_fapl_t fa; @@ -889,13 +895,58 @@ nc4_open_file(const char *path, int mode, void* parameters, int ncid) const char* profile0 = NULL; int iss3 = NC_iss3(h5->uri,NULL); - fa.version = H5FD_CURR_ROS3_FAPL_T_VERSION; + fa.version = H5FD_CURR_ROS3_FAPL_T_VERSION; fa.authenticate = (hbool_t)0; fa.aws_region[0] = '\0'; fa.secret_id[0] = '\0'; fa.secret_key[0] = '\0'; + // store region for s3util + char aws_region[128]; + aws_region[0] = '\0'; + // test s3util reader first: + int status = 1; +#ifdef S3_UTIL + if (is_s3_link(path)) { + char aws_config_file[128]; + char *accessKey = NULL; + char *secretKey = NULL; + char *sessionToken = NULL; + char *region = NULL; + snprintf(aws_config_file, sizeof(aws_config_file), "%s/.aws/config", getenv("HOME")); + status = find_profile(path, &accessKey, &secretKey, &sessionToken, ®ion, aws_config_file); + if (status == 0) { + strncpy(fa.aws_region, region, sizeof(fa.aws_region) - 1); + strncpy(fa.secret_id, accessKey, sizeof(fa.secret_id) - 1); + strncpy(fa.secret_key, secretKey, sizeof(fa.secret_key) - 1); + strncpy(aws_region, region, sizeof(aws_region) - 1); + fa.authenticate = true; + fa.version = 1; + if (H5Pset_fapl_ros3(fapl_id, &fa) < 0) + BAIL(NC_EHDFERR); + if (sessionToken) { + status = H5Pset_fapl_ros3_token(fapl_id, sessionToken); + if (status < 0) { + fprintf(stderr, "-E-: setting AWS Token %s. See %s:%d\n", sessionToken, __FILE__, + __LINE__); + free(sessionToken); + return 1; + } + else{ + free(sessionToken); + } + } + free(accessKey); + free(secretKey); + free(region); + } else { + fprintf(stderr, "-E-: No valid AWS credentials found for %s. See %s:%d\n", path, __FILE__, + __LINE__); + return 1; + } + } +#endif - if(iss3) { + if(iss3 && status) { NCS3INFO s3; NCURI* newuri = NULL; /* Rebuild the URL */ @@ -929,15 +980,31 @@ nc4_open_file(const char *path, int mode, void* parameters, int ncid) /* create and set fapl entry */ if(H5Pset_fapl_ros3(fapl_id, &fa) < 0) BAIL(NC_EHDFERR); - } else + } else if(status) #endif /*NETCDF_ENABLE_ROS3*/ {/* Configure FAPL to use our byte-range file driver */ if (H5Pset_fapl_http(fapl_id) < 0) BAIL(NC_EHDFERR); } - /* Open the HDF5 file. */ - if ((h5->hdfid = nc4_H5Fopen((newpath?newpath:path), flags, fapl_id)) < 0) + /* Open the HDF5 file. */ + if (is_s3_link(path)) + { +#ifdef S3_UTIL + char s3_hdf5_url[512]; + int status = get_https_s3_link(path,s3_hdf5_url,aws_region); + if(status!=0) + { + fprintf(stderr,"S3 Link conversion failed. See %s:%d",__FILE__,__LINE__); + exit(1); + } + if ((h5->hdfid = nc4_H5Fopen(s3_hdf5_url, flags, fapl_id)) < 0) BAIL(NC_EHDFERR); +#endif + } + else + { if ((h5->hdfid = nc4_H5Fopen((newpath?newpath:path), flags, fapl_id)) < 0) + BAIL(NC_EHDFERR); + } nullfree(newpath); } #endif @@ -2926,7 +2993,16 @@ nc4_H5Fopen(const char *filename0, unsigned flags, hid_t fapl_id) hid_t hid; char* localname = NULL; char* filename = NULL; - +#ifdef S3_UTIL + char original_path[512]; + strncpy(original_path,filename0,sizeof(original_path)-1); + if (is_s3_link(original_path)) + { + hid = H5I_INVALID_HID; + hid = H5Fopen(original_path, flags, fapl_id); + goto done; + } +#endif #ifdef HDF5_UTF8_PATHS NCpath2utf8(filename0,&filename); #else diff --git a/libs3util/CMakeLists.txt b/libs3util/CMakeLists.txt new file mode 100644 index 0000000000..b65d032360 --- /dev/null +++ b/libs3util/CMakeLists.txt @@ -0,0 +1,16 @@ +cmake_minimum_required (VERSION 3.12) + +find_package(OpenSSL REQUIRED) +find_package(CURL REQUIRED) + +include_directories(${CURL_INCLUDE_DIRS} ${OPENSSL_INCLUDE_DIR} ) +set(LIBS ${OPENSSL_LIBRARIES} ${CURL_LIBRARIES}) + +add_library(s3util s3util.c) + +set_target_properties(s3util PROPERTIES PUBLIC_HEADER "s3util.h") + +target_link_libraries(s3util PRIVATE ${LIBS}) + +install(TARGETS s3util LIBRARY DESTINATION lib PUBLIC_HEADER DESTINATION include) + diff --git a/libs3util/s3util.c b/libs3util/s3util.c new file mode 100644 index 0000000000..ced10ca1f7 --- /dev/null +++ b/libs3util/s3util.c @@ -0,0 +1,361 @@ +#include "stdio.h" +#include "stdlib.h" +#include +#include "s3util.h" +#include +#define MAX_LINE_LENGTH 4096 +#define MAX_NUMBER_OF_PROFILES 16 +#define LOG 0 +static int profile_count = 0; +struct Aws_profile { + char aws_access_key_id[256]; + char aws_secret_access_key[512]; + char aws_session_token[4096]; + char profile_name[128]; + char region[128]; +} typedef aws_prof; + +aws_prof profiles[MAX_NUMBER_OF_PROFILES]; + +#define GET_CREDS(var) \ + { \ + char creds[64] = #var; \ + char extract_key[128]; \ + snprintf(extract_key, sizeof(extract_key), "%s = ", creds); \ + int res = extract_keyword(line, extract_key, profiles[profile_count - 1].var); \ + if (res == 0) { \ + snprintf(extract_key, sizeof(extract_key), "%s=", creds); \ + res = extract_keyword(line, extract_key, profiles[profile_count - 1].var); \ + } \ + if (res && LOG) { \ + printf("Key word %s = %s with length %ld \n", creds, profiles[profile_count - 1].var, \ + strlen(profiles[profile_count - 1].var)); \ + } \ + } + +char* replace_substring(const char* str, const char* old_sub, const char* new_sub) { + // Check for invalid inputs + if (!str || !old_sub || !new_sub || !*old_sub) { + return NULL; + } + + // Calculate lengths + size_t str_len = strlen(str); + size_t old_len = strlen(old_sub); + size_t new_len = strlen(new_sub); + + // Count occurrences of old_sub in str + size_t count = 0; + const char* temp = str; + while ((temp = strstr(temp, old_sub))) { + count++; + temp += old_len; + } + + // Calculate new string length + size_t new_str_len = str_len + count * (new_len - old_len); + + // Allocate memory for new string + char* result = (char*)malloc(new_str_len + 1); + if (!result) { + return NULL; + } + + // Perform replacement + char* current_pos = result; + const char* str_pos = str; + temp = str; + + while ((temp = strstr(str_pos, old_sub))) { + // Copy characters before the match + size_t chars_before = temp - str_pos; + strncpy(current_pos, str_pos, chars_before); + current_pos += chars_before; + + // Copy new substring + strcpy(current_pos, new_sub); + current_pos += new_len; + + // Move past the old substring + str_pos = temp + old_len; + } + + // Copy remaining characters + strcpy(current_pos, str_pos); + + return result; +} + + +/** + * @brief check if it's S3 link + * A robust solution would be to use AWS C++ SDK kit + * @return 1, if it's, 0 if it's not + */ +int is_s3_link(const char *link) { + int result = 0; + result |= (strstr(link, "s3://") != NULL); + result |= ((strstr(link, ".s3.") != NULL) && (strstr(link, "https://") != NULL)); + return result; +} + +/** + @brief removes #mode specifier from the end of the link + */ +int remove_mode(char *link) { + if (link == NULL) + return 1; // safety check + + char *pound = strchr(link, '#'); + if (pound) { + *pound = '\0'; // Terminate at '#' + } + return 0; +} + +int get_https_s3_link(const char *s3Url, char *ros3_hdf5_link, const char *region) { + // check if it's indeed a url/s3 uri + if (is_s3_link(s3Url) == 0) + return 1; + // we need to convert this link + if ((strstr(s3Url, "s3://") != NULL)) { + // Skip "s3://" prefix + const char *path = s3Url + 5; + // Find the first '/' to separate bucket and key + const char *slash = strchr(path, '/'); + if (!slash) { + return 1; // No key found + } + // Extract bucket (between "s3://" and first '/') + size_t bucketLen = slash - path; + char *bucket = (char *)malloc(bucketLen + 1); + if (!bucket) { + return 1; // Memory allocation failed + } + strncpy(bucket, path, bucketLen); + (bucket)[bucketLen] = '\0'; + + // Extract key (after the first '/') + size_t keyLen = strlen(slash + 1); + char *key = (char *)malloc(keyLen + 1); + if (!key) { + free(bucket); + bucket = NULL; + return 1; // Memory allocation failed + } + strcpy(key, slash + 1); + if (!bucket || !key || !region) { + return 1; // Invalid inputs + } + remove_mode(key); + // Format: https://{bucket}.s3.{region}.amazonaws.com/{key} + const char *prefix = "https://"; + const char *s3Middle = ".s3."; + const char *s3Suffix = ".amazonaws.com/"; + + // Calculate required length for the URL + size_t urlLen = strlen(prefix) + strlen(bucket) + strlen(s3Middle) + strlen(region) + + strlen(s3Suffix) + strlen(key) + 1; + + // Construct the URL + snprintf(ros3_hdf5_link, urlLen, "%s%s%s%s%s%s", prefix, bucket, s3Middle, region, s3Suffix, key); + return 0; + } else { + strcpy(ros3_hdf5_link, s3Url); + } + + return 0; +} + +int extract_keyword(const char *orginal_line, const char *keyword, char *value) { + char *begin = strstr(orginal_line, keyword); + + if (begin) { + char *end = strstr(begin + strlen(keyword), " "); + char *end_tab = strstr(begin + strlen(keyword), "\t"); + if (end == NULL) + end = end_tab; + if (end && end_tab) { + end = end < end_tab ? end : end_tab; + } + if (end) + strncpy(value, begin + strlen(keyword), end - begin - strlen(keyword)); + else + strncpy(value, begin + strlen(keyword), strlen(begin) - strlen(keyword)); + if (value[strlen(value) - 1] == '\n') { + value[strlen(value) - 1] = '\0'; + } + return 1; + } + + return 0; +} + +int is_it_profile(const char *orginal_line, char *value) { + // check if it's a profile: + char *begin_prof = strstr(orginal_line, "["); + char *end_prof = strstr(orginal_line, "]"); + if (begin_prof != NULL && end_prof != NULL) { + strncpy(value, begin_prof + 1, end_prof - begin_prof - 1); + value[end_prof - begin_prof - 1] = '\0'; + return 1; + } + return 0; +} + +int parse_aws_cred(const char *aws_profile) { + char * aws_creds = replace_substring(aws_profile,"config","credentials"); + FILE *file = fopen(aws_creds, "r"); + // read credentials + if (file == NULL) { + perror("Error opening file"); + return 1; + } + + char line[MAX_LINE_LENGTH]; + while (fgets(line, sizeof(line), file) != NULL) { + // check if it's a profile: + if (is_it_profile(line, profiles[profile_count].profile_name)) { + // printf("%s \n", profiles[profile_count].profile_name); + // lets find it's credentials: + profiles[profile_count].aws_access_key_id[0] = '\0'; + profiles[profile_count].aws_secret_access_key[0] = '\0'; + profiles[profile_count].aws_session_token[0] = '\0'; + profiles[profile_count].region[0] = '\0'; + profile_count++; + } + if (profile_count) { + // look for credentials + GET_CREDS(aws_access_key_id); + GET_CREDS(aws_secret_access_key); + GET_CREDS(aws_session_token); + } + } + + fclose(file); + free(aws_creds); + // read profile + file = fopen(aws_profile, "r"); + if (file == NULL) + { + perror("Error opening file"); + return 1; + } + int prof_to_fil = -1; + while (fgets(line, sizeof(line), file) != NULL) + { + char config_profile_name[128]; + config_profile_name[0] = '\0'; + + if (is_it_profile(line, config_profile_name)) + { + int res = 0; + for (size_t iprof = 0; iprof < profile_count; iprof++) + { + if (strstr(config_profile_name, profiles[iprof].profile_name) != NULL) + { + prof_to_fil = iprof; + } + } + } + if (prof_to_fil != -1) + { + int res = extract_keyword(line, "region = ", profiles[prof_to_fil].region); + if (res == 0) + { + res = extract_keyword(line, "region=", profiles[prof_to_fil].region); + } + } + } + fclose(file); + return 0; +} + +int check_access(const char *s3_url, const char *access_key, const char *secret_key, + const char *session_token, const char *region) { + CURL *curl = curl_easy_init(); + if (!curl) + return 1; + char auth[256]; + char url[1024]; + int status = get_https_s3_link(s3_url, url, region); + if (status) { + fprintf(stderr, "-E-: couldn't convert the link %s\n", s3_url); + return status; + } + + snprintf(auth, sizeof(auth), "%s:%s", access_key, secret_key); + char provider[256]; + snprintf(provider, sizeof(provider), "aws:amz:%s:s3", region); + // Configure curl + curl_easy_setopt(curl, CURLOPT_URL, url); + curl_easy_setopt(curl, CURLOPT_USERPWD, auth); + curl_easy_setopt(curl, CURLOPT_AWS_SIGV4, provider); + curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); // HEAD request + + // Add security token header if present + struct curl_slist *headers = NULL; + if (session_token && *session_token) { + char token_header[1024]; + snprintf(token_header, sizeof(token_header), "x-amz-security-token: %s", session_token); + headers = curl_slist_append(headers, token_header); + } + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + + // Execute request + CURLcode res = curl_easy_perform(curl); + long response_code = 0; + curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code); + int return_status = 0; + // Output result + if (res == CURLE_OK) { + // printf("HTTP Status: %ld\n", response_code); + if (response_code == 200) { + return_status = 0; // printf("✅ Access granted\n"); + } else if (response_code == 403) { + return_status = 1; // printf("❌ Permission denied\n"); + } else if (response_code == 404) { + return_status = 1; // printf("❌ Object not found\n"); + } + } else { + return_status = 1; // fprintf(stderr, "Request failed: %s\n", curl_easy_strerror(res)); + } + + // Cleanup + curl_slist_free_all(headers); + curl_easy_cleanup(curl); + return return_status; +} + +int find_profile(const char *s3_url, char **access_key, char **secret_key, char **session_token, char **region, + const char *aws_profile) { + *access_key = strdup(getenv("AWS_ACCESS_KEY_ID")); + *secret_key = strdup(getenv("AWS_SECRET_ACCESS_KEY")); + *session_token = strdup(getenv("AWS_SESSION_TOKEN")); + *region = strdup(getenv("AWS_REGION")); + int status = check_access(s3_url, *access_key, *secret_key, *session_token, *region); + if (status) { + free(*access_key); + free(*secret_key); + free(*session_token); + free(*region); + } + else + return status; + if (aws_profile) { + status = parse_aws_cred(aws_profile); + for (int iprof = 0; iprof < profile_count; iprof++) { + status = + check_access(s3_url, profiles[iprof].aws_access_key_id, profiles[iprof].aws_secret_access_key, + profiles[iprof].aws_session_token, profiles[iprof].region); + if (status == 0) { + *access_key = strdup(profiles[iprof].aws_access_key_id); + *secret_key = strdup(profiles[iprof].aws_secret_access_key); + *session_token = strdup(profiles[iprof].aws_session_token); + *region = strdup(profiles[iprof].region); + break; + } + } + } + return status; +} diff --git a/libs3util/s3util.h b/libs3util/s3util.h new file mode 100644 index 0000000000..a0e8d1cd90 --- /dev/null +++ b/libs3util/s3util.h @@ -0,0 +1,14 @@ +#ifndef S3UTIL_H +#define S3UTIL_H +#ifdef __cplusplus +extern "C" { +#endif +int is_s3_link(const char* link); +int remove_mode(char *link); +int get_https_s3_link(const char *s3Url, char *ros3_hdf5_link, const char * region); +int find_profile(const char *s3_url, char **access_key , char **secret_key, char **session_token , char **region, const char *aws_profile ); +int check_access(const char *s3_url, const char *access_key , const char *secret_key, const char *session_token , const char *region ); +#ifdef __cplusplus +} +#endif +#endif \ No newline at end of file