diff --git a/ingestion/source-connectors/google-drive.mdx b/ingestion/source-connectors/google-drive.mdx
index b0cfd327..b72a84a2 100644
--- a/ingestion/source-connectors/google-drive.mdx
+++ b/ingestion/source-connectors/google-drive.mdx
@@ -24,8 +24,4 @@ import GoogleDrivePyV2 from '/snippets/source_connectors/google_drive.v2.py.mdx'
import SharedPartitionByAPIOSS from '/snippets/ingest-configuration-shared/partition-by-api-oss.mdx';
-
-
-import GoogleCredentialsFileAsString from '/snippets/general-shared-text/google-credentials-file-as-string.mdx';
-
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/snippets/general-shared-text/google-credentials-file-as-string.mdx b/snippets/general-shared-text/google-credentials-file-as-string.mdx
deleted file mode 100644
index e74f963c..00000000
--- a/snippets/general-shared-text/google-credentials-file-as-string.mdx
+++ /dev/null
@@ -1,44 +0,0 @@
-## Output a key file's contents as a string
-
-If you need to convert the contents of a `credentials.json` file into a string, you could use a Python script such as the following.
-This script takes the local path to the key file as input and outputs the key file's contents as a string.
-
-```python Python
-# Filename: json_file_to_string.py
-
-import json
-import sys
-
-def json_file_to_string(file_path):
- try:
- # Read the JSON file.
- with open(file_path, 'r') as file:
- # Load the JSON data.
- data = json.load(file)
-
- # Convert the JSON data back to a string, with no whitespace.
- json_string = json.dumps(data, separators=(',', ':'))
-
- return json_string
-
- except FileNotFoundError:
- print(f"Error: File '{file_path}' not found.")
- return None
- except json.JSONDecodeError:
- print(f"Error: '{file_path}' is not a valid JSON file.")
- return None
- except Exception as e:
- print(f"An unexpected error occurred: {e}")
- return None
-
-if __name__ == "__main__":
- if len(sys.argv) != 2:
- print("Usage: python json_file_to_string.py ")
- sys.exit(1)
-
- file_path = sys.argv[1]
- result = json_file_to_string(file_path)
-
- if result:
- print(result)
-```
\ No newline at end of file
diff --git a/snippets/general-shared-text/google-drive-api-placeholders.mdx b/snippets/general-shared-text/google-drive-api-placeholders.mdx
index 548b478e..9807c644 100644
--- a/snippets/general-shared-text/google-drive-api-placeholders.mdx
+++ b/snippets/general-shared-text/google-drive-api-placeholders.mdx
@@ -1,6 +1,6 @@
- `` (_required_) - A unique name for this connector.
- `` - The ID for the target Google Drive folder or drive.
-- `` - The contents of the `credentials.json` key file as a single-line string.
+- `` - The contents of the `credentials.json` key file.
- For `extensions`, set one or more `` values (such as `pdf` or `docx`) to process files with only those extensions. The default is to include all extensions.
diff --git a/snippets/general-shared-text/google-drive-cli-api.mdx b/snippets/general-shared-text/google-drive-cli-api.mdx
index 497e8591..ff561132 100644
--- a/snippets/general-shared-text/google-drive-cli-api.mdx
+++ b/snippets/general-shared-text/google-drive-cli-api.mdx
@@ -14,7 +14,7 @@ The following environment variables:
- One of the following:
- `GCP_SERVICE_ACCOUNT_KEY_FILEPATH` - The path to the `credentials.json` key file, represented by `--service-account-key-path` (CLI) or `service_account_key_path` (Python).
- - `GCP_SERVICE_ACCOUNT_KEY_STRING` - The contents of the `credentials.json` key file as a string, represented by `--service-account-key` (CLI) or `service_account_key` (Python).
+ - `GCP_SERVICE_ACCOUNT_KEY_STRING` - The contents of the `credentials.json` key file, represented by `--service-account-key` (CLI) or `service_account_key` (Python).
To use `--extensions` with a comma-separated list (CLI) or `extensions` with an array of strings (Python) to process files with only those extensions,
diff --git a/snippets/general-shared-text/google-drive.mdx b/snippets/general-shared-text/google-drive.mdx
index 00e47ae7..acfa9fa1 100644
--- a/snippets/general-shared-text/google-drive.mdx
+++ b/snippets/general-shared-text/google-drive.mdx
@@ -15,23 +15,52 @@ allowfullscreen
[Create a service account](https://developers.google.com/workspace/guides/create-credentials#create_a_service_account).
[Create credentials for a service account](https://developers.google.com/workspace/guides/create-credentials#create_credentials_for_a_service_account).
- To ensure maximum compatibility across Unstructured service offerings, you should give the service account key information to Unstructured as
- a single-line string that contains the contents of the downloaded service account key file (and not the service account key file itself).
- To print this single-line string without line breaks, suitable for copying, you can run one of the following commands from your Terminal or Command Prompt.
- In this command, replace `` with the path to the `credentials.json` key file that you downloaded by following the preceding instructions.
+ To ensure maximum compatibility across Unstructured service offerings, you should give Unstructured
+ the contents of the downloaded service account key file (and not the service account key file itself). The approach you use
+ to give Unstructured this information depends on how you intend to call Unstructured, as well as the operating system you're using, as follows.
- - For macOS or Linux:
+ - For the [Unstructured user interface (UI)](/ui/overview):
- ```text
+ Print the contents of the downloaded service account key file as a single-line string without line breaks, suitable for copying, by running one of the following commands from your Terminal or Command Prompt.
+ In this command, replace `` with the path to the `credentials.json` key file that you downloaded by following the preceding instructions.
+
+ For macOS or Linux:
+
+ ```bash Bash
tr -d '\n' <
```
- - For Windows:
+ For Windows:
- ```text
+ ```powershell PowerShell
(Get-Content -Path "" -Raw).Replace("`r`n", "").Replace("`n", "")
```
+ Copy the output of this command into the **Account Key** field in the Unstructured UI.
+
+ - For the [Unstructured API](/api-reference/overview) and [Unstructured Ingest](/ingestion/overview):
+
+ Save the contents of the downloaded service account key file as a Base64-encoded string, by running one of the following commands from your Terminal or Command Prompt.
+ In this command, replace `` with the path to the `credentials.json` key file that you downloaded by following the preceding instructions.
+
+ For macOS or Linux:
+
+ ```bash Bash
+ base64 -i <
+ ```
+
+ For Windows:
+
+ ```powershell PowerShell
+ [Convert]::ToBase64String([IO.File]::ReadAllBytes(""))
+ ```
+
+ Set the value of the `GCP_SERVICE_ACCOUNT_KEY_STRING` environment variable to the output of this command.
+
+ Then, in your code or script, before making your Unstructured API or Unstructured Ingest request,
+ decode the value of this environment variable and add it to the request. In some cases,
+ you must also escape all double quotes in the decoded string. For more information, see the following code examples.
+
- A Google Drive [shared folder](https://support.google.com/drive/answer/2375091) or [shared drive](https://support.google.com/a/users/answer/7212025).
- Give the service account access to the shared folder or shared drive. To do this, share the folder or drive with the service account's email address.
[Learn how](https://support.google.com/drive/answer/7166529).
diff --git a/snippets/source_connectors/google_drive.sh.mdx b/snippets/source_connectors/google_drive.sh.mdx
index 6d1da5a4..36e2a1ea 100644
--- a/snippets/source_connectors/google_drive.sh.mdx
+++ b/snippets/source_connectors/google_drive.sh.mdx
@@ -3,19 +3,24 @@
# Chunking and embedding are optional.
+service_account_key=$(python3 -c "import base64, json, os; \
+decoded = base64.b64decode(os.environ['GCP_SERVICE_ACCOUNT_KEY_STRING']).decode('utf-8'); \
+parsed = json.loads(decoded); \
+print(parsed)" | sed "s/'/\"/g")
+
unstructured-ingest \
google-drive \
- --download-dir $LOCAL_FILE_DOWNLOAD_DIR \
- --drive-id $GOOGLE_DRIVE_FOLDER_ID \
+ --download-dir "$LOCAL_FILE_DOWNLOAD_DIR" \
--service-account-key-path $GCP_SERVICE_ACCOUNT_KEY_FILEPATH \ # Or
- --service-account-key $GCP_SERVICE_ACCOUNT_KEY_STRING \
+ --service-account-key "$service_account_key" \
+ --drive-id "$GOOGLE_DRIVE_FOLDER_ID" \
--partition-by-api \
- --api-key $UNSTRUCTURED_API_KEY \
- --partition-endpoint $UNSTRUCTURED_API_URL \
+ --api-key "$UNSTRUCTURED_API_KEY" \
+ --partition-endpoint "$UNSTRUCTURED_API_URL" \
--strategy hi_res \
--chunking-strategy by_title \
--embedding-provider huggingface \
--additional-partition-args="{\"split_pdf_page\":\"true\", \"split_pdf_allow_failed\":\"true\", \"split_pdf_concurrency_level\": 15}" \
local \
- --output-dir $LOCAL_FILE_OUTPUT_DIR
+ --output-dir "$LOCAL_FILE_OUTPUT_DIR"
```
diff --git a/snippets/source_connectors/google_drive.v2.py.mdx b/snippets/source_connectors/google_drive.v2.py.mdx
index b198053a..acfc42fd 100644
--- a/snippets/source_connectors/google_drive.v2.py.mdx
+++ b/snippets/source_connectors/google_drive.v2.py.mdx
@@ -14,6 +14,8 @@ from unstructured_ingest.processes.chunker import ChunkerConfig
from unstructured_ingest.processes.embedder import EmbedderConfig
from unstructured_ingest.processes.connectors.local import LocalUploaderConfig
+import base64
+
# Chunking and embedding are optional.
if __name__ == "__main__":
@@ -24,7 +26,7 @@ if __name__ == "__main__":
source_connection_config=GoogleDriveConnectionConfig(
access_config=GoogleDriveAccessConfig(
service_account_key_path=os.getenv("GCP_SERVICE_ACCOUNT_KEY_FILEPATH"), # Or
- service_account_key=os.getenv("GCP_SERVICE_ACCOUNT_KEY_STRING")
+ service_account_key=base64.b64decode(s=os.getenv("GCP_SERVICE_ACCOUNT_KEY_STRING")).decode(encoding="utf-8")
),
drive_id=os.getenv("GOOGLE_DRIVE_FOLDER_ID"),
),
diff --git a/snippets/source_connectors/google_drive_sdk.mdx b/snippets/source_connectors/google_drive_sdk.mdx
index 136391d1..68c11bce 100644
--- a/snippets/source_connectors/google_drive_sdk.mdx
+++ b/snippets/source_connectors/google_drive_sdk.mdx
@@ -9,6 +9,8 @@ from unstructured_client.models.shared import (
GoogleDriveSourceConnectorConfigInput
)
+import base64
+
with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as client:
response = client.sources.create_source(
request=CreateSourceRequest(
@@ -17,7 +19,7 @@ with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as clien
type=SourceConnectorType.GOOGLE_DRIVE,
config=GoogleDriveSourceConnectorConfigInput(
drive_id="",
- service_account_key="",
+ service_account_key=base64.b64decode(s=os.getenv("GCP_SERVICE_ACCOUNT_KEY_STRING")).decode(encoding="utf-8").replace('"', '\\"'),
extensions=[
"",
""