Skip to content

Commit d3ca154

Browse files
text_generation samples for shell (google-gemini#430)
* Start on text_generation samples for shell * Add example for one image in text gen * Add streaming example for one image * Adding rest of text generation examples * change to gemini-1.5-flash * Add updates to text generation scripts * Using file api to upload audio and video * Delete audio_output.txt * Debugged audio example * Uploading videos now working for text generation * Delete file_info.json * Remove stray tag. --------- Co-authored-by: Mark Daoust <[email protected]>
1 parent 5b31be7 commit d3ca154

File tree

1 file changed

+247
-0
lines changed

1 file changed

+247
-0
lines changed

samples/rest/text_generation.sh

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
set -eu
2+
3+
SCRIPT_DIR=$(dirname "$0")
4+
MEDIA_DIR=$(realpath ${SCRIPT_DIR}/../../third_party)
5+
6+
IMG_PATH=${MEDIA_DIR}/organ.jpg
7+
AUDIO_PATH=${MEDIA_DIR}/sample.mp3
8+
VIDEO_PATH=${MEDIA_DIR}/Big_Buck_Bunny.mp4
9+
10+
BASE_URL="https://generativelanguage.googleapis.com"
11+
12+
if [[ "$(base64 --version 2>&1)" = *"FreeBSD"* ]]; then
13+
B64FLAGS="--input"
14+
else
15+
B64FLAGS="-w0"
16+
fi
17+
18+
echo "[START text_gen_text_only_prompt]"
19+
# [START text_gen_text_only_prompt]
20+
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \
21+
-H 'Content-Type: application/json' \
22+
-X POST \
23+
-d '{
24+
"contents": [{
25+
"parts":[{"text": "Write a story about a magic backpack."}]
26+
}]
27+
}' 2> /dev/null
28+
# [END text_gen_text_only_prompt]
29+
30+
echo "[START text_gen_text_only_prompt_streaming]"
31+
# [START text_gen_text_only_prompt_streaming]
32+
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?alt=sse&key=${GOOGLE_API_KEY}" \
33+
-H 'Content-Type: application/json' \
34+
--no-buffer \
35+
-d '{ "contents":[{"parts":[{"text": "Write a story about a magic backpack."}]}]}'
36+
# [END text_gen_text_only_prompt_streaming]
37+
38+
echo "[START text_gen_multimodal_one_image_prompt]"
39+
# [START text_gen_multimodal_one_image_prompt]
40+
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \
41+
-H 'Content-Type: application/json' \
42+
-X POST \
43+
-d '{
44+
"contents": [{
45+
"parts":[
46+
{"text": "Tell me about this instrument"},
47+
{
48+
"inline_data": {
49+
"mime_type":"image/jpeg",
50+
"data": "'$(base64 $B64FLAGS $IMG_PATH)'"
51+
}
52+
}
53+
]
54+
}]
55+
}' 2> /dev/null
56+
# [END text_gen_multimodal_one_image_prompt]
57+
58+
echo "[START text_gen_multimodal_one_image_prompt_streaming]"
59+
# [START text_gen_multimodal_one_image_prompt_streaming]
60+
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?key=$GOOGLE_API_KEY" \
61+
-H 'Content-Type: application/json' \
62+
-X POST \
63+
-d '{
64+
"contents": [{
65+
"parts":[
66+
{"text": "Tell me about this instrument"},
67+
{
68+
"inline_data": {
69+
"mime_type":"image/jpeg",
70+
"data": "'$(base64 $B64FLAGS $IMG_PATH)'"
71+
}
72+
}
73+
]
74+
}]
75+
}' 2> /dev/null
76+
# [END text_gen_multimodal_one_image_prompt_streaming]
77+
78+
echo "[START text_gen_multimodal_audio]"
79+
# [START text_gen_multimodal_audio]
80+
# Use File API to upload audio data to API request.
81+
MIME_TYPE=$(file -b --mime-type "${AUDIO_PATH}")
82+
NUM_BYTES=$(wc -c < "${AUDIO_PATH}")
83+
DISPLAY_NAME=AUDIO
84+
85+
tmp_header_file=upload-header.tmp
86+
87+
# Initial resumable request defining metadata.
88+
# The upload url is in the response headers dump them to a file.
89+
curl "${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}" \
90+
-D upload-header.tmp \
91+
-H "X-Goog-Upload-Protocol: resumable" \
92+
-H "X-Goog-Upload-Command: start" \
93+
-H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \
94+
-H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \
95+
-H "Content-Type: application/json" \
96+
-d "{'file': {'display_name': '${DISPLAY_NAME}'}}" 2> /dev/null
97+
98+
upload_url=$(grep -i "x-goog-upload-url: " "${tmp_header_file}" | cut -d" " -f2 | tr -d "\r")
99+
rm "${tmp_header_file}"
100+
101+
# Upload the actual bytes.
102+
curl "${upload_url}" \
103+
-H "Content-Length: ${NUM_BYTES}" \
104+
-H "X-Goog-Upload-Offset: 0" \
105+
-H "X-Goog-Upload-Command: upload, finalize" \
106+
--data-binary "@${AUDIO_PATH}" 2> /dev/null > file_info.json
107+
108+
file_uri=$(jq ".file.uri" file_info.json)
109+
echo file_uri=$file_uri
110+
111+
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \
112+
-H 'Content-Type: application/json' \
113+
-X POST \
114+
-d '{
115+
"contents": [{
116+
"parts":[
117+
{"text": "Please describe this file."},
118+
{"file_data":{"mime_type": "audio/mpeg", "file_uri": '$file_uri'}}]
119+
}]
120+
}' 2> /dev/null > response.json
121+
122+
cat response.json
123+
echo
124+
125+
jq ".candidates[].content.parts[].text" response.json
126+
# [END text_gen_multimodal_audio]
127+
128+
echo "[START text_gen_multimodal_video_prompt]"
129+
# [START text_gen_multimodal_video_prompt]
130+
# Use File API to upload audio data to API request.
131+
MIME_TYPE=$(file -b --mime-type "${VIDEO_PATH}")
132+
NUM_BYTES=$(wc -c < "${VIDEO_PATH}")
133+
DISPLAY_NAME=VIDEO
134+
135+
# Initial resumable request defining metadata.
136+
# The upload url is in the response headers dump them to a file.
137+
curl "${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}" \
138+
-D upload-header.tmp \
139+
-H "X-Goog-Upload-Protocol: resumable" \
140+
-H "X-Goog-Upload-Command: start" \
141+
-H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \
142+
-H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \
143+
-H "Content-Type: application/json" \
144+
-d "{'file': {'display_name': '${DISPLAY_NAME}'}}" 2> /dev/null
145+
146+
upload_url=$(grep -i "x-goog-upload-url: " "${tmp_header_file}" | cut -d" " -f2 | tr -d "\r")
147+
rm "${tmp_header_file}"
148+
149+
# Upload the actual bytes.
150+
curl "${upload_url}" \
151+
-H "Content-Length: ${NUM_BYTES}" \
152+
-H "X-Goog-Upload-Offset: 0" \
153+
-H "X-Goog-Upload-Command: upload, finalize" \
154+
--data-binary "@${VIDEO_PATH}" 2> /dev/null > file_info.json
155+
156+
file_uri=$(jq ".file.uri" file_info.json)
157+
echo file_uri=$file_uri
158+
159+
state=$(jq ".file.state" file_info.json)
160+
echo state=$state
161+
162+
name=$(jq ".file.name" file_info.json)
163+
echo name=$name
164+
165+
while [[ "($state)" = *"PROCESSING"* ]];
166+
do
167+
echo "Processing video..."
168+
sleep 5
169+
# Get the file of interest to check state
170+
curl https://generativelanguage.googleapis.com/v1beta/files/$name > file_info.json
171+
state=$(jq ".file.state" file_info.json)
172+
done
173+
174+
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent?key=$GOOGLE_API_KEY" \
175+
-H 'Content-Type: application/json' \
176+
-X POST \
177+
-d '{
178+
"contents": [{
179+
"parts":[
180+
{"text": "Please describe this file."},
181+
{"file_data":{"mime_type": "video/mp4", "file_uri": '$file_uri'}}]
182+
}]
183+
}' 2> /dev/null > response.json
184+
185+
cat response.json
186+
echo
187+
188+
jq ".candidates[].content.parts[].text" response.json
189+
# [END text_gen_multimodal_video_prompt]
190+
191+
echo "[START text_gen_multimodal_video_prompt_streaming]"
192+
# [START text_gen_multimodal_video_prompt_streaming]
193+
# Use File API to upload audio data to API request.
194+
MIME_TYPE=$(file -b --mime-type "${VIDEO_PATH}")
195+
NUM_BYTES=$(wc -c < "${VIDEO_PATH}")
196+
DISPLAY_NAME=VIDEO_PATH
197+
198+
# Initial resumable request defining metadata.
199+
# The upload url is in the response headers dump them to a file.
200+
curl "${BASE_URL}/upload/v1beta/files?key=${GOOGLE_API_KEY}" \
201+
-D upload-header.tmp \
202+
-H "X-Goog-Upload-Protocol: resumable" \
203+
-H "X-Goog-Upload-Command: start" \
204+
-H "X-Goog-Upload-Header-Content-Length: ${NUM_BYTES}" \
205+
-H "X-Goog-Upload-Header-Content-Type: ${MIME_TYPE}" \
206+
-H "Content-Type: application/json" \
207+
-d "{'file': {'display_name': '${DISPLAY_NAME}'}}" 2> /dev/null
208+
209+
upload_url=$(grep -i "x-goog-upload-url: " "${tmp_header_file}" | cut -d" " -f2 | tr -d "\r")
210+
rm "${tmp_header_file}"
211+
212+
# Upload the actual bytes.
213+
curl "${upload_url}" \
214+
-H "Content-Length: ${NUM_BYTES}" \
215+
-H "X-Goog-Upload-Offset: 0" \
216+
-H "X-Goog-Upload-Command: upload, finalize" \
217+
--data-binary "@${VIDEO_PATH}" 2> /dev/null > file_info.json
218+
219+
file_uri=$(jq ".file.uri" file_info.json)
220+
echo file_uri=$file_uri
221+
222+
state=$(jq ".file.state" file_info.json)
223+
echo state=$state
224+
225+
while [[ "($state)" = *"PROCESSING"* ]];
226+
do
227+
echo "Processing video..."
228+
sleep 5
229+
# Get the file of interest to check state
230+
curl https://generativelanguage.googleapis.com/v1beta/files/$name > file_info.json
231+
state=$(jq ".file.state" file_info.json)
232+
done
233+
234+
curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:streamGenerateContent?key=$GOOGLE_API_KEY" \
235+
-H 'Content-Type: application/json' \
236+
-X POST \
237+
-d '{
238+
"contents": [{
239+
"parts":[
240+
{"text": "Please describe this file."},
241+
{"file_data":{"mime_type": "video/mp4", "file_uri": '$file_uri'}}]
242+
}]
243+
}' 2> /dev/null > response.json
244+
245+
cat response.json
246+
echo
247+
# [END text_gen_multimodal_video_prompt_streaming]

0 commit comments

Comments
 (0)