|
19 | 19 | { |
20 | 20 | "attachments": {}, |
21 | 21 | "cell_type": "markdown", |
| 22 | + "id": "5831c1ac", |
22 | 23 | "metadata": {}, |
23 | 24 | "source": [ |
24 | 25 | "<div class=\"alert alert-block alert-warning\">\n", |
|
38 | 39 | "3. Demonstrate powerful AI Functions for text processing and analysis\n", |
39 | 40 | "\n", |
40 | 41 | "**Prerequisites**: Ensure AI Functions are enabled on your deployment (AI Services > AI & ML Functions)." |
41 | | - ], |
42 | | - "id": "5831c1ac" |
| 42 | + ] |
43 | 43 | }, |
44 | 44 | { |
45 | 45 | "attachments": {}, |
46 | 46 | "cell_type": "markdown", |
| 47 | + "id": "ea429156", |
47 | 48 | "metadata": {}, |
48 | 49 | "source": [ |
49 | 50 | "## Create some simple tables\n", |
50 | 51 | "\n", |
51 | 52 | "This setup establishes a basic relational structure to store some reviews for restaurants. Ensure you have selected a database and have CREATE permissions to create/delete tables." |
52 | | - ], |
53 | | - "id": "ea429156" |
| 53 | + ] |
54 | 54 | }, |
55 | 55 | { |
56 | 56 | "cell_type": "code", |
57 | 57 | "execution_count": 1, |
| 58 | + "id": "1f8ccd75", |
58 | 59 | "metadata": {}, |
59 | 60 | "outputs": [ |
60 | 61 | { |
|
97 | 98 | " Summary TEXT,\n", |
98 | 99 | " Text TEXT\n", |
99 | 100 | ");" |
100 | | - ], |
101 | | - "id": "1f8ccd75" |
| 101 | + ] |
102 | 102 | }, |
103 | 103 | { |
104 | 104 | "attachments": {}, |
105 | 105 | "cell_type": "markdown", |
| 106 | + "id": "6a2118dd", |
106 | 107 | "metadata": {}, |
107 | 108 | "source": [ |
108 | 109 | "## Install the required packages" |
109 | | - ], |
110 | | - "id": "6a2118dd" |
| 110 | + ] |
111 | 111 | }, |
112 | 112 | { |
113 | 113 | "cell_type": "code", |
114 | 114 | "execution_count": 2, |
| 115 | + "id": "40350277", |
115 | 116 | "metadata": {}, |
116 | 117 | "outputs": [ |
117 | 118 | { |
|
143 | 144 | ], |
144 | 145 | "source": [ |
145 | 146 | "!pip install kagglehub pandas" |
146 | | - ], |
147 | | - "id": "40350277" |
| 147 | + ] |
148 | 148 | }, |
149 | 149 | { |
150 | 150 | "attachments": {}, |
151 | 151 | "cell_type": "markdown", |
| 152 | + "id": "97437a79", |
152 | 153 | "metadata": {}, |
153 | 154 | "source": [ |
154 | 155 | "## Download and Load Dataset" |
155 | | - ], |
156 | | - "id": "97437a79" |
| 156 | + ] |
157 | 157 | }, |
158 | 158 | { |
159 | 159 | "cell_type": "code", |
160 | 160 | "execution_count": 3, |
| 161 | + "id": "cf62cc7e", |
161 | 162 | "metadata": {}, |
162 | 163 | "outputs": [ |
163 | 164 | { |
|
349 | 350 | "print(f\"Columns: {list(df.columns)}\")\n", |
350 | 351 | "print(\"\\nFirst few rows:\")\n", |
351 | 352 | "df.head()" |
352 | | - ], |
353 | | - "id": "cf62cc7e" |
| 353 | + ] |
354 | 354 | }, |
355 | 355 | { |
356 | 356 | "attachments": {}, |
357 | 357 | "cell_type": "markdown", |
| 358 | + "id": "0c938c99", |
358 | 359 | "metadata": {}, |
359 | 360 | "source": [ |
360 | 361 | "## Load Data into SingleStore" |
361 | | - ], |
362 | | - "id": "0c938c99" |
| 362 | + ] |
363 | 363 | }, |
364 | 364 | { |
365 | 365 | "cell_type": "code", |
366 | 366 | "execution_count": 4, |
| 367 | + "id": "4d427d08", |
367 | 368 | "metadata": {}, |
368 | 369 | "outputs": [ |
369 | 370 | { |
|
396 | 397 | ")\n", |
397 | 398 | "\n", |
398 | 399 | "print(\"Data loaded successfully!\")" |
399 | | - ], |
400 | | - "id": "4d427d08" |
| 400 | + ] |
401 | 401 | }, |
402 | 402 | { |
403 | 403 | "attachments": {}, |
404 | 404 | "cell_type": "markdown", |
| 405 | + "id": "ee21f51b", |
405 | 406 | "metadata": {}, |
406 | 407 | "source": [ |
407 | 408 | " ## Verify Data Load" |
408 | | - ], |
409 | | - "id": "ee21f51b" |
| 409 | + ] |
410 | 410 | }, |
411 | 411 | { |
412 | 412 | "cell_type": "code", |
413 | 413 | "execution_count": 5, |
| 414 | + "id": "8423c269", |
414 | 415 | "metadata": {}, |
415 | 416 | "outputs": [ |
416 | 417 | { |
|
458 | 459 | "%%sql\n", |
459 | 460 | "-- Check the number of reviews loaded\n", |
460 | 461 | "SELECT COUNT(*) as total_reviews FROM reviews;" |
461 | | - ], |
462 | | - "id": "8423c269" |
| 462 | + ] |
463 | 463 | }, |
464 | 464 | { |
465 | 465 | "attachments": {}, |
466 | 466 | "cell_type": "markdown", |
| 467 | + "id": "d6c8e487", |
467 | 468 | "metadata": {}, |
468 | 469 | "source": [ |
469 | 470 | "## Sample Data Preview" |
470 | | - ], |
471 | | - "id": "d6c8e487" |
| 471 | + ] |
472 | 472 | }, |
473 | 473 | { |
474 | 474 | "cell_type": "code", |
475 | 475 | "execution_count": 6, |
| 476 | + "id": "ccefec53", |
476 | 477 | "metadata": {}, |
477 | 478 | "outputs": [ |
478 | 479 | { |
|
602 | 603 | "SELECT Id, ProductId, Score, Summary, LEFT(Text, 100) as Review_Preview\n", |
603 | 604 | "FROM reviews\n", |
604 | 605 | "LIMIT 10;" |
605 | | - ], |
606 | | - "id": "ccefec53" |
| 606 | + ] |
607 | 607 | }, |
608 | 608 | { |
609 | 609 | "attachments": {}, |
610 | 610 | "cell_type": "markdown", |
| 611 | + "id": "0bb3deb8", |
611 | 612 | "metadata": {}, |
612 | 613 | "source": [ |
613 | 614 | "## AI Functions Demonstrations\n", |
614 | 615 | "\n", |
615 | 616 | "Now let's explore the power of SingleStore AI Functions for text analysis and processing.\n", |
616 | 617 | "Ensure that AI functions are enabled for the org and you are able to list the available AI functions" |
617 | | - ], |
618 | | - "id": "0bb3deb8" |
| 618 | + ] |
619 | 619 | }, |
620 | 620 | { |
621 | 621 | "cell_type": "code", |
622 | 622 | "execution_count": 7, |
| 623 | + "id": "bd293861", |
623 | 624 | "metadata": {}, |
624 | 625 | "outputs": [ |
625 | 626 | { |
|
769 | 770 | "%%sql\n", |
770 | 771 | "USE cluster;\n", |
771 | 772 | "SHOW functions;" |
772 | | - ], |
773 | | - "id": "bd293861" |
| 773 | + ] |
774 | 774 | }, |
775 | 775 | { |
776 | 776 | "cell_type": "code", |
777 | 777 | "execution_count": 8, |
| 778 | + "id": "05d5d27a", |
778 | 779 | "metadata": {}, |
779 | 780 | "outputs": [ |
780 | 781 | { |
|
824 | 825 | "SELECT cluster.AI_COMPLETE(\n", |
825 | 826 | " 'What is SingleStore?'\n", |
826 | 827 | ") AS completion;" |
827 | | - ], |
828 | | - "id": "05d5d27a" |
| 828 | + ] |
829 | 829 | }, |
830 | 830 | { |
831 | 831 | "cell_type": "code", |
832 | 832 | "execution_count": 9, |
| 833 | + "id": "9f842a0d", |
833 | 834 | "metadata": {}, |
834 | 835 | "outputs": [ |
835 | 836 | { |
|
888 | 889 | "%%sql\n", |
889 | 890 | "-- AI_SENTIMENT: Analyze sentiment of customer reviews for a specific product\n", |
890 | 891 | "-- WHERE ProductId = <Your choice>\n", |
891 | | - "-- Remember to specific the datbase name. In this example 'temp' is the Database name\n", |
| 892 | + "-- Remember to specify the datbase name. In this example 'temp' is the Database name\n", |
892 | 893 | "SELECT\n", |
893 | 894 | " Id,\n", |
894 | 895 | " ProductId,\n", |
|
898 | 899 | "FROM temp.reviews\n", |
899 | 900 | "WHERE ProductId = 'B000NY8ODS'\n", |
900 | 901 | "LIMIT 10;" |
901 | | - ], |
902 | | - "id": "9f842a0d" |
| 902 | + ] |
903 | 903 | }, |
904 | 904 | { |
905 | 905 | "cell_type": "code", |
906 | 906 | "execution_count": 10, |
| 907 | + "id": "56ff7a17", |
907 | 908 | "metadata": {}, |
908 | 909 | "outputs": [ |
909 | 910 | { |
|
1015 | 1016 | " review_count,\n", |
1016 | 1017 | " cluster.AI_SENTIMENT(combined_text) as overall_sentiment\n", |
1017 | 1018 | "FROM grouped_reviews;" |
1018 | | - ], |
1019 | | - "id": "56ff7a17" |
| 1019 | + ] |
1020 | 1020 | }, |
1021 | 1021 | { |
1022 | 1022 | "cell_type": "code", |
1023 | 1023 | "execution_count": 11, |
| 1024 | + "id": "b9786b66", |
1024 | 1025 | "metadata": {}, |
1025 | 1026 | "outputs": [ |
1026 | 1027 | { |
|
1122 | 1123 | " 15\n", |
1123 | 1124 | " ) AS summary\n", |
1124 | 1125 | "FROM long_reviews;" |
1125 | | - ], |
1126 | | - "id": "b9786b66" |
| 1126 | + ] |
1127 | 1127 | }, |
1128 | 1128 | { |
1129 | 1129 | "cell_type": "code", |
1130 | 1130 | "execution_count": 12, |
| 1131 | + "id": "4febc8e0", |
1131 | 1132 | "metadata": {}, |
1132 | 1133 | "outputs": [ |
1133 | 1134 | { |
|
1263 | 1264 | " '[quality, price, shipping, taste]'\n", |
1264 | 1265 | " ) AS classification\n", |
1265 | 1266 | "FROM negative_reviews;" |
1266 | | - ], |
1267 | | - "id": "4febc8e0" |
| 1267 | + ] |
1268 | 1268 | }, |
1269 | 1269 | { |
1270 | 1270 | "cell_type": "code", |
1271 | 1271 | "execution_count": 13, |
| 1272 | + "id": "40f4cd14", |
1272 | 1273 | "metadata": {}, |
1273 | 1274 | "outputs": [ |
1274 | 1275 | { |
|
1431 | 1432 | " 'Does this customer indicate they will buy this product again? Answer with yes, no, or unclear only'\n", |
1432 | 1433 | " ) AS repeat_purchase_intent\n", |
1433 | 1434 | "FROM positive_reviews;" |
1434 | | - ], |
1435 | | - "id": "40f4cd14" |
| 1435 | + ] |
1436 | 1436 | }, |
1437 | 1437 | { |
1438 | 1438 | "cell_type": "code", |
1439 | 1439 | "execution_count": 14, |
| 1440 | + "id": "a09f2d5b", |
1440 | 1441 | "metadata": {}, |
1441 | 1442 | "outputs": [ |
1442 | 1443 | { |
|
1585 | 1586 | " 'Is this customer at high risk of not purchasing again? Answer with high, medium, or low only'\n", |
1586 | 1587 | " ) AS churn_risk\n", |
1587 | 1588 | "FROM low_rated_reviews;" |
1588 | | - ], |
1589 | | - "id": "a09f2d5b" |
| 1589 | + ] |
1590 | 1590 | }, |
1591 | 1591 | { |
1592 | 1592 | "cell_type": "code", |
1593 | 1593 | "execution_count": 15, |
| 1594 | + "id": "3d78f449", |
1594 | 1595 | "metadata": {}, |
1595 | 1596 | "outputs": [ |
1596 | 1597 | { |
|
1685 | 1686 | " 'spanish'\n", |
1686 | 1687 | " ) AS spanish_translation\n", |
1687 | 1688 | "FROM translatable_reviews;" |
1688 | | - ], |
1689 | | - "id": "3d78f449" |
| 1689 | + ] |
1690 | 1690 | }, |
1691 | 1691 | { |
1692 | 1692 | "cell_type": "code", |
1693 | 1693 | "execution_count": 16, |
| 1694 | + "id": "082dc59a", |
1694 | 1695 | "metadata": {}, |
1695 | 1696 | "outputs": [ |
1696 | 1697 | { |
|
1860 | 1861 | " cluster.AI_CLASSIFY(Text, '[quality, value, taste, packaging]') as category,\n", |
1861 | 1862 | " cluster.AI_SUMMARIZE(Text, 'aifunctions_chat_default', 10) as brief_summary\n", |
1862 | 1863 | "FROM product_reviews;" |
1863 | | - ], |
1864 | | - "id": "082dc59a" |
| 1864 | + ] |
1865 | 1865 | }, |
1866 | 1866 | { |
1867 | 1867 | "cell_type": "markdown", |
|
0 commit comments