Skip to content

Commit be1e6e6

Browse files
authored
Merge pull request #63 from marklogic/feature/release-1.1-examples
Added Jupyter notebook for release 1.1.0
2 parents 7776f1a + f8796c8 commit be1e6e6

File tree

3 files changed

+1163
-271
lines changed

3 files changed

+1163
-271
lines changed

examples/release-1.1-examples.ipynb

+217
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "e57c4da7",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"# Run the following to install the MarkLogic Python client.\n",
11+
"# %pip install marklogic_python_client\n",
12+
"\n",
13+
"# Create an instance of the MarkLogic Python client, pointing at the out-of-the-box Documents database.\n",
14+
"\n",
15+
"from marklogic import Client\n",
16+
"client = Client(\"http://localhost:8000\", digest=(\"python-user\", \"pyth0n\"))"
17+
]
18+
},
19+
{
20+
"cell_type": "code",
21+
"execution_count": null,
22+
"id": "3872a3ae",
23+
"metadata": {},
24+
"outputs": [],
25+
"source": [
26+
"# Insert a MarkLogic TDE view to project rows from documents in the \"employee\" collection.\n",
27+
"\n",
28+
"from marklogic.documents import Document\n",
29+
"tde_view = {\n",
30+
" \"template\": {\n",
31+
" \"context\": \"/\",\n",
32+
" \"collections\": [\"employee\"],\n",
33+
" \"rows\": [{\n",
34+
" \"schemaName\": \"example\",\n",
35+
" \"viewName\": \"employee\",\n",
36+
" \"columns\": [\n",
37+
" {\"name\": \"lastName\", \"scalarType\": \"string\", \"val\": \"Surname\"},\n",
38+
" {\"name\": \"firstName\", \"scalarType\": \"string\", \"val\": \"GivenName\"},\n",
39+
" {\"name\": \"state\", \"scalarType\": \"string\", \"val\": \"State\"},\n",
40+
" {\"name\": \"department\", \"scalarType\": \"string\", \"val\": \"Department\"},\n",
41+
" {\"name\": \"salary\", \"scalarType\": \"int\", \"val\": \"BaseSalary\"}\n",
42+
" ]\n",
43+
" }]\n",
44+
" }\n",
45+
"}\n",
46+
"\n",
47+
"client.documents.write(\n",
48+
" Document(\n",
49+
" \"/tde/employees.json\", tde_view, \n",
50+
" permissions={\"rest-reader\": [\"read\", \"update\"]}, \n",
51+
" collections=[\"http://marklogic.com/xdmp/tde\"]\n",
52+
" ),\n",
53+
" params={\"database\": \"Schemas\"}\n",
54+
")"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": null,
60+
"id": "c72a2506",
61+
"metadata": {},
62+
"outputs": [],
63+
"source": [
64+
"# Load 500 JSON documents into the \"employee\" collection.\n",
65+
"\n",
66+
"from marklogic.documents import Document, DefaultMetadata\n",
67+
"import requests\n",
68+
"import json\n",
69+
"r = requests.get('https://raw.githubusercontent.com/marklogic/marklogic-spark-connector/master/src/test/resources/500-employees.json')\n",
70+
"\n",
71+
"docs = [\n",
72+
" DefaultMetadata(permissions={\"rest-reader\": [\"read\", \"update\"]}, collections=[\"employee\"])\n",
73+
"]\n",
74+
"\n",
75+
"for employee in json.loads(r.text):\n",
76+
" docs.append(Document(employee['uri'], json.dumps(employee['value'])))\n",
77+
"\n",
78+
"client.documents.write(docs)"
79+
]
80+
},
81+
{
82+
"cell_type": "code",
83+
"execution_count": null,
84+
"id": "ef22f938",
85+
"metadata": {},
86+
"outputs": [],
87+
"source": [
88+
"# Can use MarkLogic's Optic query language with the view.\n",
89+
"\n",
90+
"client.rows.query(\"op.fromView('example', 'employee', '').limit(3)\")[\"rows\"]"
91+
]
92+
},
93+
{
94+
"cell_type": "code",
95+
"execution_count": null,
96+
"id": "ae929676",
97+
"metadata": {},
98+
"outputs": [],
99+
"source": [
100+
"# Can use SQL queries with the view.\n",
101+
"\n",
102+
"client.rows.query(sql=\"select * from example.employee order by lastName limit 3\")[\"rows\"]"
103+
]
104+
},
105+
{
106+
"cell_type": "code",
107+
"execution_count": null,
108+
"id": "1905651c",
109+
"metadata": {},
110+
"outputs": [],
111+
"source": [
112+
"# Can use GraphQL queries with the view.\n",
113+
"\n",
114+
"client.rows.query(graphql=\"query myQuery { example_employee { lastName firstName } }\")"
115+
]
116+
},
117+
{
118+
"cell_type": "code",
119+
"execution_count": null,
120+
"id": "3fb93d17",
121+
"metadata": {},
122+
"outputs": [],
123+
"source": [
124+
"# Can return data as CSV for integration with pandas.\n",
125+
"\n",
126+
"import io\n",
127+
"import pandas\n",
128+
"\n",
129+
"csv_data = client.rows.query(\"op.fromView('example', 'employee', '')\", format=\"csv\")\n",
130+
"df = pandas.read_csv(io.StringIO(csv_data))\n",
131+
"df\n"
132+
]
133+
},
134+
{
135+
"cell_type": "code",
136+
"execution_count": null,
137+
"id": "711caba0",
138+
"metadata": {},
139+
"outputs": [],
140+
"source": [
141+
"# Install matplotlib to visualize data.\n",
142+
"\n",
143+
"%matplotlib inline"
144+
]
145+
},
146+
{
147+
"cell_type": "code",
148+
"execution_count": null,
149+
"id": "43532cff",
150+
"metadata": {},
151+
"outputs": [],
152+
"source": [
153+
"# Simple bar chart showing the count of each department.\n",
154+
"\n",
155+
"df['department'].value_counts().plot(kind='bar')"
156+
]
157+
},
158+
{
159+
"cell_type": "code",
160+
"execution_count": null,
161+
"id": "473000f1",
162+
"metadata": {},
163+
"outputs": [],
164+
"source": [
165+
"# Can use MarkLogic Spark connector with Python.\n",
166+
"# First create a Spark session that has access to the MarkLogic Spark connector jar file.\n",
167+
"\n",
168+
"import os\n",
169+
"os.environ['PYSPARK_SUBMIT_ARGS'] = '--jars \"/Users/rudin/marklogic-spark-connector-2.2.0.jar\" pyspark-shell'\n",
170+
"\n",
171+
"%pip install pyspark\n",
172+
"from pyspark.sql import SparkSession\n",
173+
"spark = SparkSession.builder.master(\"local[*]\").appName('My Notebook').getOrCreate()\n",
174+
"spark.sparkContext.setLogLevel(\"WARN\")\n",
175+
"spark"
176+
]
177+
},
178+
{
179+
"cell_type": "code",
180+
"execution_count": null,
181+
"id": "dfdd727d",
182+
"metadata": {},
183+
"outputs": [],
184+
"source": [
185+
"# Create a Spark DataFrame via a MarkLogic Optic query.\n",
186+
"\n",
187+
"df = spark.read.format(\"marklogic\") \\\n",
188+
" .option(\"spark.marklogic.client.uri\", \"python-user:pyth0n@localhost:8000\") \\\n",
189+
" .option(\"spark.marklogic.read.opticQuery\", \"op.fromView('example', 'employee', '')\") \\\n",
190+
" .load()\n",
191+
"\n",
192+
"df.show()"
193+
]
194+
}
195+
],
196+
"metadata": {
197+
"kernelspec": {
198+
"display_name": "Python 3 (ipykernel)",
199+
"language": "python",
200+
"name": "python3"
201+
},
202+
"language_info": {
203+
"codemirror_mode": {
204+
"name": "ipython",
205+
"version": 3
206+
},
207+
"file_extension": ".py",
208+
"mimetype": "text/x-python",
209+
"name": "python",
210+
"nbconvert_exporter": "python",
211+
"pygments_lexer": "ipython3",
212+
"version": "3.11.5"
213+
}
214+
},
215+
"nbformat": 4,
216+
"nbformat_minor": 5
217+
}

0 commit comments

Comments
 (0)