Skip to content

Commit f09df20

Browse files
authored
Add files via upload
1 parent c7982ef commit f09df20

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

MungeolHeo.ipynb

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
"source": [
1111
"import random\n",
1212
"\n",
13+
"# Prepare data\n",
1314
"data1 = [(1,'a'),(3,'a'),(5,'c')]\n",
1415
"df1 = sc.parallelize(data1) \n",
1516
"\n",
@@ -26,11 +27,12 @@
2627
},
2728
"outputs": [],
2829
"source": [
30+
"\n",
2931
"def func1(vals):\n",
3032
" output =[]\n",
31-
" uniqueId = random.random() # random ID for each list\n",
33+
" uniqueId = random.random() # create token\n",
3234
" for value in vals:\n",
33-
" output.append((value,uniqueId))\n",
35+
" output.append((value,uniqueId)) # assign same token element of same list\n",
3436
" \n",
3537
" return ([x for x in output])\n",
3638
" \n",
@@ -151,6 +153,8 @@
151153
" v.append(entry[1])\n",
152154
" \n",
153155
" return (k,set(v))\n",
156+
"\n",
157+
"# Final result\n",
154158
"lines.mapValues(unzip).values().collect()\n",
155159
" "
156160
]

0 commit comments

Comments
 (0)