Skip to content

Commit 91246d9

Browse files
committed
More attempts at writing... keyword: attempt
1 parent ac629fc commit 91246d9

File tree

2 files changed

+167
-0
lines changed

2 files changed

+167
-0
lines changed

schema-evolution/BLOG_DRAFT.md

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Cassandra Schema Evolution with Spark
2+
3+
* Change Wording - Make it BETTER *
4+
Changing the frequency with which you store data may change your data model. Cassandra users may find that they need to split up partitions given this new need. Since CQL does not currently a bulk operation like ``SELECT INTO`` (like in [Apache Hive]()), alternatives need to be used.
5+
6+
Integration with [Apache Spark]() offers an option for a variety of bulk operations that are short to write and provide solid throughput.
7+
8+
## Problem
9+
10+
Our application is taking in sensor data and storing it in Cassandra. The frequency of measuring from the individual sensors increased (from 60 seconds to 5 seconds). This increases the number of values per partition and changes the performance of our data model.
11+
12+
We want to reduce the values per partition. We can do this by changing the partition key (this can split up values per partition to a management amount).
13+
14+
15+
## Solution
16+
17+
### Set the table (w/ current table schema)
18+
### Discuss the code
19+
### Show Schema change
20+
### Outline the transform
21+
22+
## Putting It All Together
23+
24+
## Improvements
25+
26+
Read & write with `ConsistencyLevel.LOCAL_QUORUM`
27+
28+
Tweak partitions ... tweak values per partition

schema-evolution/FAIL.md

+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
Welcome to
2+
____ __
3+
/ __/__ ___ _____/ /__
4+
_\ \/ _ \/ _ `/ __/ '_/
5+
/___/ .__/\_,_/_/ /_/\_\ version 1.1.0
6+
/_/
7+
8+
Using Scala version 2.10.4 (Java HotSpot(TM) 64-Bit Server VM, Java 1.7.0_67)
9+
Type in expressions to have them evaluated.
10+
Type :help for more information.
11+
Creating SparkContext...
12+
Created spark context..
13+
Spark context available as sc.
14+
Type in expressions to have them evaluated.
15+
Type :help for more information.
16+
17+
scala> :paste
18+
// Entering paste mode (ctrl-D to finish)
19+
20+
import org.apache.spark._
21+
import org.apache.spark.SparkContext._
22+
import java.util.UUID
23+
import java.util.Date
24+
import java.util.Calendar
25+
26+
//import javax.xml.bind.DatatypeConverter
27+
import com.datastax.driver.core.utils.UUIDs
28+
29+
30+
import com.datastax.spark.connector._
31+
32+
33+
// Exiting paste mode, now interpreting.
34+
35+
import org.apache.spark._
36+
import org.apache.spark.SparkContext._
37+
import java.util.UUID
38+
import java.util.Date
39+
import java.util.Calendar
40+
import com.datastax.driver.core.utils.UUIDs
41+
import com.datastax.spark.connector._
42+
43+
scala> :paste
44+
// Entering paste mode (ctrl-D to finish)
45+
46+
case class ReadingsByMonth(arrayId: UUID, arrayName: String, year: Int,
47+
month: Int, sensor: String, measuredAt: Date,
48+
value: Float, unit: String, location: String)
49+
50+
case class ReadingsByMonthHour(arrayId: UUID, arrayName: String,
51+
monthHour: Date, sensor: String,
52+
measuredAt: Date, value: Float,
53+
unit: String, location: String)
54+
55+
def convertTo(in: ReadingsByMonth): ReadingsByMonthHour = {
56+
var cal = Calendar.getInstance()
57+
cal.setTime(in.measuredAt)
58+
cal.set(Calendar.SECOND, 0)
59+
cal.set(Calendar.MINUTE, 0)
60+
//
61+
return ReadingsByMonthHour(in.arrayId, in.arrayName, cal.getTime,
62+
in.sensor, in.measuredAt, in.value, in.unit,
63+
in.location)
64+
}
65+
66+
// Exiting paste mode, now interpreting.
67+
68+
defined class ReadingsByMonth
69+
defined class ReadingsByMonthHour
70+
convertTo: (in: ReadingsByMonth)ReadingsByMonthHour
71+
72+
scala> val tableData = sc.cassandraTable[ReadingsByMonth]("ts_data", "readings_by_month")
73+
tableData: com.datastax.spark.connector.rdd.CassandraRDD[ReadingsByMonth] = CassandraRDD[0] at RDD at CassandraRDD.scala:47
74+
75+
76+
scala> val reading = ReadingsByMonth(UUID.fromString("867981c2-17f9-44a9-9cc3-6d70d2fe052f"), "R5-650", 2014, 11, "Freezer", new Date(), -1.7f, "C", "32, -110")
77+
reading: ReadingsByMonth = ReadingsByMonth(867981c2-17f9-44a9-9cc3-6d70d2fe052f,R5-650,2014,11,Freezer,Tue Nov 04 10:55:21 MST 2014,-1.7,C,32, -110)
78+
79+
scala> reading.getClass
80+
res2: Class[_ <: ReadingsByMonth] = class $iwC$$iwC$ReadingsByMonth
81+
82+
scala> val one = tableData.take(1)
83+
one: Array[ReadingsByMonth] = Array(ReadingsByMonth(867981c2-17f9-44a9-9cc3-6d70d2fe052f,R65-011,2014,10,greenhouse,Fri Oct 31 05:40:09 MST 2014,89.3,F,32.221667, -110.926389))
84+
85+
scala> one.getClass
86+
res3: Class[_ <: Array[ReadingsByMonth]] = class [L$iwC$$iwC$ReadingsByMonth;
87+
88+
scala> one(0).getClass
89+
res4: Class[_ <: ReadingsByMonth] = class $iwC$$iwC$ReadingsByMonth
90+
91+
scala> val n1 = convertTo(reading)
92+
<console>:84: error: type mismatch;
93+
found : ReadingsByMonth
94+
required: ReadingsByMonth
95+
val n1 = convertTo(reading)
96+
^
97+
98+
scala> val n2 = covertTo(one(0))
99+
<console>:82: error: not found: value covertTo
100+
val n2 = covertTo(one(0))
101+
^
102+
103+
scala> val n2 = convertTo(one(0))
104+
<console>:86: error: type mismatch;
105+
found : ReadingsByMonth
106+
required: ReadingsByMonth
107+
val n2 = convertTo(one(0))
108+
^
109+
110+
scala> one(0).getClass == reading.getClass
111+
res5: Boolean = true
112+
113+
scala> convertTo(readings).tpe
114+
<console>:83: error: not found: value readings
115+
convertTo(readings).tpe
116+
^
117+
118+
scala> convertTo(readings).type
119+
<console>:1: error: identifier expected but 'type' found.
120+
convertTo(readings).type
121+
^
122+
123+
scala> import scala.reflect.runtime.{universe => ru}
124+
import scala.reflect.runtime.{universe=>ru}
125+
126+
scala> convertTo(readings).tpe
127+
<console>:84: error: not found: value readings
128+
convertTo(readings).tpe
129+
^
130+
131+
scala> convertTo _
132+
res8: ReadingsByMonth => ReadingsByMonthHour = <function1>
133+
134+
scala> one(0).getClass.isAssignableFrom(reading.getClass)
135+
res9: Boolean = true
136+
137+
scala> reading.getClass.isAssignableFrom(one(0).getClass)
138+
res10: Boolean = true
139+

0 commit comments

Comments
 (0)