forked from miguno/kafka-storm-starter
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Spark Streaming example that reads from Kafka and writes to Kafka
- Loading branch information
Showing
9 changed files
with
366 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
32 changes: 32 additions & 0 deletions
32
src/main/scala/com/miguno/kafkastorm/kafka/PooledKafkaProducerAppFactory.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package com.miguno.kafkastorm.kafka | ||
|
||
import org.apache.commons.pool2.impl.DefaultPooledObject | ||
import org.apache.commons.pool2.{PooledObject, BasePooledObjectFactory} | ||
|
||
/** | ||
* An object factory for Kafka producer apps, which is used to create a pool of such producers (think: DB connection | ||
* pool). | ||
* | ||
* We use this class in our Spark Streaming examples when writing data to Kafka. A pool is typically the preferred | ||
* pattern to minimize TCP connection overhead when talking to Kafka from a Spark cluster. Another reason is to to | ||
* reduce the number of TCP connections being established with the cluster in order not to strain the cluster. | ||
* | ||
* See the Spark Streaming Programming Guide, section "Design Patterns for using foreachRDD" in | ||
* [[http://spark.apache.org/docs/1.1.0/streaming-programming-guide.html#output-operations-on-dstreams Output Operations on DStreams]] | ||
*/ | ||
// TODO: Time out / shutdown producers if they haven't been used in a while. | ||
class PooledKafkaProducerAppFactory(val factory: KafkaProducerAppFactory) | ||
extends BasePooledObjectFactory[KafkaProducerApp] with Serializable { | ||
|
||
override def create(): KafkaProducerApp = factory.newInstance() | ||
|
||
override def wrap(obj: KafkaProducerApp): PooledObject[KafkaProducerApp] = new DefaultPooledObject(obj) | ||
|
||
// From the Commons Pool docs: "Invoked on every instance when it is being "dropped" from the pool. There is no | ||
// guarantee that the instance being destroyed will be considered active, passive or in a generally consistent state." | ||
override def destroyObject(p: PooledObject[KafkaProducerApp]): Unit = { | ||
p.getObject.shutdown() | ||
super.destroyObject(p) | ||
} | ||
|
||
} |
27 changes: 27 additions & 0 deletions
27
...main/scala/com/miguno/kafkastorm/spark/serialization/KafkaSparkStreamingRegistrator.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
package com.miguno.kafkastorm.spark.serialization | ||
|
||
import com.esotericsoftware.kryo.Kryo | ||
import com.miguno.avro.Tweet | ||
import com.twitter.chill.avro.AvroSerializer | ||
import org.apache.avro.generic.GenericRecord | ||
import org.apache.spark.serializer.KryoRegistrator | ||
|
||
/** | ||
* We register custom classes with Kryo, see the explanations in the | ||
* [[http://spark.apache.org/docs/1.1.0/tuning.html#data-serialization Tuning Spark]] guide. | ||
* | ||
* "If you don’t register your custom classes, Kryo will still work, but it will have to store the full class name with | ||
* each object, which is wasteful." | ||
*/ | ||
class KafkaSparkStreamingRegistrator extends KryoRegistrator { | ||
|
||
override def registerClasses(kryo: Kryo) { | ||
// Registers a serializer for any generic Avro records. The kafka-storm-starter project does not yet include | ||
// examples that work on generic Avro records, but we keep this registration for the convenience of our readers. | ||
kryo.register(classOf[GenericRecord], AvroSerializer.GenericRecordSerializer[GenericRecord]()) | ||
// Registers a serializer specifically for the, well, specific Avro record `Tweet` | ||
kryo.register(classOf[Tweet], AvroSerializer.SpecificRecordSerializer[Tweet]) | ||
() | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 3 additions & 1 deletion
4
src/test/scala/com/miguno/kafkastorm/integration/IntegrationSuite.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,11 @@ | ||
package com.miguno.kafkastorm.integration | ||
|
||
import com.miguno.kafkastorm.spark.KafkaSparkStreamingSpec | ||
import org.scalatest.Stepwise | ||
|
||
class IntegrationSuite extends Stepwise( | ||
new KafkaSpec, | ||
new StormSpec, | ||
new KafkaStormSpec | ||
new KafkaStormSpec, | ||
new KafkaSparkStreamingSpec | ||
) |
Oops, something went wrong.