Skip to content

Commit

Permalink
Merge pull request apache#14 from bzhang02/avro-example
Browse files Browse the repository at this point in the history
[YSPARK-1522] Add an Avro example
  • Loading branch information
Sanket Chintapalli authored and GitHub Enterprise committed Apr 1, 2020
2 parents 543c8ba + fa473d5 commit 00cf4bc
Show file tree
Hide file tree
Showing 11 changed files with 48 additions and 0 deletions.
Binary file added src/main/resources/avro/randomBoolean.avro
Binary file not shown.
Binary file added src/main/resources/avro/randomBytes.avro
Binary file not shown.
Binary file added src/main/resources/avro/randomDouble.avro
Binary file not shown.
Binary file added src/main/resources/avro/randomFloat.avro
Binary file not shown.
Binary file added src/main/resources/avro/randomInt.avro
Binary file not shown.
Binary file added src/main/resources/avro/randomLong.avro
Binary file not shown.
Binary file added src/main/resources/avro/randomLongMap.avro
Binary file not shown.
Binary file added src/main/resources/avro/randomString.avro
Binary file not shown.
Binary file added src/main/resources/avro/randomStringArray.avro
Binary file not shown.
Binary file added src/main/resources/avro/users.avro
Binary file not shown.
48 changes: 48 additions & 0 deletions src/main/scala/com/yahoo/spark/starter/SparkAvroExample.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package com.yahoo.spark.starter

import org.apache.spark.sql.{SparkSession, DataFrame}
import org.apache.spark.sql.avro._

object SparkAvroExample {
def main(args: Array[String]) {
val spark = SparkSession
.builder()
.appName("Spark Avro Example")
.getOrCreate()

val inputDir = "avro_test/resources/"
val outputDir = "avro_test/output/"

// Read, query and write for normal data frame
val usersDF = spark.read.format("avro").load(inputDir + "users.avro")
usersDF.show()

usersDF.select("name", "favorite_color").write.format("avro").save(outputDir + "namesAndFavColors.avro")

val namesAndFavColorsDF = spark.read.format("avro").load(outputDir + "namesAndFavColors.avro")
namesAndFavColorsDF.show()


// Read, query and write for primitive types
def readAndWritePrimitive(filename: String): DataFrame = {
val df = spark.read.format("avro").load(inputDir + filename)
df.show()
df.write.format("avro").save(outputDir + filename)

val df2 = spark.read.format("avro").load(outputDir + filename)
df2.show()

df2
}

println(readAndWritePrimitive("randomBoolean.avro").head().getBoolean(0))
println(readAndWritePrimitive("randomBytes.avro").head().getAs[Array[Byte]](0))
println(readAndWritePrimitive("randomDouble.avro").head().getDouble(0))
println(readAndWritePrimitive("randomFloat.avro").head().getFloat(0))
println(readAndWritePrimitive("randomInt.avro").head().getInt(0))
println(readAndWritePrimitive("randomLong.avro").head().getLong(0))
println(readAndWritePrimitive("randomString.avro").head().getString(0))
println(readAndWritePrimitive("randomLongMap.avro").head().getAs[Map[String, Long]](0))
println(readAndWritePrimitive("randomStringArray.avro").head().getAs[Array[String]](0))
}
}

0 comments on commit 00cf4bc

Please sign in to comment.