Pacific-Design.com

    
Home Index

1. Apache Spark

2. spark-shell

Apache Spark / spark-shell /

Dynamic DataFrame

/* spark-shell -i create-dynamic-df.scala */

import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

val headerDescs = "name, age, location"
val types = "StringType, IntegerType, StringType"
val data = "Peter, 32, Los Angeles"

val schema = StructType(Array(
                                  StructField("name",StringType,true)
                                , StructField("age",IntegerType,true)
                                , StructField("location",StringType,true)
                             ))

val dataRDD = sc.parallelize(Seq(data.split(","))).map(x=>Row(x(0),x(1),x(2)))

val headerSchema = StructType(headerDescs.split(",")
                    .map(fieldName => StructField(fieldName, StringType, true)))

val headerDf = sqlContext.createDataFrame(dataRDD, headerSchema)
// val headerDf = sqlContext.createDataFrame(dataRDD, schema)

headerDf.printSchema
headerDf.show()

sys.exit