Pacific-Design.com

    
Home Index

1. Spark

2. SparkSession

Spark / SparkSession /

Configure Spark Session with eventqueue.size = 10,000, default = 1,000

import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}

  def setupSparkSession(appName: String): SparkSession = {

    val sparkConf = new SparkConf()
      .setMaster(config.getString("spark.master"))
      .setAppName(appName)
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .set("mapred.job.natives3filesystem.globstatus.use", "true")
      .set("fs.s3.inputpathprocessor", "true")
      .set("fs.s3.impl", "org.apache.hadoop.fs.s3native.NativeS3FileSystem")
      .set("spark.scheduler.listenerbus.eventqueue.size", "10000")

    if (System.getenv("AWS_ACCESS_KEY_ID") != null && System.getenv("AWS_SECRET_ACCESS_KEY") != null) {
      sparkConf.set("fs.s3a.access.key", System.getenv("AWS_ACCESS_KEY_ID"))
      sparkConf.set("fs.s3a.secret.key", System.getenv("AWS_SECRET_ACCESS_KEY"))
    }

    SparkSession.builder
      .config(sparkConf)
      .enableHiveSupport()
      .getOrCreate()
  }