Pacific-Design.com

    
Home Index

1. Spark

2. UDF

Spark / UDF /

Spark User Define Function with two parameters

val func2 = udf((col1 : String, col2 : String) => {

  var result1 = "no3years"
  var result2 = 0
  var period  = 0.0

  try {
    val beginDate   = col1.toString.take(10)
    val dateFormat  = new SimpleDateFormat("yyyy-MM-dd");
    val date        = new Date();
    val currentDate = dateFormat.format(date)
    //DEBUG println("currentDate=" + currentDate)

    val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")
    val oldDate   = LocalDate.parse(beginDate, formatter)
    val newDate   = LocalDate.parse(currentDate, formatter)
    period        = newDate.toEpochDay() - oldDate.toEpochDay()
    //DEBUG println("period=" + period)

    if(period > (365*3)) { result1 = "up3year" }
    else                 { result1 = "over3years" }

    result2 = 120 - (log2(period) * 10).toInt
    if(result2 < 0)   { result2 = 0   }
    if(result2 > 120) { result2 = 120 }

  } catch { case unknown => { result1 = "no3years" } }

  result1 + " days" + period.toInt.toString() + " prank" + result2.toString() + " " + col2

})

val video  = df1.withColumn("hashtags", func2(col("published"), col("score")))