Pacific-Design.com

    
Home Index

1. Apache Spark

2. UDF

Apache Spark / UDF /

Spark User Define Function with two parameters


val func2 = udf((col1 : String, col2 : String) => {                                                                                                                                                                                    

  var result1 = "no3years"
  var result2 = 0 
  var period  = 0.0

  try { 
    val beginDate   = col1.toString.take(10)
    val dateFormat  = new SimpleDateFormat("yyyy-MM-dd");
    val date        = new Date();
    val currentDate = dateFormat.format(date)
    //DEBUG println("currentDate=" + currentDate)

    val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")
    val oldDate   = LocalDate.parse(beginDate, formatter)
    val newDate   = LocalDate.parse(currentDate, formatter)
    period        = newDate.toEpochDay() - oldDate.toEpochDay()
    //DEBUG println("period=" + period)
      
    if(period > (365*3)) { result1 = "up3year" } 
    else                 { result1 = "over3years" } 
      
    result2 = 120 - (log2(period) * 10).toInt
    if(result2 < 0)   { result2 = 0   } 
    if(result2 > 120) { result2 = 120 } 

  } catch { case unknown => { result1 = "no3years" } } 

  result1 + " days" + period.toInt.toString() + " prank" + result2.toString() + " " + col2
                                                                                                                                                                                                                                       
}) 

val video  = df1.withColumn("hashtags", func2(col("published"), col("score")))