// Dataset API for Scala
spark.table("t1")
.join(spark.table("t2"), "id")
.where($"id" % 2 === 0)
// SQL
sql("""SELECT t1.* FROM t1
INNER JOIN t2 ON t1.id = t2.id
WHERE t1.id % 2 = 0""")
What a nice class definition in Scala, isn't it?
def eval(input: InternalRow = null): Any
import org.apache.spark.sql.SparkSession
val spark = SparkSession
.builder
.withExtensions { extensions =>
extensions.injectResolutionRule { session =>
...
}
extensions.injectOptimizerRule { session =>
...
}
}
.getOrCreate
import org.apache.spark.sql.execution.debug._
val q = spark.range(10).where('id === 4)
q.debug
val q = sql("SELECT * FROM RANGE(10) WHERE id = 4")
q.debugCodegen