libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.4.4"
import org.apache.spark.sql.streaming.DataStreamReader
val streamReader: DataStreamReader = spark.readStream
// source + options
val dataset: DataFrame = streamReader.load
val dataset: DataFrame = ...
import org.apache.spark.sql.streaming.DataStreamWriter
val streamWriter: DataStreamWriter = dataset.writeStream
queryName(queryName: String): DataStreamWriter[T]
val streamWriter: DataStreamWriter = ...
val namedStreamWriter: DataStreamWriter = streamWriter.queryName("name")
outputMode(outputMode: String): DataStreamWriter[T]
outputMode(outputMode: OutputMode): DataStreamWriter[T]
trigger(trigger: Trigger): DataStreamWriter[T]
trigger(Trigger.ProcessingTime("10 seconds"))
import scala.concurrent.duration._
trigger(Trigger.ProcessingTime(10.seconds))
import java.util.concurrent.TimeUnit
trigger(ProcessingTime.create(10, TimeUnit.SECONDS))
foreach(writer: ForeachWriter[T]): DataStreamWriter[T]
abstract class ForeachWriter[T] {
abstract def close(errorOrNull: Throwable): Unit
abstract def open(partitionId: Long, version: Long): Boolean
abstract def process(value: T): Unit
}
val streamWriter: DataStreamWriter = ...
import org.apache.spark.sql.ForeachWriter
val streamWriterWithForeachSink: DataStreamWriter =
streamWriter.foreach(new ForeachWriter[Long] {
override def open(partitionId: Long, version: Long) = true
override def process(value: Long): Unit = {
println(s">>> $value")
}
override def close(errorOrNull: Throwable): Unit = {}
})
foreachBatch(function: (Dataset[T], Long) => Unit): DataStreamWriter[T]
import org.apache.spark.sql.Dataset
spark.readStream
.format("rate")
.load
.writeStream
.foreachBatch { (output: Dataset[_], batchId: Long) =>
println(s"Batch ID: $batchId")
output.show
}
start(): StreamingQuery
import org.apache.spark.sql.streaming.StreamingQuery
val query: StreamingQuery = counter.writeStream.start
import org.apache.spark.sql.streaming.StreamingQuery
val query: StreamingQuery = counter.writeStream.start
val qm: StreamingQueryManager = spark.streams