FileStreamSinkLog

Instance Constructors

new FileStreamSinkLog(metadataLogVersion: Int, sparkSession: SparkSession, path: String)

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def add(batchId: Long, logs: Array[SinkFileStatus]): Boolean

Store the metadata for the specified batchId and return true if successful.
Store the metadata for the specified batchId and return true if successful. If the batchId's metadata has already been stored, this method will return false.

Definition Classes
CompactibleFileStreamLog → HDFSMetadataLog → MetadataLog
def allFiles(): Array[SinkFileStatus]

Returns all files except the deleted ones.
Returns all files except the deleted ones.

Definition Classes
CompactibleFileStreamLog
final def asInstanceOf[T0]: T0

Definition Classes
Any
val batchFilesFilter: PathFilter

A PathFilter to filter only batch files
A PathFilter to filter only batch files

Attributes
protected
Definition Classes
HDFSMetadataLog
def batchIdToPath(batchId: Long): Path

Definition Classes
CompactibleFileStreamLog → HDFSMetadataLog
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final lazy val compactInterval: Int

Attributes
protected
Definition Classes
CompactibleFileStreamLog
def compactLogs(logs: Seq[SinkFileStatus]): Seq[SinkFileStatus]

Filter out the obsolete logs.
Filter out the obsolete logs.

Definition Classes
FileStreamSinkLog → CompactibleFileStreamLog
val defaultCompactInterval: Int

Attributes
protected
Definition Classes
FileStreamSinkLog → CompactibleFileStreamLog
def deserialize(in: InputStream): Array[SinkFileStatus]

Definition Classes
CompactibleFileStreamLog → HDFSMetadataLog
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
val fileCleanupDelayMs: Long

If we delete the old files after compaction at once, there is a race condition in S3: other processes may see the old files are deleted but still cannot see the compaction file using "list".
If we delete the old files after compaction at once, there is a race condition in S3: other processes may see the old files are deleted but still cannot see the compaction file using "list". The allFiles handles this by looking for the next compaction file directly, however, a live lock may happen if the compaction happens too frequently: one processing keeps deleting old files while another one keeps retrying. Setting a reasonable cleanup delay could avoid it.

Attributes
protected
Definition Classes
FileStreamSinkLog → CompactibleFileStreamLog
val fileManager: FileManager

Attributes
protected
Definition Classes
HDFSMetadataLog
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def get(startId: Option[Long], endId: Option[Long]): Array[(Long, Array[SinkFileStatus])]

Return metadata for batches between startId (inclusive) and endId (inclusive).
Return metadata for batches between startId (inclusive) and endId (inclusive). If startId is None, just return all batches before endId (inclusive).

Definition Classes
HDFSMetadataLog → MetadataLog
def get(batchId: Long): Option[Array[SinkFileStatus]]

Return the metadata for the specified batchId if it's stored.
Return the metadata for the specified batchId if it's stored. Otherwise, return None.

Definition Classes
HDFSMetadataLog → MetadataLog
def get(batchFile: Path): Option[Array[SinkFileStatus]]

returns
the deserialized metadata in a batch file, or None if file not exist.

Definition Classes
HDFSMetadataLog
Exceptions thrown
IllegalArgumentException when path does not point to a batch file.
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getLatest(): Option[(Long, Array[SinkFileStatus])]

Return the latest batch Id and its metadata if exist.
Return the latest batch Id and its metadata if exist.

Definition Classes
HDFSMetadataLog → MetadataLog
def getOrderedBatchFiles(): Array[FileStatus]

Get an array of [FileStatus] referencing batch files.
Get an array of [FileStatus] referencing batch files. The array is sorted by most recent batch file first to oldest batch file.

Definition Classes
HDFSMetadataLog
def hashCode(): Int

Definition Classes
AnyRef → Any
def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean = false): Boolean

Attributes
protected
Definition Classes
Logging
def initializeLogIfNecessary(isInterpreter: Boolean): Unit

Attributes
protected
Definition Classes
Logging
def isBatchFile(path: Path): Boolean

Definition Classes
CompactibleFileStreamLog → HDFSMetadataLog
val isDeletingExpiredLog: Boolean

Attributes
protected
Definition Classes
FileStreamSinkLog → CompactibleFileStreamLog
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
def isTraceEnabled(): Boolean

Attributes
protected
Definition Classes
Logging
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
val metadataPath: Path

Definition Classes
HDFSMetadataLog
val minBatchesToRetain: Int

Attributes
protected
Definition Classes
CompactibleFileStreamLog
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def pathToBatchId(path: Path): Long

Definition Classes
CompactibleFileStreamLog → HDFSMetadataLog
def purge(thresholdBatchId: Long): Unit

Removes all the log entry earlier than thresholdBatchId (exclusive).
Removes all the log entry earlier than thresholdBatchId (exclusive).

Definition Classes
HDFSMetadataLog → MetadataLog
def serialize(logData: Array[SinkFileStatus], out: OutputStream): Unit

Definition Classes
CompactibleFileStreamLog → HDFSMetadataLog
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Docs: object FileStreamSinkLog | package streaming

class FileStreamSinkLog extends CompactibleFileStreamLog[SinkFileStatus]

Instance Constructors

new FileStreamSinkLog(metadataLogVersion: Int, sparkSession: SparkSession, path: String)

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

def add(batchId: Long, logs: Array[SinkFileStatus]): Boolean

def allFiles(): Array[SinkFileStatus]

final def asInstanceOf[T0]: T0

val batchFilesFilter: PathFilter

def batchIdToPath(batchId: Long): Path

def clone(): AnyRef

final lazy val compactInterval: Int

def compactLogs(logs: Seq[SinkFileStatus]): Seq[SinkFileStatus]

val defaultCompactInterval: Int

def deserialize(in: InputStream): Array[SinkFileStatus]

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

val fileCleanupDelayMs: Long

val fileManager: FileManager

def finalize(): Unit

def get(startId: Option[Long], endId: Option[Long]): Array[(Long, Array[SinkFileStatus])]

def get(batchId: Long): Option[Array[SinkFileStatus]]

def get(batchFile: Path): Option[Array[SinkFileStatus]]

final def getClass(): Class[_]

def getLatest(): Option[(Long, Array[SinkFileStatus])]

def getOrderedBatchFiles(): Array[FileStatus]

def hashCode(): Int

def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean = false): Boolean

def initializeLogIfNecessary(isInterpreter: Boolean): Unit

def isBatchFile(path: Path): Boolean

val isDeletingExpiredLog: Boolean

final def isInstanceOf[T0]: Boolean

def isTraceEnabled(): Boolean

def log: Logger

def logDebug(msg: ⇒ String, throwable: Throwable): Unit

def logDebug(msg: ⇒ String): Unit

def logError(msg: ⇒ String, throwable: Throwable): Unit

def logError(msg: ⇒ String): Unit

def logInfo(msg: ⇒ String, throwable: Throwable): Unit

def logInfo(msg: ⇒ String): Unit

def logName: String

def logTrace(msg: ⇒ String, throwable: Throwable): Unit

def logTrace(msg: ⇒ String): Unit

def logWarning(msg: ⇒ String, throwable: Throwable): Unit

def logWarning(msg: ⇒ String): Unit

val metadataPath: Path

val minBatchesToRetain: Int

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def pathToBatchId(path: Path): Long

def purge(thresholdBatchId: Long): Unit

def serialize(logData: Array[SinkFileStatus], out: OutputStream): Unit

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from CompactibleFileStreamLog[SinkFileStatus]

Inherited from HDFSMetadataLog[Array[SinkFileStatus]]

Inherited from internal.Logging

Inherited from MetadataLog[Array[SinkFileStatus]]

Inherited from AnyRef

Inherited from Any

Ungrouped