StratifiedSamplerErrorLimit

Instance Constructors

new StratifiedSamplerErrorLimit(_options: SampleOptions)

Type Members

type ReservoirSegment = SegmentMap[Row, StratumReservoir]

Definition Classes
StratifiedSampler
final class RowWithWeight extends Row

Attributes
protected
Definition Classes
StratifiedSampler

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def append[U](rows: Iterator[Row], init: U, processFlush: (U, InternalRow) ⇒ U, startBatch: (U, Int) ⇒ U, endBatch: (U) ⇒ U, rowEncoder: ExpressionEncoder[Row], partIndex: Int): Long

Definition Classes
StratifiedSamplerErrorLimit → StratifiedSampler
final def asInstanceOf[T0]: T0

Definition Classes
Any
final val castType: Int

Store type of column once to avoid checking for every row at runtime
Store type of column once to avoid checking for every row at runtime

Attributes
protected
Definition Classes
CastLongTime
def checkCacheFlush(force: Boolean): Boolean

Check whether the cache needs to be flushed.
Check whether the cache needs to be flushed. This should be invoked whenever there is a potential significant increase in memory consumption
returns
java.lang.Boolean.TRUE if cache needs to be flushed and fully reset, java.lang.Boolean.FALSE if cache needs to be flushed but no full reset, and null if cache does not need to be flushed
def clone(): StratifiedSamplerErrorLimit

Definition Classes
StratifiedSamplerErrorLimit → AnyRef
def columnBatchSize: Int
final def concurrency: Int

Definition Classes
StratifiedSampler
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def errorLimitColumn: Int
def errorLimitPercent: Double
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def flushReservoir[U](init: U, process: (U, InternalRow) ⇒ U, startBatch: (U, Int) ⇒ U, endBatch: (U) ⇒ U): U

Definition Classes
StratifiedSamplerErrorLimit → StratifiedSampler
final def foldDrainSegment[U](prevReservoirSize: Int, fullReset: Boolean, process: (U, InternalRow) ⇒ U)(init: U, seg: ReservoirSegment): U

Attributes
protected
Definition Classes
StratifiedSampler
final def foldReservoir[U](prevReservoirSize: Int, doReset: Boolean, fullReset: Boolean, process: (U, InternalRow) ⇒ U)(bid: Int, sr: StratumReservoir, init: U): U

Attributes
protected
Definition Classes
StratifiedSampler
def getBucketId(partIndex: Int, primaryBucketIds: IntArrayList = null)(hashValue: Int): Int

Attributes
protected
Definition Classes
StratifiedSampler
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def getNullMillis(getDefaultForNull: Boolean): Long

Attributes
protected
Definition Classes
CastLongTime
def getReservoirSegment(newQcs: Array[Int], types: Array[DataType], numColumns: Int, initialCapacity: Int, loadFactor: Double, qcsColHandler: Option[ColumnHandler], segi: Int, nsegs: Int): ReservoirSegment

Attributes
protected
Definition Classes
StratifiedSampler
def hashCode(): Int

Definition Classes
AnyRef → Any
def initCacheSize: Int
def initializeLogIfNecessary(): Unit

Attributes
protected
Definition Classes
Logging
def isBucketLocal(partIndex: Int): Boolean

Attributes
protected
Definition Classes
StratifiedSampler
final def isDebugEnabled: Boolean

Definition Classes
Logging
final def isInfoEnabled: Boolean

Definition Classes
Logging
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def isTraceEnabled: Boolean

Definition Classes
Logging
def iterator(segmentStart: Int, segmentEnd: Int): Iterator[InternalRow]

Definition Classes
StratifiedSampler
def iteratorOnRegion(buckets: Set[Integer]): Iterator[InternalRow]

Definition Classes
StratifiedSampler
final var levelFlags: Int

Attributes
protected
Definition Classes
Logging
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Definition Classes
Logging
final var log_: Logger

Attributes
protected
Definition Classes
Logging
def module: String

Definition Classes
StratifiedSampler
final def name: String

Definition Classes
StratifiedSampler
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def newMutableRow(row: Row, rowEncoder: ExpressionEncoder[Row]): UnsafeRow

Attributes
protected
Definition Classes
StratifiedSampler
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def onTruncate(): Unit

Definition Classes
StratifiedSamplerErrorLimit → StratifiedSampler
final val options: SampleOptions

Definition Classes
StratifiedSampler
final def parseMillis(row: Row, timeCol: Int, getDefaultForNull: Boolean = false): Long

Definition Classes
CastLongTime
final def parseMillisFromAny(ts: Any): Long

Attributes
protected
Definition Classes
CastLongTime
final val pendingBatch: AtomicReference[ArrayBuffer[InternalRow]]

Store pending values to be flushed in a separate buffer so that we do not end up creating too small ColumnBatches.
Store pending values to be flushed in a separate buffer so that we do not end up creating too small ColumnBatches.
Note that this mini-cache is copy-on-write (to avoid copy-on-read for readers) so the buffer inside should never be changed rather the whole buffer replaced if required. This should happen only inside flushCache.

Attributes
protected
Definition Classes
StratifiedSampler
final def qcs: Array[Int]

Definition Classes
StratifiedSampler
final def qcsSparkPlan: Option[(CodeAndComment, ArrayBuffer[Any], Int, Array[DataType])]

Definition Classes
StratifiedSampler
def reservoirInRegion: Boolean

Definition Classes
StratifiedSampler
def resetLogger(): Unit

Attributes
protected
Definition Classes
Logging
final val rng: Random

Random number generator for sampling.
Random number generator for sampling.

Attributes
protected
Definition Classes
StratifiedSampler
def sample(items: Iterator[InternalRow], rowEncoder: ExpressionEncoder[Row], flush: Boolean): Iterator[InternalRow]

Definition Classes
StratifiedSamplerErrorLimit → StratifiedSampler
final def schema: StructType

Definition Classes
StratifiedSampler
def setFlushStatus(doFlush: Boolean): Unit

Definition Classes
StratifiedSampler
final val strata: ConcurrentSegmentedHashMap[Row, StratumReservoir, ReservoirSegment]

Map of each stratum key (i.e.
Map of each stratum key (i.e. a unique combination of values of columns in qcs) to related metadata and reservoir

Attributes
protected
Definition Classes
StratifiedSampler
def strataReservoirSize: Int

not used for this implementation so return init size
not used for this implementation so return init size

Attributes
protected
Definition Classes
StratifiedSamplerErrorLimit → StratifiedSampler
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def timeColumnType: Option[DataType]

Definition Classes
StratifiedSamplerErrorLimit → CastLongTime
def timeInterval: Long
def timeSeriesColumn: Int
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
def waitForSamplers(waitUntil: Int, maxMillis: Long): Unit

Attributes
protected
Definition Classes
StratifiedSampler

Related Doc: package execution

final class StratifiedSamplerErrorLimit extends StratifiedSampler with CastLongTime

Instance Constructors

new StratifiedSamplerErrorLimit(_options: SampleOptions)

Type Members

type ReservoirSegment = SegmentMap[Row, StratumReservoir]

final class RowWithWeight extends Row

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

def append[U](rows: Iterator[Row], init: U, processFlush: (U, InternalRow) ⇒ U, startBatch: (U, Int) ⇒ U, endBatch: (U) ⇒ U, rowEncoder: ExpressionEncoder[Row], partIndex: Int): Long

final def asInstanceOf[T0]: T0

final val castType: Int

def checkCacheFlush(force: Boolean): Boolean

def clone(): StratifiedSamplerErrorLimit

def columnBatchSize: Int

final def concurrency: Int

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def errorLimitColumn: Int

def errorLimitPercent: Double

def finalize(): Unit

def flushReservoir[U](init: U, process: (U, InternalRow) ⇒ U, startBatch: (U, Int) ⇒ U, endBatch: (U) ⇒ U): U

final def foldDrainSegment[U](prevReservoirSize: Int, fullReset: Boolean, process: (U, InternalRow) ⇒ U)(init: U, seg: ReservoirSegment): U

final def foldReservoir[U](prevReservoirSize: Int, doReset: Boolean, fullReset: Boolean, process: (U, InternalRow) ⇒ U)(bid: Int, sr: StratumReservoir, init: U): U

def getBucketId(partIndex: Int, primaryBucketIds: IntArrayList = null)(hashValue: Int): Int

final def getClass(): Class[_]

def getNullMillis(getDefaultForNull: Boolean): Long

def getReservoirSegment(newQcs: Array[Int], types: Array[DataType], numColumns: Int, initialCapacity: Int, loadFactor: Double, qcsColHandler: Option[ColumnHandler], segi: Int, nsegs: Int): ReservoirSegment

def hashCode(): Int

def initCacheSize: Int

def initializeLogIfNecessary(): Unit

def isBucketLocal(partIndex: Int): Boolean

final def isDebugEnabled: Boolean

final def isInfoEnabled: Boolean

final def isInstanceOf[T0]: Boolean

final def isTraceEnabled: Boolean

def iterator(segmentStart: Int, segmentEnd: Int): Iterator[InternalRow]

def iteratorOnRegion(buckets: Set[Integer]): Iterator[InternalRow]

final var levelFlags: Int

def log: Logger

def logDebug(msg: ⇒ String, throwable: Throwable): Unit

def logDebug(msg: ⇒ String): Unit

def logError(msg: ⇒ String, throwable: Throwable): Unit

def logError(msg: ⇒ String): Unit

def logInfo(msg: ⇒ String, throwable: Throwable): Unit

def logInfo(msg: ⇒ String): Unit

def logName: String

def logTrace(msg: ⇒ String, throwable: Throwable): Unit

def logTrace(msg: ⇒ String): Unit

def logWarning(msg: ⇒ String, throwable: Throwable): Unit

def logWarning(msg: ⇒ String): Unit

final var log_: Logger

def module: String

final def name: String

final def ne(arg0: AnyRef): Boolean

final def newMutableRow(row: Row, rowEncoder: ExpressionEncoder[Row]): UnsafeRow

final def notify(): Unit

final def notifyAll(): Unit

def onTruncate(): Unit

final val options: SampleOptions

final def parseMillis(row: Row, timeCol: Int, getDefaultForNull: Boolean = false): Long

final def parseMillisFromAny(ts: Any): Long

final val pendingBatch: AtomicReference[ArrayBuffer[InternalRow]]

final def qcs: Array[Int]

final def qcsSparkPlan: Option[(CodeAndComment, ArrayBuffer[Any], Int, Array[DataType])]

def reservoirInRegion: Boolean

def resetLogger(): Unit

final val rng: Random

def sample(items: Iterator[InternalRow], rowEncoder: ExpressionEncoder[Row], flush: Boolean): Iterator[InternalRow]

final def schema: StructType

def setFlushStatus(doFlush: Boolean): Unit

final val strata: ConcurrentSegmentedHashMap[Row, StratumReservoir, ReservoirSegment]

def strataReservoirSize: Int

final def synchronized[T0](arg0: ⇒ T0): T0

def timeColumnType: Option[DataType]

def timeInterval: Long

def timeSeriesColumn: Int