Class

org.apache.spark.sql.execution

StratifiedSamplerReservoir

Related Doc: package execution

Permalink

final class StratifiedSamplerReservoir extends StratifiedSampler

A simple reservoir based stratified sampler that will use the provided reservoir size for every stratum present in the incoming rows.

Linear Supertypes
StratifiedSampler, Logging, Cloneable, Cloneable, Serializable, Serializable, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. StratifiedSamplerReservoir
  2. StratifiedSampler
  3. Logging
  4. Cloneable
  5. Cloneable
  6. Serializable
  7. Serializable
  8. AnyRef
  9. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new StratifiedSamplerReservoir(_options: SampleOptions)

    Permalink

Type Members

  1. type ReservoirSegment = SegmentMap[Row, StratumReservoir]

    Permalink
    Definition Classes
    StratifiedSampler
  2. final class RowWithWeight extends Row

    Permalink
    Attributes
    protected
    Definition Classes
    StratifiedSampler

Value Members

  1. final def !=(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  4. def append[U](rows: Iterator[Row], init: U, processFlush: (U, InternalRow) ⇒ U, startBatch: (U, Int) ⇒ U, endBatch: (U) ⇒ U, rowEncoder: ExpressionEncoder[Row], partIndex: Int): Long

    Permalink
  5. final def asInstanceOf[T0]: T0

    Permalink
    Definition Classes
    Any
  6. def clone(): StratifiedSamplerReservoir

    Permalink
    Definition Classes
    StratifiedSamplerReservoir → AnyRef
  7. final def concurrency: Int

    Permalink
    Definition Classes
    StratifiedSampler
  8. final def eq(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  9. def equals(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  10. def finalize(): Unit

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  11. def flushReservoir[U](init: U, process: (U, InternalRow) ⇒ U, startBatch: (U, Int) ⇒ U, endBatch: (U) ⇒ U): U

    Permalink
  12. final def foldDrainSegment[U](prevReservoirSize: Int, fullReset: Boolean, process: (U, InternalRow) ⇒ U)(init: U, seg: ReservoirSegment): U

    Permalink
    Attributes
    protected
    Definition Classes
    StratifiedSampler
  13. final def foldReservoir[U](prevReservoirSize: Int, doReset: Boolean, fullReset: Boolean, process: (U, InternalRow) ⇒ U)(bid: Int, sr: StratumReservoir, init: U): U

    Permalink
    Attributes
    protected
    Definition Classes
    StratifiedSampler
  14. def getBucketId(partIndex: Int, primaryBucketIds: IntArrayList = null)(hashValue: Int): Int

    Permalink
    Attributes
    protected
    Definition Classes
    StratifiedSampler
  15. final def getClass(): Class[_]

    Permalink
    Definition Classes
    AnyRef → Any
  16. def getReservoirSegment(newQcs: Array[Int], types: Array[DataType], numColumns: Int, initialCapacity: Int, loadFactor: Double, qcsColHandler: Option[ColumnHandler], segi: Int, nsegs: Int): ReservoirSegment

    Permalink
    Attributes
    protected
    Definition Classes
    StratifiedSampler
  17. def hashCode(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  18. def initializeLogIfNecessary(): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  19. def isBucketLocal(partIndex: Int): Boolean

    Permalink
    Attributes
    protected
    Definition Classes
    StratifiedSampler
  20. final def isDebugEnabled: Boolean

    Permalink
    Definition Classes
    Logging
  21. final def isInfoEnabled: Boolean

    Permalink
    Definition Classes
    Logging
  22. final def isInstanceOf[T0]: Boolean

    Permalink
    Definition Classes
    Any
  23. final def isTraceEnabled: Boolean

    Permalink
    Definition Classes
    Logging
  24. def iterator(segmentStart: Int, segmentEnd: Int): Iterator[InternalRow]

    Permalink
    Definition Classes
    StratifiedSampler
  25. def iteratorOnRegion(buckets: Set[Integer]): Iterator[InternalRow]

    Permalink
    Definition Classes
    StratifiedSampler
  26. final var levelFlags: Int

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  27. def log: Logger

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  28. def logDebug(msg: ⇒ String, throwable: Throwable): Unit

    Permalink
    Definition Classes
    Logging
  29. def logDebug(msg: ⇒ String): Unit

    Permalink
    Definition Classes
    Logging
  30. def logError(msg: ⇒ String, throwable: Throwable): Unit

    Permalink
    Definition Classes
    Logging
  31. def logError(msg: ⇒ String): Unit

    Permalink
    Definition Classes
    Logging
  32. def logInfo(msg: ⇒ String, throwable: Throwable): Unit

    Permalink
    Definition Classes
    Logging
  33. def logInfo(msg: ⇒ String): Unit

    Permalink
    Definition Classes
    Logging
  34. def logName: String

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  35. def logTrace(msg: ⇒ String, throwable: Throwable): Unit

    Permalink
    Definition Classes
    Logging
  36. def logTrace(msg: ⇒ String): Unit

    Permalink
    Definition Classes
    Logging
  37. def logWarning(msg: ⇒ String, throwable: Throwable): Unit

    Permalink
    Definition Classes
    Logging
  38. def logWarning(msg: ⇒ String): Unit

    Permalink
    Definition Classes
    Logging
  39. final var log_: Logger

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  40. def module: String

    Permalink
    Definition Classes
    StratifiedSampler
  41. final def name: String

    Permalink
    Definition Classes
    StratifiedSampler
  42. final def ne(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  43. final def newMutableRow(row: Row, rowEncoder: ExpressionEncoder[Row]): UnsafeRow

    Permalink
    Attributes
    protected
    Definition Classes
    StratifiedSampler
  44. final def notify(): Unit

    Permalink
    Definition Classes
    AnyRef
  45. final def notifyAll(): Unit

    Permalink
    Definition Classes
    AnyRef
  46. def onTruncate(): Unit

    Permalink
  47. final val options: SampleOptions

    Permalink
    Definition Classes
    StratifiedSampler
  48. final val pendingBatch: AtomicReference[ArrayBuffer[InternalRow]]

    Permalink

    Store pending values to be flushed in a separate buffer so that we do not end up creating too small ColumnBatches.

    Store pending values to be flushed in a separate buffer so that we do not end up creating too small ColumnBatches.

    Note that this mini-cache is copy-on-write (to avoid copy-on-read for readers) so the buffer inside should never be changed rather the whole buffer replaced if required. This should happen only inside flushCache.

    Attributes
    protected
    Definition Classes
    StratifiedSampler
  49. final def qcs: Array[Int]

    Permalink
    Definition Classes
    StratifiedSampler
  50. final def qcsSparkPlan: Option[(CodeAndComment, ArrayBuffer[Any], Int, Array[DataType])]

    Permalink
    Definition Classes
    StratifiedSampler
  51. def reservoirInRegion: Boolean

    Permalink
    Definition Classes
    StratifiedSampler
  52. val reservoirSize: Int

    Permalink
  53. def resetLogger(): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    Logging
  54. final val rng: Random

    Permalink

    Random number generator for sampling.

    Random number generator for sampling.

    Attributes
    protected
    Definition Classes
    StratifiedSampler
  55. def sample(items: Iterator[InternalRow], rowEncoder: ExpressionEncoder[Row], flush: Boolean): Iterator[InternalRow]

    Permalink
  56. final def schema: StructType

    Permalink
    Definition Classes
    StratifiedSampler
  57. def setFlushStatus(doFlush: Boolean): Unit

    Permalink
    Definition Classes
    StratifiedSampler
  58. final val strata: ConcurrentSegmentedHashMap[Row, StratumReservoir, ReservoirSegment]

    Permalink

    Map of each stratum key (i.e.

    Map of each stratum key (i.e. a unique combination of values of columns in qcs) to related metadata and reservoir

    Attributes
    protected
    Definition Classes
    StratifiedSampler
  59. def strataReservoirSize: Int

    Permalink
    Attributes
    protected
    Definition Classes
    StratifiedSamplerReservoirStratifiedSampler
  60. final def synchronized[T0](arg0: ⇒ T0): T0

    Permalink
    Definition Classes
    AnyRef
  61. def toString(): String

    Permalink
    Definition Classes
    AnyRef → Any
  62. final def wait(): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  63. final def wait(arg0: Long, arg1: Int): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  64. final def wait(arg0: Long): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  65. def waitForSamplers(waitUntil: Int, maxMillis: Long): Unit

    Permalink
    Attributes
    protected
    Definition Classes
    StratifiedSampler

Inherited from StratifiedSampler

Inherited from Logging

Inherited from Cloneable

Inherited from Cloneable

Inherited from Serializable

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Ungrouped