RowBasedHashMapGenerator

This is a helper class to generate an append-only row-based hash map that can act as a 'cache' for extremely fast key-value lookups while evaluating aggregates (and fall back to the BytesToBytesMap if a given key isn't found). This is 'codegened' in HashAggregate to speed up aggregates w/ key.

We also have VectorizedHashMapGenerator, which generates a append-only vectorized hash map. We choose one of the two as the 1st level, fast hash map during aggregation.

NOTE: This row-based hash map currently doesn't support nullable keys and falls back to the BytesToBytesMap to store them.

Linear Supertypes

HashMapGenerator, AnyRef, Any

Instance Constructors

new RowBasedHashMapGenerator(ctx: CodegenContext, aggregateExpressions: Seq[AggregateExpression], generatedClassName: String, groupingKeySchema: StructType, bufferSchema: StructType)

Type Members

case class Buffer(dataType: DataType, name: String) extends Product with Serializable

Definition Classes
HashMapGenerator

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
val buffVars: Seq[ExprCode]

Definition Classes
HashMapGenerator
val bufferValues: Seq[Buffer]

Definition Classes
HashMapGenerator
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def genComputeHash(ctx: CodegenContext, input: String, dataType: DataType, result: String): String

Attributes
protected
Definition Classes
HashMapGenerator
def generate(): String

Definition Classes
HashMapGenerator
final def generateClose(): String

Attributes
protected
Definition Classes
HashMapGenerator
def generateEquals(): String

Generates a method that returns true if the group-by keys exist at a given index in the associated org.apache.spark.sql.catalyst.expressions.RowBasedKeyValueBatch.
Generates a method that returns true if the group-by keys exist at a given index in the associated org.apache.spark.sql.catalyst.expressions.RowBasedKeyValueBatch.

Attributes
protected
Definition Classes
RowBasedHashMapGenerator → HashMapGenerator
def generateFindOrInsert(): String

Generates a method that returns a org.apache.spark.sql.catalyst.expressions.UnsafeRow which keeps track of the aggregate value(s) for a given set of keys.
Generates a method that returns a org.apache.spark.sql.catalyst.expressions.UnsafeRow which keeps track of the aggregate value(s) for a given set of keys. If the corresponding row doesn't exist, the generated method adds the corresponding row in the associated org.apache.spark.sql.catalyst.expressions.RowBasedKeyValueBatch.

Attributes
protected
Definition Classes
RowBasedHashMapGenerator → HashMapGenerator
final def generateHashFunction(): String

Generates a method that computes a hash by currently xor-ing all individual group-by keys.
Generates a method that computes a hash by currently xor-ing all individual group-by keys. For instance, if we have 2 long group-by keys, the generated function would be of the form:
```
private long hash(long agg_key, long agg_key1) {
  return agg_key ^ agg_key1;
  }
```
Attributes
protected
Definition Classes
HashMapGenerator
def generateRowIterator(): String

Attributes
protected
Definition Classes
RowBasedHashMapGenerator → HashMapGenerator
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
val groupingKeySignature: String

Definition Classes
HashMapGenerator
val groupingKeys: Seq[Buffer]

Definition Classes
HashMapGenerator
def hashCode(): Int

Definition Classes
AnyRef → Any
def initializeAggregateHashMap(): String

Attributes
protected
Definition Classes
RowBasedHashMapGenerator → HashMapGenerator
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Doc: package aggregate

class RowBasedHashMapGenerator extends HashMapGenerator

Instance Constructors

new RowBasedHashMapGenerator(ctx: CodegenContext, aggregateExpressions: Seq[AggregateExpression], generatedClassName: String, groupingKeySchema: StructType, bufferSchema: StructType)

Type Members

case class Buffer(dataType: DataType, name: String) extends Product with Serializable

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

val buffVars: Seq[ExprCode]

val bufferValues: Seq[Buffer]

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def genComputeHash(ctx: CodegenContext, input: String, dataType: DataType, result: String): String

def generate(): String

final def generateClose(): String

def generateEquals(): String

def generateFindOrInsert(): String

final def generateHashFunction(): String

def generateRowIterator(): String

final def getClass(): Class[_]

val groupingKeySignature: String

val groupingKeys: Seq[Buffer]

def hashCode(): Int

def initializeAggregateHashMap(): String

final def isInstanceOf[T0]: Boolean

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from HashMapGenerator

Inherited from AnyRef

Inherited from Any

Ungrouped