class CassandraCoGroupedRDD[T] extends RDD[Seq[Seq[T]]]
A RDD which pulls from provided separate CassandraTableScanRDDs which share partition keys type and keyspaces. These tables will be joined on READ using a merge iterator. As long as we join on the token of the partition key the two iterators should be read in order. Note: this implementation do not restrict partition keys has the same names, but they should have the same types
- Alphabetic
- By Inheritance
- CassandraCoGroupedRDD
- RDD
- Logging
- Serializable
- Serializable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
- new CassandraCoGroupedRDD(sc: SparkContext, scanRDDs: Seq[CassandraTableScanRDD[T]])(implicit classTag: ClassTag[T])
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
def
++(other: RDD[Seq[Seq[T]]]): RDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
aggregate[U](zeroValue: U)(seqOp: (U, Seq[Seq[T]]) ⇒ U, combOp: (U, U) ⇒ U)(implicit arg0: ClassTag[U]): U
- Definition Classes
- RDD
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
barrier(): RDDBarrier[Seq[Seq[T]]]
- Definition Classes
- RDD
- Annotations
- @Experimental() @Since( "2.4.0" )
-
def
cache(): CassandraCoGroupedRDD.this.type
- Definition Classes
- RDD
-
def
cartesian[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(Seq[Seq[T]], U)]
- Definition Classes
- RDD
-
def
checkValidMergeJoin(): Unit
- Attributes
- protected
-
def
checkpoint(): Unit
- Definition Classes
- RDD
-
def
cleanShuffleDependencies(blocking: Boolean): Unit
- Definition Classes
- RDD
- Annotations
- @Experimental() @DeveloperApi() @Since( "3.1.0" )
-
def
clearDependencies(): Unit
- Attributes
- protected
- Definition Classes
- RDD
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native() @HotSpotIntrinsicCandidate()
-
def
coalesce(numPartitions: Int, shuffle: Boolean, partitionCoalescer: Option[PartitionCoalescer])(implicit ord: Ordering[Seq[Seq[T]]]): RDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
collect[U](f: PartialFunction[Seq[Seq[T]], U])(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
-
def
collect(): Array[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
compute(split: Partition, context: TaskContext): Iterator[Seq[Seq[T]]]
- Definition Classes
- CassandraCoGroupedRDD → RDD
- Annotations
- @DeveloperApi()
- lazy val connector: CassandraConnector
-
def
context: SparkContext
- Definition Classes
- RDD
-
def
count(): Long
- Definition Classes
- RDD
-
def
countApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble]
- Definition Classes
- RDD
-
def
countApproxDistinct(relativeSD: Double): Long
- Definition Classes
- RDD
-
def
countApproxDistinct(p: Int, sp: Int): Long
- Definition Classes
- RDD
-
def
countByValue()(implicit ord: Ordering[Seq[Seq[T]]]): Map[Seq[Seq[T]], Long]
- Definition Classes
- RDD
-
def
countByValueApprox(timeout: Long, confidence: Double)(implicit ord: Ordering[Seq[Seq[T]]]): PartialResult[Map[Seq[Seq[T]], BoundedDouble]]
- Definition Classes
- RDD
-
final
def
dependencies: Seq[Dependency[_]]
- Definition Classes
- RDD
-
def
distinct(): RDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
distinct(numPartitions: Int)(implicit ord: Ordering[Seq[Seq[T]]]): RDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
filter(f: (Seq[Seq[T]]) ⇒ Boolean): RDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
first(): Seq[Seq[T]]
- Definition Classes
- RDD
-
def
firstParent[U](implicit arg0: ClassTag[U]): RDD[U]
- Attributes
- protected[org.apache.spark]
- Definition Classes
- RDD
-
def
flatMap[U](f: (Seq[Seq[T]]) ⇒ TraversableOnce[U])(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
-
def
fold(zeroValue: Seq[Seq[T]])(op: (Seq[Seq[T]], Seq[Seq[T]]) ⇒ Seq[Seq[T]]): Seq[Seq[T]]
- Definition Classes
- RDD
-
def
foreach(f: (Seq[Seq[T]]) ⇒ Unit): Unit
- Definition Classes
- RDD
-
def
foreachPartition(f: (Iterator[Seq[Seq[T]]]) ⇒ Unit): Unit
- Definition Classes
- RDD
-
def
getCheckpointFile: Option[String]
- Definition Classes
- RDD
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
def
getDependencies: Seq[Dependency[_]]
- Attributes
- protected
- Definition Classes
- RDD
-
final
def
getNumPartitions: Int
- Definition Classes
- RDD
- Annotations
- @Since( "1.6.0" )
-
def
getOutputDeterministicLevel: org.apache.spark.rdd.DeterministicLevel.Value
- Attributes
- protected
- Definition Classes
- RDD
- Annotations
- @DeveloperApi()
- def getPartitionKey(connector: CassandraConnector, keyspaceName: String, tableName: String): Seq[ColumnDef]
-
def
getPartitions: Array[Partition]
- Attributes
- protected
- Definition Classes
- CassandraCoGroupedRDD → RDD
-
def
getPreferredLocations(split: Partition): Seq[String]
- Definition Classes
- CassandraCoGroupedRDD → RDD
-
def
getResourceProfile(): ResourceProfile
- Definition Classes
- RDD
- Annotations
- @Experimental() @Since( "3.1.0" )
-
def
getStorageLevel: StorageLevel
- Definition Classes
- RDD
-
def
glom(): RDD[Array[Seq[Seq[T]]]]
- Definition Classes
- RDD
-
def
groupBy[K](f: (Seq[Seq[T]]) ⇒ K, p: Partitioner)(implicit kt: ClassTag[K], ord: Ordering[K]): RDD[(K, Iterable[Seq[Seq[T]]])]
- Definition Classes
- RDD
-
def
groupBy[K](f: (Seq[Seq[T]]) ⇒ K, numPartitions: Int)(implicit kt: ClassTag[K]): RDD[(K, Iterable[Seq[Seq[T]]])]
- Definition Classes
- RDD
-
def
groupBy[K](f: (Seq[Seq[T]]) ⇒ K)(implicit kt: ClassTag[K]): RDD[(K, Iterable[Seq[Seq[T]]])]
- Definition Classes
- RDD
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
val
id: Int
- Definition Classes
- RDD
-
def
initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
intersection(other: RDD[Seq[Seq[T]]], numPartitions: Int): RDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
intersection(other: RDD[Seq[Seq[T]]], partitioner: Partitioner)(implicit ord: Ordering[Seq[Seq[T]]]): RDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
intersection(other: RDD[Seq[Seq[T]]]): RDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
lazy val
isBarrier_: Boolean
- Attributes
- protected
- Definition Classes
- RDD
- Annotations
- @transient()
-
def
isCheckpointed: Boolean
- Definition Classes
- RDD
-
def
isEmpty(): Boolean
- Definition Classes
- RDD
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
- def isValidMergeJoin(): Boolean
-
final
def
iterator(split: Partition, context: TaskContext): Iterator[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
keyBy[K](f: (Seq[Seq[T]]) ⇒ K): RDD[(K, Seq[Seq[T]])]
- Definition Classes
- RDD
-
def
localCheckpoint(): CassandraCoGroupedRDD.this.type
- Definition Classes
- RDD
-
def
log: Logger
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logName: String
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
map[U](f: (Seq[Seq[T]]) ⇒ U)(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
-
def
mapPartitions[U](f: (Iterator[Seq[Seq[T]]]) ⇒ Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
-
def
mapPartitionsWithIndex[U](f: (Int, Iterator[Seq[Seq[T]]]) ⇒ Iterator[U], preservesPartitioning: Boolean)(implicit arg0: ClassTag[U]): RDD[U]
- Definition Classes
- RDD
-
def
max()(implicit ord: Ordering[Seq[Seq[T]]]): Seq[Seq[T]]
- Definition Classes
- RDD
-
def
min()(implicit ord: Ordering[Seq[Seq[T]]]): Seq[Seq[T]]
- Definition Classes
- RDD
-
var
name: String
- Definition Classes
- RDD
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
def
parent[U](j: Int)(implicit arg0: ClassTag[U]): RDD[U]
- Attributes
- protected[org.apache.spark]
- Definition Classes
- RDD
-
val
partitioner: Option[Partitioner]
- Definition Classes
- RDD
-
final
def
partitions: Array[Partition]
- Definition Classes
- RDD
-
def
persist(): CassandraCoGroupedRDD.this.type
- Definition Classes
- RDD
-
def
persist(newLevel: StorageLevel): CassandraCoGroupedRDD.this.type
- Definition Classes
- RDD
-
def
pipe(command: Seq[String], env: Map[String, String], printPipeContext: ((String) ⇒ Unit) ⇒ Unit, printRDDElement: (Seq[Seq[T]], (String) ⇒ Unit) ⇒ Unit, separateWorkingDir: Boolean, bufferSize: Int, encoding: String): RDD[String]
- Definition Classes
- RDD
-
def
pipe(command: String, env: Map[String, String]): RDD[String]
- Definition Classes
- RDD
-
def
pipe(command: String): RDD[String]
- Definition Classes
- RDD
-
final
def
preferredLocations(split: Partition): Seq[String]
- Definition Classes
- RDD
-
def
randomSplit(weights: Array[Double], seed: Long): Array[RDD[Seq[Seq[T]]]]
- Definition Classes
- RDD
-
def
reduce(f: (Seq[Seq[T]], Seq[Seq[T]]) ⇒ Seq[Seq[T]]): Seq[Seq[T]]
- Definition Classes
- RDD
-
def
repartition(numPartitions: Int)(implicit ord: Ordering[Seq[Seq[T]]]): RDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
sample(withReplacement: Boolean, fraction: Double, seed: Long): RDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
saveAsObjectFile(path: String): Unit
- Definition Classes
- RDD
-
def
saveAsTextFile(path: String, codec: Class[_ <: CompressionCodec]): Unit
- Definition Classes
- RDD
-
def
saveAsTextFile(path: String): Unit
- Definition Classes
- RDD
-
def
setName(_name: String): CassandraCoGroupedRDD.this.type
- Definition Classes
- RDD
-
def
sortBy[K](f: (Seq[Seq[T]]) ⇒ K, ascending: Boolean, numPartitions: Int)(implicit ord: Ordering[K], ctag: ClassTag[K]): RDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
sparkContext: SparkContext
- Definition Classes
- RDD
-
def
subtract(other: RDD[Seq[Seq[T]]], p: Partitioner)(implicit ord: Ordering[Seq[Seq[T]]]): RDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
subtract(other: RDD[Seq[Seq[T]]], numPartitions: Int): RDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
subtract(other: RDD[Seq[Seq[T]]]): RDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
take(num: Int): Array[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
takeOrdered(num: Int)(implicit ord: Ordering[Seq[Seq[T]]]): Array[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
takeSample(withReplacement: Boolean, num: Int, seed: Long): Array[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
toDebugString: String
- Definition Classes
- RDD
-
def
toJavaRDD(): JavaRDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
toLocalIterator: Iterator[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
toString(): String
- Definition Classes
- RDD → AnyRef → Any
- def tokenExtractor(row: Row): Token
-
def
top(num: Int)(implicit ord: Ordering[Seq[Seq[T]]]): Array[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
treeAggregate[U](zeroValue: U)(seqOp: (U, Seq[Seq[T]]) ⇒ U, combOp: (U, U) ⇒ U, depth: Int)(implicit arg0: ClassTag[U]): U
- Definition Classes
- RDD
-
def
treeReduce(f: (Seq[Seq[T]], Seq[Seq[T]]) ⇒ Seq[Seq[T]], depth: Int): Seq[Seq[T]]
- Definition Classes
- RDD
-
def
union(other: RDD[Seq[Seq[T]]]): RDD[Seq[Seq[T]]]
- Definition Classes
- RDD
-
def
unpersist(blocking: Boolean): CassandraCoGroupedRDD.this.type
- Definition Classes
- RDD
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
def
withResources(rp: ResourceProfile): CassandraCoGroupedRDD.this.type
- Definition Classes
- RDD
- Annotations
- @Experimental() @Since( "3.1.0" )
-
def
zip[U](other: RDD[U])(implicit arg0: ClassTag[U]): RDD[(Seq[Seq[T]], U)]
- Definition Classes
- RDD
-
def
zipPartitions[B, C, D, V](rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D])(f: (Iterator[Seq[Seq[T]]], Iterator[B], Iterator[C], Iterator[D]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[D], arg3: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
-
def
zipPartitions[B, C, D, V](rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D], preservesPartitioning: Boolean)(f: (Iterator[Seq[Seq[T]]], Iterator[B], Iterator[C], Iterator[D]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[D], arg3: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
-
def
zipPartitions[B, C, V](rdd2: RDD[B], rdd3: RDD[C])(f: (Iterator[Seq[Seq[T]]], Iterator[B], Iterator[C]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
-
def
zipPartitions[B, C, V](rdd2: RDD[B], rdd3: RDD[C], preservesPartitioning: Boolean)(f: (Iterator[Seq[Seq[T]]], Iterator[B], Iterator[C]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[C], arg2: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
-
def
zipPartitions[B, V](rdd2: RDD[B])(f: (Iterator[Seq[Seq[T]]], Iterator[B]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
-
def
zipPartitions[B, V](rdd2: RDD[B], preservesPartitioning: Boolean)(f: (Iterator[Seq[Seq[T]]], Iterator[B]) ⇒ Iterator[V])(implicit arg0: ClassTag[B], arg1: ClassTag[V]): RDD[V]
- Definition Classes
- RDD
-
def
zipWithIndex(): RDD[(Seq[Seq[T]], Long)]
- Definition Classes
- RDD
-
def
zipWithUniqueId(): RDD[(Seq[Seq[T]], Long)]
- Definition Classes
- RDD
Deprecated Value Members
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] ) @Deprecated @deprecated
- Deprecated
(Since version ) see corresponding Javadoc for more information.