package connector
The root package of Cassandra connector for Apache Spark. Offers handy implicit conversions that add Cassandra-specific methods to SparkContext and RDD.
Call cassandraTable method on the SparkContext object to create a CassandraRDD exposing Cassandra tables as Spark RDDs.
Call RDDFunctions saveToCassandra
function on any RDD
to save distributed collection to a Cassandra table.
Example:
CREATE KEYSPACE test WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1 }; CREATE TABLE test.words (word text PRIMARY KEY, count int); INSERT INTO test.words(word, count) VALUES ("and", 50);
import com.datastax.spark.connector._ val sparkMasterHost = "127.0.0.1" val cassandraHost = "127.0.0.1" val keyspace = "test" val table = "words" // Tell Spark the address of one Cassandra node: val conf = new SparkConf(true).set("spark.cassandra.connection.host", cassandraHost) // Connect to the Spark cluster: val sc = new SparkContext("spark://" + sparkMasterHost + ":7077", "example", conf) // Read the table and print its contents: val rdd = sc.cassandraTable(keyspace, table) rdd.toArray().foreach(println) // Write two rows to the table: val col = sc.parallelize(Seq(("of", 1200), ("the", "863"))) col.saveToCassandra(keyspace, table) sc.stop()
- Alphabetic
- By Inheritance
- connector
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Type Members
- sealed trait BatchSize extends AnyRef
- case class BytesInBatch(batchSize: Int) extends BatchSize with Product with Serializable
- class CassandraSparkExtensions extends (SparkSessionExtensions) ⇒ Unit with Logging
- final class CassandraTableScanPairRDDFunctions[K, V] extends Serializable
- final class CassandraTableScanRDDFunctions[R] extends Serializable
- implicit final class ColumnNameFunctions extends AnyVal
- sealed trait ColumnSelector extends AnyRef
-
class
DatasetFunctions[K] extends Serializable
Provides Cassandra-specific methods on org.apache.spark.sql.DataFrame
- class PairRDDFunctions[K, V] extends Serializable
-
class
RDDFunctions[T] extends WritableToCassandra[T] with Serializable
Provides Cassandra-specific methods on RDD
- case class RowsInBatch(batchSize: Int) extends BatchSize with Product with Serializable
- case class SomeColumns(columns: ColumnRef*) extends ColumnSelector with Product with Serializable
-
class
SparkContextFunctions extends Serializable
Provides Cassandra-specific methods on SparkContext
Value Members
- implicit def toCassandraTableScanFunctions[T](rdd: CassandraTableScanRDD[T]): CassandraTableScanRDDFunctions[T]
- implicit def toCassandraTableScanRDDPairFunctions[K, V](rdd: CassandraTableScanRDD[(K, V)]): CassandraTableScanPairRDDFunctions[K, V]
- implicit def toDataFrameFunctions(dataFrame: DataFrame): DatasetFunctions[Row]
- implicit def toDatasetFunctions[K](dataset: Dataset[K])(implicit arg0: Encoder[K]): DatasetFunctions[K]
- implicit def toNamedColumnRef(columnName: String): ColumnName
- implicit def toPairRDDFunctions[K, V](rdd: RDD[(K, V)]): PairRDDFunctions[K, V]
- implicit def toRDDFunctions[T](rdd: RDD[T]): RDDFunctions[T]
- implicit def toSparkContextFunctions(sc: SparkContext): SparkContextFunctions
- object AllColumns extends ColumnSelector with Product with Serializable
- object BatchSize
- object DocUtil
- object PartitionKeyColumns extends ColumnSelector with Product with Serializable
- object PrimaryKeyColumns extends ColumnSelector with Product with Serializable
- object SomeColumns extends Serializable