Start Minikube
minikube start --memory 8192 --cpus 4
Download spark and set SPARK_HOME
cd $SPARK_HOME
K8S_SERVER=$(k config view --output=jsonpath='{.clusters[].cluster.server}')
Build the spark image in Docker Daemon inside the Minikube
eval $(minikube docker-env)
docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile .
Check Docker Image
$ docker images spark REPOSITORY TAG IMAGE ID CREATED SIZE spark latest 3686fa10e74a 3 months ago 529MB
Start the spark-shell using below command.
./bin/spark-shell \
--master k8s://$K8S_SERVER \
--conf spark.kubernetes.container.image=spark:latest \
--conf spark.kubernetes.context=minikube \
--conf spark.kubernetes.namespace=spark \
--verbose
Error like "unable to find valid certification path to requested target"
Start Kubectl Proxy
$ kubectl proxy
Starting to serve on 127.0.0.1:8001
Change --master to --master k8s://http://localhost:8001 and start spark-shell again.
scala> spark.version
res0: String = 3.1.2
scala> sc.master
res1: String = k8s://http://localhost:8001
scala> val values = List(List("1", "One") ,List("2", "Two") ,List("3", "Three"),List("4","4")).map(x =>(x(0), x(1)))
values: List[(String, String)] = List((1,One), (2,Two), (3,Three), (4,4))
scala> val df = spark.createDataFrame(values.map(e => Tuple2(e._1, e._2))).toDF("col1", "col1")
df: org.apache.spark.sql.DataFrame = [col1: string, col1: string]
scala> df.show()
+----+-----+
|col1| col1|
+----+-----+
| 1| One|
| 2| Two|
| 3|Three|
| 4| 4|
+----+-----+
scala> df.count()
res3: Long = 4
scala> df.first()
res4: org.apache.spark.sql.Row = [1,One]
./bin/spark-submit \
--master k8s://http://localhost:8001 \
--deploy-mode client \
--name a1 \
--class org.apache.spark.examples.SparkPi \
--conf spark.kubernetes.container.image=spark:latest \
--conf spark.kubernetes.driver.pod.name=a1 \
--conf spark.executor.instances=1 \
--conf spark.kubernetes.driver.limit.cores=1 \
--conf spark.executor.cores=1 \
--conf spark.executor.memory=500m \
--conf spark.kubernetes.namespace=spark \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=spark-serviceaccount \
--verbose \
local:///Users/aagarwal/dev/spark/examples/jars/spark-examples_2.12-3.1.2.jar 10