Upgrade to Pro — share decks privately, control downloads, hide ads and more …

Spark Jobserver

Sponsored · Your Podcast. Everywhere. Effortlessly. Share. Educate. Inspire. Entertain. You do you. We'll handle the rest.

Spark Jobserver

Spark Jobserver and why you should use it

Avatar for Yegor Andreenko

Yegor Andreenko

October 20, 2016
Tweet

More Decks by Yegor Andreenko

Other Decks in Technology

Transcript

  1. curl --data-binary @my-spark-job-allinone.jar jobserver:8090/jars/demo curl -d "" jobserver:8090/contexts/agg?num-cpu-cores=2&memory-per-node=2048m curl -d

    "input.string = A lazy dog jumped mean dog" \ 'jobserver:8090/jobs?appName=demo&context=agg&classPath=spark.jobserver.WordCountExample' { "status": "STARTED", "result": { "jobId": "a3e0e19d-f10f-4b78-9eb3-e65d63853a74", "context": "agg" } }
  2. object WordCountExample extends SparkJob { override def validate(sc: SparkContext, config:

    Config): SparkJobValidation = { Try(config.getString(“input.string”)) .map(x => SparkJobValid) .getOrElse(SparkJobInvalid(“No input.string”)) } override def runJob(sc: SparkContext, config: Config): Any = { val dd = sc.parallelize(config.getString(“input.string”).split(" ").toSeq) dd.map((_, 1)).reduceByKey(_ + _).collect().toMap } }
  3. this.namedRdds.update("french_dictionary", frenchDictionaryRDD) val rdd = this.namedRdds.get[(String, String)]("french_dictionary").get val NamedRDD(frenchDictionaryRDD, _

    ,_) = namedObjects.get[NamedRDD[(String, String)]]("rdd:french_dictionary").get val NamedDataFrame(frenchDictionaryDF, _, _) = namedObjects.get[NamedDataFrame]("df:some df").get