Upgrade to Pro — share decks privately, control downloads, hide ads and more …

AI 기반 광고 추천 파이프라인에서 스파크 스트리밍의 배포 및 모니터링 전략

Sponsored · Ship Features Fearlessly Turn features on and off without deploys. Used by thousands of Ruby developers.
Avatar for kakao kakao
November 01, 2024

AI 기반 광고 추천 파이프라인에서 스파크 스트리밍의 배포 및 모니터링 전략

#카프카 #스파크 #실시간 파이프라인 #빅데이터

AI 기반 광고 추천 시스템에서 스파크 스트리밍은 실시간 데이터 처리를 위한 핵심 구성 요소로 작동합니다.
이 파이프라인은 사용자 행동 데이터를 실시간으로 수집하고 처리하여 개인화된 광고를 추천하는 데 사용됩니다.
이번에 발표할 내용은 스파크 파이프라인 아키텍처와 스파크를 효과적으로 모니터링하기 위한 방법들을 소개합니다.

발표자 : cory.doras
광고추천팀에서 실시간 데이터 프로세싱과 서빙을 맡고 있습니다.

Avatar for kakao

kakao

November 01, 2024

More Decks by kakao

Other Decks in Programming

Transcript

  1. ౵੉೐ۄੋߓನইఃఫ୊ // mongodbী ੷੢غয ࢎਊغח ౵੉೐ۄੋ ࢸ੿(JSON) { "name": "dankeRankerTest",

    "mode": "dankeRankerDruid", "config": { "sink.bootstrap.servers": "kafka.kakao.com:9092", "sink.topic": "adrec-danke-predictor-druid", "source.bootstrap.servers": "kafka.kakao.com:9092", "source.topics": "adrec-danke-serving" } } // п modeীࢲ ࢎਊؼ Config ੿੄. ೙ࣻ ২ٜ࣌ਸ ࠽؊ ಁఢਸ ഝਊ val definition: ConfigDef.ConfigDef = new ConfigDef.ConfigDef() .define("source.bootstrap.servers") .define("source.topics") .define("sink.bootstrap.servers") .define("sink.topic")
  2. ౵੉೐ۄੋߓನইఃఫ୊FOUSZQPJOUTI #!/bin/bash echo "Application ID: $NAME" for APP_ID in $APPLICATION_IDS;

    do echo "ઙܐ ઺: $APP_ID" yarn application -kill $APP_ID done echo "Submitting spark app" spark3-submit \ --deploy-mode cluster \ --master yarn \ --name ${NAME} \ --driver-memory 2g \ --num-executors ${SPARK_NUM_EXECUTORS} \ ... ࢤۚ echo "Running spark app: $NAME"
  3. ౵੉೐ۄੋߓನইఃఫ୊LTZBNM apiVersion: batch/v1 kind: Job metadata: name: danke-predictor-druid spec: template:

    spec: containers: - name: adrec-metric-pipeline:v1.0.0 imagePullPolicy: ifNotPresent env: - name: NAME value: "dankePredictorDruid" - name: SPARK_NUM_EXECUTORS value: "100" - name: SPARK_EXECUTOR_CORES value: "2" - name: SPARK_EXECUTOR_MEMORY value: "2g" restartPolicy: Never
  4. झ౵௼ীࢲઁҕೞח4USFBNJOH2VFSZ-JTUFOFS @Evolving abstract class StreamingQueryListener extends Serializable { import StreamingQueryListener._

    // Called when a query is started. def onQueryStarted(event: QueryStartedEvent): Unit // Called when there is some status update (ingestion rate updated, etc.) def onQueryProgress(event: QueryProgressEvent): Unit // Called when a query is stopped, with or without error. def onQueryTerminated(event: QueryTerminatedEvent): Unit }
  5. { "id" : "be7898d0-895b-4f32-ad73-c07cfad4c2bb", "runId" : "9357ed50-fe79-4dd8-a327-3775663e97ba", "timestamp" : "2024-10-22T01:43:22.739Z",

    "processedRowsPerSecond" : 644.6234119782214, "sources" : [ { "description" : "KafkaV2[Subscribe[adrec-click-log]]", "startOffset" : null, "endOffset" : { "adrec-click-log" : { "92" : 1100, // ݃૑݄ਵ۽ ୊ܻ೮؍ ["౵౭࣌ ߣഐ" : "য়೐ࣇ ߣഐ"] ؘ੉ఠ } }, "latestOffset" : { "adrec-click-log" : { "92" : 1000, } }, "numInputRows" : 5683 // ੑ۱ row ѐࣻ }, "sink" : { "description" : "org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaTable@739b9fa4", "numOutputRows" : 5683 // ୹۱ row ѐࣻ } }
  6. { "id" : "be7898d0-895b-4f32-ad73-c07cfad4c2bb", "runId" : "9357ed50-fe79-4dd8-a327-3775663e97ba", "timestamp" : "2024-10-22T01:43:22.739Z",

    "processedRowsPerSecond" : 644.6234119782214, "sources" : [ { "description" : "KafkaV2[Subscribe[adrec-click-log]]", "startOffset" : null, "endOffset" : { "adrec-click-log" : { "92" : 1100, // ݃૑݄ਵ۽ ୊ܻ೮؍ ["౵౭࣌ ߣഐ" : "য়೐ࣇ ߣഐ"] ؘ੉ఠ } }, "latestOffset" : { "adrec-click-log" : { "92" : 1000, } }, "numInputRows" : 5683 // ੑ۱ row ѐࣻ }, "sink" : { "description" : "org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaTable@739b9fa4", "numOutputRows" : 5683 // ୹۱ row ѐࣻ } } ؘ੉ఠ੄JOQVU ؘ੉ఠ੄PVUQVU ழ޿ীࢎਊೡؘ੉ఠ
  7. class StreamListener(bootstrapServers: String, applicationId: String) extends StreamingQueryListener { // ழ޿

    ੹ਊ ஶगݠ private var offsetCommitConsumer: KafkaConsumer[String, String] = _ // Sparkীࢲ झ౟ܻ߁ ௪ܻо ୊਺ द੘ؼ ٸ ழ޿ ੹ਊ ஶगݠ ࢤࢿ override def onQueryStarted(event: StreamingQueryListener.QueryStartedEvent): Unit = { val props = new Properties() props.put("bootstrap.servers", bootstrapServers) props.put("group.id", applicationId) props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") props.put("enable.auto.commit", "false") offsetCommitConsumer = new KafkaConsumer[String, String](props) } // झ౟ܻ߁ ௪ܻо ઙܐؼ ٸ ழ޿ ੹ਊ ஶगݠ ೧ઁ override def onQueryTerminated(event: StreamingQueryListener.QueryTerminatedEvent): Unit = { if (offsetCommitConsumer != null) { offsetCommitConsumer.close() } }
  8. class StreamListener(bootstrapServers: String, applicationId: String) extends StreamingQueryListener { // ݃੉௼۽

    ߓ஖о ՘դ ٍী ݒߣ ഐ୹غח ೣࣻ override def onQueryProgress(event: QueryProgressEvent): Unit = { // ࣗझо 2ѐ ੉࢚੉ۄݶ (0) ਵ۽ ࢶ঱ೞחѱ ইפۄ flatMapਵ۽ ಟ୛ࢲ ࢎਊ೧ঠೣ val endOffsetJson = event.progress.sources(0).endOffset // endOffset਷ json Stringਵ۽ غয ੓਺. ؘ੉ఠܳ ౵य೧ࢲ ೙ਃೠ ؘ੉ఠ݅ ୶୹. val parsedData = new ObjectMapper().readValue(endOffsetJson, classOf[java.util.Map[String, java.util.Map[String, Long]]]) .asScala val offsetRanges = parsedData.flatMap { case (topic, partitionOffsets) => partitionOffsets.asScala.map { case (partition, offset) => new TopicPartition(topic, partition.toInt) -> new OffsetAndMetadata(offset) } } // ழ޿ೡ য়೐ࣇ੉ ઓ੤ೠ׮ݶ ழ޿ ࣻ೯ if (offsetRanges.nonEmpty) { offsetCommitConsumer.commitSync(offsetRanges.asJava) } } +40/ਸ౵य೧ঠೣ
  9. class StreamListener(bootstrapServers: String, applicationId: String) extends StreamingQueryListener { // ݃੉௼۽

    ߓ஖о ՘դ ٍী ݒߣ ഐ୹غח ೣࣻ override def onQueryProgress(event: QueryProgressEvent): Unit = { // ࣗझо 2ѐ ੉࢚੉ۄݶ (0) ਵ۽ ࢶ঱ೞחѱ ইפۄ flatMapਵ۽ ಟ୛ࢲ ࢎਊ೧ঠೣ val endOffsetJson = event.progress.sources(0).endOffset // endOffset਷ json Stringਵ۽ غয ੓਺. ؘ੉ఠܳ ౵य೧ࢲ ೙ਃೠ ؘ੉ఠ݅ ୶୹. val parsedData = new ObjectMapper().readValue(endOffsetJson, classOf[java.util.Map[String, java.util.Map[String, Long]]]) .asScala val offsetRanges = parsedData.flatMap { case (topic, partitionOffsets) => partitionOffsets.asScala.map { case (partition, offset) => new TopicPartition(topic, partition.toInt) -> new OffsetAndMetadata(offset) } } // ழ޿ೡ য়೐ࣇ੉ ઓ੤ೠ׮ݶ ழ޿ ࣻ೯ if (offsetRanges.nonEmpty) { offsetCommitConsumer.commitSync(offsetRanges.asJava) } } .BQਸٜ݅Ҋ$PNNJUࣻ೯
  10. 2"