Upgrade to Pro — share decks privately, control downloads, hide ads and more …

AI 기반 광고 추천 파이프라인에서 스파크 스트리밍의 배포 및 모니터링 전략

kakao
November 01, 2024

AI 기반 광고 추천 파이프라인에서 스파크 스트리밍의 배포 및 모니터링 전략

#카프카 #스파크 #실시간 파이프라인 #빅데이터

AI 기반 광고 추천 시스템에서 스파크 스트리밍은 실시간 데이터 처리를 위한 핵심 구성 요소로 작동합니다.
이 파이프라인은 사용자 행동 데이터를 실시간으로 수집하고 처리하여 개인화된 광고를 추천하는 데 사용됩니다.
이번에 발표할 내용은 스파크 파이프라인 아키텍처와 스파크를 효과적으로 모니터링하기 위한 방법들을 소개합니다.

발표자 : cory.doras
광고추천팀에서 실시간 데이터 프로세싱과 서빙을 맡고 있습니다.

kakao

November 01, 2024
Tweet

More Decks by kakao

Other Decks in Programming

Transcript

  1. ౵੉೐ۄੋߓನইఃఫ୊ // mongodbী ੷੢غয ࢎਊغח ౵੉೐ۄੋ ࢸ੿(JSON) { "name": "dankeRankerTest",

    "mode": "dankeRankerDruid", "config": { "sink.bootstrap.servers": "kafka.kakao.com:9092", "sink.topic": "adrec-danke-predictor-druid", "source.bootstrap.servers": "kafka.kakao.com:9092", "source.topics": "adrec-danke-serving" } } // п modeীࢲ ࢎਊؼ Config ੿੄. ೙ࣻ ২ٜ࣌ਸ ࠽؊ ಁఢਸ ഝਊ val definition: ConfigDef.ConfigDef = new ConfigDef.ConfigDef() .define("source.bootstrap.servers") .define("source.topics") .define("sink.bootstrap.servers") .define("sink.topic")
  2. ౵੉೐ۄੋߓನইఃఫ୊FOUSZQPJOUTI #!/bin/bash echo "Application ID: $NAME" for APP_ID in $APPLICATION_IDS;

    do echo "ઙܐ ઺: $APP_ID" yarn application -kill $APP_ID done echo "Submitting spark app" spark3-submit \ --deploy-mode cluster \ --master yarn \ --name ${NAME} \ --driver-memory 2g \ --num-executors ${SPARK_NUM_EXECUTORS} \ ... ࢤۚ echo "Running spark app: $NAME"
  3. ౵੉೐ۄੋߓನইఃఫ୊LTZBNM apiVersion: batch/v1 kind: Job metadata: name: danke-predictor-druid spec: template:

    spec: containers: - name: adrec-metric-pipeline:v1.0.0 imagePullPolicy: ifNotPresent env: - name: NAME value: "dankePredictorDruid" - name: SPARK_NUM_EXECUTORS value: "100" - name: SPARK_EXECUTOR_CORES value: "2" - name: SPARK_EXECUTOR_MEMORY value: "2g" restartPolicy: Never
  4. झ౵௼ীࢲઁҕೞח4USFBNJOH2VFSZ-JTUFOFS @Evolving abstract class StreamingQueryListener extends Serializable { import StreamingQueryListener._

    // Called when a query is started. def onQueryStarted(event: QueryStartedEvent): Unit // Called when there is some status update (ingestion rate updated, etc.) def onQueryProgress(event: QueryProgressEvent): Unit // Called when a query is stopped, with or without error. def onQueryTerminated(event: QueryTerminatedEvent): Unit }
  5. { "id" : "be7898d0-895b-4f32-ad73-c07cfad4c2bb", "runId" : "9357ed50-fe79-4dd8-a327-3775663e97ba", "timestamp" : "2024-10-22T01:43:22.739Z",

    "processedRowsPerSecond" : 644.6234119782214, "sources" : [ { "description" : "KafkaV2[Subscribe[adrec-click-log]]", "startOffset" : null, "endOffset" : { "adrec-click-log" : { "92" : 1100, // ݃૑݄ਵ۽ ୊ܻ೮؍ ["౵౭࣌ ߣഐ" : "য়೐ࣇ ߣഐ"] ؘ੉ఠ } }, "latestOffset" : { "adrec-click-log" : { "92" : 1000, } }, "numInputRows" : 5683 // ੑ۱ row ѐࣻ }, "sink" : { "description" : "org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaTable@739b9fa4", "numOutputRows" : 5683 // ୹۱ row ѐࣻ } }
  6. { "id" : "be7898d0-895b-4f32-ad73-c07cfad4c2bb", "runId" : "9357ed50-fe79-4dd8-a327-3775663e97ba", "timestamp" : "2024-10-22T01:43:22.739Z",

    "processedRowsPerSecond" : 644.6234119782214, "sources" : [ { "description" : "KafkaV2[Subscribe[adrec-click-log]]", "startOffset" : null, "endOffset" : { "adrec-click-log" : { "92" : 1100, // ݃૑݄ਵ۽ ୊ܻ೮؍ ["౵౭࣌ ߣഐ" : "য়೐ࣇ ߣഐ"] ؘ੉ఠ } }, "latestOffset" : { "adrec-click-log" : { "92" : 1000, } }, "numInputRows" : 5683 // ੑ۱ row ѐࣻ }, "sink" : { "description" : "org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaTable@739b9fa4", "numOutputRows" : 5683 // ୹۱ row ѐࣻ } } ؘ੉ఠ੄JOQVU ؘ੉ఠ੄PVUQVU ழ޿ীࢎਊೡؘ੉ఠ
  7. class StreamListener(bootstrapServers: String, applicationId: String) extends StreamingQueryListener { // ழ޿

    ੹ਊ ஶगݠ private var offsetCommitConsumer: KafkaConsumer[String, String] = _ // Sparkীࢲ झ౟ܻ߁ ௪ܻо ୊਺ द੘ؼ ٸ ழ޿ ੹ਊ ஶगݠ ࢤࢿ override def onQueryStarted(event: StreamingQueryListener.QueryStartedEvent): Unit = { val props = new Properties() props.put("bootstrap.servers", bootstrapServers) props.put("group.id", applicationId) props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer") props.put("enable.auto.commit", "false") offsetCommitConsumer = new KafkaConsumer[String, String](props) } // झ౟ܻ߁ ௪ܻо ઙܐؼ ٸ ழ޿ ੹ਊ ஶगݠ ೧ઁ override def onQueryTerminated(event: StreamingQueryListener.QueryTerminatedEvent): Unit = { if (offsetCommitConsumer != null) { offsetCommitConsumer.close() } }
  8. class StreamListener(bootstrapServers: String, applicationId: String) extends StreamingQueryListener { // ݃੉௼۽

    ߓ஖о ՘դ ٍী ݒߣ ഐ୹غח ೣࣻ override def onQueryProgress(event: QueryProgressEvent): Unit = { // ࣗझо 2ѐ ੉࢚੉ۄݶ (0) ਵ۽ ࢶ঱ೞחѱ ইפۄ flatMapਵ۽ ಟ୛ࢲ ࢎਊ೧ঠೣ val endOffsetJson = event.progress.sources(0).endOffset // endOffset਷ json Stringਵ۽ غয ੓਺. ؘ੉ఠܳ ౵य೧ࢲ ೙ਃೠ ؘ੉ఠ݅ ୶୹. val parsedData = new ObjectMapper().readValue(endOffsetJson, classOf[java.util.Map[String, java.util.Map[String, Long]]]) .asScala val offsetRanges = parsedData.flatMap { case (topic, partitionOffsets) => partitionOffsets.asScala.map { case (partition, offset) => new TopicPartition(topic, partition.toInt) -> new OffsetAndMetadata(offset) } } // ழ޿ೡ য়೐ࣇ੉ ઓ੤ೠ׮ݶ ழ޿ ࣻ೯ if (offsetRanges.nonEmpty) { offsetCommitConsumer.commitSync(offsetRanges.asJava) } } +40/ਸ౵य೧ঠೣ
  9. class StreamListener(bootstrapServers: String, applicationId: String) extends StreamingQueryListener { // ݃੉௼۽

    ߓ஖о ՘դ ٍী ݒߣ ഐ୹غח ೣࣻ override def onQueryProgress(event: QueryProgressEvent): Unit = { // ࣗझо 2ѐ ੉࢚੉ۄݶ (0) ਵ۽ ࢶ঱ೞחѱ ইפۄ flatMapਵ۽ ಟ୛ࢲ ࢎਊ೧ঠೣ val endOffsetJson = event.progress.sources(0).endOffset // endOffset਷ json Stringਵ۽ غয ੓਺. ؘ੉ఠܳ ౵य೧ࢲ ೙ਃೠ ؘ੉ఠ݅ ୶୹. val parsedData = new ObjectMapper().readValue(endOffsetJson, classOf[java.util.Map[String, java.util.Map[String, Long]]]) .asScala val offsetRanges = parsedData.flatMap { case (topic, partitionOffsets) => partitionOffsets.asScala.map { case (partition, offset) => new TopicPartition(topic, partition.toInt) -> new OffsetAndMetadata(offset) } } // ழ޿ೡ য়೐ࣇ੉ ઓ੤ೠ׮ݶ ழ޿ ࣻ೯ if (offsetRanges.nonEmpty) { offsetCommitConsumer.commitSync(offsetRanges.asJava) } } .BQਸٜ݅Ҋ$PNNJUࣻ೯
  10. 2"