Upgrade to Pro — share decks privately, control downloads, hide ads and more …

[Swift Heroes '24] Delightful on-device AI experiences

[Swift Heroes '24] Delightful on-device AI experiences

Currently, numerous APIs allow developers to run Stable Diffusion models on the server and integrate AI into their apps by making network requests.

While these APIs are great, they can become very costly and don't allow for building offline-first applications. In this session, I will discuss a feature I have been building for my app QReate that uses Stable Diffusion to generate art-like QR codes completely on-device using CoreML.

Pol Piella Abadia

April 20, 2024
Tweet

More Decks by Pol Piella Abadia

Other Decks in Programming

Transcript

  1. A neon drawing of an astronaut fl oating in space,

    digital art by Mór Than, unsplash contest winner, space art, sci- fi , retrowave, synthwave
  2. #2

  3. git clone https: / / github.com/apple/ml-stable-diffusion.git cd ml-stable-diffusion python3 -m

    venv venv source venv/bin/activate pip install -e . python -m python_coreml_stable_diffusion.torch2coreml \ -- bundle-resources-for-swift-cli \ -- attention-implementation SPLIT_EINSUM_V2 \ -- convert-unet \ -- convert-text-encoder \ -- convert-vae-decoder \ -- convert-vae-encoder \ -- model-version runwayml/stable-diffusion-v1-5 \ -- unet-support-controlnet \ -- convert-controlnet DionTimmer/controlnet_qrcode-control_v1p_sd15 \ -o generated
  4. git clone https: / / github.com/apple/ml-stable-diffusion.git cd ml-stable-diffusion python3 -m

    venv venv source venv/bin/activate pip install -e . python -m python_coreml_stable_diffusion.torch2coreml \ -- bundle-resources-for-swift-cli \ -- attention-implementation SPLIT_EINSUM_V2 \ -- convert-unet \ -- convert-text-encoder \ -- convert-vae-decoder \ -- convert-vae-encoder \ -- model-version runwayml/stable-diffusion-v1-5 \ -- unet-support-controlnet \ -- convert-controlnet DionTimmer/controlnet_qrcode-control_v1p_sd15 \ -o generated
  5. git clone https: / / github.com/apple/ml-stable-diffusion.git cd ml-stable-diffusion python3 -m

    venv venv source venv/bin/activate pip install -e . python -m python_coreml_stable_diffusion.torch2coreml \ -- bundle-resources-for-swift-cli \ -- attention-implementation SPLIT_EINSUM_V2 \ -- convert-unet \ -- convert-text-encoder \ -- convert-vae-decoder \ -- convert-vae-encoder \ -- model-version runwayml/stable-diffusion-v1-5 \ -- unet-support-controlnet \ -- convert-controlnet DionTimmer/controlnet_qrcode-control_v1p_sd15 \ -o generated
  6. git clone https: / / github.com/apple/ml-stable-diffusion.git cd ml-stable-diffusion python3 -m

    venv venv source venv/bin/activate pip install -e . python -m python_coreml_stable_diffusion.torch2coreml \ -- bundle-resources-for-swift-cli \ -- attention-implementation SPLIT_EINSUM_V2 \ -- convert-unet \ -- convert-text-encoder \ -- convert-vae-decoder \ -- convert-vae-encoder \ -- model-version runwayml/stable-diffusion-v1-5 \ -- unet-support-controlnet \ -- convert-controlnet DionTimmer/controlnet_qrcode-control_v1p_sd15 \ -o generated
  7. // swift-tools-version: 5.9 import PackageDescription let package = Package( name:

    "StableDiffusionControlNet", platforms: [.macOS(.v14)], dependencies: [ ], targets: [ ] )
  8. // swift-tools-version: 5.9 import PackageDescription let package = Package( name:

    "StableDiffusionControlNet", platforms: [.macOS(.v14)], dependencies: [ ], targets: [ ] ) .package(url: "https: // github.com/apple/ml-stable-diffusion.git", exact: "1.1.0")
  9. // swift-tools-version: 5.9 import PackageDescription let package = Package( name:

    "StableDiffusionControlNet", platforms: [.macOS(.v14)], dependencies: [ .package(url: "https: // github.com/apple/ml-stable-diffusion.git", exact: "1.1.0") ], targets: [ ] ) .executableTarget( name: "StableDiffusionControlNet", dependencies: [.product(name: "StableDiffusion", package: "ml-stable-diffusion")], resources: [.process("Resources")] ),
  10. import Foundation import StableDiffusion import CoreML func generate(prompt: String, startingImageURL:

    URL, numberOfImages: Int) async throws -> [CGImage] { guard let resourcesURL = Bundle.module.url(forResource: "Resources", withExtension: nil) ?. path() else { return [] } let url = URL(fileURLWithPath: resourcesURL) let configuration = MLModelConfiguration() configuration.computeUnits = .all let pipeline = try StableDiffusionPipeline(resourcesAt: url, controlNet: ["DiontimmerControlnetQrcodeControlV1PSd15"], configuration: configuration, disableSafety: false, reduceMemory: true) try pipeline.loadResources() let startingNSImage = NSImage(contentsOf: startingImageURL) ?. resized(to: .init(width: 512, height: 512)) guard let startingImage = startingNSImage ?. cgImage(forProposedRect: nil, context: nil, hints: nil) else { return [] } var pipelineConfig = StableDiffusionPipeline.Configuration(prompt: prompt) pipelineConfig.negativePrompt = "ugly, disfigured, low quality, blurry, nsfw" pipelineConfig.controlNetInputs = [startingImage] pipelineConfig.startingImage = startingImage pipelineConfig.useDenoisedIntermediates = true pipelineConfig.strength = 0.9 pipelineConfig.seed = UInt32.random(in: (0 .. < UInt32.max)) pipelineConfig.guidanceScale = 7.5 pipelineConfig.stepCount = 50 pipelineConfig.originalSize = 512 pipelineConfig.targetSize = 512 pipelineConfig.imageCount = numberOfImages return try pipeline.generateImages(configuration: pipelineConfig, progressHandler: { _ in true }) .compactMap { $0 } }
  11. import Foundation import StableDiffusion import CoreML func generate(prompt: String, startingImageURL:

    URL, numberOfImages: Int) async throws -> [CGImage] { guard let resourcesURL = Bundle.module.url(forResource: "Resources", withExtension: nil) ?. path() else { return [] } let url = URL(fileURLWithPath: resourcesURL) let configuration = MLModelConfiguration() configuration.computeUnits = .all let pipeline = try StableDiffusionPipeline(resourcesAt: url, controlNet: ["DiontimmerControlnetQrcodeControlV1PSd15"], configuration: configuration, disableSafety: false, reduceMemory: true) try pipeline.loadResources() let startingNSImage = NSImage(contentsOf: startingImageURL) ?. resized(to: .init(width: 512, height: 512)) guard let startingImage = startingNSImage ?. cgImage(forProposedRect: nil, context: nil, hints: nil) else { return [] } var pipelineConfig = StableDiffusionPipeline.Configuration(prompt: prompt) pipelineConfig.negativePrompt = "ugly, disfigured, low quality, blurry, nsfw" pipelineConfig.controlNetInputs = [startingImage] pipelineConfig.startingImage = startingImage pipelineConfig.useDenoisedIntermediates = true pipelineConfig.strength = 0.9 pipelineConfig.seed = UInt32.random(in: (0 .. < UInt32.max)) pipelineConfig.guidanceScale = 7.5 pipelineConfig.stepCount = 50 pipelineConfig.originalSize = 512 pipelineConfig.targetSize = 512 pipelineConfig.imageCount = numberOfImages return try pipeline.generateImages(configuration: pipelineConfig, progressHandler: { _ in true }) .compactMap { $0 } }
  12. import Foundation import StableDiffusion import CoreML func generate(prompt: String, startingImageURL:

    URL, numberOfImages: Int) async throws -> [CGImage] { guard let resourcesURL = Bundle.module.url(forResource: "Resources", withExtension: nil) ?. path() else { return [] } let url = URL(fileURLWithPath: resourcesURL) let configuration = MLModelConfiguration() configuration.computeUnits = .all let pipeline = try StableDiffusionPipeline(resourcesAt: url, controlNet: ["DiontimmerControlnetQrcodeControlV1PSd15"], configuration: configuration, disableSafety: false, reduceMemory: true) try pipeline.loadResources() let startingNSImage = NSImage(contentsOf: startingImageURL) ?. resized(to: .init(width: 512, height: 512)) guard let startingImage = startingNSImage ?. cgImage(forProposedRect: nil, context: nil, hints: nil) else { return [] } var pipelineConfig = StableDiffusionPipeline.Configuration(prompt: prompt) pipelineConfig.negativePrompt = "ugly, disfigured, low quality, blurry, nsfw" pipelineConfig.controlNetInputs = [startingImage] pipelineConfig.startingImage = startingImage pipelineConfig.useDenoisedIntermediates = true pipelineConfig.strength = 0.9 pipelineConfig.seed = UInt32.random(in: (0 .. < UInt32.max)) pipelineConfig.guidanceScale = 7.5 pipelineConfig.stepCount = 50 pipelineConfig.originalSize = 512 pipelineConfig.targetSize = 512 pipelineConfig.imageCount = numberOfImages return try pipeline.generateImages(configuration: pipelineConfig, progressHandler: { _ in true }) .compactMap { $0 } }
  13. import Foundation import StableDiffusion import CoreML func generate(prompt: String, startingImageURL:

    URL, numberOfImages: Int) async throws -> [CGImage] { guard let resourcesURL = Bundle.module.url(forResource: "Resources", withExtension: nil) ?. path() else { return [] } let url = URL(fileURLWithPath: resourcesURL) let configuration = MLModelConfiguration() configuration.computeUnits = .all let pipeline = try StableDiffusionPipeline(resourcesAt: url, controlNet: ["DiontimmerControlnetQrcodeControlV1PSd15"], configuration: configuration, disableSafety: false, reduceMemory: true) try pipeline.loadResources() let startingNSImage = NSImage(contentsOf: startingImageURL) ?. resized(to: .init(width: 512, height: 512)) guard let startingImage = startingNSImage ?. cgImage(forProposedRect: nil, context: nil, hints: nil) else { return [] } var pipelineConfig = StableDiffusionPipeline.Configuration(prompt: prompt) pipelineConfig.negativePrompt = "ugly, disfigured, low quality, blurry, nsfw" pipelineConfig.controlNetInputs = [startingImage] pipelineConfig.startingImage = startingImage pipelineConfig.useDenoisedIntermediates = true pipelineConfig.strength = 0.9 pipelineConfig.seed = UInt32.random(in: (0 .. < UInt32.max)) pipelineConfig.guidanceScale = 7.5 pipelineConfig.stepCount = 50 pipelineConfig.originalSize = 512 pipelineConfig.targetSize = 512 pipelineConfig.imageCount = numberOfImages return try pipeline.generateImages(configuration: pipelineConfig, progressHandler: { _ in true }) .compactMap { $0 } }
  14. import Foundation import StableDiffusion import CoreML func generate(prompt: String, startingImageURL:

    URL, numberOfImages: Int) async throws -> [CGImage] { guard let resourcesURL = Bundle.module.url(forResource: "Resources", withExtension: nil) ?. path() else { return [] } let url = URL(fileURLWithPath: resourcesURL) let configuration = MLModelConfiguration() configuration.computeUnits = .all let pipeline = try StableDiffusionPipeline(resourcesAt: url, controlNet: ["DiontimmerControlnetQrcodeControlV1PSd15"], configuration: configuration, disableSafety: false, reduceMemory: true) try pipeline.loadResources() let startingNSImage = NSImage(contentsOf: startingImageURL) ?. resized(to: .init(width: 512, height: 512)) guard let startingImage = startingNSImage ?. cgImage(forProposedRect: nil, context: nil, hints: nil) else { return [] } var pipelineConfig = StableDiffusionPipeline.Configuration(prompt: prompt) pipelineConfig.negativePrompt = "ugly, disfigured, low quality, blurry, nsfw" pipelineConfig.controlNetInputs = [startingImage] pipelineConfig.startingImage = startingImage pipelineConfig.useDenoisedIntermediates = true pipelineConfig.strength = 0.9 pipelineConfig.seed = UInt32.random(in: (0 .. < UInt32.max)) pipelineConfig.guidanceScale = 7.5 pipelineConfig.stepCount = 50 pipelineConfig.originalSize = 512 pipelineConfig.targetSize = 512 pipelineConfig.imageCount = numberOfImages return try pipeline.generateImages(configuration: pipelineConfig, progressHandler: { _ in true }) .compactMap { $0 } }
  15. import Foundation import StableDiffusion import CoreML func generate(prompt: String, startingImageURL:

    URL, numberOfImages: Int) async throws -> [CGImage] { guard let resourcesURL = Bundle.module.url(forResource: "Resources", withExtension: nil) ?. path() else { return [] } let url = URL(fileURLWithPath: resourcesURL) let configuration = MLModelConfiguration() configuration.computeUnits = .all let pipeline = try StableDiffusionPipeline(resourcesAt: url, controlNet: ["DiontimmerControlnetQrcodeControlV1PSd15"], configuration: configuration, disableSafety: false, reduceMemory: true) try pipeline.loadResources() let startingNSImage = NSImage(contentsOf: startingImageURL) ?. resized(to: .init(width: 512, height: 512)) guard let startingImage = startingNSImage ?. cgImage(forProposedRect: nil, context: nil, hints: nil) else { return [] } var pipelineConfig = StableDiffusionPipeline.Configuration(prompt: prompt) pipelineConfig.negativePrompt = "ugly, disfigured, low quality, blurry, nsfw" pipelineConfig.controlNetInputs = [startingImage] pipelineConfig.startingImage = startingImage pipelineConfig.useDenoisedIntermediates = true pipelineConfig.strength = 0.9 pipelineConfig.seed = UInt32.random(in: (0 .. < UInt32.max)) pipelineConfig.guidanceScale = 7.5 pipelineConfig.stepCount = 50 pipelineConfig.originalSize = 512 pipelineConfig.targetSize = 512 pipelineConfig.imageCount = numberOfImages return try pipeline.generateImages(configuration: pipelineConfig, progressHandler: { _ in true }) .compactMap { $0 } }
  16. import Foundation import StableDiffusion import CoreML func generate(prompt: String, startingImageURL:

    URL, numberOfImages: Int) async throws -> [CGImage] { guard let resourcesURL = Bundle.module.url(forResource: "Resources", withExtension: nil) ?. path() else { return [] } let url = URL(fileURLWithPath: resourcesURL) let configuration = MLModelConfiguration() configuration.computeUnits = .all let pipeline = try StableDiffusionPipeline(resourcesAt: url, controlNet: ["DiontimmerControlnetQrcodeControlV1PSd15"], configuration: configuration, disableSafety: false, reduceMemory: false) try pipeline.loadResources() let startingNSImage = NSImage(contentsOf: startingImageURL) ?. resized(to: .init(width: 512, height: 512)) guard let startingImage = startingNSImage ?. cgImage(forProposedRect: nil, context: nil, hints: nil) else { return [] } var pipelineConfig = StableDiffusionPipeline.Configuration(prompt: prompt) pipelineConfig.negativePrompt = "ugly, disfigured, low quality, blurry, nsfw" pipelineConfig.controlNetInputs = [startingImage] pipelineConfig.startingImage = startingImage pipelineConfig.useDenoisedIntermediates = true pipelineConfig.strength = 0.9 pipelineConfig.seed = UInt32.random(in: (0 .. < UInt32.max)) pipelineConfig.guidanceScale = 7.5 pipelineConfig.stepCount = 50 pipelineConfig.originalSize = 512 pipelineConfig.targetSize = 512 pipelineConfig.imageCount = numberOfImages return try pipeline.generateImages(configuration: pipelineConfig, progressHandler: { _ in true }) .compactMap { $0 } }
  17. import Foundation import StableDiffusion import CoreML func generate(prompt: String, startingImageURL:

    URL, numberOfImages: Int) async throws -> [CGImage] { guard let resourcesURL = Bundle.module.url(forResource: "Resources", withExtension: nil) ?. path() else { return [] } let url = URL(fileURLWithPath: resourcesURL) let configuration = MLModelConfiguration() configuration.computeUnits = .all let pipeline = try StableDiffusionPipeline(resourcesAt: url, controlNet: ["DiontimmerControlnetQrcodeControlV1PSd15"], configuration: configuration, disableSafety: false, reduceMemory: false) try pipeline.loadResources() let startingNSImage = NSImage(contentsOf: startingImageURL) ?. resized(to: .init(width: 512, height: 512)) guard let startingImage = startingNSImage ?. cgImage(forProposedRect: nil, context: nil, hints: nil) else { return [] } var pipelineConfig = StableDiffusionPipeline.Configuration(prompt: prompt) pipelineConfig.negativePrompt = "ugly, disfigured, low quality, blurry, nsfw" pipelineConfig.controlNetInputs = [startingImage] pipelineConfig.startingImage = startingImage pipelineConfig.useDenoisedIntermediates = true pipelineConfig.strength = 0.9 pipelineConfig.seed = UInt32.random(in: (0 .. < UInt32.max)) pipelineConfig.guidanceScale = 7.5 pipelineConfig.stepCount = 50 pipelineConfig.originalSize = 512 pipelineConfig.targetSize = 512 pipelineConfig.imageCount = numberOfImages return try pipeline.generateImages(configuration: pipelineConfig, progressHandler: { _ in true }) .compactMap { $0 } }
  18. let prompt = """ Style-NebMagic, award winning photo, A Dark-Eyed

    Junco, sitting Great Basin National Park, intricate, nature background, wildlife photography, hyper realistic, Style-LostTemple, deep shadow, high contrast, dark, sunrise, morning, full moon """
  19. let prompt = """ Style-NebMagic, award winning photo, A Dark-Eyed

    Junco, sitting Great Basin National Park, intricate, nature background, wildlife photography, hyper realistic, Style-LostTemple, deep shadow, high contrast, dark, sunrise, morning, full moon """ let url = URL(filePath: "/my-qr-code.png")
  20. let prompt = """ Style-NebMagic, award winning photo, A Dark-Eyed

    Junco, sitting Great Basin National Park, intricate, nature background, wildlife photography, hyper realistic, Style-LostTemple, deep shadow, high contrast, dark, sunrise, morning, full moon """ let url = URL(filePath: "/my-qr-code.png") let image = try await generate( prompt: prompt, startingImageURL: url, numberOfImages: 1 )
  21. let modelURL = URL(string: "https: // huggingface.co/:user/:model/resolve/main/:file.zip?download=true")! let (location, downloadFileResponse)

    = try await URLSession.shared.download(from: modelURL) guard let httpResponse = downloadFileResponse as? HTTPURLResponse, httpResponse.statusCode == 200 else { exit(1) } try FileManager.default .moveItem( at: location, to: URL.desktopDirectory.appending(component: "model.zip") ) Model from HuggingFace