otmb · March 29, 2025 10:27
diff --git a/20250329_yolo-v9-s_coreml.ipynb b/20250329_yolo-v9-s_coreml.ipynb
diff --git a/README.md b/README.md
diff --git a/ContentView.swift b/ContentView.swift
 import SwiftUI
 import Vision

 struct ContentView: View {
  @ObservedObject var detection = ObjectDetaction()
  var uiImage = UIImage(named: "dog.jpg")
  @State var image: UIImage?
  
  var body: some View {
    VStack {
      if let image = image {
        Image(uiImage: image).resizable()
          .aspectRatio(contentMode: .fit)
      }
    }
    .onAppear {
      predict()
    }
  }
  
  func predict(){
    do {
      if let uiImage = uiImage {
        _ = try detection.prediction(uiImage: uiImage)
        if let img = detection.dstImage {
          self.image = img
        }
      }
    } catch {
      print(error)
    }
  }
 }

 class BBox {
  var cls = 0
  var box = CGRect.zero
  var score = 0.0
 }

 class BoundingBox {
  var r = 0.0
  var padH = 0.0
  var padW = 0.0
  var bbox = [BBox]()
  
  init(modelWidth: Double, modelHeight: Double, imgSize: CGSize){
    self.r = min(modelHeight / imgSize.height, modelWidth / imgSize.width)
    if modelHeight / imgSize.height >  modelWidth / imgSize.width {
      self.padH = (modelHeight - imgSize.height * r) / 2
    } else {
      self.padW = (modelWidth - imgSize.width * r) / 2
    }
  }
  
  func create(_ predict: [Double]) -> [BBox] {
    let boxCount = predict.count / 6
    var bbox = [BBox]()
    for num in 0..<boxCount {
      bbox.append(_create(Array(predict[num*6..<num*6+6])))
    }
    return bbox
  }
  
  private func _create(_ predict: [Double]) -> BBox {
    let bbox = BBox()
    bbox.cls = Int(predict[0])
    let minX   = (predict[1] - padW) / r
    let width  = (predict[3] - padW) / r - minX
    let minY   = (predict[2] - padH) / r
    let height = (predict[4] - padH) / r - minY
    bbox.box = CGRect(x: minX, y: minY, width: width, height: height)
    bbox.score = predict[5]
    return bbox
  }
 }

 class ObjectDetaction: ObservableObject {
  let modelName = "v9-s"
  private var requests = [VNRequest]()
  var originalImage: UIImage? = nil
  var dstImage: UIImage? = nil
  var bbox = [BBox]()
  
  init(){
    if let error = setupVision(){
      print(error.localizedDescription)
    }
  }
  
  @discardableResult
  func setupVision() -> NSError? {
    let error: NSError! = nil
    guard let modelURL = Bundle.main.url(forResource: modelName, withExtension: "mlmodelc") else {
      return NSError(domain: "Model file is missing.", code: -1)
    }
    do {
      let visionModel = try VNCoreMLModel(for: MLModel(contentsOf: modelURL))
      let request = VNCoreMLRequest(model: visionModel, completionHandler: visionObjectDetectionResults)
      request.imageCropAndScaleOption = .scaleFit
      requests = [request]
    } catch let error as NSError {
      print("Model loading went wrong: \(error)")
    }
    
    return error
  }
  
  func runCoreML(uiImage: UIImage, orientation: CGImagePropertyOrientation) throws {
    let cgiImage = uiImage.cgImage!
    let classifierRequestHandler = VNImageRequestHandler(cgImage: cgiImage,
                                                         orientation: orientation, options: [:])
    try classifierRequestHandler.perform(requests)
  }
  
  func visionObjectDetectionResults(request: VNRequest, error: Error?) {
    guard let observations = request.results as? [VNCoreMLFeatureValueObservation] else { fatalError() }
    
    let mlarray = observations[0].featureValue.multiArrayValue!
    let length = mlarray.count
    let floatPtr =  mlarray.dataPointer.bindMemory(to: Float32.self, capacity: length)
    let floatBuffer = UnsafeBufferPointer(start: floatPtr, count: length)
    let boxes = floatBuffer.map{ Double($0) }
    
    guard let uiImage = originalImage else { return }
    let boundingBox = BoundingBox(modelWidth: 640, modelHeight: 640, imgSize: uiImage.size)
    let bbox = boundingBox.create(boxes)
    
    let dstImageSize = uiImage.size
    let dstImageFormat = UIGraphicsImageRendererFormat()
    dstImageFormat.scale = 1
    
    let renderer = UIGraphicsImageRenderer(size: dstImageSize,
                                           format: dstImageFormat)
    
    let dstImage = renderer.image { rendererContext in
      draw(image: uiImage.cgImage!, in: rendererContext.cgContext)
      for box in bbox {
        if box.score < 0.5 {
          continue
        }
        draw(rect: box.box, in: rendererContext.cgContext)
      }
    }
    self.bbox = bbox
    self.dstImage = dstImage
  }
  
  func draw(image: CGImage, in cgContext: CGContext) {
    cgContext.saveGState()
    cgContext.scaleBy(x: 1.0, y: -1.0)
    let drawingRect = CGRect(x: 0, y: -image.height, width: image.width, height: image.height)
    cgContext.draw(image, in: drawingRect)
    cgContext.restoreGState()
  }
  
  var boxColor: UIColor = UIColor.white
  var boxLineWidth: CGFloat = 2
  private func draw(rect box: CGRect, in cgContext: CGContext) {
    cgContext.setStrokeColor(boxColor.cgColor)
    cgContext.setLineWidth(boxLineWidth)
    cgContext.addRect(box)
    cgContext.strokePath()
  }
  
  func prediction(uiImage: UIImage, orientation: CGImagePropertyOrientation = .up) throws -> [BBox] {
    self.originalImage = uiImage
    try runCoreML(uiImage: uiImage, orientation: orientation)
    return self.bbox
  }
 }
diff --git a/v9-s.mlpackage.tar.gz b/v9-s.mlpackage.tar.gz
diff --git a/yolo_coreml.patch b/yolo_coreml.patch
	import SwiftUI
	import Vision

	struct ContentView: View {
	@ObservedObject var detection = ObjectDetaction()
	var uiImage = UIImage(named: "dog.jpg")
	@State var image: UIImage?

	var body: some View {
	VStack {
	if let image = image {
	Image(uiImage: image).resizable()
	.aspectRatio(contentMode: .fit)
	}
	}
	.onAppear {
	predict()
	}
	}

	func predict(){
	do {
	if let uiImage = uiImage {
	_ = try detection.prediction(uiImage: uiImage)
	if let img = detection.dstImage {
	self.image = img
	}
	}
	} catch {
	print(error)
	}
	}
	}

	class BBox {
	var cls = 0
	var box = CGRect.zero
	var score = 0.0
	}

	class BoundingBox {
	var r = 0.0
	var padH = 0.0
	var padW = 0.0
	var bbox = [BBox]()

	init(modelWidth: Double, modelHeight: Double, imgSize: CGSize){
	self.r = min(modelHeight / imgSize.height, modelWidth / imgSize.width)
	if modelHeight / imgSize.height > modelWidth / imgSize.width {
	self.padH = (modelHeight - imgSize.height * r) / 2
	} else {
	self.padW = (modelWidth - imgSize.width * r) / 2
	}
	}

	func create(_ predict: [Double]) -> [BBox] {
	let boxCount = predict.count / 6
	var bbox = [BBox]()
	for num in 0..<boxCount {
	bbox.append(_create(Array(predict[num6..<num6+6])))
	}
	return bbox
	}

	private func _create(_ predict: [Double]) -> BBox {
	let bbox = BBox()
	bbox.cls = Int(predict[0])
	let minX = (predict[1] - padW) / r
	let width = (predict[3] - padW) / r - minX
	let minY = (predict[2] - padH) / r
	let height = (predict[4] - padH) / r - minY
	bbox.box = CGRect(x: minX, y: minY, width: width, height: height)
	bbox.score = predict[5]
	return bbox
	}
	}

	class ObjectDetaction: ObservableObject {
	let modelName = "v9-s"
	private var requests = [VNRequest]()
	var originalImage: UIImage? = nil
	var dstImage: UIImage? = nil
	var bbox = [BBox]()

	init(){
	if let error = setupVision(){
	print(error.localizedDescription)
	}
	}

	@discardableResult
	func setupVision() -> NSError? {
	let error: NSError! = nil
	guard let modelURL = Bundle.main.url(forResource: modelName, withExtension: "mlmodelc") else {
	return NSError(domain: "Model file is missing.", code: -1)
	}
	do {
	let visionModel = try VNCoreMLModel(for: MLModel(contentsOf: modelURL))
	let request = VNCoreMLRequest(model: visionModel, completionHandler: visionObjectDetectionResults)
	request.imageCropAndScaleOption = .scaleFit
	requests = [request]
	} catch let error as NSError {
	print("Model loading went wrong: \(error)")
	}

	return error
	}

	func runCoreML(uiImage: UIImage, orientation: CGImagePropertyOrientation) throws {
	let cgiImage = uiImage.cgImage!
	let classifierRequestHandler = VNImageRequestHandler(cgImage: cgiImage,
	orientation: orientation, options: [:])
	try classifierRequestHandler.perform(requests)
	}

	func visionObjectDetectionResults(request: VNRequest, error: Error?) {
	guard let observations = request.results as? [VNCoreMLFeatureValueObservation] else { fatalError() }

	let mlarray = observations[0].featureValue.multiArrayValue!
	let length = mlarray.count
	let floatPtr = mlarray.dataPointer.bindMemory(to: Float32.self, capacity: length)
	let floatBuffer = UnsafeBufferPointer(start: floatPtr, count: length)
	let boxes = floatBuffer.map{ Double($0) }

	guard let uiImage = originalImage else { return }
	let boundingBox = BoundingBox(modelWidth: 640, modelHeight: 640, imgSize: uiImage.size)
	let bbox = boundingBox.create(boxes)

	let dstImageSize = uiImage.size
	let dstImageFormat = UIGraphicsImageRendererFormat()
	dstImageFormat.scale = 1

	let renderer = UIGraphicsImageRenderer(size: dstImageSize,
	format: dstImageFormat)

	let dstImage = renderer.image { rendererContext in
	draw(image: uiImage.cgImage!, in: rendererContext.cgContext)
	for box in bbox {
	if box.score < 0.5 {
	continue
	}
	draw(rect: box.box, in: rendererContext.cgContext)
	}
	}
	self.bbox = bbox
	self.dstImage = dstImage
	}

	func draw(image: CGImage, in cgContext: CGContext) {
	cgContext.saveGState()
	cgContext.scaleBy(x: 1.0, y: -1.0)
	let drawingRect = CGRect(x: 0, y: -image.height, width: image.width, height: image.height)
	cgContext.draw(image, in: drawingRect)
	cgContext.restoreGState()
	}

	var boxColor: UIColor = UIColor.white
	var boxLineWidth: CGFloat = 2
	private func draw(rect box: CGRect, in cgContext: CGContext) {
	cgContext.setStrokeColor(boxColor.cgColor)
	cgContext.setLineWidth(boxLineWidth)
	cgContext.addRect(box)
	cgContext.strokePath()
	}

	func prediction(uiImage: UIImage, orientation: CGImagePropertyOrientation = .up) throws -> [BBox] {
	self.originalImage = uiImage
	try runCoreML(uiImage: uiImage, orientation: orientation)
	return self.bbox
	}
	}