我正在使用Vision检测对象,在获得[VNRecognizedObjectObservation]之后,我在显示它们之前对标准化的矩形进行变换:

let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -CGFloat(height))
VNImageRectForNormalizedRect(normalizedRect, width, height) // Displayed with SwiftUI, that's why I'm applying transform
    .applying(transform)

宽度和高度来自SwiftUI GeometryReader:

Image(...)
    .resizable()
    .scaledToFit()
    .overlay {
        GeometryReader { geometry in // ZStack and ForEach([VNRecognizedObjectObservation], id: \.uuid), then:
            let calculatedRect = calculateRect(boundingBox, geometry)
            Rectangle()
                .frame(width: calculatedRect.width, height: calculatedRect.height)
                .offset(x: calculatedRect.origin.x, y: calculatedRect.origin.y)
        }
    }

但问题是,即使是在正方形的图像上,许多盒子的位置也不正确(尽管有些是准确的).

这与模型无关,因为当我在Xcode模型预览部分try 时,相同的图像(使用相同的MLModel)具有相当准确的bbs.

Sample Image in my App:

Sample Image on my App

Sample Image in Xcode Preview:

Sample Image in Xcode Preview


更新(可重现的最小示例):

将此代码作为macOS SwiftUI个项目放在ContentView.swift中,而将YOLOv3Tiny.mlmodel放在项目包中,将产生相同的结果.

import SwiftUI
import Vision
import CoreML

class Detection: ObservableObject {
    let imgURL = URL(string: "https://i.imgur.com/EqsxxTc.jpg")! // Xcode preview generates this: https://i.imgur.com/6IPNQ8b.png
    @Published var objects: [VNRecognizedObjectObservation] = []

    func getModel() -> VNCoreMLModel? {
        if let modelURL = Bundle.main.url(forResource: "YOLOv3Tiny", withExtension: "mlmodelc") {
            if let mlModel = try? MLModel(contentsOf: modelURL, configuration: MLModelConfiguration()) {
                return try? VNCoreMLModel(for: mlModel)
            }
        }
        return nil
    }

    func detect() async {
        guard let model = getModel(), let tiff = NSImage(contentsOf: imgURL)?.tiffRepresentation else {
            fatalError("Either YOLOv3Tiny.mlmodel is not in project bundle, or image failed to load.")
            // YOLOv3Tiny: https://ml-assets.apple.com/coreml/models/Image/ObjectDetection/YOLOv3Tiny/YOLOv3Tiny.mlmodel
        }
        let request = VNCoreMLRequest(model: model) { (request, error) in
            DispatchQueue.main.async {
                self.objects = (request.results as? [VNRecognizedObjectObservation]) ?? []
            }
        }
        try? VNImageRequestHandler(data: tiff).perform([request])
    }

    func deNormalize(_ rect: CGRect, _ geometry: GeometryProxy) -> CGRect {
        let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -CGFloat(geometry.size.height))
        return VNImageRectForNormalizedRect(rect, Int(geometry.size.width), Int(geometry.size.height)).applying(transform)
    }
}

struct ContentView: View {
    @StateObject var detection = Detection()

    var body: some View {
        AsyncImage(url: detection.imgURL) { img in
            img.resizable().scaledToFit().overlay {
                GeometryReader { geometry in
                    ZStack {
                        ForEach(detection.objects, id: \.uuid) { object in
                            let rect = detection.deNormalize(object.boundingBox, geometry)
                            Rectangle()
                                .stroke(lineWidth: 2)
                                .foregroundColor(.red)
                                .frame(width: rect.width, height: rect.height)
                                .offset(x: rect.origin.x, y: rect.origin.y)
                        }
                    }
                }
            }
        } placeholder: {
            ProgressView()
        }
        .onAppear {
            Task { await self.detection.detect() }
        }
    }
}

Edit:进一步测试发现,VN返回正确的位置,我的deNormalize()函数也返回正确的位置和大小,因此它必须与SwiftUI相关.

推荐答案

Issue 1

GeometryReader使内部的所有东西都缩小到最小尺寸.

.border(Color.orange)加到ZStack上,您将看到下面类似于我的内容.

enter image description here

您可以使用.frame(maxWidth: .infinity, maxHeight: .infinity)来拉伸ZStack以占用所有可用空间.

Issue 2

positionoffset.

offset通常从中心开始,然后按指定的数量开始offset.

position更像是origin.

将此视图的中心定位在其父视图的坐标空间中的指定坐标处.

Issue 3

针对原点使用的中心位置与左上角(0,0)进行调整.

Issue 4

ZStack需要在X轴上翻转.

以下是完整的代码

import SwiftUI
import Vision
import CoreML
@MainActor
class Detection: ObservableObject {
    //Moved file to assets
    //let imgURL = URL(string: "https://i.imgur.com/EqsxxTc.jpg")! // Xcode preview generates this: https://i.imgur.com/6IPNQ8b.png
    let imageName: String = "EqsxxTc"
    @Published var objects: [VNRecognizedObjectObservation] = []
    
    func getModel() throws -> VNCoreMLModel {
        //Used model directly instead of loading from URL
        let model = try YOLOv3Tiny(configuration: .init()).model
        
        let mlModel = try VNCoreMLModel(for: model)
        
        return mlModel
    }
    
    func detect() async throws {
        let model = try getModel()
        
        guard let tiff = NSImage(named: imageName)?.tiffRepresentation else {
            // YOLOv3Tiny: https://ml-assets.apple.com/coreml/models/Image/ObjectDetection/YOLOv3Tiny/YOLOv3Tiny.mlmodel
            //fatalError("Either YOLOv3Tiny.mlmodel is not in project bundle, or image failed to load.")
            throw AppError.unableToLoadImage
        }
        //Completion handlers are not compatible with async/await you have to convert to a continuation.
        self.objects = try await withCheckedThrowingContinuation { (cont: CheckedContinuation<[VNRecognizedObjectObservation], Error>) in
            
            let request = VNCoreMLRequest(model: model) { (request, error) in
                if let error = error{
                    cont.resume(throwing: error)
                }else{
                    cont.resume(returning: (request.results as? [VNRecognizedObjectObservation]) ?? [])
                }
            }
            do{
                try VNImageRequestHandler(data: tiff).perform([request])
            }catch{
                cont.resume(throwing: error)
            }
        }
    }
    
    func deNormalize(_ rect: CGRect, _ geometry: GeometryProxy) -> CGRect {
        return VNImageRectForNormalizedRect(rect, Int(geometry.size.width), Int(geometry.size.height))
    }
}

struct ContentView: View {
    @StateObject var detection = Detection()
    
    var body: some View {
        Image(detection.imageName)
            .resizable()
            .scaledToFit()
            .overlay {
                GeometryReader { geometry in
                    ZStack {
                        ForEach(detection.objects, id: \.uuid) { object in
                            let rect = detection.deNormalize(object.boundingBox, geometry)
                            Rectangle()
                                .stroke(lineWidth: 2)
                                .foregroundColor(.red)
                                .frame(width: rect.width, height: rect.height)
                            //Changed to position
                            //Adjusting for center vs leading origin
                                .position(x: rect.origin.x + rect.width/2, y: rect.origin.y + rect.height/2)
                        }
                    }
                    //Geometry reader makes the view shrink to its smallest size
                    .frame(maxWidth: .infinity, maxHeight: .infinity)
                    //Flip upside down
                    .rotation3DEffect(.degrees(180), axis: (x: 1, y: 0, z: 0))
                    
                }.border(Color.orange)
            }
        
            .task {
                do{
                    try await self.detection.detect()
                }catch{
                    //Always throw errors to the View so you can tell the user somehow. You don't want crashes or to leave the user waiting for something that has failed.
                    print(error)
                }
            }
    }
}
struct ContentView_Previews: PreviewProvider {
    static var previews: some View {
        ContentView()
    }
}

enum AppError: LocalizedError{
    case cannotFindFile
    case unableToLoadImage
}

enter image description here

我还更改了一些其他内容,如您所注意到的,代码中有注释.

Ios相关问答推荐

iPhone:iOS 17.4中没有气压数据

删除领域中的cartItem时出现问题

在TVOS中访问SWIFT中的objc++函数时发送给类的无法识别的 Select 符

如何根据SWIFT中的按钮点击更新文本值

如何将 Info.plist 文件添加到 Xcode for admob,错误 添加后会产生多个命令

Swift UI中如何为Text提供内部填充

SwiftUI - TextField - 在焦点上清除 Double 类型的值,在取消 Select 时恢复

如何在 SwiftUI 中对表格行使用 Transferable

过渡动画在 iOS16 中不起作用,但在 iOS15 中起作用

通过 Rosetta 打开模拟器 xcode 14 以修复滚动

如何从 Locale Swift 获取货币符号

SwiftUI:通过Sheet将数据添加到Realm DB时视图不更新

Swiftui为什么在点击第二行之前背景不起作用

DateFormatter 使用 TimeZone 和 Locale 的特定组合返回 nil

UICollectionView - 动态单元格高度?

'+entityForName: nil 不是合法的 NSManagedObjectContext 参数 - 核心数据

如何在键盘上方添加工具栏?

xcode 5.1:libCordova.a 架构问题

类 AMSupportURLConnectionDelegate 在两者中都实现

我在 xCode5-iOS7 中收到错误输入目录均不包含匹配的启动图像