This is the solution I tried but it did not work - I believe that the size of the UImage I get by image.size does not match the size of the image that is used by Firebase. But maybe someone else has an idea how to solve this?
The result...
So this is the view which does the calculation:
struct ImageScanned: View {
var image: UIImage
@Binding var rectangles: [DetectedRectangle]
@State var viewSize: CGSize = .zero
var body: some View {
// TODO: fix scaling
ZStack {
Image(uiImage: image)
.resizable()
.aspectRatio(CGSize(width: image.size.width, height: image.size.height), contentMode: .fill)
.overlay(
GeometryReader { geometry in
ZStack {
ForEach(self.transformRectangles(geometry: geometry)) { rect in
DetectedRectangleView(rectangle: rect)
}
}
}
)
}
}
private func transformRectangles(geometry: GeometryProxy) -> [DetectedRectangle] {
var rectangles: [DetectedRectangle] = []
let viewSize = geometry.frame(in: .global).size
// 2
let resolutionView = viewSize.width / viewSize.height
let resolutionImage = self.image.size.width / self.image.size.height
// 3
var scale: CGFloat
if resolutionView > resolutionImage {
scale = viewSize.height / self.image.size.height
} else {
scale = viewSize.width / self.image.size.width
}
for rect in self.rectangles {
// 4
let featureWidthScaled = rect.width * scale
let featureHeightScaled = rect.height * scale
// 5
let imageWidthScaled = rect.width * scale
let imageHeightScaled = rect.height * scale
let imagePointXScaled = (viewSize.width - imageWidthScaled) / 2
let imagePointYScaled = (viewSize.height - imageHeightScaled) / 2
// 6
let featurePointXScaled = imagePointXScaled + rect.x * scale
let featurePointYScaled = imagePointYScaled + rect.y * scale
rectangles.append(DetectedRectangle(width: featureWidthScaled,
height: featureHeightScaled,
x: featurePointXScaled,
y: featurePointYScaled))
}
return rectangles
}
}
This is the one that forms the rectangles:
struct DetectedRectangleView: View {
var rectangle: DetectedRectangle
var body: some View {
Rectangle()
.path(in: CGRect(
x: rectangle.x,
y: rectangle.y,
width: rectangle.width,
height: rectangle.height
))
.foregroundColor(Color.white)
.opacity(0.7)
}
}
struct DetectedRectangle: Identifiable {
var id = UUID()
var width: CGFloat = 0
var height: CGFloat = 0
var x: CGFloat = 0
var y: CGFloat = 0
var text: String = ""
}
The view which contains the above views:
struct StartScanView: View {
@State var showCaptureImageView: Bool = false
@State var image: UIImage? = nil
@State var rectangles: [DetectedRectangle] = []
var body: some View {
ZStack {
if showCaptureImageView {
CaptureImageView(isShown: $showCaptureImageView, image: $image)
} else {
VStack {
Button(action: {
self.showCaptureImageView.toggle()
}) {
Text("Start Scanning")
}
// show here View with rectangles on top of image
if self.image != nil {
ImageScanned(image: self.image ?? UIImage(), rectangles: $rectangles)
}
Button(action: {
self.processImage()
}) {
Text("Process Image")
}
}
}
}
}
func processImage() {
let scaledImageProcessor = ScaledElementProcessor()
if image != nil {
scaledImageProcessor.process(in: image!) { text in
for block in text.blocks {
for line in block.lines {
for element in line.elements {
let frame = element.frame
self.rectangles.append(DetectedRectangle(width: frame.width, height: frame.height, x: frame.minX, y: frame.minY, text: element.text))
}
}
}
}
}
}
}
Lastly, the views that do the image capturing (so this is complete):
CaptureImageView
struct CaptureImageView {
@Binding var isShown: Bool
@Binding var image: UIImage?
func makeCoordinator() -> Coordinator {
return Coordinator(isShown: $isShown, image: $image)
}
}
extension CaptureImageView: UIViewControllerRepresentable {
func makeUIViewController(context: UIViewControllerRepresentableContext<CaptureImageView>) -> UIImagePickerController {
let picker = UIImagePickerController()
picker.sourceType = .camera
picker.delegate = context.coordinator
return picker
}
func updateUIViewController(_: UIImagePickerController,
context _: UIViewControllerRepresentableContext<CaptureImageView>) {}
}
Coordinator (for dealing with SwiftUI & UIKit images)
class Coordinator: NSObject, UINavigationControllerDelegate, UIImagePickerControllerDelegate {
@Binding var isCoordinatorShown: Bool
@Binding var imageInCoordinator: UIImage?
init(isShown: Binding<Bool>, image: Binding<UIImage?>) {
_isCoordinatorShown = isShown
_imageInCoordinator = image
}
func imagePickerController(_: UIImagePickerController,
didFinishPickingMediaWithInfo info: [UIImagePickerController.InfoKey: Any]) {
guard let unwrapImage = info[UIImagePickerController.InfoKey.originalImage] as? UIImage else { return }
imageInCoordinator = unwrapImage
isCoordinatorShown = false
}
func imagePickerControllerDidCancel(_: UIImagePickerController) {
isCoordinatorShown = false
}
}
Firebase Vision Text Recognizer
class ScaledElementProcessor {
let vision = Vision.vision()
var textRecognizer: VisionTextRecognizer!
init() {
// Provide language hints
let options = VisionCloudTextRecognizerOptions()
options.languageHints = ["nl"]
textRecognizer = vision.cloudTextRecognizer(options: options)
}
func process(in image: UIImage?,
callback: @escaping (_ text: VisionText) -> Void) {
guard let image = image else { return }
let visionImage = VisionImage(image: image)
// provide metadata to improve text recognition
let metadata = VisionImageMetadata()
metadata.orientation = .topLeft
visionImage.metadata = metadata
textRecognizer.process(visionImage) { result, error in
guard
error == nil,
let result = result,
!result.text.isEmpty
else {
return
}
callback(result)
}
}
}
I tried already to play around with some calculations like the one from the Firebase example project (also UIKit) but the results were all the same - the images were all too small. I hope, that someone has an idea.