Of course the real project is much more complex, but here is a minimal command line project which takes an array [[Int32]] and reverses both the inner and outer arrays. It prints the input followed by the output.
There are three parts:
main.swift
Shaders.metal
Bridge.h
As you can see, I needed to set up the input using buffers.
MAIN.SWIFT
// Main.swift
// if you uncomment the repeat loop in Model.compute() you can interrupt the flow
// to see the buffer contents.
import Foundation
import MetalKit
class Model {
static let Device: MTLDevice = MTLCreateSystemDefaultDevice()!
static let CommandQueue: MTLCommandQueue = Device.makeCommandQueue()!
var mslInBuffer: MTLBuffer
var mslOutBuffer: MTLBuffer
var mslInPointer: UnsafeMutablePointer<MSLArray>
var mslOutPointer: UnsafeMutablePointer<MSLArray>
let bufferSize = 20
private var computePipelineState: MTLComputePipelineState!
init() {
computePipelineState = Model.makeComputePipelineState()
mslInBuffer = Model.Device.makeBuffer(length: MemoryLayout<MSLArray>.stride * bufferSize, options: .storageModeShared)!
mslOutBuffer = Model.Device.makeBuffer(length: MemoryLayout<MSLArray>.stride * bufferSize, options: .storageModeShared)!
mslInPointer = mslInBuffer.contents().bindMemory(to: MSLArray.self, capacity: bufferSize)
mslOutPointer = mslOutBuffer.contents().bindMemory(to: MSLArray.self, capacity: bufferSize)
var mslIn = [MSLArray]()
for index in 0..<bufferSize {
var idx = Int32(index)
let a = MSLArray(ar: (idx, idx+1, idx+2))
mslIn.append(a)
mslInPointer[index] = a
idx = 0
mslOutPointer[index] = MSLArray(ar: (idx, idx, idx))
}
}
func compute() {
print("Before:")
// repeat {
showResults()
guard let commandBuffer = Model.CommandQueue.makeCommandBuffer(),
let computeEncoder = commandBuffer.makeComputeCommandEncoder()
else { return }
computeEncoder.setComputePipelineState(computePipelineState)
computeEncoder.setBuffer(mslInBuffer, offset: 0, index: 0)
computeEncoder.setBuffer(mslOutBuffer, offset: 0, index: 1)
let gridSize = MTLSize(width: bufferSize, height: 1, depth: 1)
var threadGroupSize = computePipelineState.maxTotalThreadsPerThreadgroup
if (threadGroupSize > bufferSize) { threadGroupSize = bufferSize }
let threadsPerGroup = MTLSize(width: threadGroupSize, height: 1, depth: 1)
computeEncoder.dispatchThreads(gridSize, threadsPerThreadgroup: threadsPerGroup)
computeEncoder.endEncoding()
commandBuffer.commit()
commandBuffer.waitUntilCompleted()
print("After:")
// } while true
showResults()
}
}
extension Model {
func showResults() {
func display<T>(name: String, ptr: UnsafeMutablePointer<T>) {
var ar = [T]()
var p = ptr
for _ in 0..<bufferSize {
ar.append(p.pointee)
p = p.advanced(by: 1)
}
print("\(name): [", terminator: "")
ar.forEach { print($0, terminator: ", ") }
print("]\n")
}
display(name: "mslIn", ptr: mslInPointer)
print()
display(name: "mslIn reversed", ptr: mslOutPointer)
print()
}
private static func makeComputePipelineState() -> MTLComputePipelineState {
// Create Shaders
guard let shaderLibrary = Model.Device.makeDefaultLibrary()
else { fatalError("Failed to obtain shader library") }
let kernelShader = shaderLibrary.makeFunction(name: "reverse_input")
// Create Pipeline State
do { return try Model.Device.makeComputePipelineState(function: (kernelShader!)) }
catch { fatalError(error.localizedDescription) }
}
}
var model = Model()
print("MSL Version, with tuple replacing array")
model.compute()
SHADERS.METAL
// Shaders.metal
#include <metal_stdlib>
using namespace metal;
#include "Bridge.h"
kernel void reverse_input(device MSLArray *mslIn [[buffer(0)]],
device MSLArray *mslOut [[buffer(1)]],
uint threadIndex [[thread_position_in_grid]])
{
int index = 19-threadIndex;
for (int i=0; i<3; i++) {
int j = 2-i;
mslOut[index].ar[j] = mslIn[threadIndex].ar[i];
}
}
BRIDGE.H
// Bridge.h
#ifndef Bridge_h
#define Bridge_h
typedef struct {
int ar[3];
} MSLArray;
#endif **/* Bridge_h */**