FYI. Updated for Xcode 13.3.1 ...
let bufferLength = N * MemoryLayout<Float>.size
let devices = MTLCopyAllDevices()
print("\(#file):\(#line) Possible devices: \(devices)")
let device = devices[0]
print("\(#file):\(#line) Running compute application on device \(device.name)")
print("\(#file):\(#line) Adding vectorA and vectorB into vectorC. Each vector is \(N) floats")
let commandQueue = device.makeCommandQueue()
let defaultLibrary = device.makeDefaultLibrary()
let commandBuffer = commandQueue!.makeCommandBuffer()
let kernel = defaultLibrary!.makeFunction(name: "add_kernel")
let computePipeLineDescriptor = MTLComputePipelineDescriptor()
computePipeLineDescriptor.computeFunction = kernel
let computePipelineState = try! await device.makeComputePipelineState(descriptor: computePipeLineDescriptor, options: [] )
// Set up thread groups to be used in commandEncoder
let thrdWidth = 3 // FOUND NO REPLACEMENT FOR: computePipelineState.threadExecutionWidth, SO USED INTEGER
let thrdsPerGroup = MTLSize(width:thrdWidth,height:1,depth:1)
let numThrdgroups = MTLSize(width:(N+thrdWidth)/thrdWidth, height:1, depth:1)
// Create input and output vectors, and corresponding metal buffers
var vectorA = Array(repeating: Float(0.0), count: N)
for (index, _) in vectorA.enumerated() {
vectorA[index] = Float(index)
}
var vectorB = Array(repeating: Float(0.0), count: N)
for (index, _) in vectorB.enumerated() {
vectorB[index] = Float(index * 2)
}
var vectorC = Array(repeating: Float(0.0), count: N)
let bufferA = device.makeBuffer(bytes: vectorA, length: bufferLength, options: [])
let bufferB = device.makeBuffer(bytes: vectorB, length: bufferLength, options: [])
let bufferC = device.makeBuffer(bytes: vectorC, length: bufferLength, options: [])
// Create Compute Command Encoder and add buffers and thread groups
let computeCommandEncoder = commandBuffer!.makeComputeCommandEncoder()
computeCommandEncoder!.setBuffer(bufferA, offset: 0, index: 0)
computeCommandEncoder!.setBuffer(bufferB, offset: 0, index: 1)
computeCommandEncoder!.setBuffer(bufferC, offset: 0, index: 2)
computeCommandEncoder!.setComputePipelineState(computePipelineState.0)
computeCommandEncoder!.dispatchThreadgroups(numThrdgroups, threadsPerThreadgroup: thrdsPerGroup)
// Finalize configuration and start job
computeCommandEncoder!.endEncoding()
commandBuffer!.commit()
// Wait for job to finish
commandBuffer!.waitUntilCompleted()
// Get output data back into Swift
let data = NSData(bytesNoCopy: bufferC!.contents(), length: bufferLength, freeWhenDone: false)
data.getBytes(&vectorC, length:bufferLength)
print("\(#file):\(#line) vectorA = \(vectorA)")
print("\(#file):\(#line) vectorB = \(vectorB)")
print("\(#file):\(#line) vectorC = \(vectorC)")
exit(0)
Note that I found no replacement for thrdWidth declaration and so just inserted an integer.