swift4rl
diff --git a/‎Package.swift
+3 b/‎Package.swift
+3
diff --git a/‎PersonLab/Backbone.swift
+192 b/‎PersonLab/Backbone.swift
+192
diff --git a/‎PersonLab/Decoder.swift
+196 b/‎PersonLab/Decoder.swift
+196
@@ -81,6 +81,9 @@ let package = Package(
             name: "MobileNetV2-Imagenette",
             dependencies: ["Datasets", "ImageClassificationModels", "TrainingLoop"],
             path: "Examples/MobileNetV2-Imagenette"),
+        .target(
+            name: "PersonLab", dependencies: ["Checkpoints", "ModelSupport", .product(name: "ArgumentParser", package: "swift-argument-parser")],
+            path: "PersonLab"),
         .target(
             name: "MiniGo", dependencies: ["Checkpoints"], path: "MiniGo", exclude: ["main.swift"]),
         .target(
 
@@ -0,0 +1,192 @@
+// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import Checkpoints
+import TensorFlow
+
+public struct DepthwiseSeparableConvBlock: Layer {
+  var dConv: DepthwiseConv2D<Float>
+  var conv: Conv2D<Float>
+
+  public init(
+    depthWiseFilter: Tensor<Float>,
+    depthWiseBias: Tensor<Float>,
+    pointWiseFilter: Tensor<Float>,
+    pointWiseBias: Tensor<Float>,
+    strides: (Int, Int)
+  ) {
+
+    dConv = DepthwiseConv2D<Float>(
+      filter: depthWiseFilter,
+      bias: depthWiseBias,
+      activation: relu6,
+      strides: strides,
+      padding: .same
+    )
+
+    conv = Conv2D<Float>(
+      filter: pointWiseFilter,
+      bias: pointWiseBias,
+      activation: relu6,
+      padding: .same
+    )
+  }
+
+  @differentiable
+  public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
+    return input.sequenced(through: dConv, conv)
+  }
+}
+
+public struct MobileNetLikeBackbone: Layer {
+  @noDerivative let ckpt: CheckpointReader
+
+  public var convBlock0: Conv2D<Float>
+  public var dConvBlock1: DepthwiseSeparableConvBlock
+  public var dConvBlock2: DepthwiseSeparableConvBlock
+  public var dConvBlock3: DepthwiseSeparableConvBlock
+  public var dConvBlock4: DepthwiseSeparableConvBlock
+  public var dConvBlock5: DepthwiseSeparableConvBlock
+  public var dConvBlock6: DepthwiseSeparableConvBlock
+  public var dConvBlock7: DepthwiseSeparableConvBlock
+  public var dConvBlock8: DepthwiseSeparableConvBlock
+  public var dConvBlock9: DepthwiseSeparableConvBlock
+  public var dConvBlock10: DepthwiseSeparableConvBlock
+  public var dConvBlock11: DepthwiseSeparableConvBlock
+  public var dConvBlock12: DepthwiseSeparableConvBlock
+  public var dConvBlock13: DepthwiseSeparableConvBlock
+
+  public init(checkpoint: CheckpointReader) {
+    self.ckpt = checkpoint
+
+    self.convBlock0 = Conv2D<Float>(
+      filter: ckpt.load(from: "Conv2d_0/weights"),
+      bias: ckpt.load(from: "Conv2d_0/biases"),
+      activation: relu6,
+      strides: (2, 2),
+      padding: .same
+    )
+    self.dConvBlock1 = DepthwiseSeparableConvBlock(
+      depthWiseFilter: ckpt.load(from: "Conv2d_1_depthwise/depthwise_weights"),
+      depthWiseBias: ckpt.load(from: "Conv2d_1_depthwise/biases"),
+      pointWiseFilter: ckpt.load(from: "Conv2d_1_pointwise/weights"),
+      pointWiseBias: ckpt.load(from: "Conv2d_1_pointwise/biases"),
+      strides: (1, 1)
+    )
+    self.dConvBlock2 = DepthwiseSeparableConvBlock(
+      depthWiseFilter: ckpt.load(from: "Conv2d_2_depthwise/depthwise_weights"),
+      depthWiseBias: ckpt.load(from: "Conv2d_2_depthwise/biases"),
+      pointWiseFilter: ckpt.load(from: "Conv2d_2_pointwise/weights"),
+      pointWiseBias: ckpt.load(from: "Conv2d_2_pointwise/biases"),
+      strides: (2, 2)
+    )
+    self.dConvBlock3 = DepthwiseSeparableConvBlock(
+      depthWiseFilter: ckpt.load(from: "Conv2d_3_depthwise/depthwise_weights"),
+      depthWiseBias: ckpt.load(from: "Conv2d_3_depthwise/biases"),
+      pointWiseFilter: ckpt.load(from: "Conv2d_3_pointwise/weights"),
+      pointWiseBias: ckpt.load(from: "Conv2d_3_pointwise/biases"),
+      strides: (1, 1)
+    )
+    self.dConvBlock4 = DepthwiseSeparableConvBlock(
+      depthWiseFilter: ckpt.load(from: "Conv2d_4_depthwise/depthwise_weights"),
+      depthWiseBias: ckpt.load(from: "Conv2d_4_depthwise/biases"),
+      pointWiseFilter: ckpt.load(from: "Conv2d_4_pointwise/weights"),
+      pointWiseBias: ckpt.load(from: "Conv2d_4_pointwise/biases"),
+      strides: (2, 2)
+    )
+    self.dConvBlock5 = DepthwiseSeparableConvBlock(
+      depthWiseFilter: ckpt.load(from: "Conv2d_5_depthwise/depthwise_weights"),
+      depthWiseBias: ckpt.load(from: "Conv2d_5_depthwise/biases"),
+      pointWiseFilter: ckpt.load(from: "Conv2d_5_pointwise/weights"),
+      pointWiseBias: ckpt.load(from: "Conv2d_5_pointwise/biases"),
+      strides: (1, 1)
+    )
+    self.dConvBlock6 = DepthwiseSeparableConvBlock(
+      depthWiseFilter: ckpt.load(from: "Conv2d_6_depthwise/depthwise_weights"),
+      depthWiseBias: ckpt.load(from: "Conv2d_6_depthwise/biases"),
+      pointWiseFilter: ckpt.load(from: "Conv2d_6_pointwise/weights"),
+      pointWiseBias: ckpt.load(from: "Conv2d_6_pointwise/biases"),
+      strides: (2, 2)
+    )
+    self.dConvBlock7 = DepthwiseSeparableConvBlock(
+      depthWiseFilter: ckpt.load(from: "Conv2d_7_depthwise/depthwise_weights"),
+      depthWiseBias: ckpt.load(from: "Conv2d_7_depthwise/biases"),
+      pointWiseFilter: ckpt.load(from: "Conv2d_7_pointwise/weights"),
+      pointWiseBias: ckpt.load(from: "Conv2d_7_pointwise/biases"),
+      strides: (1, 1)
+    )
+    self.dConvBlock8 = DepthwiseSeparableConvBlock(
+      depthWiseFilter: ckpt.load(from: "Conv2d_8_depthwise/depthwise_weights"),
+      depthWiseBias: ckpt.load(from: "Conv2d_8_depthwise/biases"),
+      pointWiseFilter: ckpt.load(from: "Conv2d_8_pointwise/weights"),
+      pointWiseBias: ckpt.load(from: "Conv2d_8_pointwise/biases"),
+      strides: (1, 1)
+    )
+    self.dConvBlock9 = DepthwiseSeparableConvBlock(
+      depthWiseFilter: ckpt.load(from: "Conv2d_9_depthwise/depthwise_weights"),
+      depthWiseBias: ckpt.load(from: "Conv2d_9_depthwise/biases"),
+      pointWiseFilter: ckpt.load(from: "Conv2d_9_pointwise/weights"),
+      pointWiseBias: ckpt.load(from: "Conv2d_9_pointwise/biases"),
+      strides: (1, 1)
+    )
+    self.dConvBlock10 = DepthwiseSeparableConvBlock(
+      depthWiseFilter: ckpt.load(from: "Conv2d_10_depthwise/depthwise_weights"),
+      depthWiseBias: ckpt.load(from: "Conv2d_10_depthwise/biases"),
+      pointWiseFilter: ckpt.load(from: "Conv2d_10_pointwise/weights"),
+      pointWiseBias: ckpt.load(from: "Conv2d_10_pointwise/biases"),
+      strides: (1, 1)
+    )
+    self.dConvBlock11 = DepthwiseSeparableConvBlock(
+      depthWiseFilter: ckpt.load(from: "Conv2d_11_depthwise/depthwise_weights"),
+      depthWiseBias: ckpt.load(from: "Conv2d_11_depthwise/biases"),
+      pointWiseFilter: ckpt.load(from: "Conv2d_11_pointwise/weights"),
+      pointWiseBias: ckpt.load(from: "Conv2d_11_pointwise/biases"),
+      strides: (1, 1)
+    )
+    self.dConvBlock12 = DepthwiseSeparableConvBlock(
+      depthWiseFilter: ckpt.load(from: "Conv2d_12_depthwise/depthwise_weights"),
+      depthWiseBias: ckpt.load(from: "Conv2d_12_depthwise/biases"),
+      pointWiseFilter: ckpt.load(from: "Conv2d_12_pointwise/weights"),
+      pointWiseBias: ckpt.load(from: "Conv2d_12_pointwise/biases"),
+      strides: (1, 1)
+    )
+    self.dConvBlock13 = DepthwiseSeparableConvBlock(
+      depthWiseFilter: ckpt.load(from: "Conv2d_13_depthwise/depthwise_weights"),
+      depthWiseBias: ckpt.load(from: "Conv2d_13_depthwise/biases"),
+      pointWiseFilter: ckpt.load(from: "Conv2d_13_pointwise/weights"),
+      pointWiseBias: ckpt.load(from: "Conv2d_13_pointwise/biases"),
+      strides: (1, 1)
+    )
+  }
+
+  @differentiable
+  public func callAsFunction(_ input: Tensor<Float>) -> Tensor<Float> {
+    var x = convBlock0(input)
+    x = dConvBlock1(x)
+    x = dConvBlock2(x)
+    x = dConvBlock3(x)
+    x = dConvBlock4(x)
+    x = dConvBlock5(x)
+    x = dConvBlock6(x)
+    x = dConvBlock7(x)
+    x = dConvBlock8(x)
+    x = dConvBlock9(x)
+    x = dConvBlock10(x)
+    x = dConvBlock11(x)
+    x = dConvBlock12(x)
+    x = dConvBlock13(x)
+    return x
+  }
+
+}
@@ -0,0 +1,196 @@
+// Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import Foundation
+import TensorFlow
+
+// This whole struct should probably be merged into the PersonLab model struct when we no longer
+// need to do CPUTensor wrapping when SwiftRT fixes the GPU->CPU copy issue.
+struct PoseDecoder {
+  let heatmap: CPUTensor<Float>
+  let offsets: CPUTensor<Float>
+  let displacementsFwd: CPUTensor<Float>
+  let displacementsBwd: CPUTensor<Float>
+  let config: Config
+
+  init(for results: PersonlabHeadsResults, with config: Config) {
+    // Hardcoded to batch size == 1 at the moment
+    self.heatmap = CPUTensor<Float>(results.heatmap[0])
+    self.offsets = CPUTensor<Float>(results.offsets[0])
+    self.displacementsFwd = CPUTensor<Float>(results.displacementsFwd[0])
+    self.displacementsBwd = CPUTensor<Float>(results.displacementsBwd[0])
+    self.config = config
+  }
+
+  func decode() -> [Pose] {
+    var poses = [Pose]()
+    var sortedLocallyMaximumKeypoints = getSortedLocallyMaximumKeypoints()
+    while sortedLocallyMaximumKeypoints.count > 0 {
+      let rootKeypoint = sortedLocallyMaximumKeypoints.removeFirst()
+      if rootKeypoint.isWithinRadiusOfCorrespondingKeypoints(in: poses, radius: config.nmsRadius) {
+        continue
+      }
+
+      var pose = Pose(resolution: self.config.inputImageSize)
+      pose.add(rootKeypoint)
+
+      // Recursivelly parse keypoint tree going in both forwards & backwards directions optimally
+      recursivellyAddNextKeypoint(
+        after: rootKeypoint,
+        into: &pose
+      )
+
+      if getPoseScore(for: pose, considering: poses) > config.poseScoreThreshold {
+        poses.append(pose)
+      }
+    }
+    return poses
+  }
+
+  func recursivellyAddNextKeypoint(after previousKeypoint: Keypoint, into pose: inout Pose) {
+    for (nextKeypointIndex, direction) in getNextKeypointIndexAndDirection(previousKeypoint.index) {
+      if pose.getKeypoint(nextKeypointIndex) == nil {
+        let nextKeypoint = followDisplacement(
+          from: previousKeypoint,
+          to: nextKeypointIndex,
+          using: direction == .fwd ? displacementsFwd : displacementsBwd
+        )
+        pose.add(nextKeypoint)
+        recursivellyAddNextKeypoint(after: nextKeypoint, into: &pose)
+      }
+    }
+  }
+
+  func followDisplacement(
+    from previousKeypoint: Keypoint, to nextKeypointIndex: KeypointIndex,
+    using displacements: CPUTensor<Float>
+  ) -> Keypoint {
+    let displacementKeypointIndexY = keypointPairToDisplacementIndexMap[
+      Set([previousKeypoint.index, nextKeypointIndex])]!
+    let displacementKeypointIndexX = displacementKeypointIndexY + displacements.shape[2] / 2
+    let displacementYIndex = getUnstridedIndex(y: previousKeypoint.y)
+    let displacementXIndex = getUnstridedIndex(x: previousKeypoint.x)
+
+    let displacementY = displacements[
+      displacementYIndex,
+      displacementXIndex,
+      displacementKeypointIndexY
+    ]
+    let displacementX = displacements[
+      displacementYIndex,
+      displacementXIndex,
+      displacementKeypointIndexX
+    ]
+
+    let displacedY = getUnstridedIndex(y: previousKeypoint.y + displacementY)
+    let displacedX = getUnstridedIndex(x: previousKeypoint.x + displacementX)
+
+    let yOffset = offsets[
+      displacedY,
+      displacedX,
+      nextKeypointIndex.rawValue
+    ]
+    let xOffset = offsets[
+      displacedY,
+      displacedX,
+      nextKeypointIndex.rawValue + KeypointIndex.allCases.count
+    ]
+
+    // If we are getting the offset from an exact point in the heatmap, we should add this
+    // offset parting from that exact point in the heatmap, so we just nearest neighbour
+    // interpolate it back, then re strech using output stride, and then add said offset.
+    let nextY = Float(displacedY * config.outputStride) + yOffset
+    let nextX = Float(displacedX * config.outputStride) + xOffset
+
+    return Keypoint(
+      y: nextY,
+      x: nextX,
+      index: nextKeypointIndex,
+      score: heatmap[
+        displacedY, displacedX, nextKeypointIndex.rawValue
+      ]
+    )
+  }
+
+  func scoreIsMaximumInLocalWindow(heatmapY: Int, heatmapX: Int, score: Float, keypointIndex: Int)
+    -> Bool
+  {
+    let yStart = max(heatmapY - config.keypointLocalMaximumRadius, 0)
+    let yEnd = min(heatmapY + config.keypointLocalMaximumRadius, heatmap.shape[0] - 1)
+    for windowY in yStart...yEnd {
+      let xStart = max(heatmapX - config.keypointLocalMaximumRadius, 0)
+      let xEnd = min(heatmapX + config.keypointLocalMaximumRadius, heatmap.shape[1] - 1)
+      for windowX in xStart...xEnd {
+        if heatmap[windowY, windowX, keypointIndex] > score {
+          return false
+        }
+      }
+    }
+    return true
+  }
+
+  func getUnstridedIndex(y: Float) -> Int {
+    let downScaled = y / Float(config.outputStride)
+    let clamped = min(max(0, downScaled.rounded()), Float(heatmap.shape[0] - 1))
+    return Int(clamped)
+  }
+
+  func getUnstridedIndex(x: Float) -> Int {
+    let downScaled = x / Float(config.outputStride)
+    let clamped = min(max(0, downScaled.rounded()), Float(heatmap.shape[1] - 1))
+    return Int(clamped)
+  }
+
+  func getSortedLocallyMaximumKeypoints() -> [Keypoint] {
+    var sortedLocallyMaximumKeypoints = [Keypoint]()
+    for heatmapY in 0..<heatmap.shape[0] {
+      for heatmapX in 0..<heatmap.shape[1] {
+        for keypointIndex in 0..<heatmap.shape[2] {
+          let score = heatmap[heatmapY, heatmapX, keypointIndex]
+
+          if score < config.keypointScoreThreshold { continue }
+          if scoreIsMaximumInLocalWindow(
+            heatmapY: heatmapY,
+            heatmapX: heatmapX,
+            score: score,
+            keypointIndex: keypointIndex
+          ) {
+            sortedLocallyMaximumKeypoints.append(
+              Keypoint(
+                heatmapY: heatmapY,
+                heatmapX: heatmapX,
+                index: keypointIndex,
+                score: score,
+                offsets: offsets,
+                outputStride: config.outputStride
+              )
+            )
+          }
+        }
+      }
+    }
+    sortedLocallyMaximumKeypoints.sort { $0.score > $1.score }
+    return sortedLocallyMaximumKeypoints
+  }
+
+  func getPoseScore(for pose: Pose, considering poses: [Pose]) -> Float {
+    var notOverlappedKeypointScoreAccumulator: Float = 0
+    for keypoint in pose.keypoints {
+      if !keypoint!.isWithinRadiusOfCorrespondingKeypoints(in: poses, radius: config.nmsRadius) {
+        notOverlappedKeypointScoreAccumulator += keypoint!.score
+      }
+    }
+    return notOverlappedKeypointScoreAccumulator / Float(KeypointIndex.allCases.count)
+  }
+}