Caps-Train/Sources/ClassifierCreator.swift
2023-10-23 12:28:35 +02:00

464 lines
17 KiB
Swift

import Foundation
import CreateML
final class ClassifierCreator {
let server: URL
let configuration: Configuration
let imageDirectory: URL
let thumbnailDirectory: URL
let classifierUrl: URL
let df = DateFormatter()
// MARK: Step 1: Load configuration
init(configuration: Configuration) throws {
self.configuration = configuration
self.server = try configuration.serverUrl()
let contentDirectory = URL(fileURLWithPath: configuration.contentFolder)
self.imageDirectory = contentDirectory.appendingPathComponent("images")
self.classifierUrl = contentDirectory.appendingPathComponent("classifier.mlmodel")
self.thumbnailDirectory = contentDirectory.appendingPathComponent("thumbnails")
df.dateFormat = "yy-MM-dd-HH-mm-ss"
}
// MARK: Main function
func run() async throws {
let imagesSnapshotDate = Date()
let (classes, changedImageCount, changedMainImages) = try await loadImages()
guard !classes.isEmpty else {
print("[INFO] No image classes found, exiting...")
return
}
guard changedImageCount > 0 else {
print("[INFO] No changed images, so no new classifier trained")
await createThumbnails(changed: changedMainImages)
print("[INFO] Done")
return
}
let classifierVersion = try await getClassifierVersion()
let newVersion = classifierVersion + 1
print("[INFO] Image directory: \(imageDirectory.absoluteURL.path)")
print("[INFO] Model path: \(classifierUrl.path)")
print("[INFO] Version: \(newVersion)")
print("[INFO] Classes: \(classes.count)")
print("[INFO] Iterations: \(configuration.trainingIterations)")
try trainModel()
try await upload(version: newVersion)
try await upload(classes: classes, lastUpdate: imagesSnapshotDate)
await createThumbnails(changed: changedMainImages)
print("[INFO] Done")
}
// MARK: Step 2: Load changed images
func loadImages() async throws -> (classes: [Int], changedImageCount: Int, changedMainImages: [Int]) {
guard createFolderIfMissing(imageDirectory) else {
throw TrainingError.mainImageFolderNotCreated
}
let imageCounts = await getImageCounts()
let missingImageList: [CapImage] = imageCounts
.sorted { $0.key < $1.key }
.reduce(into: []) { list, pair in
let missingImagesForCap: [CapImage] = (0..<pair.value).compactMap { image in
let image = CapImage(cap: pair.key, image: image)
let url = imageUrl(base: imageDirectory, image: image)
guard !FileManager.default.fileExists(atPath: url.path) else {
return nil
}
return image
}
list.append(contentsOf: missingImagesForCap)
}
if missingImageList.isEmpty {
print("[INFO] No missing images to load")
} else {
print("[INFO] Loading \(missingImageList.count) missing images...")
try await loadImages(missingImageList)
}
let changedImageList = try await getChangedImageList()
let filteredChangeList = changedImageList
.filter { $0.image < imageCounts[$0.cap] ?? 0 } // Filter non-existent images
.filter { !missingImageList.contains($0) }
let imagesAlreadyLoad = changedImageList.count - filteredChangeList.count
let suffix = imagesAlreadyLoad > 0 ? " (\(imagesAlreadyLoad) already loaded)" : ""
if filteredChangeList.isEmpty {
print("[INFO] No changed images to load" + suffix)
} else {
print("[INFO] Loading \(filteredChangeList.count) changed images" + suffix)
try await loadImages(filteredChangeList)
}
let changedMainImages = changedImageList.filter { $0.image == 0 }.map { $0.cap }
let classes = imageCounts.keys.sorted()
// Delete any image folders not present as caps
try deleteUnnecessaryImageFolders(caps: classes)
return (classes, missingImageList.count + changedImageList.count, changedMainImages)
}
private func getImageCounts() async -> [Int : Int] {
guard let data: Data = await get(server.appendingPathComponent("caps.json")) else {
return [:]
}
do {
return try JSONDecoder().decode([Cap].self, from: data)
.reduce(into: [:]) { $0[$1.id] = $1.count }
} catch {
print("[ERROR] Failed to decode cap database: \(error)")
return [:]
}
}
private func deleteUnnecessaryImageFolders(caps: [Int]) throws {
let validNames = caps.map { String(format: "%04d", $0) }
let folders: [String]
do {
folders = try FileManager.default.contentsOfDirectory(atPath: imageDirectory.path)
} catch {
print("[ERROR] Failed to get list of image folders: \(error)")
throw TrainingError.failedToGetListOfImageFolders
}
for folder in folders {
if validNames.contains(folder) {
continue
}
// Not a valid cap folder
let url = imageDirectory.appendingPathComponent(folder)
do {
try FileManager.default.removeItem(at: url)
print("[INFO] Removed unused image folder '\(folder)'")
} catch {
print("[ERROR] Failed to delete unused image folder \(folder): \(error)")
throw TrainingError.failedToRemoveImageFolder
}
}
}
private func imageUrl(base: URL, image: CapImage) -> URL {
base.appendingPathComponent(String(format: "%04d/%04d-%02d.jpg", image.cap, image.cap, image.image))
}
private func load(image: CapImage) async -> Bool {
guard createFolderIfMissing(imageDirectory.appendingPathComponent(String(format: "%04d", image.cap))) else {
return false
}
let url = imageUrl(base: server.appendingPathComponent("images"), image: image)
let tempFile: URL, response: URLResponse
do {
(tempFile, response) = try await URLSession.shared.download(from: url)
} catch {
print("[ERROR] Failed to load \(image): \(error)")
return false
}
let responseCode = (response as! HTTPURLResponse).statusCode
guard responseCode == 200 else {
print("[ERROR] Failed to load \(image): Response \(responseCode)")
return false
}
do {
let localUrl = imageUrl(base: imageDirectory, image: image)
if FileManager.default.fileExists(atPath: localUrl.path) {
try FileManager.default.removeItem(at: localUrl)
}
try FileManager.default.moveItem(at: tempFile, to: localUrl)
return true
} catch {
print("[ERROR] Failed to save \(image): \(error)")
return false
}
}
private func loadImages(_ list: [CapImage]) async throws {
var loadedImages = 0
await withTaskGroup(of: Bool.self) { group in
for image in list {
group.addTask {
await self.load(image: image)
}
}
for await loaded in group {
if loaded {
loadedImages += 1
}
}
}
if loadedImages != list.count {
print("[ERROR] Only \(loadedImages) of \(list.count) images loaded")
throw TrainingError.failedToLoadImages
}
}
func getChangedImageList() async throws -> [CapImage] {
guard let string: String = await get(server.appendingPathComponent("changes.txt")) else {
print("[ERROR] Failed to get list of changed images")
throw TrainingError.failedToGetChangedImagesList
}
return string
.components(separatedBy: "\n")
.filter { $0 != "" }
.compactMap {
let parts = $0.components(separatedBy: ":")
guard parts.count == 3 else {
return nil
}
/*
guard let date = df.date(from: parts[0]) else {
print("[WARN] Invalid date \(parts[0]) in change list")
return nil
}
*/
guard let cap = Int(parts[1]) else {
print("[WARN] Invalid cap id \(parts[1]) in change list")
return nil
}
guard let image = Int(parts[2]) else {
print("[WARN] Invalid image id \(parts[2]) in change list")
return nil
}
return CapImage(cap: cap, image: image)
}
}
// MARK: Step 3: Compute version
func getClassifierVersion() async throws -> Int {
guard let string: String = await get(server.appendingPathComponent("version")) else {
print("[ERROR] Failed to get classifier version")
throw TrainingError.failedToGetClassifierVersion
}
guard let version = Int(string) else {
print("[ERROR] Invalid classifier version \(string)")
throw TrainingError.invalidClassifierVersion(string)
}
return version
}
// MARK: Step 4: Train classifier
func trainModel() throws {
var params = MLImageClassifier.ModelParameters(augmentation: [])
params.maxIterations = configuration.trainingIterations
let model: MLImageClassifier
do {
model = try MLImageClassifier(
trainingData: .labeledDirectories(at: imageDirectory),
parameters: params)
} catch {
print("[ERROR] Failed to create classifier: \(error)")
throw TrainingError.failedToCreateClassifier(error)
}
print("[INFO] Saving classifier...")
do {
try model.write(to: classifierUrl)
} catch {
print("[ERROR] Failed to save model to file: \(error)")
throw TrainingError.failedToWriteClassifier(error)
}
}
// MARK: Step 5: Upload classifier
func upload(version: Int) async throws {
print("[INFO] Uploading classifier...")
let modelData: Data
do {
modelData = try Data(contentsOf: classifierUrl)
} catch {
print("[ERROR] Failed to read classifier data: \(error)")
throw TrainingError.failedToReadClassifierData(error)
}
let success = await post(
url: server.appendingPathComponent("classifier/\(version)"),
body: modelData)
guard success else {
throw TrainingError.failedToUploadClassifier
}
}
// MARK: Step 6: Update classes
func upload(classes: [Int], lastUpdate: Date) async throws {
print("[INFO] Uploading trained classes...")
let dateString = df.string(from: lastUpdate)
let url = server.appendingPathComponent("classes/\(dateString)")
let body = classes.map(String.init).joined(separator: ",").data(using: .utf8)!
guard await post(url: url, body: body) else {
throw TrainingError.failedToUploadClassifierClasses
}
}
// MARK: Step 7: Create thumbnails
func createThumbnails(changed: [Int]) async {
guard checkMagickAvailability() else {
return
}
guard createFolderIfMissing(thumbnailDirectory) else {
print("[ERROR] Failed to create folder for thumbnails")
return
}
let capIdsOfMissingThumbnails = await getMissingThumbnailIds()
let all = Set(capIdsOfMissingThumbnails).union(changed)
print("[INFO] Creating \(all.count) thumbnails...")
for cap in all {
await createThumbnail(for: cap)
}
}
func checkMagickAvailability() -> Bool {
do {
let (code, output) = try safeShell("magick --version")
guard code == 0, let version = output.components(separatedBy: "ImageMagick ").dropFirst().first?
.components(separatedBy: " ").first else {
print("[ERROR] Magick not found, install using 'brew install imagemagick'")
return false
}
print("[INFO] Using magick \(version)")
} catch {
print("[ERROR] Failed to get version of magick: (\(error))")
return false
}
return true
}
private func getMissingThumbnailIds() async -> [Int] {
guard let string: String = await get(server.appendingPathComponent("thumbnails/missing")) else {
print("[ERROR] Failed to get missing thumbnails")
return []
}
return string.components(separatedBy: ",").compactMap(Int.init)
}
private func createThumbnail(for cap: Int) async {
let mainImage = CapImage(cap: cap, image: 0)
let inputUrl = imageUrl(base: imageDirectory, image: mainImage)
guard FileManager.default.fileExists(atPath: inputUrl.path) else {
print("[ERROR] Local main image not found for cap \(cap): \(inputUrl.path)")
return
}
let output = thumbnailDirectory.appendingPathComponent(String(format: "%04d.jpg", cap))
do {
let command = "magick convert \(inputUrl.path) -quality 70% -resize 100x100 \(output.path)"
let (code, output) = try safeShell(command)
if code != 0 {
print("Failed to create thumbnail for cap \(cap): \(output)")
return
}
} catch {
print("Failed to read created thumbnail for cap \(cap): \(error)")
return
}
let data: Data
do {
data = try Data(contentsOf: output)
} catch {
print("Failed to read created thumbnail for cap \(cap): \(error)")
return
}
guard await post(url: server.appendingPathComponent("thumbnails/\(cap)"), body: data) else {
print("Failed to upload thumbnail for cap \(cap)")
return
}
}
// MARK: Helper
@discardableResult
private func safeShell(_ command: String) throws -> (code: Int32, output: String) {
let task = Process()
let pipe = Pipe()
task.standardOutput = pipe
task.standardError = pipe
task.arguments = ["-cl", command]
task.executableURL = URL(fileURLWithPath: "/bin/zsh")
task.standardInput = nil
try task.run()
task.waitUntilExit()
let data = pipe.fileHandleForReading.readDataToEndOfFile()
let output = String(data: data, encoding: .utf8)!
return (task.terminationStatus, output)
}
private func createFolderIfMissing(_ folder: URL) -> Bool {
guard !FileManager.default.fileExists(atPath: folder.path) else {
return true
}
do {
try FileManager.default.createDirectory(at: folder, withIntermediateDirectories: true)
return true
} catch {
print("[ERROR] Failed to create directory \(folder.path): \(error)")
return false
}
}
// MARK: Requests
private func post(url: URL, body: Data) async -> Bool {
var request = URLRequest(url: url)
request.httpMethod = "POST"
request.httpBody = body
request.addValue(configuration.authenticationToken, forHTTPHeaderField: "key")
return await perform(request) != nil
}
private func perform(_ request: URLRequest) async -> Data? {
let data: Data
let response: URLResponse
do {
(data, response) = try await URLSession.shared.data(for: request)
} catch {
print("[ERROR] Request to \(request.url!.absoluteString) failed: \(error)")
return nil
}
let code = (response as! HTTPURLResponse).statusCode
guard code == 200 else {
print("[ERROR] Request to \(request.url!.absoluteString): Invalid response \(code)")
return nil
}
return data
}
private func get(_ url: URL) async -> Data? {
await perform(URLRequest(url: url))
}
private func get(_ url: URL) async -> String? {
guard let data: Data = await get(url) else {
return nil
}
guard let string = String(data: data, encoding: .utf8) else {
print("[ERROR] Invalid string response \(data)")
return nil
}
return string
}
}