Abort and report errors in training script

This commit is contained in:
Christoph Hagen 2021-12-20 20:45:27 +01:00
parent c44f521288
commit d969d168e1

View File

@ -45,6 +45,12 @@ echo "[INFO] Working in directory ${WORK_DIR}"
echo "[INFO] Getting classifier version from server..." echo "[INFO] Getting classifier version from server..."
scp -P $SSH_PORT ${SERVER}:/${SERVER_ROOT_PATH}/classifier.version $WORK_DIR scp -P $SSH_PORT ${SERVER}:/${SERVER_ROOT_PATH}/classifier.version $WORK_DIR
retVal=$?
if [ $retVal -ne 0 ]; then
echo '[ERROR] Failed to get classifier version'
return $retVal
fi
# Read classifier version from file # Read classifier version from file
OLD_VERSION=$(< $VERSION_FILE) OLD_VERSION=$(< $VERSION_FILE)
NEW_VERSION=$(($OLD_VERSION + 1)) NEW_VERSION=$(($OLD_VERSION + 1))
@ -53,14 +59,29 @@ echo "[INFO] Creating classifier version ${NEW_VERSION}"
echo "[INFO] Ensuring permissions for images on server..." echo "[INFO] Ensuring permissions for images on server..."
ssh -p $SSH_PORT ${SERVER} "sudo chmod -R 755 ${SERVER_ROOT_PATH}/images" ssh -p $SSH_PORT ${SERVER} "sudo chmod -R 755 ${SERVER_ROOT_PATH}/images"
retVal=$?
if [ $retVal -ne 0 ]; then
echo '[ERROR] Failed to change image permissions'
return $retVal
fi
echo "[INFO] Transferring images from server..." echo "[INFO] Transferring images from server..."
rsync -hvrPut -e "ssh -p ${SSH_PORT}" ${SERVER}:/${SERVER_ROOT_PATH}/images/ "${IMAGE_DIR}" rsync -hvrPut -e "ssh -p ${SSH_PORT}" ${SERVER}:/${SERVER_ROOT_PATH}/images/ "${IMAGE_DIR}"
retVal=$?
if [ $retVal -ne 0 ]; then
echo '[ERROR] Failed to transfer images from server'
return $retVal
fi
echo "[INFO] Training the model..." echo "[INFO] Training the model..."
swift train.swift $IMAGE_DIR $TRAINING_ITERATIONS $MODEL_FILE swift train.swift $IMAGE_DIR $TRAINING_ITERATIONS $MODEL_FILE
echo "[INFO] Backing up model..." retVal=$?
cp $MODEL_FILE "${BACKUP_DIR}/classifier${NEW_VERSION}.mlmodel" if [ $retVal -ne 0 ]; then
echo '[ERROR] Failed to train model'
return $retVal
fi
echo "[INFO] Incrementing version file..." echo "[INFO] Incrementing version file..."
echo "${NEW_VERSION}" > $VERSION_FILE echo "${NEW_VERSION}" > $VERSION_FILE
@ -68,13 +89,40 @@ echo "${NEW_VERSION}" > $VERSION_FILE
echo "[INFO] Copying the files to the server..." echo "[INFO] Copying the files to the server..."
scp -P $SSH_PORT $MODEL_FILE $VERSION_FILE ${SERVER}:~/ scp -P $SSH_PORT $MODEL_FILE $VERSION_FILE ${SERVER}:~/
retVal=$?
if [ $retVal -ne 0 ]; then
echo '[ERROR] Failed to copy new files to server'
return $retVal
fi
echo "[INFO] Moving files into public directory..." echo "[INFO] Moving files into public directory..."
ssh -p ${SSH_PORT} ${SERVER} "sudo mv /home/pi/classifier.* ${SERVER_ROOT_PATH}/" ssh -p ${SSH_PORT} ${SERVER} "sudo mv /home/pi/classifier.* ${SERVER_ROOT_PATH}/"
retVal=$?
if [ $retVal -ne 0 ]; then
echo '[ERROR] Failed to move files on server'
return $retVal
fi
echo "[INFO] Updating permissions..." echo "[INFO] Updating permissions..."
ssh -p ${SSH_PORT} ${SERVER} "sudo chown -R www-data\:www-data ${SERVER_ROOT_PATH}/" ssh -p ${SSH_PORT} ${SERVER} "sudo chown -R www-data\:www-data ${SERVER_ROOT_PATH}/"
retVal=$?
if [ $retVal -ne 0 ]; then
echo '[ERROR] Failed to update file permissions on server'
return $retVal
fi
echo "[INFO] Backing up model..."
mv $MODEL_FILE "${BACKUP_DIR}/classifier${NEW_VERSION}.mlmodel"
retVal=$?
if [ $retVal -ne 0 ]; then
echo '[WARNING] Failed to back up model'
rm $MODEL_FILE
fi
echo "[INFO] Cleaning up..." echo "[INFO] Cleaning up..."
rm $MODEL_FILE $VERSION_FILE rm $VERSION_FILE
echo "[INFO] Process finished" echo "[INFO] Process finished"