Skip to content

Commit

Permalink
added mlflow logging
Browse files Browse the repository at this point in the history
  • Loading branch information
RihabFekii committed Apr 9, 2023
1 parent 2efcb15 commit ee9f8ea
Show file tree
Hide file tree
Showing 9 changed files with 106 additions and 68 deletions.
14 changes: 13 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
#!make
include .env

PYTHON_INTERPRETER = python3

env:
@echo ">>> Creating a python virtual environment with venv"
$(PYTHON_INTERPRETER) -m venv env
@echo ">>> A new virtual env is created. Activate it with:\nsource env/bin/activate ."
@echo ">>> A new virtual env is created. Activate it with:\nsource env/bin/activate ."


mlflow:
@echo ">>> Authenticating to MLflow remote server on DagsHub"
export MLFLOW_TRACKING_URI=$(MLFLOW_TRACKING_URI)
export MLFLOW_TRACKING_USERNAME=$(MLFLOW_TRACKING_USERNAME)
export MLFLOW_TRACKING_PASSWORD=$(MLFLOW_TRACKING_PASSWORD)
@echo ">>> Authenticating successful!"

24 changes: 12 additions & 12 deletions dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,21 @@ stages:
size: 28867246
nfiles: 1482
- path: params.yaml
md5: a061a8a3138da65df56595927ca325f6
size: 216
md5: f2fc712e78c50f898902306181630ead
size: 215
- path: src/train.py
md5: 8cde4fe436744929ddbaeaadb37aad71
size: 986
md5: 8402b07d9f68ed886e86f8b644b2a843
size: 2002
- path: src/utils.py
md5: 3ecd480350d0d8edb7e4ba533bb555ad
size: 1253
md5: 4d04addab005a6475665898aae662ace
size: 1815
outs:
- path: models/model.pt
md5: 31c95b64b4a92e32e26ca67d99ffc401
size: 22495096
md5: 7fe4c6173c334d2e926e71236366b8e0
size: 6226104
- path: reports/train_metrics.csv
md5: 30cf68a8a216e4dcfbde8717fa882e9f
size: 7056
md5: 464bce4270bb543ea8741659367b6b9d
size: 1008
- path: reports/train_params.yaml
md5: 5c58f717d9f2b23de0e7483ba7fbf2b2
size: 1502
md5: 2ea9f04ec479dd1db02f44263b2c8328
size: 1501
6 changes: 3 additions & 3 deletions params.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
model_type: yolov8s.pt
model_type: yolov8n.pt
pretrained: True
seed: 0
imgsz: 640
batch: 8
epochs: 20
epochs: 2
optimizer: SGD # other choices=['SGD', 'Adam', 'AdamW', 'RMSProp']
lr0: 0.01 # learning rate
name: 'yolov8s_exp_v0' # experiment name
name: 'yolov8n_exp_v0' # experiment name
Binary file modified reports/train_confusion_matrix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
22 changes: 2 additions & 20 deletions reports/train_metrics.csv
Original file line number Diff line number Diff line change
@@ -1,21 +1,3 @@
epoch, train/box_loss, train/cls_loss, train/dfl_loss, metrics/precision(B), metrics/recall(B), metrics/mAP50(B), metrics/mAP50-95(B), val/box_loss, val/cls_loss, val/dfl_loss, lr/pg0, lr/pg1, lr/pg2
0, 2.074, 3.6688, 1.7378, 0.29774, 0.32591, 0.23183, 0.08104, 2.2504, 2.6345, 1.8202, 0.070462, 0.0032821, 0.0032821
1, 1.767, 1.7208, 1.4953, 0.64651, 0.53061, 0.62556, 0.29666, 1.8773, 1.8427, 1.3876, 0.040134, 0.0062879, 0.0062879
2, 1.7171, 1.707, 1.3988, 0.06729, 0.09524, 0.04705, 0.02481, 3.0641, 4.3916, 2.5411, 0.0094766, 0.0089638, 0.0089638
3, 1.7341, 1.8335, 1.4552, 0.38806, 0.47619, 0.41508, 0.17576, 1.9845, 2.1096, 1.6549, 0.008515, 0.008515, 0.008515
4, 1.691, 1.3376, 1.4286, 0.49323, 0.42177, 0.41415, 0.16903, 2.213, 2.0351, 1.7182, 0.008515, 0.008515, 0.008515
5, 1.6586, 1.3538, 1.3919, 0.62787, 0.38095, 0.39228, 0.18293, 2.0223, 2.3359, 1.6252, 0.00802, 0.00802, 0.00802
6, 1.6037, 1.2676, 1.3543, 0.66557, 0.47619, 0.5516, 0.25586, 1.8962, 1.6098, 1.4971, 0.007525, 0.007525, 0.007525
7, 1.658, 1.2554, 1.3514, 0.58698, 0.68027, 0.6487, 0.29438, 1.7871, 1.3586, 1.4083, 0.00703, 0.00703, 0.00703
8, 1.609, 1.1162, 1.3305, 0.6504, 0.5102, 0.53742, 0.25941, 1.9, 1.6061, 1.4134, 0.006535, 0.006535, 0.006535
9, 1.5784, 1.1193, 1.3391, 0.58894, 0.70068, 0.60387, 0.26491, 1.8898, 1.466, 1.4003, 0.00604, 0.00604, 0.00604
10, 1.6081, 1.0872, 1.332, 0.54884, 0.53741, 0.56067, 0.26937, 1.89, 1.6558, 1.4405, 0.005545, 0.005545, 0.005545
11, 1.6216, 1.0673, 1.3576, 0.6775, 0.7415, 0.76254, 0.40175, 1.6952, 1.1328, 1.3126, 0.00505, 0.00505, 0.00505
12, 1.5426, 1.0814, 1.3183, 0.63694, 0.73995, 0.67857, 0.33739, 1.8035, 1.2909, 1.3621, 0.004555, 0.004555, 0.004555
13, 1.527, 0.9778, 1.2827, 0.76852, 0.76871, 0.80068, 0.40906, 1.6945, 1.0812, 1.3067, 0.00406, 0.00406, 0.00406
14, 1.4765, 0.93611, 1.2862, 0.74653, 0.84152, 0.85997, 0.47623, 1.6398, 1.0427, 1.3348, 0.003565, 0.003565, 0.003565
15, 1.4623, 0.90297, 1.2588, 0.83993, 0.80952, 0.88026, 0.47516, 1.6723, 0.98866, 1.2993, 0.00307, 0.00307, 0.00307
16, 1.4283, 0.86243, 1.2558, 0.81461, 0.82313, 0.90594, 0.46365, 1.6496, 0.93395, 1.3134, 0.002575, 0.002575, 0.002575
17, 1.4264, 0.83566, 1.2381, 0.85204, 0.82993, 0.90175, 0.49576, 1.5633, 0.94734, 1.2569, 0.00208, 0.00208, 0.00208
18, 1.3657, 0.84115, 1.214, 0.87301, 0.87755, 0.922, 0.4962, 1.5714, 0.88971, 1.2903, 0.001585, 0.001585, 0.001585
19, 1.3289, 0.8171, 1.1956, 0.85437, 0.86395, 0.92797, 0.51129, 1.5971, 0.85184, 1.2837, 0.00109, 0.00109, 0.00109
0, 1.9822, 4.1505, 1.6088, 0.00279, 0.83673, 0.04395, 0.0187, 2.0868, 4.2883, 1.5972, 0.070462, 0.0032821, 0.0032821
1, 1.7723, 2.9226, 1.3586, 0.25019, 0.33333, 0.19749, 0.07728, 2.2085, 3.4761, 1.5367, 0.037187, 0.0033408, 0.0033408
1 change: 1 addition & 0 deletions reports/train_metrics.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"0":{" epoch":0," train\/box_loss":1.9822," train\/cls_loss":4.1505," train\/dfl_loss":1.6088," metrics\/precision(B)":0.00279," metrics\/recall(B)":0.83673," metrics\/mAP50(B)":0.04395," metrics\/mAP50-95(B)":0.0187," val\/box_loss":2.0868," val\/cls_loss":4.2883," val\/dfl_loss":1.5972," lr\/pg0":0.070462," lr\/pg1":0.0032821," lr\/pg2":0.0032821},"1":{" epoch":1," train\/box_loss":1.7723," train\/cls_loss":2.9226," train\/dfl_loss":1.3586," metrics\/precision(B)":0.25019," metrics\/recall(B)":0.33333," metrics\/mAP50(B)":0.19749," metrics\/mAP50-95(B)":0.07728," val\/box_loss":2.2085," val\/cls_loss":3.4761," val\/dfl_loss":1.5367," lr\/pg0":0.037187," lr\/pg1":0.0033408," lr\/pg2":0.0033408}}
8 changes: 4 additions & 4 deletions reports/train_params.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
task: detect
mode: train
model: yolov8s.pt
model: yolov8n.pt
data: /Users/rihabfeki/Desktop/wildfire-smoke-detector/data/raw/wildfire-raw-yolov8/data.yaml
epochs: 20
epochs: 2
patience: 50
batch: 8
imgsz: 640
Expand All @@ -12,7 +12,7 @@ cache: false
device: null
workers: 8
project: null
name: yolov8s_exp_v0
name: yolov8n_exp_v0
exist_ok: false
pretrained: true
optimizer: SGD
Expand Down Expand Up @@ -92,4 +92,4 @@ copy_paste: 0.0
cfg: null
v5loader: false
tracker: botsort.yaml
save_dir: /Users/rihabfeki/Desktop/wildfire-smoke-detector/runs/detect/yolov8s_exp_v0
save_dir: /Users/rihabfeki/Desktop/wildfire-smoke-detector/runs/detect/yolov8n_exp_v0
77 changes: 52 additions & 25 deletions src/train.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,70 @@
import os
from pathlib import Path

import mlflow
import yaml
from dotenv import load_dotenv
from ultralytics import YOLO

from utils import save_metrics, save_model
from utils import convert_metrics_csv_to_json, save_metrics_and_params, save_model


ROOT_DIR = Path(__file__).resolve().parents[1] # root directory absolute path
DATA_DIR = os.path.join(ROOT_DIR, "data/raw/wildfire-raw-yolov8")
DATA_YAML = os.path.join(DATA_DIR, "data.yaml")
load_dotenv()

MLFLOW_TRACKING_URI=os.getenv('MLFLOW_TRACKING_URI')

root_dir = Path(__file__).resolve().parents[1] # root directory absolute path
data_dir = os.path.join(root_dir, "data/raw/wildfire-raw-yolov8")
data_yaml_path = os.path.join(data_dir, "data.yaml")
metrics_path = os.path.join(root_dir, 'reports/train_metrics.json')


if __name__ == '__main__':

# load the configuration file
with open(r"params.yaml") as f:
params = yaml.safe_load(f)

# load a pre-trained model
pre_trained_model = YOLO(params['model_type'])

# train
model = pre_trained_model.train(
data=DATA_YAML,
imgsz=params['imgsz'],
batch=params['batch'],
epochs=params['epochs'],
optimizer=params['optimizer'],
lr0=params['lr0'],
seed=params['seed'],
pretrained=params['pretrained'],
name=params['name']
)

# save model
save_model(experiment_name=params['name'])

# save metrics
save_metrics(experiment_name=params['name'])
# set the tracking uri
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

# start mlflow experiment
with mlflow.start_run(run_name=params['name']):
# load a pre-trained model
pre_trained_model = YOLO(params['model_type'])

# train
model = pre_trained_model.train(
data=data_yaml_path,
imgsz=params['imgsz'],
batch=params['batch'],
epochs=params['epochs'],
optimizer=params['optimizer'],
lr0=params['lr0'],
seed=params['seed'],
pretrained=params['pretrained'],
name=params['name']
)

# log params with mlflow
mlflow.log_param('model_type', params['model_type'])
mlflow.log_param('epochs',params['epochs'])
mlflow.log_param('optimizer', params['optimizer'])
mlflow.log_param('learning_rate', params['lr0'])

# save model
model_path = save_model(experiment_name=params['name'])
# log model path with mlflow
mlflow.log_artifact(model_path)

# save metrics csv file and training params
save_metrics_and_params(experiment_name=params['name'])
# convert metrics from csv to json
convert_metrics_csv_to_json(metrics_path, params['name'])







Expand Down
22 changes: 19 additions & 3 deletions src/utils.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,39 @@
import os
import shutil
from pathlib import Path
import pandas as pd
import json

ROOT_DIR = Path(__file__).resolve().parents[1] # root directory absolute path


def save_model(experiment_name: str) -> None:
def save_model(experiment_name: str):
""" saves the weights of trained model to the models directory """
if os.path.isdir('runs'):
model_weights = experiment_name + "/weights/best.pt"
path_model_weights = os.path.join(ROOT_DIR, "runs/detect", model_weights)

shutil.copy(src=path_model_weights, dst=f'{ROOT_DIR}/models/model.pt')
return shutil.copy(src=path_model_weights, dst=f'{ROOT_DIR}/models/model.pt')


def save_metrics(experiment_name: str) -> None:
def csv_to_json(csv_file_path:str, dest_file_path: str):
df = pd.read_csv(csv_file_path)
df.to_json(dest_file_path, orient="index")


def convert_metrics_csv_to_json(dest_file_path: str, experiment_name:str):
# convert metrics from csv to json format in order to track the with DVC
if os.path.isdir('runs'):
path_metrics = os.path.join(ROOT_DIR, "runs/detect", experiment_name)
path_metrics = os.path.join(path_metrics, 'results.csv')
csv_to_json(path_metrics, dest_file_path)


def save_metrics_and_params(experiment_name: str) -> None:
""" saves training metrics, params and confusion matrix to the reports directory """
if os.path.isdir('runs'):
path_metrics = os.path.join(ROOT_DIR, "runs/detect", experiment_name)

# save experiment training metrics
shutil.copy(src=f'{path_metrics}/results.csv', dst=f'{ROOT_DIR}/reports/train_metrics.csv')

Expand Down

0 comments on commit ee9f8ea

Please sign in to comment.