diff --git a/README.md b/README.md index 1c44206..a8036b2 100644 --- a/README.md +++ b/README.md @@ -4,57 +4,73 @@ This document outlines the development of a modile application that uses a DeepL ## Dataset -The data used in this study is split into training, validation, and testing sets ensuring a robust evaluation of our model's performance. The dataset consists of a set of 9027 images of three disease commonly found on grapevines: -**Black Rot**, **ESCA**, and **Leaf Blight**, balanced with equal representation across the classes. Images are in .jpeg format with dimensions of 256x256 pixels. +The data used in this study came from [kaggle](kaggle.com/datasets/rm1000/grape-disease-dataset-original). It is split into training, validation, and testing sets ensuring a robust evaluation of our model's performance. The dataset consists of a set of 9027 images of three disease commonly found on grapevines: +**Black Rot**, **ESCA**, and **Leaf Blight**. Classes are well balenced with a slit overrepresentation of **ESCA** and **Black Rot** . Images are in .jpeg format with dimensions of 256x256 pixels. ![Dataset Overview](./docs/images/dataset_overview.png) +![Sample](./docs/images/samples_img.png) ## Model Structure Our model is a Convolutional Neural Network (CNN) built using Keras API with TensorFlow backend. It includes several convolutional layers followed by batch normalization, ReLU activation function and max pooling for downsampling. Dropout layers are used for regularization to prevent overfitting. The architecture details and parameters are as follows: -| Layer (type) | Output Shape | Param # | -|--------------------------------------|-----------------------------|----------| -| sequential | (None, 224, 224, 3) | 0 | -| conv2d | (None, 224, 224, 32) | 896 | -| batch_normalization | (None, 224, 224, 32) | 128 | -| conv2d_1 | (None, 224, 224, 32) | 9248 | -| batch_normalization_1 | (None, 224, 224, 32) | 128 | -| max_pooling2d | (None, 112, 112, 32) | 0 | -| dropout | (None, 112, 112, 32) | 0 | -| conv2d_2 | (None, 112, 112, 64) | 18496 | -| batch_normalization_2 | (None, 112, 112, 64) | 256 | -| conv2d_3 | (None, 112, 112, 64) | 36864 | -| batch_normalization_3 | (None, 112, 112, 64) | 256 | -| max_pooling2d_1 | (None, 56, 56, 64) | 0 | -| dropout_1 | (None, 56, 56, 64) | 0 | -| conv2d_4 | (None, 56, 56, 128) | 73728 | -| batch_normalization_4 | (None, 56, 56, 128) | 512 | -| conv2d_5 | (None, 56, 56, 128) | 147584| -| batch_normalization_5 | (None, 56, 56, 128) | 512 | -| max_pooling2d_2 | (None, 28, 28, 128) | 0 | -| dropout_2 | (None, 28, 28, 128) | 0 | -| conv2d_6 | (None, 28, 28, 256) | 294912| -| batch_normalization_6 | (None, 28, 28, 256) | 1024 | -| conv2d_7 | (None, 28, 28, 256) | 590080| -| batch_normalization_7 | (None, 28, 28, 256) | 1024 | -| max_pooling2d_3 | (None, 14, 14, 256) | 0 | -| dropout_3 | (None, 14, 14, 256) | 0 | -| global_average_pooling2d | (None, 256) | 0 | -| dense | (None, 256) | 65792 | -| batch_normalization_8 | (None, 256) | 1024 | -| dropout_4 | (None, 256) | 0 | -| dense_1 | (None, 128) | 32768 | -| batch_normalization_9 | (None, 128) | 512 | -| dropout_5 | (None, 128) | 0 | -| dense_2 | (None, 4) | 516 | +```{python} +model = Sequential([ + data_augmentation, + + # Block 1 + layers.Conv2D(32, kernel_size=3, padding='same', activation='relu'), + layers.BatchNormalization(), + layers.Conv2D(32, kernel_size=3, padding='same', activation='relu'), + layers.BatchNormalization(), + layers.MaxPooling2D(pool_size=2), + layers.Dropout(0.25), + + # Block 2 + layers.Conv2D(64, kernel_size=3, padding='same', activation='relu'), + layers.BatchNormalization(), + layers.Conv2D(64, kernel_size=3, padding='same', activation='relu'), + layers.BatchNormalization(), + layers.MaxPooling2D(pool_size=2), + layers.Dropout(0.25), + + # Block 3 + layers.Conv2D(128, kernel_size=3, padding='same', activation='relu'), + layers.BatchNormalization(), + layers.Conv2D(128, kernel_size=3, padding='same', activation='relu'), + layers.BatchNormalization(), + layers.MaxPooling2D(pool_size=2), + layers.Dropout(0.25), + + # Block 4 + layers.Conv2D(256, kernel_size=3, padding='same', activation='relu'), + layers.BatchNormalization(), + layers.Conv2D(256, kernel_size=3, padding='same', activation='relu'), + layers.BatchNormalization(), + layers.MaxPooling2D(pool_size=2), + layers.Dropout(0.25), + + # Classification head + layers.GlobalAveragePooling2D(), + layers.Dense(256, activation='relu'), + layers.BatchNormalization(), + layers.Dropout(0.5), + layers.Dense(128, activation='relu'), + layers.BatchNormalization(), + layers.Dropout(0.5), + layers.Dense(num_classes) +]) + +optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) + +``` - Total params: 3,825,134 (14.59 MB) - Trainable params: 1,274,148 (4.86 MB) - Non-trainable params: 2,688 (10.50 KB) - Optimizer params: 2,548,298 (9.72 MB) + Total params: 3,825,134 (14.59 MB)
+ Trainable params: 1,274,148 (4.86 MB)
+ Non-trainable params: 2,688 (10.50 KB)
+ Optimizer params: 2,548,298 (9.72 MB)
## Training Details @@ -81,9 +97,9 @@ model is identifying key features for accurate classification. ### ressources: -https://www.tensorflow.org/tutorials/images/classification?hl=en -https://www.tensorflow.org/lite/convert?hl=en -https://www.tensorflow.org/tutorials/interpretability/integrated_gradients?hl=en +https://www.tensorflow.org/tutorials/images/classification?hl=en
+https://www.tensorflow.org/lite/convert?hl=en
+https://www.tensorflow.org/tutorials/interpretability/integrated_gradients?hl=en
AI(s) : deepseek-coder:6.7b | deepseek-r1:8b diff --git a/docs/images/dataset_overview.png b/docs/images/dataset_overview.png index dbd5df8..a431735 100644 Binary files a/docs/images/dataset_overview.png and b/docs/images/dataset_overview.png differ diff --git a/venv/src/__pycache__/data_pretreat.cpython-312.pyc b/venv/src/__pycache__/data_pretreat.cpython-312.pyc index 4420f5f..f3f763d 100644 Binary files a/venv/src/__pycache__/data_pretreat.cpython-312.pyc and b/venv/src/__pycache__/data_pretreat.cpython-312.pyc differ diff --git a/venv/src/__pycache__/load_model.cpython-312.pyc b/venv/src/__pycache__/load_model.cpython-312.pyc index a0bd659..e37ee0d 100644 Binary files a/venv/src/__pycache__/load_model.cpython-312.pyc and b/venv/src/__pycache__/load_model.cpython-312.pyc differ diff --git a/venv/src/data_explore.py b/venv/src/data_explore.py index 992a436..f3dcaef 100644 --- a/venv/src/data_explore.py +++ b/venv/src/data_explore.py @@ -2,10 +2,12 @@ import os import matplotlib.pyplot as plt import numpy as np +from data_pretreat import train_ds + # Configuration -data_dir = os.getcwd()[:-9] + "/data/datasplit/" -class_names = ['Black_Rot', 'ESCA', 'Healthy', 'Leaf_Blight'] -subsets = ['train', 'val', 'test'] +data_dir = os.getcwd()[:-9] + "/data/" +class_names = ['Black Rot', 'ESCA', 'Healthy', 'Leaf Blight'] +subsets = [dir_ for dir_ in os.listdir(data_dir)] class_counts = {subset: {class_name: 0 for class_name in class_names} for subset in subsets} @@ -34,7 +36,7 @@ for idx, subset in enumerate(subsets): print(f" {class_name}: {count} images ({percentage:.1f}%)") print(f" Total: {total_lst[idx]} images") -fig, axes = plt.subplots(1, 3, figsize=(15, 5)) +fig, axes = plt.subplots(1, len(subsets), figsize=(15, 5)) for idx, subset in enumerate(subsets): counts = [class_counts[subset][class_name] for class_name in class_names] @@ -49,7 +51,7 @@ for idx, subset in enumerate(subsets): ha='center', va='bottom', fontweight='bold') axes[idx].set_title(subset.upper()+" tot: "+str(total_lst[idx]), fontsize=12, fontweight='bold') - axes[idx].set_ylabel('Nombre d\'images', fontsize=10) + axes[idx].set_ylabel('Number of images', fontsize=10) axes[idx].set_xlabel('Classes', fontsize=10) axes[idx].tick_params(axis='x', rotation=45) axes[idx].grid(axis='y', alpha=0.3, linestyle='--') @@ -57,3 +59,12 @@ for idx, subset in enumerate(subsets): plt.tight_layout() plt.show() +# Load examples + +for img, lbl in train_ds.take(1): + for i in range(4): + ax = plt.subplot(2, 2, i + 1) + plt.imshow(img[i].numpy().astype("uint8")) + plt.title(class_names[lbl[i]]) + plt.axis("off") +plt.show() diff --git a/venv/src/data_pretreat.py b/venv/src/data_pretreat.py index 4b38b46..a196693 100644 --- a/venv/src/data_pretreat.py +++ b/venv/src/data_pretreat.py @@ -9,15 +9,15 @@ from tensorflow.keras.models import Sequential current_dir = os.getcwd() batch_size = 32 -img_height = 224 -img_width = 224 +img_height = 256 +img_width = 256 channels=3 epochs=100 -data_dir = current_dir[:-9]+"/data/datasplit/" +data_dir = current_dir[:-9]+"/data/train/" train_ds = tf.keras.utils.image_dataset_from_directory( - data_dir+"train/", + data_dir, validation_split=0.2, subset="training", seed=123, @@ -25,7 +25,7 @@ train_ds = tf.keras.utils.image_dataset_from_directory( batch_size=batch_size) val_ds = tf.keras.utils.image_dataset_from_directory( - data_dir+"val/", + data_dir, validation_split=0.2, subset="validation", seed=123, @@ -33,24 +33,12 @@ val_ds = tf.keras.utils.image_dataset_from_directory( batch_size=batch_size) test_ds = tf.keras.utils.image_dataset_from_directory( - data_dir+"test/", + current_dir[:-9]+"/data/test/", seed=123, image_size=(img_height, img_width), batch_size=batch_size) class_names = train_ds.class_names -print(class_names) - -# Visualize data - -# plt.figure(figsize=(10, 10)) -# for images, labels in train_ds.take(1): -# for i in range(9): -# ax = plt.subplot(3, 3, i + 1) -# plt.imshow(images[i].numpy().astype("uint8")) -# plt.title(class_names[labels[i]]) -# plt.axis("off") -# plt.show() #Data augmentation data_augmentation = keras.Sequential( @@ -77,4 +65,12 @@ normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y)) image_batch, labels_batch = next(iter(normalized_ds)) first_image = image_batch[0] -print("\n DONE !") +# Images name tensors +img_name_tensors = {} +for images, labels in test_ds: + for i, class_name in enumerate(class_names): + class_idx = class_names.index(class_name) + mask = labels == class_idx + + if tf.reduce_any(mask): + img_name_tensors[class_name] = images[mask][0] / 255.0 diff --git a/venv/src/evaluate_model.py b/venv/src/evaluate_model.py index ff12d2a..317fdbb 100644 --- a/venv/src/evaluate_model.py +++ b/venv/src/evaluate_model.py @@ -1,14 +1,15 @@ import os import numpy as np +import tensorflow as tf +import math import matplotlib.pyplot as plt import pandas as pd -import tensorflow as tf from tensorflow.keras.models import load_model from sklearn.metrics import confusion_matrix, classification_report import seaborn as sns -from load_model import * -from data_pretreat import * # src/ function +from load_model import select_model +from data_pretreat import test_ds, img_name_tensors, class_names model, model_dir = select_model() @@ -23,17 +24,13 @@ loss = df['loss'] val_loss = df['val_loss'] # Model testing -y_pred = model.predict(test_ds) -y_ = np.argmax(y_pred, axis=1) +y_ = model.predict(test_ds) +y_ = np.argmax(y_, axis=1) -y_test_raw = np.concatenate([y for x, y in test_ds], axis=0) - -y_test_classes = y_test_raw +y_test_classes = np.concatenate([y for x, y in test_ds], axis=0) cm = confusion_matrix(y_test_classes, y_) -class_names = ['Black_Rot', 'ESCA', 'Healthy', 'Leaf_Blight'] - plt.figure(figsize=(16, 5)) # Subplot 1 : Training Accuracy @@ -60,10 +57,42 @@ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names, cbar=False) -plt.title('Matrice de Confusion') -plt.ylabel('Vraies étiquettes') -plt.xlabel('Prédictions') +plt.title('Confusion Matrix') +plt.ylabel('True Classes') +plt.xlabel('Predictions') plt.tight_layout() plt.show() + +# Display images probabilities +def top_k_predictions(img, k=2): + image_batch = tf.expand_dims(img, 0) + predictions = model(image_batch) + probs = tf.nn.softmax(predictions, axis=-1) + top_probs, top_idxs = tf.math.top_k(input=probs, k=k) + + top_labels = [class_names[idx.numpy()] for idx in top_idxs[0]] + + return top_labels, top_probs[0] +# Show img with prediction +plt.figure(figsize=(14, 12)) +num_images = len(img_name_tensors) +cols = 2 +rows = math.ceil(num_images / cols) + +for n, (name, img_tensor) in enumerate(img_name_tensors.items()): + ax = plt.subplot(rows, cols, n+1) + ax.imshow(img_tensor) + + pred_labels, pred_probs = top_k_predictions(img_tensor, k=4) + + pred_text = f"Real classe: {name}\n\nPredictions:\n" + for label, prob in zip(pred_labels, pred_probs): + pred_text += f"{label}: {prob.numpy():0.1%}\n" + + ax.axis('off') + ax.text(-0.5, 0.95, pred_text, ha='left', va='top', transform=ax.transAxes) + +plt.tight_layout() +plt.show() diff --git a/venv/src/gradient.py b/venv/src/gradient.py index 7a6ed76..e5ee03f 100644 --- a/venv/src/gradient.py +++ b/venv/src/gradient.py @@ -5,62 +5,11 @@ import os import math from tensorflow.keras.models import load_model -from load_model import * -from data_pretreat import * # src/ function +from load_model import select_model +from data_pretreat import test_ds, img_name_tensors, class_names, img_height, img_width model, model_dir = select_model() -def read_image(file_name): - image = tf.io.read_file(file_name) - image = tf.io.decode_jpeg(image, channels=channels) - image = tf.image.convert_image_dtype(image, tf.float32) - image = tf.image.resize_with_pad(image, target_height=img_height, target_width=img_width) - return image - -def top_k_predictions(img, k=2): - image_batch = tf.expand_dims(img, 0) - predictions = model(image_batch) - probs = tf.nn.softmax(predictions, axis=-1) - top_probs, top_idxs = tf.math.top_k(input=probs, k=k) - - top_labels = [class_names[idx.numpy()] for idx in top_idxs[0]] - - return top_labels, top_probs[0] - -# Load img -img_name_tensors = {} - -for images, labels in test_ds: - for i, class_name in enumerate(class_names): - class_idx = class_names.index(class_name) - mask = labels == class_idx - - if tf.reduce_any(mask): - img_name_tensors[class_name] = images[mask][0] / 255.0 - - -# Show img with prediction -plt.figure(figsize=(14, 12)) -num_images = len(img_name_tensors) -cols = 2 -rows = math.ceil(num_images / cols) - -for n, (name, img_tensor) in enumerate(img_name_tensors.items()): - ax = plt.subplot(rows, cols, n+1) - ax.imshow(img_tensor) - - pred_labels, pred_probs = top_k_predictions(img_tensor, k=4) - - pred_text = f"Real classe: {name}\n\nPredictions:\n" - for label, prob in zip(pred_labels, pred_probs): - pred_text += f"{label}: {prob.numpy():0.1%}\n" - - ax.axis('off') - ax.text(-0.5, 0.95, pred_text, ha='left', va='top', transform=ax.transAxes) - -plt.tight_layout() -plt.show() - # Calculate Integrated Gradients def f(x): return tf.where(x < 0.8, x, 0.8) #A simplified model function. @@ -185,16 +134,10 @@ def plot_img_attributions(baseline, plt.tight_layout() return fig -_ = plot_img_attributions(image=img_name_tensors['Leaf_Blight'], +_ = plot_img_attributions(image=img_name_tensors[class_names[3]], baseline=baseline, target_class_idx=3, m_steps=240, cmap=plt.cm.inferno, overlay_alpha=0.4) plt.show() - - -""" -@ref : -https://www.tensorflow.org/tutorials/interpretability/integrated_gradients?hl=en -""" diff --git a/venv/src/load_model.py b/venv/src/load_model.py index 297fdb2..89d87c7 100644 --- a/venv/src/load_model.py +++ b/venv/src/load_model.py @@ -3,12 +3,17 @@ from tensorflow.keras.models import load_model from data_pretreat import img_height, img_width, channels import sys +def menu(dir_): + print("Select a model:") + for idx, dir_ in enumerate(dir_): + print(f"({idx})\t{dir_}") + def select_model(): - # all_model_dir = "/home/jhodi/bit/Python/Grapevine_Pathology_Detection/venv/models" # Verify if a model is present on all_model_dir while True: try: - all_model_dir = input("Model dir : ") + # all_model_dir = input("Model dir : ") + all_model_dir = "/home/jhodi/bit/Python/Grapevine_Pathology_Detection/venv/models" model_found = 0 for foldername, subfolders, filenames in os.walk(all_model_dir): for filename in filenames: @@ -21,14 +26,13 @@ def select_model(): break except Exception as e: print(f"Something went wrong! {str(e)}") + sys.exit() subdirectories = [name for name in os.listdir(all_model_dir) if os.path.isdir(os.path.join(all_model_dir, name))] - print("Select a model:") - for idx, dir_ in enumerate(subdirectories): - print(f"({idx})\t{dir_}") - + # Let user make his choce while True: try: + menu(subdirectories) selected_model = int(input("-> ")) if 0 <= selected_model < len(subdirectories): break