This commit is contained in:
jhodi.avizara 2026-03-23 16:49:29 +01:00
parent 26b13a424e
commit bb9df73b5f
9 changed files with 146 additions and 147 deletions

104
README.md
View file

@ -4,57 +4,73 @@ This document outlines the development of a modile application that uses a DeepL
## Dataset ## Dataset
The data used in this study is split into training, validation, and testing sets ensuring a robust evaluation of our model's performance. The dataset consists of a set of 9027 images of three disease commonly found on grapevines: The data used in this study came from [kaggle](kaggle.com/datasets/rm1000/grape-disease-dataset-original). It is split into training, validation, and testing sets ensuring a robust evaluation of our model's performance. The dataset consists of a set of 9027 images of three disease commonly found on grapevines:
**Black Rot**, **ESCA**, and **Leaf Blight**, balanced with equal representation across the classes. Images are in .jpeg format with dimensions of 256x256 pixels. **Black Rot**, **ESCA**, and **Leaf Blight**. Classes are well balenced with a slit overrepresentation of **ESCA** and **Black Rot** . Images are in .jpeg format with dimensions of 256x256 pixels.
![Dataset Overview](./docs/images/dataset_overview.png) ![Dataset Overview](./docs/images/dataset_overview.png)
![Sample](./docs/images/samples_img.png)
## Model Structure ## Model Structure
Our model is a Convolutional Neural Network (CNN) built using Keras API with TensorFlow backend. It includes several convolutional layers followed by batch normalization, ReLU activation function and max pooling for downsampling. Our model is a Convolutional Neural Network (CNN) built using Keras API with TensorFlow backend. It includes several convolutional layers followed by batch normalization, ReLU activation function and max pooling for downsampling.
Dropout layers are used for regularization to prevent overfitting. The architecture details and parameters are as follows: Dropout layers are used for regularization to prevent overfitting. The architecture details and parameters are as follows:
| Layer (type) | Output Shape | Param # | ```{python}
|--------------------------------------|-----------------------------|----------| model = Sequential([
| sequential | (None, 224, 224, 3) | 0 | data_augmentation,
| conv2d | (None, 224, 224, 32) | 896 |
| batch_normalization | (None, 224, 224, 32) | 128 | # Block 1
| conv2d_1 | (None, 224, 224, 32) | 9248 | layers.Conv2D(32, kernel_size=3, padding='same', activation='relu'),
| batch_normalization_1 | (None, 224, 224, 32) | 128 | layers.BatchNormalization(),
| max_pooling2d | (None, 112, 112, 32) | 0 | layers.Conv2D(32, kernel_size=3, padding='same', activation='relu'),
| dropout | (None, 112, 112, 32) | 0 | layers.BatchNormalization(),
| conv2d_2 | (None, 112, 112, 64) | 18496 | layers.MaxPooling2D(pool_size=2),
| batch_normalization_2 | (None, 112, 112, 64) | 256 | layers.Dropout(0.25),
| conv2d_3 | (None, 112, 112, 64) | 36864 |
| batch_normalization_3 | (None, 112, 112, 64) | 256 | # Block 2
| max_pooling2d_1 | (None, 56, 56, 64) | 0 | layers.Conv2D(64, kernel_size=3, padding='same', activation='relu'),
| dropout_1 | (None, 56, 56, 64) | 0 | layers.BatchNormalization(),
| conv2d_4 | (None, 56, 56, 128) | 73728 | layers.Conv2D(64, kernel_size=3, padding='same', activation='relu'),
| batch_normalization_4 | (None, 56, 56, 128) | 512 | layers.BatchNormalization(),
| conv2d_5 | (None, 56, 56, 128) | 147584| layers.MaxPooling2D(pool_size=2),
| batch_normalization_5 | (None, 56, 56, 128) | 512 | layers.Dropout(0.25),
| max_pooling2d_2 | (None, 28, 28, 128) | 0 |
| dropout_2 | (None, 28, 28, 128) | 0 | # Block 3
| conv2d_6 | (None, 28, 28, 256) | 294912| layers.Conv2D(128, kernel_size=3, padding='same', activation='relu'),
| batch_normalization_6 | (None, 28, 28, 256) | 1024 | layers.BatchNormalization(),
| conv2d_7 | (None, 28, 28, 256) | 590080| layers.Conv2D(128, kernel_size=3, padding='same', activation='relu'),
| batch_normalization_7 | (None, 28, 28, 256) | 1024 | layers.BatchNormalization(),
| max_pooling2d_3 | (None, 14, 14, 256) | 0 | layers.MaxPooling2D(pool_size=2),
| dropout_3 | (None, 14, 14, 256) | 0 | layers.Dropout(0.25),
| global_average_pooling2d | (None, 256) | 0 |
| dense | (None, 256) | 65792 | # Block 4
| batch_normalization_8 | (None, 256) | 1024 | layers.Conv2D(256, kernel_size=3, padding='same', activation='relu'),
| dropout_4 | (None, 256) | 0 | layers.BatchNormalization(),
| dense_1 | (None, 128) | 32768 | layers.Conv2D(256, kernel_size=3, padding='same', activation='relu'),
| batch_normalization_9 | (None, 128) | 512 | layers.BatchNormalization(),
| dropout_5 | (None, 128) | 0 | layers.MaxPooling2D(pool_size=2),
| dense_2 | (None, 4) | 516 | layers.Dropout(0.25),
# Classification head
layers.GlobalAveragePooling2D(),
layers.Dense(256, activation='relu'),
layers.BatchNormalization(),
layers.Dropout(0.5),
layers.Dense(128, activation='relu'),
layers.BatchNormalization(),
layers.Dropout(0.5),
layers.Dense(num_classes)
])
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
```
Total params: 3,825,134 (14.59 MB) Total params: 3,825,134 (14.59 MB) <br>
Trainable params: 1,274,148 (4.86 MB) Trainable params: 1,274,148 (4.86 MB) <br>
Non-trainable params: 2,688 (10.50 KB) Non-trainable params: 2,688 (10.50 KB) <br>
Optimizer params: 2,548,298 (9.72 MB) Optimizer params: 2,548,298 (9.72 MB) <br>
## Training Details ## Training Details
@ -81,9 +97,9 @@ model is identifying key features for accurate classification.
### ressources: ### ressources:
https://www.tensorflow.org/tutorials/images/classification?hl=en https://www.tensorflow.org/tutorials/images/classification?hl=en <br>
https://www.tensorflow.org/lite/convert?hl=en https://www.tensorflow.org/lite/convert?hl=en <br>
https://www.tensorflow.org/tutorials/interpretability/integrated_gradients?hl=en https://www.tensorflow.org/tutorials/interpretability/integrated_gradients?hl=en <br>
AI(s) : deepseek-coder:6.7b | deepseek-r1:8b AI(s) : deepseek-coder:6.7b | deepseek-r1:8b

Binary file not shown.

Before

Width:  |  Height:  |  Size: 50 KiB

After

Width:  |  Height:  |  Size: 55 KiB

View file

@ -2,10 +2,12 @@ import os
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
from data_pretreat import train_ds
# Configuration # Configuration
data_dir = os.getcwd()[:-9] + "/data/datasplit/" data_dir = os.getcwd()[:-9] + "/data/"
class_names = ['Black_Rot', 'ESCA', 'Healthy', 'Leaf_Blight'] class_names = ['Black Rot', 'ESCA', 'Healthy', 'Leaf Blight']
subsets = ['train', 'val', 'test'] subsets = [dir_ for dir_ in os.listdir(data_dir)]
class_counts = {subset: {class_name: 0 for class_name in class_names} for subset in subsets} class_counts = {subset: {class_name: 0 for class_name in class_names} for subset in subsets}
@ -34,7 +36,7 @@ for idx, subset in enumerate(subsets):
print(f" {class_name}: {count} images ({percentage:.1f}%)") print(f" {class_name}: {count} images ({percentage:.1f}%)")
print(f" Total: {total_lst[idx]} images") print(f" Total: {total_lst[idx]} images")
fig, axes = plt.subplots(1, 3, figsize=(15, 5)) fig, axes = plt.subplots(1, len(subsets), figsize=(15, 5))
for idx, subset in enumerate(subsets): for idx, subset in enumerate(subsets):
counts = [class_counts[subset][class_name] for class_name in class_names] counts = [class_counts[subset][class_name] for class_name in class_names]
@ -49,7 +51,7 @@ for idx, subset in enumerate(subsets):
ha='center', va='bottom', fontweight='bold') ha='center', va='bottom', fontweight='bold')
axes[idx].set_title(subset.upper()+" tot: "+str(total_lst[idx]), fontsize=12, fontweight='bold') axes[idx].set_title(subset.upper()+" tot: "+str(total_lst[idx]), fontsize=12, fontweight='bold')
axes[idx].set_ylabel('Nombre d\'images', fontsize=10) axes[idx].set_ylabel('Number of images', fontsize=10)
axes[idx].set_xlabel('Classes', fontsize=10) axes[idx].set_xlabel('Classes', fontsize=10)
axes[idx].tick_params(axis='x', rotation=45) axes[idx].tick_params(axis='x', rotation=45)
axes[idx].grid(axis='y', alpha=0.3, linestyle='--') axes[idx].grid(axis='y', alpha=0.3, linestyle='--')
@ -57,3 +59,12 @@ for idx, subset in enumerate(subsets):
plt.tight_layout() plt.tight_layout()
plt.show() plt.show()
# Load examples
for img, lbl in train_ds.take(1):
for i in range(4):
ax = plt.subplot(2, 2, i + 1)
plt.imshow(img[i].numpy().astype("uint8"))
plt.title(class_names[lbl[i]])
plt.axis("off")
plt.show()

View file

@ -9,15 +9,15 @@ from tensorflow.keras.models import Sequential
current_dir = os.getcwd() current_dir = os.getcwd()
batch_size = 32 batch_size = 32
img_height = 224 img_height = 256
img_width = 224 img_width = 256
channels=3 channels=3
epochs=100 epochs=100
data_dir = current_dir[:-9]+"/data/datasplit/" data_dir = current_dir[:-9]+"/data/train/"
train_ds = tf.keras.utils.image_dataset_from_directory( train_ds = tf.keras.utils.image_dataset_from_directory(
data_dir+"train/", data_dir,
validation_split=0.2, validation_split=0.2,
subset="training", subset="training",
seed=123, seed=123,
@ -25,7 +25,7 @@ train_ds = tf.keras.utils.image_dataset_from_directory(
batch_size=batch_size) batch_size=batch_size)
val_ds = tf.keras.utils.image_dataset_from_directory( val_ds = tf.keras.utils.image_dataset_from_directory(
data_dir+"val/", data_dir,
validation_split=0.2, validation_split=0.2,
subset="validation", subset="validation",
seed=123, seed=123,
@ -33,24 +33,12 @@ val_ds = tf.keras.utils.image_dataset_from_directory(
batch_size=batch_size) batch_size=batch_size)
test_ds = tf.keras.utils.image_dataset_from_directory( test_ds = tf.keras.utils.image_dataset_from_directory(
data_dir+"test/", current_dir[:-9]+"/data/test/",
seed=123, seed=123,
image_size=(img_height, img_width), image_size=(img_height, img_width),
batch_size=batch_size) batch_size=batch_size)
class_names = train_ds.class_names class_names = train_ds.class_names
print(class_names)
# Visualize data
# plt.figure(figsize=(10, 10))
# for images, labels in train_ds.take(1):
# for i in range(9):
# ax = plt.subplot(3, 3, i + 1)
# plt.imshow(images[i].numpy().astype("uint8"))
# plt.title(class_names[labels[i]])
# plt.axis("off")
# plt.show()
#Data augmentation #Data augmentation
data_augmentation = keras.Sequential( data_augmentation = keras.Sequential(
@ -77,4 +65,12 @@ normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds)) image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0] first_image = image_batch[0]
print("\n DONE !") # Images name tensors
img_name_tensors = {}
for images, labels in test_ds:
for i, class_name in enumerate(class_names):
class_idx = class_names.index(class_name)
mask = labels == class_idx
if tf.reduce_any(mask):
img_name_tensors[class_name] = images[mask][0] / 255.0

View file

@ -1,14 +1,15 @@
import os import os
import numpy as np import numpy as np
import tensorflow as tf
import math
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import pandas as pd import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import load_model from tensorflow.keras.models import load_model
from sklearn.metrics import confusion_matrix, classification_report from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns import seaborn as sns
from load_model import * from load_model import select_model
from data_pretreat import * # src/ function from data_pretreat import test_ds, img_name_tensors, class_names
model, model_dir = select_model() model, model_dir = select_model()
@ -23,17 +24,13 @@ loss = df['loss']
val_loss = df['val_loss'] val_loss = df['val_loss']
# Model testing # Model testing
y_pred = model.predict(test_ds) y_ = model.predict(test_ds)
y_ = np.argmax(y_pred, axis=1) y_ = np.argmax(y_, axis=1)
y_test_raw = np.concatenate([y for x, y in test_ds], axis=0) y_test_classes = np.concatenate([y for x, y in test_ds], axis=0)
y_test_classes = y_test_raw
cm = confusion_matrix(y_test_classes, y_) cm = confusion_matrix(y_test_classes, y_)
class_names = ['Black_Rot', 'ESCA', 'Healthy', 'Leaf_Blight']
plt.figure(figsize=(16, 5)) plt.figure(figsize=(16, 5))
# Subplot 1 : Training Accuracy # Subplot 1 : Training Accuracy
@ -60,10 +57,42 @@ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=class_names, xticklabels=class_names,
yticklabels=class_names, yticklabels=class_names,
cbar=False) cbar=False)
plt.title('Matrice de Confusion') plt.title('Confusion Matrix')
plt.ylabel('Vraies étiquettes') plt.ylabel('True Classes')
plt.xlabel('Prédictions') plt.xlabel('Predictions')
plt.tight_layout() plt.tight_layout()
plt.show() plt.show()
# Display images probabilities
def top_k_predictions(img, k=2):
image_batch = tf.expand_dims(img, 0)
predictions = model(image_batch)
probs = tf.nn.softmax(predictions, axis=-1)
top_probs, top_idxs = tf.math.top_k(input=probs, k=k)
top_labels = [class_names[idx.numpy()] for idx in top_idxs[0]]
return top_labels, top_probs[0]
# Show img with prediction
plt.figure(figsize=(14, 12))
num_images = len(img_name_tensors)
cols = 2
rows = math.ceil(num_images / cols)
for n, (name, img_tensor) in enumerate(img_name_tensors.items()):
ax = plt.subplot(rows, cols, n+1)
ax.imshow(img_tensor)
pred_labels, pred_probs = top_k_predictions(img_tensor, k=4)
pred_text = f"Real classe: {name}\n\nPredictions:\n"
for label, prob in zip(pred_labels, pred_probs):
pred_text += f"{label}: {prob.numpy():0.1%}\n"
ax.axis('off')
ax.text(-0.5, 0.95, pred_text, ha='left', va='top', transform=ax.transAxes)
plt.tight_layout()
plt.show()

View file

@ -5,62 +5,11 @@ import os
import math import math
from tensorflow.keras.models import load_model from tensorflow.keras.models import load_model
from load_model import * from load_model import select_model
from data_pretreat import * # src/ function from data_pretreat import test_ds, img_name_tensors, class_names, img_height, img_width
model, model_dir = select_model() model, model_dir = select_model()
def read_image(file_name):
image = tf.io.read_file(file_name)
image = tf.io.decode_jpeg(image, channels=channels)
image = tf.image.convert_image_dtype(image, tf.float32)
image = tf.image.resize_with_pad(image, target_height=img_height, target_width=img_width)
return image
def top_k_predictions(img, k=2):
image_batch = tf.expand_dims(img, 0)
predictions = model(image_batch)
probs = tf.nn.softmax(predictions, axis=-1)
top_probs, top_idxs = tf.math.top_k(input=probs, k=k)
top_labels = [class_names[idx.numpy()] for idx in top_idxs[0]]
return top_labels, top_probs[0]
# Load img
img_name_tensors = {}
for images, labels in test_ds:
for i, class_name in enumerate(class_names):
class_idx = class_names.index(class_name)
mask = labels == class_idx
if tf.reduce_any(mask):
img_name_tensors[class_name] = images[mask][0] / 255.0
# Show img with prediction
plt.figure(figsize=(14, 12))
num_images = len(img_name_tensors)
cols = 2
rows = math.ceil(num_images / cols)
for n, (name, img_tensor) in enumerate(img_name_tensors.items()):
ax = plt.subplot(rows, cols, n+1)
ax.imshow(img_tensor)
pred_labels, pred_probs = top_k_predictions(img_tensor, k=4)
pred_text = f"Real classe: {name}\n\nPredictions:\n"
for label, prob in zip(pred_labels, pred_probs):
pred_text += f"{label}: {prob.numpy():0.1%}\n"
ax.axis('off')
ax.text(-0.5, 0.95, pred_text, ha='left', va='top', transform=ax.transAxes)
plt.tight_layout()
plt.show()
# Calculate Integrated Gradients # Calculate Integrated Gradients
def f(x): def f(x):
return tf.where(x < 0.8, x, 0.8) #A simplified model function. return tf.where(x < 0.8, x, 0.8) #A simplified model function.
@ -185,16 +134,10 @@ def plot_img_attributions(baseline,
plt.tight_layout() plt.tight_layout()
return fig return fig
_ = plot_img_attributions(image=img_name_tensors['Leaf_Blight'], _ = plot_img_attributions(image=img_name_tensors[class_names[3]],
baseline=baseline, baseline=baseline,
target_class_idx=3, target_class_idx=3,
m_steps=240, m_steps=240,
cmap=plt.cm.inferno, cmap=plt.cm.inferno,
overlay_alpha=0.4) overlay_alpha=0.4)
plt.show() plt.show()
"""
@ref :
https://www.tensorflow.org/tutorials/interpretability/integrated_gradients?hl=en
"""

View file

@ -3,12 +3,17 @@ from tensorflow.keras.models import load_model
from data_pretreat import img_height, img_width, channels from data_pretreat import img_height, img_width, channels
import sys import sys
def menu(dir_):
print("Select a model:")
for idx, dir_ in enumerate(dir_):
print(f"({idx})\t{dir_}")
def select_model(): def select_model():
# all_model_dir = "/home/jhodi/bit/Python/Grapevine_Pathology_Detection/venv/models"
# Verify if a model is present on all_model_dir # Verify if a model is present on all_model_dir
while True: while True:
try: try:
all_model_dir = input("Model dir : ") # all_model_dir = input("Model dir : ")
all_model_dir = "/home/jhodi/bit/Python/Grapevine_Pathology_Detection/venv/models"
model_found = 0 model_found = 0
for foldername, subfolders, filenames in os.walk(all_model_dir): for foldername, subfolders, filenames in os.walk(all_model_dir):
for filename in filenames: for filename in filenames:
@ -21,14 +26,13 @@ def select_model():
break break
except Exception as e: except Exception as e:
print(f"Something went wrong! {str(e)}") print(f"Something went wrong! {str(e)}")
sys.exit()
subdirectories = [name for name in os.listdir(all_model_dir) if os.path.isdir(os.path.join(all_model_dir, name))] subdirectories = [name for name in os.listdir(all_model_dir) if os.path.isdir(os.path.join(all_model_dir, name))]
print("Select a model:") # Let user make his choce
for idx, dir_ in enumerate(subdirectories):
print(f"({idx})\t{dir_}")
while True: while True:
try: try:
menu(subdirectories)
selected_model = int(input("-> ")) selected_model = int(input("-> "))
if 0 <= selected_model < len(subdirectories): if 0 <= selected_model < len(subdirectories):
break break