arduino-photometrics/exec/random_forest_predict.r
2026-01-12 11:10:24 +01:00

122 lines
3.5 KiB
R

# install.packages('randomForest')
library(tidyverse)
library(ggplot2)
library(lubridate)
library(dplyr)
library(randomForest)
setwd("~/Documents/PlatformIO/Projects/Robot_Go_West/arduino-photometrics/exec")
# Load
solar <- read.csv("../data/solar_pos_data/solar_data_2026-01-05_to_2026-01-10.csv", header=TRUE)
photo <- read.csv("../data/photo_measures/merged_photo_data.csv", header=TRUE)
# Time type changes
photo$time <- as.POSIXct(photo$Epoch)
photo <- photo %>%
mutate(
datetime = as.POSIXct(Epoch, origin = "1970-01-01", tz = "UTC"),
jour = as.Date(datetime),
num_jour = as.numeric(format(datetime, "%j")),
alterative_num_jour =yday(datetime),
sin_day = sin(alterative_num_jour * (2*pi/365)),
decimal_hour = hour(datetime) + minute(datetime)/60 + second(datetime)/3600,
rad_hour = decimal_hour * (2*pi / 24),
sin_hour = sin(rad_hour),
cos_hour = cos(rad_hour)
)
# Transform data to improve learning during the training phase
solar$sin_azimut <- sin(solar$azimut)
# Same but normalised values are square root to highlight little light variations
max_val_sensor = 254
photo <- photo %>%
mutate(across(starts_with("Photo_sensor"), ~ {
.x <- sqrt(.x)
.x <- (.x*-1) + max_val_sensor
.x <- as.numeric(scale(.x, center = TRUE, scale = TRUE))
}))
# Remove NaN colomne (i had some NaN after the application of scale at a columne entirely composed of the same value)
photo <- photo %>%
select(where(~ !all(is.na(.x))))
# select the nearest time raw of the sun position
max_timestamp = as.integer(max(photo$Epoch))
min_timestamp = as.integer(min(photo$Epoch))
elapsed_time = photo$Epoch[4] - photo$Epoch[3]
filtered_solar <- solar %>%
filter(utime > (min_timestamp - elapsed_time) &
utime < (max_timestamp + elapsed_time))
remove(solar)
# merge
binded <- bind_cols(filtered_solar, photo)
remove(filtered_solar, photo)
# Check elapsed time
binded$gap_time <- abs(binded$utime - binded$Epoch)
# Random split train and test dataset
set.seed(123)
binded <- binded %>% mutate(id = row_number())
random_train_data <- binded %>% sample_frac(0.80)
random_test_data <- anti_join(binded, random_train_data, by = "id")
random_train_data$id <- NULL
random_test_data$id <- NULL
summary(random_train_data$azimut)
summary(random_test_data$azimut)
# Chrono split train and test dataset
# Dataset already chrono sorted
seuil <- floor(0.80 * nrow((binded)))
chrono_train_data <- binded[1:seuil, ]
chrono_test_data <- binded[(seuil + 1):nrow(binded), ]
summary(chrono_train_data$azimut)
summary(chrono_test_data$azimut)
# Model creation
nb_tree = 100
random_model <- randomForest(
x = random_train_data[, c("sin_day", "sin_hour", "cos_hour", "Photo_sensor0", "Photo_sensor1", "Photo_sensor2", "Photo_sensor4", "Photo_sensor5", "Temp_sensor0")],
y = random_train_data$azimut,
ntree = nb_tree
)
chrono_model <- randomForest(
x = chrono_train_data[, c("sin_day", "sin_hour", "cos_hour", "Photo_sensor0", "Photo_sensor1", "Photo_sensor2", "Photo_sensor4", "Photo_sensor5", "Temp_sensor0")],
y = chrono_train_data$azimut,
ntree = nb_tree
)
test_random_predictions <- predict(random_model, newdata = random_test_data)
test_chrono_predictions <- predict(chrono_model, newdata = chrono_test_data)
test_random_results <- random_test_data
test_chrono_results <- chrono_test_data
test_chrono_results$predicted_azimut <- test_chrono_predictions
test_random_results$predicted_azimut <- test_random_predictions
head(test_random_results[, c("azimut", "predicted_azimut")])
head(test_chrono_results[, c("azimut", "predicted_azimut")])