arduino-photometrics/exec/random_forest_predict.r

# install.packages('randomForest')

library(tidyverse)
library(ggplot2)
library(lubridate)
library(dplyr)
library(randomForest)

setwd("~/Documents/PlatformIO/Projects/Robot_Go_West/arduino-photometrics/exec")

# Load
solar <- read.csv("../data/solar_pos_data/solar_data_2026-01-05_to_2026-01-10.csv", header=TRUE)
photo <- read.csv("../data/photo_measures/merged_photo_data.csv", header=TRUE)

# Time type changes
photo$time <- as.POSIXct(photo$Epoch)

photo <- photo %>%
  mutate(
    datetime = as.POSIXct(Epoch, origin = "1970-01-01", tz = "UTC"),

    jour = as.Date(datetime),
    num_jour = as.numeric(format(datetime, "%j")),
    alterative_num_jour =yday(datetime),
    sin_day = sin(alterative_num_jour * (2*pi/365)),

    decimal_hour = hour(datetime) + minute(datetime)/60 + second(datetime)/3600,
    rad_hour = decimal_hour * (2*pi / 24),
    sin_hour = sin(rad_hour),
    cos_hour = cos(rad_hour)
  )

# Transform data to improve learning during the training phase
solar$sin_azimut <- sin(solar$azimut)

# Same but normalised values are square root to highlight little light variations
max_val_sensor = 254
photo <- photo %>%
  mutate(across(starts_with("Photo_sensor"), ~ {
    .x <- sqrt(.x)
    .x <- (.x*-1) + max_val_sensor
    .x <- as.numeric(scale(.x, center = TRUE, scale = TRUE))
  }))

# Remove NaN colomne (i had some NaN after the application of scale at a columne entirely composed of the same value)
photo <- photo %>%
  select(where(~ !all(is.na(.x))))

# select the nearest time raw of the sun position
max_timestamp = as.integer(max(photo$Epoch))
min_timestamp = as.integer(min(photo$Epoch))
elapsed_time = photo$Epoch[4] - photo$Epoch[3]

filtered_solar <- solar %>%
  filter(utime > (min_timestamp - elapsed_time) &
           utime < (max_timestamp + elapsed_time))

remove(solar)


# merge
binded <- bind_cols(filtered_solar, photo)

remove(filtered_solar, photo)

# Check elapsed time
binded$gap_time <- abs(binded$utime - binded$Epoch)


# Random split train and test dataset
set.seed(123)

binded <- binded %>% mutate(id = row_number())

random_train_data <- binded %>% sample_frac(0.80)
random_test_data  <- anti_join(binded, random_train_data, by = "id")

random_train_data$id <- NULL
random_test_data$id <- NULL

summary(random_train_data$azimut)
summary(random_test_data$azimut)


# Chrono split train and test dataset
# Dataset already chrono sorted

seuil <- floor(0.80 * nrow((binded)))
chrono_train_data <- binded[1:seuil, ]
chrono_test_data  <- binded[(seuil + 1):nrow(binded), ]

summary(chrono_train_data$azimut)
summary(chrono_test_data$azimut)

# Model creation
nb_tree = 100

random_model <- randomForest(
  x = random_train_data[, c("sin_day", "sin_hour", "cos_hour", "Photo_sensor0", "Photo_sensor1", "Photo_sensor2", "Photo_sensor4", "Photo_sensor5", "Temp_sensor0")],
  y = random_train_data$azimut,
  ntree = nb_tree
)

chrono_model <- randomForest(
  x = chrono_train_data[, c("sin_day", "sin_hour", "cos_hour", "Photo_sensor0", "Photo_sensor1", "Photo_sensor2", "Photo_sensor4", "Photo_sensor5", "Temp_sensor0")],
  y = chrono_train_data$azimut,
  ntree = nb_tree
)

test_random_predictions <- predict(random_model, newdata = random_test_data)
test_chrono_predictions <- predict(chrono_model, newdata = chrono_test_data)


test_random_results <- random_test_data
test_chrono_results <- chrono_test_data

test_chrono_results$predicted_azimut <- test_chrono_predictions
test_random_results$predicted_azimut <- test_random_predictions

head(test_random_results[, c("azimut", "predicted_azimut")])
head(test_chrono_results[, c("azimut", "predicted_azimut")])