# install.packages('randomForest') library(tidyverse) library(ggplot2) library(lubridate) library(dplyr) library(randomForest) setwd("~/Documents/PlatformIO/Projects/Robot_Go_West/arduino-photometrics/exec") # Load solar <- read.csv("../data/solar_pos_data/solar_data_2026-01-05_to_2026-01-06.csv", header=TRUE) photo <- read.csv("../data/arduino_data_package_auto_20260105_151537.csv", header=TRUE) # Time type changes photo$time <- as.POSIXct(photo$Epoch) photo <- photo %>% mutate( datetime = as.POSIXct(Epoch, origin = "1970-01-01", tz = "UTC"), jour = as.Date(datetime), num_jour = as.numeric(format(datetime, "%j")), alterative_num_jour =yday(datetime), sin_day = sin(alterative_num_jour * (2*pi/365)), decimal_hour = hour(datetime) + minute(datetime)/60 + second(datetime)/3600, rad_hour = decimal_hour * (2*pi / 24), sin_hour = sin(rad_hour), cos_hour = cos(rad_hour) ) # Transform data to improve learning during the training phase solar$sin_azimut <- sin(solar$azimut) # Same max_val_sensor = 254 photo <- photo %>% mutate(across(starts_with("Photo_sensor"), ~ { .x <- (.x*-1) + max_val_sensor .x <- as.numeric(scale(.x, center = TRUE, scale = TRUE)) })) # Remove NaN colomne (i had some NaN after the application of scale at a columne entirely composed of the same value) photo <- photo %>% select(where(~ !all(is.na(.x)))) # select the nearest time raw of the sun position max_timestamp = as.integer(max(photo$Epoch)) min_timestamp = as.integer(min(photo$Epoch)) elapsed_time = photo$Epoch[4] - photo$Epoch[3] filtered_solar <- solar %>% filter(utime > (min_timestamp - elapsed_time) & utime < (max_timestamp + elapsed_time)) remove(solar) # merge binded <- bind_cols(filtered_solar, photo) remove(filtered_solar, photo) # Check elapsed time binded$gap_time <- abs(binded$utime - binded$Epoch) # Random split train and test dataset set.seed(123) binded <- binded %>% mutate(id = row_number()) random_train_data <- binded %>% sample_frac(0.80) random_test_data <- anti_join(binded, random_train_data, by = "id") random_train_data$id <- NULL random_test_data$id <- NULL summary(random_train_data$azimut) summary(random_test_data$azimut) # Chrono split train and test dataset # Dataset already chrono sorted seuil <- floor(0.80 * nrow((binded))) chrono_train_data <- binded[1:seuil, ] chrono_test_data <- binded[(seuil + 1):nrow(binded), ] summary(chrono_train_data$azimut) summary(chrono_test_data$azimut) # Model creation nb_tree = 100 random_features <- random_train_data[, c("sin_day", "sin_hour", "cos_hour", "Photo_sensor0", "Photo_sensor1", "Photo_sensor2", "Photo_sensor4", "Photo_sensor5", "Temp_sensor0")] random_features <- chrono_train_data[, c("sin_day", "sin_hour", "cos_hour", "Photo_sensor0", "Photo_sensor1", "Photo_sensor2", "Photo_sensor4", "Photo_sensor5", "Temp_sensor0")] random_model <- randomForest(azimut ~ random_features, data = random_train_data, ntree = nb_tree) chrono_model <- randomForest(azimut ~ chrono_features, data = chrono_train_data, ntree = nb_tree)