arduino-photometrics/exec/random_forest_predict.r

154 lines
4.3 KiB
R

# install.packages('randomForest')
# install.packages(c('neuralnet','keras','tensorflow'),dependencies = T)
library(tidyverse)
library(ggplot2)
library(lubridate)
library(dplyr)
library(randomForest)
library(suntools)
library(neuralnet)
setwd("~/Documents/PlatformIO/Projects/Robot_Go_West/arduino-photometrics/exec")
# Load
photo <- read.csv("../data/photo_measures/merged_photo_data.csv", header=TRUE)
# Time type changes
# photo$time <- as.POSIXct(photo$Epoch)
photo <- photo %>%
mutate(
datetime = as.POSIXct(Epoch, origin = "1970-01-01", tz = "Europe/Paris"),
jour = as.Date(datetime),
num_jour = as.numeric(format(datetime, "%j")),
alterative_num_jour =yday(datetime),
sin_day = sin(alterative_num_jour * (2*pi/365)),
decimal_hour = hour(datetime) + minute(datetime)/60 + second(datetime)/3600,
rad_hour = decimal_hour * (2*pi / 24),
sin_hour = sin(rad_hour),
cos_hour = cos(rad_hour)
)
# Same but normalised values are square root to highlight little light variations
max_val_sensor = 254
photo <- photo %>%
mutate(across(starts_with("Photo_sensor"), ~ {
.x <- sqrt(.x)
.x <- (.x*-1) + max_val_sensor
.x <- as.numeric(scale(.x, center = TRUE, scale = TRUE))
}))
# Remove NaN colomne (i had some NaN after the application of scale at a columne entirely composed of the same value)
photo <- photo %>%
select(where(~ !all(is.na(.x))))
# retreive solar pos
unix_time <- as.numeric(photo$datetime)
lat <- 44.7912
lon <- -0.6078
coords <- matrix(c(lon, lat), nrow = 1)
positions <- solarpos(coords, photo$datetime)
df_soleil <- data.frame(
timestamp = photo$datetime,
utime = unix_time,
azimut = positions[, 1],
elevation = positions[, 2]
)
# Transform data to improve learning during the training phase
df_soleil$sin_azimut <- sin(df_soleil$azimut)
binded <- bind_cols(df_soleil, photo)
remove(df_soleil)
# Check elapsed time
binded$gap_time <- abs(binded$utime - binded$Epoch)
# Random split train and test dataset
set.seed(123)
binded <- binded %>% mutate(id = row_number())
random_train_data <- binded %>% sample_frac(0.80)
random_test_data <- anti_join(binded, random_train_data, by = "id")
random_train_data$id <- NULL
random_test_data$id <- NULL
summary(random_train_data$azimut)
summary(random_test_data$azimut)
# Chrono split train and test dataset
# Dataset already chrono sorted
seuil <- floor(0.80 * nrow((binded)))
chrono_train_data <- binded[1:seuil, ]
chrono_test_data <- binded[(seuil + 1):nrow(binded), ]
summary(chrono_train_data$azimut)
summary(chrono_test_data$azimut)
# Model creation
nb_tree = 100
random_model <- randomForest(
x = random_train_data[, c("sin_day", "sin_hour", "cos_hour", "Photo_sensor0", "Photo_sensor1", "Photo_sensor2", "Photo_sensor4", "Photo_sensor5", "Temp_sensor0")],
y = random_train_data$azimut,
ntree = nb_tree
)
chrono_model <- randomForest(
x = chrono_train_data[, c("sin_day", "sin_hour", "cos_hour", "Photo_sensor0", "Photo_sensor1", "Photo_sensor2", "Photo_sensor4", "Photo_sensor5", "Temp_sensor0")],
y = chrono_train_data$azimut,
ntree = nb_tree
)
test_random_predictions <- predict(random_model, newdata = random_test_data)
test_chrono_predictions <- predict(chrono_model, newdata = chrono_test_data)
test_random_results <- random_test_data
test_chrono_results <- chrono_test_data
test_chrono_results$predicted_azimut <- test_chrono_predictions
test_random_results$predicted_azimut <- test_random_predictions
head(test_random_results[, c("azimut", "predicted_azimut")])
head(test_chrono_results[, c("azimut", "predicted_azimut")])
comparison_chrono <- test_chrono_results[, c("azimut", "predicted_azimut", "timestamp", "decimal_hour")]
# comparison_chrono$list = abs(comparison_chrono$azimut - comparison_chrono$predicted_azimut)
# print(length(comparison_chrono$list))
# print(as.integer(sum(comparison_chrono$list)))
# mean_before_filter <- as.integer(sum(comparison_chrono$list)) / length(comparison_chrono$list)
#
comparison_chrono <- comparison_chrono %>%
filter(decimal_hour > 7.0) %>%
filter(decimal_hour < 20.0)
comparison_chrono$comp <- abs(comparison_chrono$azimut - comparison_chrono$predicted_azimut)
mean <- sum(comparison_chrono$comp) / length(comparison_chrono$comp)
print(typeof(comparison_chrono))
print(random_model)
print(chrono_model)