piperun-bot/src/groupped_repport.rs

use std::fmt::Debug;

use chrono::Datelike;
use itertools::Itertools;
use polars::prelude::buffer::validate_utf8;
use polars::prelude::*;
use reqwest;
use walkdir;
use std::time::Duration;
use std::env;

use csv;

#[derive(Debug, serde::Deserialize)]
struct CsvHeader {
    CATEGORIA: String,
    PONTOS: Option<u8>,
}

#[derive(Debug, serde::Deserialize)]
struct CsvEvaluation {
    APRESENTAÇÃO: u8,
    CONFIRMAÇÃO_DE_EMAIL: u8,
    CONFIRMAÇÃO_DE_TELEFONE: u8,
    PROTOCOLO: u8,
    USO_DO_PORTUGUÊS: u8,
    PACIÊNCIA_E_EDUCAÇÃO: u8,
    DISPONIBILIDADE: u8,
    CONHECIMENTO_TÉCNICO: u8,
    DIDATISMO: u8,
    ID_TALK: String
}

fn main() {
    match dotenv::dotenv().ok() {
        Some(_) => println!("Environment variables loaded from .env file"),
        None => eprintln!("Failed to load .env file, using defaults"),
    }

    // Read environment variables
    let OLLAMA_URL = env::var("OLLAMA_URL").unwrap_or("localhost".to_string());
    let OLLAMA_PORT = env::var("OLLAMA_PORT")
        .unwrap_or("11432".to_string())
        .parse::<u16>()
        .unwrap_or(11432);
    let OLLAMA_AI_MODEL_DATA_SANITIZATION = env::var("OLLAMA_AI_MODEL_DATA_SANITIZATION").expect("Missing environment variable OLLAMA_AI_MODEL_DATA_SANITIZATION");

    let ip_address = ipaddress::IPAddress::parse(OLLAMA_URL.to_string());
    let OLLAMA_SANITIZED_IP = match ip_address {
        Ok(ip) => {
            if ip.is_ipv4() {
                OLLAMA_URL.clone()
            } else {
                format!("[{}]", OLLAMA_URL.clone())
            }
        }
        Err(e) => OLLAMA_URL.clone(),
    };

    // Get the current day in the format YYYY-MM-DD
    let current_date = chrono::Local::now();
    let formatted_date = current_date.format("%Y-%m-%d").to_string();

    let previous_week_folder_names = std::fs::read_dir(std::path::Path::new("./evaluations")).expect("Failed to read directory ./evaluations")
    .filter_map_ok(|entry| {
        if entry.metadata().unwrap().is_dir(){
            Some(entry.file_name())
        }
        else {
            None
        }
    })
    .filter_map_ok(|entry_string_name| {
        let regex_match_date = regex::Regex::new(r"(\d{4}-\d{2}-\d{2})").expect("Failed to build regex");

        let filename = entry_string_name.to_str().unwrap();
        let matches_find = regex_match_date.find(filename);

        match matches_find {
            Some(found) => {
                let date = chrono::NaiveDate::parse_from_str(found.as_str(), "%Y-%m-%d");
                return Some((date.unwrap().week(chrono::Weekday::Sun), entry_string_name));
            },
            None => {return None;}
        };
    })
    .filter_map_ok(|(week, directory_string)| {
        let current_date = chrono::Local::now();
        let first_day_of_current_week = current_date.date_naive().week(chrono::Weekday::Sun).first_day();
        let current_date_minus_one_week = first_day_of_current_week.checked_sub_days(chrono::Days::new(1)).expect("Failed to subtract one day");
        let first_day_of_last_week = current_date_minus_one_week.week(chrono::Weekday::Sun).first_day();
        let first_day_of_week_in_folder_name = week.first_day();

        if first_day_of_last_week == first_day_of_week_in_folder_name {
            return Some(directory_string);
        }
        return None;
    })
    .filter_map(|value| {
        if value.is_ok() {return Some(value.unwrap());}
        else {return None;}
})
    .sorted()
    .collect_vec();

    println!("{:?}", previous_week_folder_names);

    let prompt_data_sanitization = std::fs::read_to_string("./PROMPT_DATA_SANITIZATION.txt").expect("Failed to read PROMPT_DATA_SANITIZATION.txt");
    let client = reqwest::blocking::Client::new();

    let a  = previous_week_folder_names.iter().map(|folder_name| {
            let folder_base_path = std::path::Path::new("./evaluations");
            let folder_date_path = folder_base_path.join(folder_name);
            std::fs::read_dir(folder_date_path)
        })
        .filter_map_ok(|files_inside_folder_on_date |
        {
            files_inside_folder_on_date.filter_ok(|entry| {
                let entry_file_name_as_str = entry.file_name().into_string().expect("Failed to get filename as a String");

                entry_file_name_as_str.ends_with(".csv") &&
                !entry_file_name_as_str.contains("response_time.csv")

            })
            .filter_map(|value|{
                if value.is_ok() {return Some(value.unwrap());}
                None
            })
            .take(1)
            .map(|file_name_csv| {
                println!("{:?}", file_name_csv.path());
                let file_contents = std::fs::read_to_string(file_name_csv.path()).expect("Failed to read CSV file");

                let ollama_api_request = client.post(format!("http://{OLLAMA_SANITIZED_IP}:{OLLAMA_PORT}/api/generate"))
                    .body(
                        serde_json::json!({
                            "model": OLLAMA_AI_MODEL_DATA_SANITIZATION,
                            "prompt": format!("{prompt_data_sanitization} \n{file_contents}"),
                            "temperature": 0.0, // Get predictable and reproducible output
                            "stream": false,
                        }).to_string()
                    );

                let result = ollama_api_request.timeout(Duration::from_secs(3600)).send();

                match result {
                    Ok(response) => {println!("Response: {:?}", response);
                    let response_json = response.json::<serde_json::Value>().expect("Failed to deserialize response to JSON");
                    let ai_response = response_json["response"]
                        .as_str()
                        .expect("Failed to get AI response as string");

                    let ai_response = ai_response.to_string();

                    let ai_response = if let Some(resp) = ai_response.strip_prefix(" ").unwrap_or(&ai_response).strip_prefix("```csv\n") { resp.to_string() } else { ai_response };
                    let ai_response = if let Some(resp) = ai_response.strip_suffix(" ").unwrap_or(&ai_response).strip_suffix("```") { resp.to_string() } else { ai_response };

                    return Ok((ai_response, file_name_csv));

                },
                    Err(error) => {println!("Error {error}"); return Err(error);}
                };

            })
            .filter_map_ok(|(ai_repsonse, file_path_csv)| {
                let mut reader = csv::ReaderBuilder::new()
                .has_headers(true)
                .delimiter(b';')
                .from_reader(ai_repsonse.as_bytes());

                let mut deserialized_iter = reader.deserialize::<CsvHeader>();

                let correctly_parsed = deserialized_iter.all(|value| {
                    value.is_ok() && value.unwrap().PONTOS.is_some()
                });

                if !correctly_parsed { return None; }

                // Parse id talk from file_path
                // filename example is: CC - Erraoander Quintana - 515578 - 20251020515578.csv
                // id talk is the last information, so in the example is: 20251020515578
                let regex_filename = regex::Regex::new(r"(CC - )((\w+\s*)+) - (\d+) - (\d+).csv").unwrap();

                let filename = file_path_csv
                    .file_name()
                    .into_string()
                    .expect("Failed to convert file name as Rust &str");
                let found_regex_groups_in_filename = regex_filename.captures(
                    filename.as_str()
                ).expect("Failed to do regex capture");

                let talk_id = found_regex_groups_in_filename.get(5).expect("Failed to get the id from regex maches").clone();

                println!("{:?}", talk_id);
                // a.for_each(|val| { println!("{:?}", val)});

                // println!("{:?}", a);

                // Do validation on CSV parsed
                // let records_parsed = reader.records();

                // if records_parsed.try_len().expect("Failed to obtain lenght") != 9 { return None; }

                // reader.records().for_each(|record| println!("{:?}", record));


                return Some(());

            })
            .for_each(|_value| {});
            // println!("{:?}", files_inside_folder_on_date);

            return Some(());
        })
        .collect_vec();

    // println!("{:?}", a);


    // Read CSV files inside folder

    // Use AI to sanitize the data

    // Save into a hashmap, with the user name as key, the date, evaluation

    // Final file should look like
/*
Header: Att1, att2, att3, ...
categoria1
categoria2
categoria3
...

*/


}