use std::fmt::Debug; use chrono::Datelike; use itertools::Itertools; use polars::prelude::buffer::validate_utf8; use polars::prelude::*; use reqwest; use walkdir; use std::time::Duration; use std::env; use csv; #[derive(Debug, serde::Deserialize)] struct CsvHeader { CATEGORIA: String, PONTOS: Option, } #[derive(Debug, serde::Deserialize)] struct CsvEvaluation { APRESENTAÇÃO: u8, CONFIRMAÇÃO_DE_EMAIL: u8, CONFIRMAÇÃO_DE_TELEFONE: u8, PROTOCOLO: u8, USO_DO_PORTUGUÊS: u8, PACIÊNCIA_E_EDUCAÇÃO: u8, DISPONIBILIDADE: u8, CONHECIMENTO_TÉCNICO: u8, DIDATISMO: u8, ID_TALK: String } fn main() { match dotenv::dotenv().ok() { Some(_) => println!("Environment variables loaded from .env file"), None => eprintln!("Failed to load .env file, using defaults"), } // Read environment variables let OLLAMA_URL = env::var("OLLAMA_URL").unwrap_or("localhost".to_string()); let OLLAMA_PORT = env::var("OLLAMA_PORT") .unwrap_or("11432".to_string()) .parse::() .unwrap_or(11432); let OLLAMA_AI_MODEL_DATA_SANITIZATION = env::var("OLLAMA_AI_MODEL_DATA_SANITIZATION").expect("Missing environment variable OLLAMA_AI_MODEL_DATA_SANITIZATION"); let ip_address = ipaddress::IPAddress::parse(OLLAMA_URL.to_string()); let OLLAMA_SANITIZED_IP = match ip_address { Ok(ip) => { if ip.is_ipv4() { OLLAMA_URL.clone() } else { format!("[{}]", OLLAMA_URL.clone()) } } Err(e) => OLLAMA_URL.clone(), }; // Get the current day in the format YYYY-MM-DD let current_date = chrono::Local::now(); let formatted_date = current_date.format("%Y-%m-%d").to_string(); let previous_week_folder_names = std::fs::read_dir(std::path::Path::new("./evaluations")).expect("Failed to read directory ./evaluations") .filter_map_ok(|entry| { if entry.metadata().unwrap().is_dir(){ Some(entry.file_name()) } else { None } }) .filter_map_ok(|entry_string_name| { let regex_match_date = regex::Regex::new(r"(\d{4}-\d{2}-\d{2})").expect("Failed to build regex"); let filename = entry_string_name.to_str().unwrap(); let matches_find = regex_match_date.find(filename); match matches_find { Some(found) => { let date = chrono::NaiveDate::parse_from_str(found.as_str(), "%Y-%m-%d"); return Some((date.unwrap().week(chrono::Weekday::Sun), entry_string_name)); }, None => {return None;} }; }) .filter_map_ok(|(week, directory_string)| { let current_date = chrono::Local::now(); let first_day_of_current_week = current_date.date_naive().week(chrono::Weekday::Sun).first_day(); let current_date_minus_one_week = first_day_of_current_week.checked_sub_days(chrono::Days::new(1)).expect("Failed to subtract one day"); let first_day_of_last_week = current_date_minus_one_week.week(chrono::Weekday::Sun).first_day(); let first_day_of_week_in_folder_name = week.first_day(); if first_day_of_last_week == first_day_of_week_in_folder_name { return Some(directory_string); } return None; }) .filter_map(|value| { if value.is_ok() {return Some(value.unwrap());} else {return None;} }) .sorted() .collect_vec(); println!("{:?}", previous_week_folder_names); let prompt_data_sanitization = std::fs::read_to_string("./PROMPT_DATA_SANITIZATION.txt").expect("Failed to read PROMPT_DATA_SANITIZATION.txt"); let client = reqwest::blocking::Client::new(); let a = previous_week_folder_names.iter().map(|folder_name| { let folder_base_path = std::path::Path::new("./evaluations"); let folder_date_path = folder_base_path.join(folder_name); std::fs::read_dir(folder_date_path) }) .filter_map_ok(|files_inside_folder_on_date | { files_inside_folder_on_date.filter_ok(|entry| { let entry_file_name_as_str = entry.file_name().into_string().expect("Failed to get filename as a String"); entry_file_name_as_str.ends_with(".csv") && !entry_file_name_as_str.contains("response_time.csv") }) .filter_map(|value|{ if value.is_ok() {return Some(value.unwrap());} None }) .take(1) .map(|file_name_csv| { println!("{:?}", file_name_csv.path()); let file_contents = std::fs::read_to_string(file_name_csv.path()).expect("Failed to read CSV file"); let ollama_api_request = client.post(format!("http://{OLLAMA_SANITIZED_IP}:{OLLAMA_PORT}/api/generate")) .body( serde_json::json!({ "model": OLLAMA_AI_MODEL_DATA_SANITIZATION, "prompt": format!("{prompt_data_sanitization} \n{file_contents}"), "temperature": 0.0, // Get predictable and reproducible output "stream": false, }).to_string() ); let result = ollama_api_request.timeout(Duration::from_secs(3600)).send(); match result { Ok(response) => {println!("Response: {:?}", response); let response_json = response.json::().expect("Failed to deserialize response to JSON"); let ai_response = response_json["response"] .as_str() .expect("Failed to get AI response as string"); let ai_response = ai_response.to_string(); let ai_response = if let Some(resp) = ai_response.strip_prefix(" ").unwrap_or(&ai_response).strip_prefix("```csv\n") { resp.to_string() } else { ai_response }; let ai_response = if let Some(resp) = ai_response.strip_suffix(" ").unwrap_or(&ai_response).strip_suffix("```") { resp.to_string() } else { ai_response }; return Ok((ai_response, file_name_csv)); }, Err(error) => {println!("Error {error}"); return Err(error);} }; }) .filter_map_ok(|(ai_repsonse, file_path_csv)| { let mut reader = csv::ReaderBuilder::new() .has_headers(true) .delimiter(b';') .from_reader(ai_repsonse.as_bytes()); let mut deserialized_iter = reader.deserialize::(); let correctly_parsed = deserialized_iter.all(|value| { value.is_ok() && value.unwrap().PONTOS.is_some() }); if !correctly_parsed { return None; } // Parse id talk from file_path // filename example is: CC - Erraoander Quintana - 515578 - 20251020515578.csv // id talk is the last information, so in the example is: 20251020515578 let regex_filename = regex::Regex::new(r"(CC - )((\w+\s*)+) - (\d+) - (\d+).csv").unwrap(); let filename = file_path_csv .file_name() .into_string() .expect("Failed to convert file name as Rust &str"); let found_regex_groups_in_filename = regex_filename.captures( filename.as_str() ).expect("Failed to do regex capture"); let talk_id = found_regex_groups_in_filename.get(5).expect("Failed to get the id from regex maches").clone(); println!("{:?}", talk_id); // a.for_each(|val| { println!("{:?}", val)}); // println!("{:?}", a); // Do validation on CSV parsed // let records_parsed = reader.records(); // if records_parsed.try_len().expect("Failed to obtain lenght") != 9 { return None; } // reader.records().for_each(|record| println!("{:?}", record)); return Some(()); }) .for_each(|_value| {}); // println!("{:?}", files_inside_folder_on_date); return Some(()); }) .collect_vec(); // println!("{:?}", a); // Read CSV files inside folder // Use AI to sanitize the data // Save into a hashmap, with the user name as key, the date, evaluation // Final file should look like /* Header: Att1, att2, att3, ... categoria1 categoria2 categoria3 ... */ }