diff --git a/src/groupped_repport.rs b/src/groupped_repport.rs index d338eac..9ca95dd 100644 --- a/src/groupped_repport.rs +++ b/src/groupped_repport.rs @@ -2,12 +2,35 @@ use std::fmt::Debug; use chrono::Datelike; use itertools::Itertools; +use polars::prelude::buffer::validate_utf8; use polars::prelude::*; use reqwest; use walkdir; use std::time::Duration; use std::env; +use csv; + +#[derive(Debug, serde::Deserialize)] +struct CsvHeader { + CATEGORIA: String, + PONTOS: Option, +} + +#[derive(Debug, serde::Deserialize)] +struct CsvEvaluation { + APRESENTAÇÃO: u8, + CONFIRMAÇÃO_DE_EMAIL: u8, + CONFIRMAÇÃO_DE_TELEFONE: u8, + PROTOCOLO: u8, + USO_DO_PORTUGUÊS: u8, + PACIÊNCIA_E_EDUCAÇÃO: u8, + DISPONIBILIDADE: u8, + CONHECIMENTO_TÉCNICO: u8, + DIDATISMO: u8, + ID_TALK: String +} + fn main() { match dotenv::dotenv().ok() { Some(_) => println!("Environment variables loaded from .env file"), @@ -105,14 +128,15 @@ fn main() { }) .take(1) .map(|file_name_csv| { + println!("{:?}", file_name_csv.path()); let file_contents = std::fs::read_to_string(file_name_csv.path()).expect("Failed to read CSV file"); - println!("{}", format!("{prompt_data_sanitization} \n{file_contents}")); let ollama_api_request = client.post(format!("http://{OLLAMA_SANITIZED_IP}:{OLLAMA_PORT}/api/generate")) .body( serde_json::json!({ "model": OLLAMA_AI_MODEL_DATA_SANITIZATION, "prompt": format!("{prompt_data_sanitization} \n{file_contents}"), + "temperature": 0.0, // Get predictable and reproducible output "stream": false, }).to_string() ); @@ -125,13 +149,65 @@ fn main() { let ai_response = response_json["response"] .as_str() .expect("Failed to get AI response as string"); - println!("AI Response: {}", ai_response); + + let ai_response = ai_response.to_string(); + + let ai_response = if let Some(resp) = ai_response.strip_prefix(" ").unwrap_or(&ai_response).strip_prefix("```csv\n") { resp.to_string() } else { ai_response }; + let ai_response = if let Some(resp) = ai_response.strip_suffix(" ").unwrap_or(&ai_response).strip_suffix("```") { resp.to_string() } else { ai_response }; + + return Ok((ai_response, file_name_csv)); + }, - Err(error) => {println!("Error {error}");} + Err(error) => {println!("Error {error}"); return Err(error);} }; }) - .for_each(|value| println!("{:?}", value)); + .filter_map_ok(|(ai_repsonse, file_path_csv)| { + let mut reader = csv::ReaderBuilder::new() + .has_headers(true) + .delimiter(b';') + .from_reader(ai_repsonse.as_bytes()); + + let mut deserialized_iter = reader.deserialize::(); + + let correctly_parsed = deserialized_iter.all(|value| { + value.is_ok() && value.unwrap().PONTOS.is_some() + }); + + if !correctly_parsed { return None; } + + // Parse id talk from file_path + // filename example is: CC - Erraoander Quintana - 515578 - 20251020515578.csv + // id talk is the last information, so in the example is: 20251020515578 + let regex_filename = regex::Regex::new(r"(CC - )((\w+\s*)+) - (\d+) - (\d+).csv").unwrap(); + + let filename = file_path_csv + .file_name() + .into_string() + .expect("Failed to convert file name as Rust &str"); + let found_regex_groups_in_filename = regex_filename.captures( + filename.as_str() + ).expect("Failed to do regex capture"); + + let talk_id = found_regex_groups_in_filename.get(5).expect("Failed to get the id from regex maches").clone(); + + println!("{:?}", talk_id); + // a.for_each(|val| { println!("{:?}", val)}); + + // println!("{:?}", a); + + // Do validation on CSV parsed + // let records_parsed = reader.records(); + + // if records_parsed.try_len().expect("Failed to obtain lenght") != 9 { return None; } + + // reader.records().for_each(|record| println!("{:?}", record)); + + + return Some(()); + + }) + .for_each(|_value| {}); // println!("{:?}", files_inside_folder_on_date); return Some(());