From 7fb6021e64178e0ccb39ad72fa58ecc330fb98f3 Mon Sep 17 00:00:00 2001 From: Jelson Rodrigues Date: Sat, 25 Oct 2025 12:09:28 -0300 Subject: [PATCH] chore: sanitize data with ai model --- src/groupped_repport.rs | 85 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/src/groupped_repport.rs b/src/groupped_repport.rs index 9408415..d338eac 100644 --- a/src/groupped_repport.rs +++ b/src/groupped_repport.rs @@ -5,10 +5,34 @@ use itertools::Itertools; use polars::prelude::*; use reqwest; use walkdir; +use std::time::Duration; +use std::env; fn main() { - let PROMPT = std::fs::read_to_string("./PROMPT_DATA_SANITIZATION.txt").expect("Failed to read the promp for data sanitization"); + match dotenv::dotenv().ok() { + Some(_) => println!("Environment variables loaded from .env file"), + None => eprintln!("Failed to load .env file, using defaults"), + } + // Read environment variables + let OLLAMA_URL = env::var("OLLAMA_URL").unwrap_or("localhost".to_string()); + let OLLAMA_PORT = env::var("OLLAMA_PORT") + .unwrap_or("11432".to_string()) + .parse::() + .unwrap_or(11432); + let OLLAMA_AI_MODEL_DATA_SANITIZATION = env::var("OLLAMA_AI_MODEL_DATA_SANITIZATION").expect("Missing environment variable OLLAMA_AI_MODEL_DATA_SANITIZATION"); + + let ip_address = ipaddress::IPAddress::parse(OLLAMA_URL.to_string()); + let OLLAMA_SANITIZED_IP = match ip_address { + Ok(ip) => { + if ip.is_ipv4() { + OLLAMA_URL.clone() + } else { + format!("[{}]", OLLAMA_URL.clone()) + } + } + Err(e) => OLLAMA_URL.clone(), + }; // Get the current day in the format YYYY-MM-DD let current_date = chrono::Local::now(); @@ -58,6 +82,65 @@ fn main() { println!("{:?}", previous_week_folder_names); + let prompt_data_sanitization = std::fs::read_to_string("./PROMPT_DATA_SANITIZATION.txt").expect("Failed to read PROMPT_DATA_SANITIZATION.txt"); + let client = reqwest::blocking::Client::new(); + + let a = previous_week_folder_names.iter().map(|folder_name| { + let folder_base_path = std::path::Path::new("./evaluations"); + let folder_date_path = folder_base_path.join(folder_name); + std::fs::read_dir(folder_date_path) + }) + .filter_map_ok(|files_inside_folder_on_date | + { + files_inside_folder_on_date.filter_ok(|entry| { + let entry_file_name_as_str = entry.file_name().into_string().expect("Failed to get filename as a String"); + + entry_file_name_as_str.ends_with(".csv") && + !entry_file_name_as_str.contains("response_time.csv") + + }) + .filter_map(|value|{ + if value.is_ok() {return Some(value.unwrap());} + None + }) + .take(1) + .map(|file_name_csv| { + let file_contents = std::fs::read_to_string(file_name_csv.path()).expect("Failed to read CSV file"); + + println!("{}", format!("{prompt_data_sanitization} \n{file_contents}")); + let ollama_api_request = client.post(format!("http://{OLLAMA_SANITIZED_IP}:{OLLAMA_PORT}/api/generate")) + .body( + serde_json::json!({ + "model": OLLAMA_AI_MODEL_DATA_SANITIZATION, + "prompt": format!("{prompt_data_sanitization} \n{file_contents}"), + "stream": false, + }).to_string() + ); + + let result = ollama_api_request.timeout(Duration::from_secs(3600)).send(); + + match result { + Ok(response) => {println!("Response: {:?}", response); + let response_json = response.json::().expect("Failed to deserialize response to JSON"); + let ai_response = response_json["response"] + .as_str() + .expect("Failed to get AI response as string"); + println!("AI Response: {}", ai_response); + }, + Err(error) => {println!("Error {error}");} + }; + + }) + .for_each(|value| println!("{:?}", value)); + // println!("{:?}", files_inside_folder_on_date); + + return Some(()); + }) + .collect_vec(); + + // println!("{:?}", a); + + // Read CSV files inside folder // Use AI to sanitize the data