chore: sanitize data with ai model
This commit is contained in:
@@ -5,10 +5,34 @@ use itertools::Itertools;
|
|||||||
use polars::prelude::*;
|
use polars::prelude::*;
|
||||||
use reqwest;
|
use reqwest;
|
||||||
use walkdir;
|
use walkdir;
|
||||||
|
use std::time::Duration;
|
||||||
|
use std::env;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let PROMPT = std::fs::read_to_string("./PROMPT_DATA_SANITIZATION.txt").expect("Failed to read the promp for data sanitization");
|
match dotenv::dotenv().ok() {
|
||||||
|
Some(_) => println!("Environment variables loaded from .env file"),
|
||||||
|
None => eprintln!("Failed to load .env file, using defaults"),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read environment variables
|
||||||
|
let OLLAMA_URL = env::var("OLLAMA_URL").unwrap_or("localhost".to_string());
|
||||||
|
let OLLAMA_PORT = env::var("OLLAMA_PORT")
|
||||||
|
.unwrap_or("11432".to_string())
|
||||||
|
.parse::<u16>()
|
||||||
|
.unwrap_or(11432);
|
||||||
|
let OLLAMA_AI_MODEL_DATA_SANITIZATION = env::var("OLLAMA_AI_MODEL_DATA_SANITIZATION").expect("Missing environment variable OLLAMA_AI_MODEL_DATA_SANITIZATION");
|
||||||
|
|
||||||
|
let ip_address = ipaddress::IPAddress::parse(OLLAMA_URL.to_string());
|
||||||
|
let OLLAMA_SANITIZED_IP = match ip_address {
|
||||||
|
Ok(ip) => {
|
||||||
|
if ip.is_ipv4() {
|
||||||
|
OLLAMA_URL.clone()
|
||||||
|
} else {
|
||||||
|
format!("[{}]", OLLAMA_URL.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => OLLAMA_URL.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
// Get the current day in the format YYYY-MM-DD
|
// Get the current day in the format YYYY-MM-DD
|
||||||
let current_date = chrono::Local::now();
|
let current_date = chrono::Local::now();
|
||||||
@@ -58,6 +82,65 @@ fn main() {
|
|||||||
|
|
||||||
println!("{:?}", previous_week_folder_names);
|
println!("{:?}", previous_week_folder_names);
|
||||||
|
|
||||||
|
let prompt_data_sanitization = std::fs::read_to_string("./PROMPT_DATA_SANITIZATION.txt").expect("Failed to read PROMPT_DATA_SANITIZATION.txt");
|
||||||
|
let client = reqwest::blocking::Client::new();
|
||||||
|
|
||||||
|
let a = previous_week_folder_names.iter().map(|folder_name| {
|
||||||
|
let folder_base_path = std::path::Path::new("./evaluations");
|
||||||
|
let folder_date_path = folder_base_path.join(folder_name);
|
||||||
|
std::fs::read_dir(folder_date_path)
|
||||||
|
})
|
||||||
|
.filter_map_ok(|files_inside_folder_on_date |
|
||||||
|
{
|
||||||
|
files_inside_folder_on_date.filter_ok(|entry| {
|
||||||
|
let entry_file_name_as_str = entry.file_name().into_string().expect("Failed to get filename as a String");
|
||||||
|
|
||||||
|
entry_file_name_as_str.ends_with(".csv") &&
|
||||||
|
!entry_file_name_as_str.contains("response_time.csv")
|
||||||
|
|
||||||
|
})
|
||||||
|
.filter_map(|value|{
|
||||||
|
if value.is_ok() {return Some(value.unwrap());}
|
||||||
|
None
|
||||||
|
})
|
||||||
|
.take(1)
|
||||||
|
.map(|file_name_csv| {
|
||||||
|
let file_contents = std::fs::read_to_string(file_name_csv.path()).expect("Failed to read CSV file");
|
||||||
|
|
||||||
|
println!("{}", format!("{prompt_data_sanitization} \n{file_contents}"));
|
||||||
|
let ollama_api_request = client.post(format!("http://{OLLAMA_SANITIZED_IP}:{OLLAMA_PORT}/api/generate"))
|
||||||
|
.body(
|
||||||
|
serde_json::json!({
|
||||||
|
"model": OLLAMA_AI_MODEL_DATA_SANITIZATION,
|
||||||
|
"prompt": format!("{prompt_data_sanitization} \n{file_contents}"),
|
||||||
|
"stream": false,
|
||||||
|
}).to_string()
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = ollama_api_request.timeout(Duration::from_secs(3600)).send();
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(response) => {println!("Response: {:?}", response);
|
||||||
|
let response_json = response.json::<serde_json::Value>().expect("Failed to deserialize response to JSON");
|
||||||
|
let ai_response = response_json["response"]
|
||||||
|
.as_str()
|
||||||
|
.expect("Failed to get AI response as string");
|
||||||
|
println!("AI Response: {}", ai_response);
|
||||||
|
},
|
||||||
|
Err(error) => {println!("Error {error}");}
|
||||||
|
};
|
||||||
|
|
||||||
|
})
|
||||||
|
.for_each(|value| println!("{:?}", value));
|
||||||
|
// println!("{:?}", files_inside_folder_on_date);
|
||||||
|
|
||||||
|
return Some(());
|
||||||
|
})
|
||||||
|
.collect_vec();
|
||||||
|
|
||||||
|
// println!("{:?}", a);
|
||||||
|
|
||||||
|
|
||||||
// Read CSV files inside folder
|
// Read CSV files inside folder
|
||||||
|
|
||||||
// Use AI to sanitize the data
|
// Use AI to sanitize the data
|
||||||
|
|||||||
Reference in New Issue
Block a user