Skip to content

Instantly share code, notes, and snippets.

@MANTENN
Created January 26, 2025 01:54
Show Gist options
  • Save MANTENN/0a89cb85a7b30aaef26599ab6339e202 to your computer and use it in GitHub Desktop.
Save MANTENN/0a89cb85a7b30aaef26599ab6339e202 to your computer and use it in GitHub Desktop.
#![allow(non_snake_case)]
use curl::easy::{Easy, List};
use scraper::{Html, Selector};
use std::time::{ Instant};
use warp::{Filter, http::Response};
use serde::{Deserialize, Serialize, de::Error};
#[derive(Serialize, Deserialize)]
struct License {
business: String,
nameType: String,
license: i32,
city: String,
status: String
}
#[tokio::main]
async fn main() {
let cors = warp::cors()
.allow_origin("http://localhost:8000")
.allow_origin("https://www.localtier.com")
.allow_origin("https://localtier.com")
.allow_methods(vec!["GET"]);
#[cfg(target = "debug")]
cors.allow_any_origin();
let index = warp::path!().map(|| {
Response::builder()
.header("Content-Type", "application/json")
.body(format!("Hello from index"))
});
let staticPath = warp::path("static").and(warp::fs::dir("www/static"));
let contractorsRootPath = warp::path!("contractors").map(|| {
Response::builder()
.header("Content-Type", "application/json")
.body(format!("Contractors path"))
});
let contractorLicenses = warp::path!("contractors"/"license"/ "search" / String).map(|name| {
let total = Instant::now();
let now = Instant::now();
let fetchedIn = Instant::now();
let mut data = Vec::new();
let mut handle = Easy::new();
handle.verbose(false).map_err(|err| println!("verbose(false): {:?}", err)).ok();
println!("\n\n------------------------------------\n\n");
let mut headers = List::new();
headers.append("Referer: https://t.co/").map_err(|err| println!("{:?}", err)).ok();
handle.http_headers(headers).map_err(|err| println!("http_headers(headers): {:?}", err)).ok();
let url = format!("https://www.cslb.ca.gov/OnlineServices/CheckLicenseII/NameSearch.aspx?NextName={}&NextLicNum=50", name);
println!("REQUESTING: {}", url);
handle.url(&url).unwrap();
{
let mut transfer = handle.transfer();
transfer
.write_function(|new_data| {
data.extend_from_slice(new_data);
// print!("\n\n---\nData dump: {}\n\n---\n", std::str::from_utf8(&new_data).expect("expecting valid utf8 content; recieved invalid utf8 content"));
Ok(new_data.len())
})
.unwrap();
transfer.perform().unwrap();
}
let responseCode = handle.response_code().map_err(|e| print!("error grabbing response code: {}", e)).unwrap();
println!("response: {}", responseCode);
// let redirectUrl = handle.redirect_url().unwrap().unwrap_or("");
// println!("redirectUrl: {}", redirectUrl);
println!("[FETCHED IN]: {}ms", fetchedIn.elapsed().as_millis());
let parseTimeInitialied = Instant::now();
let dataAsString = String::from_utf8(data).unwrap();
// println!("html: {}", dataAsString);
let document = Html::parse_document(&dataAsString);
let mut index = 0;
let mut licenses: Vec<License>= vec![];
let selector = Selector::parse("#MainContent_dlMain > tbody > tr").unwrap();
for element in document.select(&selector) {
// assert_eq!("span", element.value().name());
// println!("{}", element.inner_html());
let mut license = License {
business: "".to_string(),
nameType: "".to_string(),
license: 0,
city: "".to_string(),
status: "".to_string(),
};
let mut fieldCount = 0;
let mut totalFields = 0;
let mut titleStringSelector = "#MainContent_dlMain_lblName_".to_owned();
titleStringSelector.push_str(&index.to_string());
let titleSelector = Selector::parse(&titleStringSelector).unwrap();
for titleElement in element.select(&titleSelector) {
totalFields+=1;
assert_eq!("span", titleElement.value().name());
let innerHtml = titleElement.inner_html();
// println!("{}", innerHtml);
if innerHtml.is_empty() {
fieldCount +=1;
continue;
}
license.business = titleElement.inner_html()
}
let mut typeStringSelector = "#MainContent_dlMain_lblType_".to_owned();
typeStringSelector.push_str(&index.to_string());
// println!("{}", titleStringSelector);
let typeSelector = Selector::parse(&typeStringSelector).unwrap();
for typeElement in element.select(&typeSelector) {
totalFields +=1;
assert_eq!("span", typeElement.value().name());
let innerHtml = typeElement.inner_html();
// println!("{}", innerHtml);
if innerHtml.is_empty() {
fieldCount +=1;
continue;
}
license.nameType = typeElement.inner_html()
}
let mut licenseStringSelector = "#MainContent_dlMain_hlLicense_".to_owned();
licenseStringSelector.push_str(&index.to_string());
// println!("{}", titleStringSelector);
let licenseSelector = Selector::parse(&licenseStringSelector).unwrap();
for licenseElement in element.select(&licenseSelector) {
totalFields +=1;
assert_eq!("a", licenseElement.value().name());
let innerHtml = licenseElement.inner_html();
// println!("{}", innerHtml);
if innerHtml.is_empty() {
fieldCount +=1;
continue;
}
license.license = licenseElement.inner_html().parse::<i32>().unwrap()
}
let mut cityStringSelector = "#MainContent_dlMain_lblCity_".to_owned();
cityStringSelector.push_str(&index.to_string());
// println!("{}", titleStringSelector);
let citySelector = Selector::parse(&cityStringSelector).unwrap();
for cityElement in element.select(&citySelector) {
totalFields +=1;
assert_eq!("span", cityElement.value().name());
let innerHtml = cityElement.inner_html();
// println!("{}", innerHtml);
if innerHtml.is_empty() {
fieldCount +=1;
continue;
}
license.city = cityElement.inner_html()
}
let mut licenseStringSelector = "#MainContent_dlMain_lblLicenseStatus_".to_owned();
licenseStringSelector.push_str(&index.to_string());
// println!("{}", titleStringSelector);
let licenseStatusSelector = Selector::parse(&licenseStringSelector).unwrap();
for licenseStatusElement in element.select(&licenseStatusSelector) {
totalFields +=1;
assert_eq!("span", licenseStatusElement.value().name());
let innerHtml = licenseStatusElement.inner_html();
// println!("{}", innerHtml);
if innerHtml.is_empty() {
fieldCount +=1;
continue;
} license.status = licenseStatusElement.inner_html()
}
index += 1;
if fieldCount == totalFields { continue;}
licenses.push(license);
}
println!("[DONE IN]: {}ms", now.elapsed().as_millis());
println!("[HTML]: {}ms", parseTimeInitialied.elapsed().as_millis());
// println!("Hello, world!\n\n\n{}", selector.);
Response::builder()
.header("Content-Type", "application/json")
.body(convertLicensesToJson(&licenses, &total))
});
// GET /hello/warp => 200 OK with body "Hello, warp!"
let notFound = warp::any()
.map(|| {
print!("ROUTE");
Response::builder().status(404).body("Not Found")
});
let routes = warp::path::end()
.and(index)
.or(staticPath)
.or(contractorsRootPath)
.or(contractorLicenses)
.or(notFound).with(cors);
let warpServer = warp::serve(routes).run(([127, 0, 0, 1], 3030)).await;
warpServer
}
fn convertLicensesToJson(licenses: &Vec<License>, total: &Instant) -> String {
let parsedJsonIn = Instant::now();
let json = serde_json::to_string(&licenses);
println!("[PARSED JSON]: {}ms", parsedJsonIn.elapsed().as_millis());
println!("[TOTAL]: {}ms", total.elapsed().as_millis());
// println!("{:?}", json);
match json{
Ok(data) => data,
Err(error) => format!("{{\"error\": \"error converting json\", \"context\": \"{}\"}}", error)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment