Created
January 26, 2025 01:54
-
-
Save MANTENN/0a89cb85a7b30aaef26599ab6339e202 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![allow(non_snake_case)] | |
use curl::easy::{Easy, List}; | |
use scraper::{Html, Selector}; | |
use std::time::{ Instant}; | |
use warp::{Filter, http::Response}; | |
use serde::{Deserialize, Serialize, de::Error}; | |
#[derive(Serialize, Deserialize)] | |
struct License { | |
business: String, | |
nameType: String, | |
license: i32, | |
city: String, | |
status: String | |
} | |
#[tokio::main] | |
async fn main() { | |
let cors = warp::cors() | |
.allow_origin("http://localhost:8000") | |
.allow_origin("https://www.localtier.com") | |
.allow_origin("https://localtier.com") | |
.allow_methods(vec!["GET"]); | |
#[cfg(target = "debug")] | |
cors.allow_any_origin(); | |
let index = warp::path!().map(|| { | |
Response::builder() | |
.header("Content-Type", "application/json") | |
.body(format!("Hello from index")) | |
}); | |
let staticPath = warp::path("static").and(warp::fs::dir("www/static")); | |
let contractorsRootPath = warp::path!("contractors").map(|| { | |
Response::builder() | |
.header("Content-Type", "application/json") | |
.body(format!("Contractors path")) | |
}); | |
let contractorLicenses = warp::path!("contractors"/"license"/ "search" / String).map(|name| { | |
let total = Instant::now(); | |
let now = Instant::now(); | |
let fetchedIn = Instant::now(); | |
let mut data = Vec::new(); | |
let mut handle = Easy::new(); | |
handle.verbose(false).map_err(|err| println!("verbose(false): {:?}", err)).ok(); | |
println!("\n\n------------------------------------\n\n"); | |
let mut headers = List::new(); | |
headers.append("Referer: https://t.co/").map_err(|err| println!("{:?}", err)).ok(); | |
handle.http_headers(headers).map_err(|err| println!("http_headers(headers): {:?}", err)).ok(); | |
let url = format!("https://www.cslb.ca.gov/OnlineServices/CheckLicenseII/NameSearch.aspx?NextName={}&NextLicNum=50", name); | |
println!("REQUESTING: {}", url); | |
handle.url(&url).unwrap(); | |
{ | |
let mut transfer = handle.transfer(); | |
transfer | |
.write_function(|new_data| { | |
data.extend_from_slice(new_data); | |
// print!("\n\n---\nData dump: {}\n\n---\n", std::str::from_utf8(&new_data).expect("expecting valid utf8 content; recieved invalid utf8 content")); | |
Ok(new_data.len()) | |
}) | |
.unwrap(); | |
transfer.perform().unwrap(); | |
} | |
let responseCode = handle.response_code().map_err(|e| print!("error grabbing response code: {}", e)).unwrap(); | |
println!("response: {}", responseCode); | |
// let redirectUrl = handle.redirect_url().unwrap().unwrap_or(""); | |
// println!("redirectUrl: {}", redirectUrl); | |
println!("[FETCHED IN]: {}ms", fetchedIn.elapsed().as_millis()); | |
let parseTimeInitialied = Instant::now(); | |
let dataAsString = String::from_utf8(data).unwrap(); | |
// println!("html: {}", dataAsString); | |
let document = Html::parse_document(&dataAsString); | |
let mut index = 0; | |
let mut licenses: Vec<License>= vec![]; | |
let selector = Selector::parse("#MainContent_dlMain > tbody > tr").unwrap(); | |
for element in document.select(&selector) { | |
// assert_eq!("span", element.value().name()); | |
// println!("{}", element.inner_html()); | |
let mut license = License { | |
business: "".to_string(), | |
nameType: "".to_string(), | |
license: 0, | |
city: "".to_string(), | |
status: "".to_string(), | |
}; | |
let mut fieldCount = 0; | |
let mut totalFields = 0; | |
let mut titleStringSelector = "#MainContent_dlMain_lblName_".to_owned(); | |
titleStringSelector.push_str(&index.to_string()); | |
let titleSelector = Selector::parse(&titleStringSelector).unwrap(); | |
for titleElement in element.select(&titleSelector) { | |
totalFields+=1; | |
assert_eq!("span", titleElement.value().name()); | |
let innerHtml = titleElement.inner_html(); | |
// println!("{}", innerHtml); | |
if innerHtml.is_empty() { | |
fieldCount +=1; | |
continue; | |
} | |
license.business = titleElement.inner_html() | |
} | |
let mut typeStringSelector = "#MainContent_dlMain_lblType_".to_owned(); | |
typeStringSelector.push_str(&index.to_string()); | |
// println!("{}", titleStringSelector); | |
let typeSelector = Selector::parse(&typeStringSelector).unwrap(); | |
for typeElement in element.select(&typeSelector) { | |
totalFields +=1; | |
assert_eq!("span", typeElement.value().name()); | |
let innerHtml = typeElement.inner_html(); | |
// println!("{}", innerHtml); | |
if innerHtml.is_empty() { | |
fieldCount +=1; | |
continue; | |
} | |
license.nameType = typeElement.inner_html() | |
} | |
let mut licenseStringSelector = "#MainContent_dlMain_hlLicense_".to_owned(); | |
licenseStringSelector.push_str(&index.to_string()); | |
// println!("{}", titleStringSelector); | |
let licenseSelector = Selector::parse(&licenseStringSelector).unwrap(); | |
for licenseElement in element.select(&licenseSelector) { | |
totalFields +=1; | |
assert_eq!("a", licenseElement.value().name()); | |
let innerHtml = licenseElement.inner_html(); | |
// println!("{}", innerHtml); | |
if innerHtml.is_empty() { | |
fieldCount +=1; | |
continue; | |
} | |
license.license = licenseElement.inner_html().parse::<i32>().unwrap() | |
} | |
let mut cityStringSelector = "#MainContent_dlMain_lblCity_".to_owned(); | |
cityStringSelector.push_str(&index.to_string()); | |
// println!("{}", titleStringSelector); | |
let citySelector = Selector::parse(&cityStringSelector).unwrap(); | |
for cityElement in element.select(&citySelector) { | |
totalFields +=1; | |
assert_eq!("span", cityElement.value().name()); | |
let innerHtml = cityElement.inner_html(); | |
// println!("{}", innerHtml); | |
if innerHtml.is_empty() { | |
fieldCount +=1; | |
continue; | |
} | |
license.city = cityElement.inner_html() | |
} | |
let mut licenseStringSelector = "#MainContent_dlMain_lblLicenseStatus_".to_owned(); | |
licenseStringSelector.push_str(&index.to_string()); | |
// println!("{}", titleStringSelector); | |
let licenseStatusSelector = Selector::parse(&licenseStringSelector).unwrap(); | |
for licenseStatusElement in element.select(&licenseStatusSelector) { | |
totalFields +=1; | |
assert_eq!("span", licenseStatusElement.value().name()); | |
let innerHtml = licenseStatusElement.inner_html(); | |
// println!("{}", innerHtml); | |
if innerHtml.is_empty() { | |
fieldCount +=1; | |
continue; | |
} license.status = licenseStatusElement.inner_html() | |
} | |
index += 1; | |
if fieldCount == totalFields { continue;} | |
licenses.push(license); | |
} | |
println!("[DONE IN]: {}ms", now.elapsed().as_millis()); | |
println!("[HTML]: {}ms", parseTimeInitialied.elapsed().as_millis()); | |
// println!("Hello, world!\n\n\n{}", selector.); | |
Response::builder() | |
.header("Content-Type", "application/json") | |
.body(convertLicensesToJson(&licenses, &total)) | |
}); | |
// GET /hello/warp => 200 OK with body "Hello, warp!" | |
let notFound = warp::any() | |
.map(|| { | |
print!("ROUTE"); | |
Response::builder().status(404).body("Not Found") | |
}); | |
let routes = warp::path::end() | |
.and(index) | |
.or(staticPath) | |
.or(contractorsRootPath) | |
.or(contractorLicenses) | |
.or(notFound).with(cors); | |
let warpServer = warp::serve(routes).run(([127, 0, 0, 1], 3030)).await; | |
warpServer | |
} | |
fn convertLicensesToJson(licenses: &Vec<License>, total: &Instant) -> String { | |
let parsedJsonIn = Instant::now(); | |
let json = serde_json::to_string(&licenses); | |
println!("[PARSED JSON]: {}ms", parsedJsonIn.elapsed().as_millis()); | |
println!("[TOTAL]: {}ms", total.elapsed().as_millis()); | |
// println!("{:?}", json); | |
match json{ | |
Ok(data) => data, | |
Err(error) => format!("{{\"error\": \"error converting json\", \"context\": \"{}\"}}", error) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment