Last active
February 23, 2024 22:21
-
-
Save wheelerlaw/5063fa83e845d4214977e207514689bb to your computer and use it in GitHub Desktop.
wines of argentina
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import * as fs from 'fs'; | |
import axios, {AxiosResponse} from "axios"; | |
import * as csv from 'csv-stringify' | |
import { JSDOM } from 'jsdom' | |
/************************************************************ | |
* WINE LIST | |
*/ | |
export type WineryListRequest = { | |
columns: Column[]; | |
length: number; | |
order: Order[]; | |
search: WineryListRequestSearch; | |
start: number; | |
} | |
export type Column = { | |
data: string; | |
name: string; | |
searchable?: boolean; | |
search?: ColumnSearch; | |
} | |
export type Order = { | |
column: string; | |
dir: string; | |
} | |
export type WineryListRequestSearch = { | |
value: string; | |
} | |
export type WineryListResponse = { | |
recordsFiltered: number; | |
recordsTotal: number; | |
data: WineryListItem[]; | |
error: null; | |
pagination: null; | |
} | |
export type WineryListItem = { | |
id: string; | |
name: string; | |
code: string; | |
logo: Logo[]; | |
winery_debt: string; | |
province: null | string; | |
region: null | string; | |
province_code: null | string; | |
region_code: null | string; | |
} | |
export type Logo = { | |
id: string; | |
obj: string; | |
id_obj: string; | |
attr: string; | |
attachment: Attachment; | |
} | |
export type Attachment = { | |
id: string; | |
name: string; | |
filename: string; | |
type: string; | |
url: string; | |
} | |
/************************************************************ | |
* WINERY DETAILS | |
*/ | |
export type Winery = { | |
id: string; | |
code: string; | |
name: string; | |
business_name: string; | |
cuit: string; | |
associate: string; | |
size: Size; | |
enabled: string; | |
winery_type: WineryType[] | null; | |
winery_certifier: null; | |
description: string; | |
main_export_markets: MainExportMarket[] | null; | |
total_production: string; | |
logo: Gallery[]; | |
gallery: Gallery[]; | |
addresses: AddressElement[]; | |
social: Social[]; | |
services: Service[] | null; | |
winemakers: Winemaker[] | null; | |
client_code: string; | |
email: string; | |
phone: string; | |
created: null; | |
winery_debt: string; | |
province: string; | |
region: string; | |
province_code: string; | |
region_code: string; | |
} | |
export type WinerySearch = Pick<Winery, 'id' | 'code' | 'name'> | |
export type AddressElement = { | |
id: string; | |
id_winery: string; | |
id_address: string; | |
id_winery_address_type: string; | |
address: AddressAddress; | |
type: Size; | |
} | |
export type AddressAddress = { | |
id: string; | |
address: string; | |
address_extra: null; | |
latitude: string; | |
longitude: string; | |
department: null; | |
province: string; | |
country: string; | |
postal_code: string; | |
id_province: string; | |
id_country: string; | |
} | |
export type Size = { | |
id: string; | |
name: string; | |
} | |
export type Gallery = { | |
id: string; | |
obj: Obj; | |
id_obj: string; | |
attr: Attr; | |
attachment: Attachment; | |
} | |
export type MainExportMarket = { | |
id: string; | |
id_winery: string; | |
id_country: string; | |
market: Market; | |
} | |
export type Market = { | |
id: string; | |
name: string; | |
iso_num: string; | |
iso2: string; | |
iso3: string; | |
code: null; | |
} | |
export type Social = { | |
id: string; | |
value: string; | |
id_winery: string; | |
id_social_network: string; | |
type: Size; | |
} | |
export type Winemaker = { | |
id: string; | |
name: string; | |
id_winery: string; | |
id_type: string; | |
type: Size; | |
} | |
export type WineryType = { | |
id: string; | |
id_winery: string; | |
id_winery_type: string; | |
type: Size; | |
} | |
export type WineryRecord = { | |
name: string, | |
description: string, | |
type: string, | |
varietals: string, | |
email: string, | |
phone: string, | |
website: string, | |
address: string, | |
} | |
/************************************************************ | |
* WINE LIST | |
*/ | |
export type WineListRequest = { | |
columns: Column[]; | |
length: number; | |
order: Order[]; | |
search: WineListRequestSearch; | |
start: number; | |
} | |
export type ColumnSearch = { | |
value: ValueElement[] | number; | |
} | |
export type ValueElement = { | |
id: string; | |
name: string; | |
code: string; | |
} | |
export type WineListRequestSearch = { | |
value: string; | |
} | |
export type WineListResponse = { | |
recordsFiltered: number; | |
recordsTotal: number; | |
data: Wine[]; | |
error: null; | |
pagination: null; | |
} | |
export type Wine = { | |
id: string; | |
code: string; | |
name: string; | |
image: Image[] | null; | |
prizes: Prize[] | null; | |
price: Price; | |
varieties: VarietyElement[]; | |
alcohol: string; | |
ph_level: null | string; | |
acidity: null | string; | |
residual_sugar: null | string; | |
show_in_web: string; | |
winery: Winery; | |
harvest: string; | |
} | |
export type Image = { | |
id: string; | |
obj: Obj; | |
id_obj: string; | |
attr: Attr; | |
attachment: Attachment; | |
} | |
export type Type = "jpeg" | "jpg"; | |
export type Attr = "image" | "gallery" | "logo"; | |
export type Obj = "wine" | "winery"; | |
export type Price = { | |
id: string; | |
name: string; | |
order: string; | |
} | |
export type Prize = { | |
id: string; | |
id_wine: string; | |
id_prize: string; | |
prize: Size; | |
points: string; | |
year: string; | |
} | |
export type VarietyElement = { | |
id: string; | |
id_wine: string; | |
id_wine_variety: string; | |
variety: ColorClass; | |
percentage: string; | |
} | |
export type ColorClass = { | |
id: string; | |
name: string; | |
color?: ColorClass; | |
code: string; | |
} | |
export type Service = { | |
id: string; | |
id_winery: string; | |
id_service_type?: string; | |
type: Size; | |
id_winery_type?: string; | |
} | |
function createWineryListRequest(index: number, pageLength: number = 50): WineryListRequest { | |
return { | |
length: pageLength, | |
order: [{column: "name", dir: "asc"}], | |
search: {value: ""}, | |
start: index, | |
columns: [ | |
{name: "id", data: "id"}, | |
{name: "name", data: "name", searchable: true}, | |
{name: "code", data: "code"}, | |
{name: "province", data: "province", search: {value: 0}} | |
] | |
} | |
} | |
async function doRequest<T>(req: PromiseLike<AxiosResponse<T>>): Promise<T> { | |
const res: AxiosResponse<T> = await req | |
if (res.status === 200) { | |
return res.data as T | |
} | |
throw new Error(`Something happened! ${res.status} ${res.statusText}`) | |
} | |
async function getWineriesPage(req: WineryListRequest): Promise<WineryListResponse> { | |
const url = 'https://api.winesofargentina.org/index.php/wineries/listFront' | |
console.log(`Requesting: ${url}`) | |
return await doRequest(axios.post(url, req, {headers: { 'Accept-Language': 'en'}})) | |
} | |
async function getWineryInfo(code: string): Promise<Winery> { | |
const url = `https://api.winesofargentina.org/index.php/wineries/code/${code}` | |
console.log(`Requesting: ${url}`) | |
return await doRequest(axios.get(url, {headers: {'Accept-Language': 'en'}})) | |
} | |
async function *getWineries() { | |
let totalRecords = -1 | |
let index = 0 | |
while (totalRecords === -1 || index < totalRecords) { | |
const request = createWineryListRequest(index) | |
const response = await getWineriesPage(request) | |
if (totalRecords === -1) { | |
totalRecords = response.recordsTotal | |
} | |
index += response.data.length | |
for (const winery of response.data) { | |
yield winery | |
} | |
} | |
} | |
function createWineListRequest(winery: WinerySearch, index: number, pageLength: number = 0): WineListRequest { | |
return { | |
length: pageLength, | |
order: [{column: "wine.name", dir: "asc"}], | |
search: {value: ""}, | |
start: index, | |
columns: [ | |
{name: "id", data: "id" }, | |
{name: "code", data: "code"}, | |
{name: "name", data: "name", "searchable": true}, | |
{name: "prizes", data: "prizes"}, | |
{name: "price", data: "price"}, | |
{name: "pts_st", data: "pts_st"}, | |
{name: "pts_we", data: "pts_we"}, | |
{name: "varieties", data: "varieties"}, | |
{name: "ids_varieties", data: "ids_varieties", "search": {"value": []}}, | |
{name: "alcohol", data: "alcohol"}, | |
{name: "ph_level", data: "ph_level"}, | |
{name: "acidity", data: "acidity"}, | |
{name: "residual_sugar", data: "residual_sugar"}, | |
{name: "show_in_web", data: "show_in_web", "search": {"value": 1}}, | |
{name: "winery", data: "winery", search: {value: [winery]}}, | |
{name: "harvest", data: "harvest", search: {value: []}} | |
] | |
} | |
} | |
async function *getWines(winery: WinerySearch) { | |
let recordCount = -1 | |
let index = 0 | |
while (recordCount === -1 || index < recordCount) { | |
const request = createWineListRequest(winery, index) | |
const response = await getWinesPage(request) | |
if (recordCount === -1) { | |
recordCount = response.recordsFiltered | |
} | |
index += response.data.length | |
for (const winery of response.data) { | |
yield winery | |
} | |
} | |
} | |
async function getWinesPage(req: WineListRequest): Promise<WineListResponse> { | |
const url = 'https://api.winesofargentina.org/index.php/wines/listFront' | |
console.log(`Requesting: ${url}`) | |
return await doRequest(axios.post(url, req, {headers: { 'Accept-Language': 'en'}})) | |
} | |
async function listWineries() { | |
const wineries: WineryListItem[] = [] | |
for await (const winery of getWineries()) { | |
wineries.push(winery) | |
console.log(winery.name) | |
} | |
console.log(wineries.length) | |
let wineryRecords: WineryRecord[] = [] | |
const batchSize = 10 | |
let index = 0 | |
while (index < wineries.length) { | |
const end = Math.min(index + batchSize, wineries.length) | |
const slice = wineries.slice(index, end) | |
const wineryDetailsBatch = await Promise.all(slice.map(async winery => await getWineryInfo(winery.code))) | |
const wineryRecordsBatch = await Promise.all(wineryDetailsBatch.map(async wineryDetails => { | |
const wineryType = wineryDetails.winery_type ? wineryDetails.winery_type.map(type => type.type.name).join(", ") : "unclassified" | |
const wines: Wine[] = [] | |
const wineryQuery: WinerySearch = (({id, code, name}) => ({id, code, name}))(wineryDetails) | |
for await (const wine of getWines(wineryQuery)) { | |
wines.push(wine) | |
} | |
if (wines.length === 0) { | |
console.log(`EMPTY ${wineryDetails.name} ${wineryDetails.code}`) | |
} | |
const wineryVarietals = Array.from(wines | |
.map(wine => new Set<string>(wine.varieties?.map(variety => variety.variety.name))) | |
.reduce((accumulator, currentValue) => union(accumulator, currentValue), new Set<string>())) | |
.join(", ") | |
const dom = new JSDOM(wineryDetails.description) | |
const wineryRecord: WineryRecord = { | |
name: wineryDetails.name, | |
description: dom.window.document.body.textContent.trim().replaceAll("\n", " "), | |
type: wineryType, | |
varietals: wineryVarietals, | |
email: wineryDetails.email, | |
phone: wineryDetails.phone, | |
website: wineryDetails.social | |
?.filter(social => social.type.name === "Website") | |
.map(social => social.value) | |
.join(", "), | |
address: wineryDetails.addresses | |
?.map(address => address.address) | |
.map(address => [address.address, address.address_extra, address.postal_code, address.province].filter(item => item != null)) | |
.map(address => address.join(', ')) | |
.join("; ") | |
} | |
return wineryRecord | |
})) | |
wineryRecords = wineryRecords.concat(wineryRecordsBatch) | |
index += batchSize | |
} | |
const stream = fs.createWriteStream('./wineries.csv'); | |
csv.stringify(wineryRecords, {header: true, columns: ["name", "description", "type", "varietals", "email", "website", "address"]}) | |
.pipe(stream) | |
} | |
listWineries() | |
.then() | |
/// Helpers | |
const union = function<T>(firstSet: Set<T>, otherSet: Set<T>): Set<T> { | |
const union = new Set<T>(otherSet); | |
for(const x of firstSet) if(!otherSet.has(x)) union.add(x); | |
return union | |
} | |
/* tsconfig.json | |
{ | |
"compilerOptions": { | |
"target": "es2021", | |
"sourceMap": true, | |
"moduleResolution": "NodeNext", | |
"module": "NodeNext", | |
"allowSyntheticDefaultImports": true | |
}, | |
"exclude": [ | |
"node_modules", | |
"dist" | |
] | |
} | |
*/ | |
/* package.json | |
{ | |
"dependencies": { | |
"@types/jsdom": "^21.1.6", | |
"@types/node": "^20.11.20", | |
"axios": "^1.6.7", | |
"csv-stringify": "^6.4.5", | |
"jsdom": "^24.0.0", | |
"typescript": "^5.3.3" | |
}, | |
"type": "module" | |
} | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment