Skip to content

Instantly share code, notes, and snippets.

@KrishGarg
Last active July 20, 2025 17:17
Show Gist options
  • Save KrishGarg/3e0b5aa33f89270be54f7e4a73f0f462 to your computer and use it in GitHub Desktop.
Save KrishGarg/3e0b5aa33f89270be54f7e4a73f0f462 to your computer and use it in GitHub Desktop.
// scrape.js
const puppeteer = require("puppeteer");
const fs = require("fs");
const path = require("path");
// Home page as direct access sometimes denies access
const TARGET_URL = "https://tgeapcet.nic.in/default.aspx";
async function scrapeAllotments() {
console.log("🚀 Starting the scraper...");
const browser = await puppeteer.launch({ headless: true }); // headless makes the entire browser run inside program and without opening the UI window
const ua =
"Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Mobile Safari/537.3"; // to mimic browser in headless mode
const page = await browser.newPage();
await page.setUserAgent(ua);
await page.goto(TARGET_URL, { waitUntil: "networkidle2" });
console.log(`On ${TARGET_URL}`);
// You should inspect the page to get the correct selector IDs
const collegeSelector = "#MainContent_DropDownList1";
const branchSelector = "#MainContent_DropDownList2";
const submitButtonSelector = "#MainContent_btn_allot";
const resultsTableSelector = "table.sortable";
// Wait for new page/tab to open
const newPagePromise = new Promise((x) =>
browser.once("targetcreated", (target) => x(target.page()))
);
// click on the link to cutoffs page
await page.click("a[href$='college_allotment.aspx']");
const newPage = await newPagePromise;
await newPage.waitForNavigation(); // this will be used multiple times, it basically waits for the page to load
await newPage.setUserAgent(ua);
const colleges = await newPage.evaluate((selector) => { // evaluate() scripts run inside the browser console
return Array.from(document.querySelector(selector).options)
.map((option) => ({ name: option.text, value: option.value }))
.filter((opt) => opt.value !== ""); // Filter out the default "--Select--" option
}, collegeSelector); // to pass data from program to browser instance
// colleges.forEach((c) => {
// console.log(c.name);
// });
// array to hold all scraped data
const allRecords = {};
// looping through all combinations of colleges and branches
for (let i = 1; i <= colleges.length; i++) {
// why we aren't starting from zero is because the site is a little bugged for first entry. you have to click on other college then back to first for it to work
// so we are just taking the first college at the end
const college = colleges[i % colleges.length];
// select the college in dropdown
await newPage.select(collegeSelector, college.value);
await newPage.waitForNavigation();
// get all branch names
const branches = await newPage.evaluate((selector) => {
return Array.from(document.querySelector(selector).options)
.map((option) => ({ name: option.text, value: option.value }))
.filter((opt) => opt.value !== "0"); // Filter out the default "--Select--" option
}, branchSelector);
allRecords[college.name] = {};
for (const branch of branches) {
try {
console.log(`Fetching: ${college.name} -> ${branch.name}`);
// Select college and branch
await newPage.select(collegeSelector, college.value);
await newPage.waitForNavigation();
await newPage.select(branchSelector, branch.value);
// Click submit and wait for the results table to appear
await newPage.click(submitButtonSelector);
await newPage.waitForNavigation();
allRecords[college.name][branch.name] = [];
// scraping the table in browser
const tableData = await newPage.evaluate((tableSelector) => {
const rows = document.querySelectorAll(`${tableSelector} tr`);
const records = [];
// Start from 1 to skip the header row
for (let i = 0; i < rows.length; i++) {
const cells = rows[i].querySelectorAll("td");
if (cells.length > 1) {
// Ensure row has data
// The keys here should match the table structure
records.push({
sno: +cells[0]?.innerText.trim(), // + makes it integer
hallticketno: cells[1]?.innerText.trim(),
rank: +cells[2]?.innerText.trim(),
name: cells[3]?.innerText.trim(),
sex: cells[4]?.innerText.trim(),
caste: cells[5]?.innerText.trim(),
region: cells[6]?.innerText.trim(),
seatcategory: cells[7]?.innerText.trim(),
});
}
}
return records;
}, resultsTableSelector);
if (tableData.length > 0) {
allRecords[college.name][branch.name].push(...tableData);
} else {
// this was an edge case found during the testing where some branches didn't have allotments
// so table would be empty, we just cleanly go back and continue the process
await newPage.goBack();
await newPage.waitForNavigation();
}
} catch (error) {
console.error(
`Could not fetch data for ${college.name} -> ${branch.name}. Skipping.`,
error
);
// any error, just go back and continue
await newPage.goBack();
await newPage.waitForNavigation();
}
}
}
// write to json
const outputPath = path.resolve(__dirname, "allotment_data.json");
fs.writeFileSync(outputPath, JSON.stringify(allRecords, null, 2));
console.log(`✅ Scraping complete! Data saved to ${outputPath}`);
await browser.close();
}
scrapeAllotments().catch(console.error);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment