INVISIBLE5130 · April 5, 2025 17:02
diff --git a/candidates-scraper.js b/candidates-scraper.js
 // ==UserScript==
 // @name         Candidates Scraper for Djinni
 // @namespace    http://tampermonkey.net/
 // @version      2025-04-05
 // @description  Scrape candidates from Djinni
 // @author       Ihor Sheptiakov
 // @match        https://djinni.co/developers/*
 // @icon         https://www.google.com/s2/favicons?sz=64&domain=djinni.co
 // @grant        none
 // ==/UserScript==

 // Function to extract candidate information from a single card
 async function extractCandidateInfo(card) {
    const getText = (selector) => {
        if (selector === '.text-card') {
            const showMoreBtn = card.querySelector('.js-show-more-btn');
            if (showMoreBtn) {
                showMoreBtn.click();
            }
        }
        const element = card.querySelector(selector);
        return element ? element.textContent.trim() : '';
    };

    const getSkills = async () => {
        const skillElements = card.querySelectorAll('.badge.border');
        const skillElementsMoreButton = card.querySelector('.badge.border.js-analytics-event');
        if (skillElementsMoreButton) {
            await skillElementsMoreButton.click();
        }
        const skills = Array.from(skillElements).map(el => el.textContent.trim());
        console.log('Skills:', skills.join(', '));
        return skills.join(', ');
    };

    // Helper function to clean up description text
    const cleanDescription = (text) => {
        return text
            .replace(/\n/g, ' ') // Replace newlines with spaces
            .replace(/\s+/g, ' ') // Replace multiple spaces with single space
            .trim();
    };

    // Get all spans in the location/experience section
    const infoSpans = Array.from(card.querySelectorAll('p.text-secondary span'))
        .filter(span => {
            const text = span.textContent.trim();
            return !text.includes('Опубліковано') &&
                   !text.includes('У пасивному пошуку') &&
                   !span.classList.contains('mx-1');
        });

    // Find city and experience by their content patterns
    let city = '';
    let experience = '';
    let englishLevel = '';

    for (let i = 0; i < infoSpans.length; i++) {
        const text = infoSpans[i].textContent.trim();

        // Check for city (it should be after a dot separator and not match any other patterns)
        if (
            i === 2 &&
            !text.match(/^(\d+|\d+\.\d+)\s+(рік|роки|років)\s+досвіду$/i) &&
            !text.match(/^(Advanced\/Fluent|Upper-Intermediate|Intermediate|Pre-Intermediate|Beginner)$/i) &&
            !text.match(/^(Опубліковано|У пасивному пошуку)$/i) &&
            !text.match(/\d/)
        ) {
            city = text;
        }

        // Check for experience pattern
        // Match text that contains a number (including decimals) followed by Ukrainian words for "year(s) of experience"
        // рік = year (singular)
        // роки = years (2-4 years)
        // років = years (5+ years or decimal numbers)
        if (text.match(/^(\d+|\d+\.\d+)\s+(рік|роки|років)\s+досвіду$/i)) {
            experience = text;
        }
        // Check for English level
        else if (text.match(/^(Advanced\/Fluent|Upper-Intermediate|Intermediate|Pre-Intermediate|Beginner)$/i)) {
            englishLevel = text;
        }
    }

    // Get skills
    const skills = await getSkills();

    return {
        position: getText('h2 a.profile'),
        salary: getText('.text-success'),
        country: getText('p.text-secondary span:first-child'),
        city: city,
        experience: experience,
        englishLevel: englishLevel,
        description: cleanDescription(getText('.text-card')),
        skills: skills,
        profileUrl: card.querySelector('h2 a.profile')?.href || '',
        views: getText('.bi-eye + span'),
        timestamp: new Date().toISOString()
    };
 }

 // Function to scrape all candidates on the current page
 async function scrapeCurrentPage() {
    const cards = document.querySelectorAll('.card.mb-4');
    const candidates = [];

    for (const card of cards) {
        const candidate = await extractCandidateInfo(card);
        candidates.push(candidate);
    }

    return candidates;
 }

 // Function to create and download CSV
 function downloadCSV(candidates) {
    // Escape CSV values
    const escapeCSV = (value) => {
        if (value === null || value === undefined) return '';
        const stringValue = String(value);
        // If the value contains commas, newlines, or quotes, wrap it in quotes and escape existing quotes
        if (stringValue.includes(',') || stringValue.includes('\n') || stringValue.includes('"')) {
            return `"${stringValue.replace(/"/g, '""')}"`;
        }
        return stringValue;
    };

    const headers = [
        'Position', 'Salary', 'Country', 'City', 'Experience',
        'English Level', 'Description', 'Skills', 'Profile URL',
        'Views', 'Timestamp'
    ];

    // Create header row
    const headerRow = headers.map(escapeCSV).join(',');

    console.log(candidates);


    // Create data rows
    const dataRows = candidates.map(candidate => [
        candidate.position,
        candidate.salary,
        candidate.country,
        candidate.city,
        candidate.experience,
        candidate.englishLevel,
        candidate.description,
        candidate.skills,
        candidate.profileUrl,
        candidate.views,
        candidate.timestamp
    ].map(escapeCSV).join(','));

    // Combine header and data rows
    const csvContent = [headerRow, ...dataRows].join('\n');

    // Create and download file
    const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
    const link = document.createElement('a');
    const url = URL.createObjectURL(blob);
    link.setAttribute('href', url);
    link.setAttribute('download', `candidates_${new Date().toISOString().split('T')[0]}.csv`);
    link.style.visibility = 'hidden';
    document.body.appendChild(link);
    link.click();
    document.body.removeChild(link);
 }

 // Main function to scrape all pages
 async function scrapeAllPages() {
    let currentPage = 1;
    const storageKey = 'djinni_candidates';

    console.log('Starting to scrape candidates...');

    while (true) {
        console.log(`Scraping page ${currentPage}...`);

        // Scrape current page
        const pageCandidates = await scrapeCurrentPage();

        // Get existing candidates from localStorage
        const existingCandidates = JSON.parse(localStorage.getItem(storageKey) || '[]');

        // Add new candidates and save back to localStorage
        const updatedCandidates = [...existingCandidates, ...pageCandidates];
        localStorage.setItem(storageKey, JSON.stringify(updatedCandidates));

        console.log(`Found ${pageCandidates.length} candidates on page ${currentPage}`);
        console.log(`Total candidates so far: ${updatedCandidates.length}`);

        // Try to go to next page
        const hasNextPage = await goToNextPage();
        if (!hasNextPage) {
            console.log('No more pages to scrape');
            break;
        }
        currentPage++;
    }

    // Get all candidates from localStorage and download CSV
    const allCandidates = JSON.parse(localStorage.getItem(storageKey) || '[]');
    console.log('Scraping completed!');
    console.log(`Total candidates found: ${allCandidates.length}`);

    // Create and download CSV
    downloadCSV(allCandidates);
    console.log('CSV file has been downloaded!');

    // Clear localStorage after downloading
    localStorage.removeItem(storageKey);

    return allCandidates;
 }

 // Function to navigate to the next page
 async function goToNextPage() {
    // Look for the next page link with the chevron-right icon
    const nextButton = document.querySelector('a.page-link:has(.bi-chevron-right)');
    if (nextButton && nextButton.getAttribute('aria-disabled') !== 'True') {
        nextButton.click();
        // Wait for the page to load
        await new Promise(resolve => setTimeout(resolve, 2000));
        return true;
    }
    return false;
 }

 // Run the scraper
 scrapeAllPages().then(candidates => {
    console.log('Scraping process completed!');
 }).catch(error => {
    console.error('Error during scraping:', error);
 });
	// ==UserScript==
	// @name Candidates Scraper for Djinni
	// @namespace http://tampermonkey.net/
	// @version 2025-04-05
	// @description Scrape candidates from Djinni
	// @author Ihor Sheptiakov
	// @match https://djinni.co/developers/*
	// @icon https://www.google.com/s2/favicons?sz=64&domain=djinni.co
	// @grant none
	// ==/UserScript==

	// Function to extract candidate information from a single card
	async function extractCandidateInfo(card) {
	const getText = (selector) => {
	if (selector === '.text-card') {
	const showMoreBtn = card.querySelector('.js-show-more-btn');
	if (showMoreBtn) {
	showMoreBtn.click();
	}
	}
	const element = card.querySelector(selector);
	return element ? element.textContent.trim() : '';
	};

	const getSkills = async () => {
	const skillElements = card.querySelectorAll('.badge.border');
	const skillElementsMoreButton = card.querySelector('.badge.border.js-analytics-event');
	if (skillElementsMoreButton) {
	await skillElementsMoreButton.click();
	}
	const skills = Array.from(skillElements).map(el => el.textContent.trim());
	console.log('Skills:', skills.join(', '));
	return skills.join(', ');
	};

	// Helper function to clean up description text
	const cleanDescription = (text) => {
	return text
	.replace(/\n/g, ' ') // Replace newlines with spaces
	.replace(/\s+/g, ' ') // Replace multiple spaces with single space
	.trim();
	};

	// Get all spans in the location/experience section
	const infoSpans = Array.from(card.querySelectorAll('p.text-secondary span'))
	.filter(span => {
	const text = span.textContent.trim();
	return !text.includes('Опубліковано') &&
	!text.includes('У пасивному пошуку') &&
	!span.classList.contains('mx-1');
	});

	// Find city and experience by their content patterns
	let city = '';
	let experience = '';
	let englishLevel = '';

	for (let i = 0; i < infoSpans.length; i++) {
	const text = infoSpans[i].textContent.trim();

	// Check for city (it should be after a dot separator and not match any other patterns)
	if (
	i === 2 &&
	!text.match(/^(\d+\|\d+\.\d+)\s+(рік\|роки\|років)\s+досвіду$/i) &&
	!text.match(/^(Advanced\/Fluent\|Upper-Intermediate\|Intermediate\|Pre-Intermediate\|Beginner)$/i) &&
	!text.match(/^(Опубліковано\|У пасивному пошуку)$/i) &&
	!text.match(/\d/)
	) {
	city = text;
	}

	// Check for experience pattern
	// Match text that contains a number (including decimals) followed by Ukrainian words for "year(s) of experience"
	// рік = year (singular)
	// роки = years (2-4 years)
	// років = years (5+ years or decimal numbers)
	if (text.match(/^(\d+\|\d+\.\d+)\s+(рік\|роки\|років)\s+досвіду$/i)) {
	experience = text;
	}
	// Check for English level
	else if (text.match(/^(Advanced\/Fluent\|Upper-Intermediate\|Intermediate\|Pre-Intermediate\|Beginner)$/i)) {
	englishLevel = text;
	}
	}

	// Get skills
	const skills = await getSkills();

	return {
	position: getText('h2 a.profile'),
	salary: getText('.text-success'),
	country: getText('p.text-secondary span:first-child'),
	city: city,
	experience: experience,
	englishLevel: englishLevel,
	description: cleanDescription(getText('.text-card')),
	skills: skills,
	profileUrl: card.querySelector('h2 a.profile')?.href \|\| '',
	views: getText('.bi-eye + span'),
	timestamp: new Date().toISOString()
	};
	}

	// Function to scrape all candidates on the current page
	async function scrapeCurrentPage() {
	const cards = document.querySelectorAll('.card.mb-4');
	const candidates = [];

	for (const card of cards) {
	const candidate = await extractCandidateInfo(card);
	candidates.push(candidate);
	}

	return candidates;
	}

	// Function to create and download CSV
	function downloadCSV(candidates) {
	// Escape CSV values
	const escapeCSV = (value) => {
	if (value === null \|\| value === undefined) return '';
	const stringValue = String(value);
	// If the value contains commas, newlines, or quotes, wrap it in quotes and escape existing quotes
	if (stringValue.includes(',') \|\| stringValue.includes('\n') \|\| stringValue.includes('"')) {
	return `"${stringValue.replace(/"/g, '""')}"`;
	}
	return stringValue;
	};

	const headers = [
	'Position', 'Salary', 'Country', 'City', 'Experience',
	'English Level', 'Description', 'Skills', 'Profile URL',
	'Views', 'Timestamp'
	];

	// Create header row
	const headerRow = headers.map(escapeCSV).join(',');

	console.log(candidates);


	// Create data rows
	const dataRows = candidates.map(candidate => [
	candidate.position,
	candidate.salary,
	candidate.country,
	candidate.city,
	candidate.experience,
	candidate.englishLevel,
	candidate.description,
	candidate.skills,
	candidate.profileUrl,
	candidate.views,
	candidate.timestamp
	].map(escapeCSV).join(','));

	// Combine header and data rows
	const csvContent = [headerRow, ...dataRows].join('\n');

	// Create and download file
	const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
	const link = document.createElement('a');
	const url = URL.createObjectURL(blob);
	link.setAttribute('href', url);
	link.setAttribute('download', `candidates_${new Date().toISOString().split('T')[0]}.csv`);
	link.style.visibility = 'hidden';
	document.body.appendChild(link);
	link.click();
	document.body.removeChild(link);
	}

	// Main function to scrape all pages
	async function scrapeAllPages() {
	let currentPage = 1;
	const storageKey = 'djinni_candidates';

	console.log('Starting to scrape candidates...');

	while (true) {
	console.log(`Scraping page ${currentPage}...`);

	// Scrape current page
	const pageCandidates = await scrapeCurrentPage();

	// Get existing candidates from localStorage
	const existingCandidates = JSON.parse(localStorage.getItem(storageKey) \|\| '[]');

	// Add new candidates and save back to localStorage
	const updatedCandidates = [...existingCandidates, ...pageCandidates];
	localStorage.setItem(storageKey, JSON.stringify(updatedCandidates));

	console.log(`Found ${pageCandidates.length} candidates on page ${currentPage}`);
	console.log(`Total candidates so far: ${updatedCandidates.length}`);

	// Try to go to next page
	const hasNextPage = await goToNextPage();
	if (!hasNextPage) {
	console.log('No more pages to scrape');
	break;
	}
	currentPage++;
	}

	// Get all candidates from localStorage and download CSV
	const allCandidates = JSON.parse(localStorage.getItem(storageKey) \|\| '[]');
	console.log('Scraping completed!');
	console.log(`Total candidates found: ${allCandidates.length}`);

	// Create and download CSV
	downloadCSV(allCandidates);
	console.log('CSV file has been downloaded!');

	// Clear localStorage after downloading
	localStorage.removeItem(storageKey);

	return allCandidates;
	}

	// Function to navigate to the next page
	async function goToNextPage() {
	// Look for the next page link with the chevron-right icon
	const nextButton = document.querySelector('a.page-link:has(.bi-chevron-right)');
	if (nextButton && nextButton.getAttribute('aria-disabled') !== 'True') {
	nextButton.click();
	// Wait for the page to load
	await new Promise(resolve => setTimeout(resolve, 2000));
	return true;
	}
	return false;
	}

	// Run the scraper
	scrapeAllPages().then(candidates => {
	console.log('Scraping process completed!');
	}).catch(error => {
	console.error('Error during scraping:', error);
	});