Last active
November 27, 2023 07:33
-
-
Save J-Broadway/de61cc0f2be876a1036667b2bced3e6b to your computer and use it in GitHub Desktop.
YouTube videos page scrapper (JS to paste into dev tools console)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/////////////////////////////////////////////////////////////////////////////////////////////////// | |
// WAT DO | |
//------------------------------------------------------------------------------------------------- | |
// Intended to scrape youtube video thumbnail, title (with embeded hyperlink), estimated date. And | |
// save as a .html file | |
/////////////////////////////////////////////////////////////////////////////////////////////////// | |
// USAGE | |
//------------------------------------------------------------------------------------------------- | |
// - Open up your browser's dev tools (CTRL + SHIFT + C [works for me]) | |
// - Navigate to 'Console' tab | |
// - Paste this code and hit 'ENTER' | |
// - Save .html to your computer | |
/////////////////////////////////////////////////////////////////////////////////////////////////// | |
// WORKING AS OF 11/27/2023 | |
/////////////////////////////////////////////////////////////////////////////////////////////////// | |
(function() { | |
// Function to estimate the date from relative descriptions | |
function estimateDate(relativeDateStr) { | |
const parts = relativeDateStr.split(' '); | |
const currentDate = new Date(); | |
if (parts.length === 3) { | |
const amount = parseInt(parts[0], 10); | |
const unit = parts[1]; | |
switch (unit) { | |
case 'year': | |
case 'years': | |
currentDate.setFullYear(currentDate.getFullYear() - amount); | |
break; | |
case 'month': | |
case 'months': | |
currentDate.setMonth(currentDate.getMonth() - amount); | |
break; | |
case 'week': | |
case 'weeks': | |
currentDate.setDate(currentDate.getDate() - (amount * 7)); | |
break; | |
case 'day': | |
case 'days': | |
currentDate.setDate(currentDate.getDate() - amount); | |
break; | |
default: | |
return 'Unknown date'; | |
} | |
return currentDate.toLocaleDateString(); | |
} else { | |
return 'Unknown date'; | |
} | |
} | |
// Create a new Blob to hold the HTML | |
let blobContent = '<html><head><title>YouTube Content</title></head><body>'; | |
// Use a Set to keep track of processed video titles to prevent duplicates | |
const processedTitles = new Set(); | |
// Get all elements with the 'ytd-rich-item-renderer' class | |
const elements = document.querySelectorAll('.ytd-rich-item-renderer'); | |
elements.forEach(el => { | |
const titleElement = el.querySelector('#video-title-link'); | |
const metadataItems = el.querySelectorAll('#metadata-line .inline-metadata-item'); | |
const thumbnailElement = el.querySelector('ytd-thumbnail img'); | |
if (titleElement) { | |
const title = titleElement.textContent.trim(); | |
// Skip processing if this title has already been processed | |
if (processedTitles.has(title)) return; | |
processedTitles.add(title); | |
const videoLink = titleElement.href; | |
const viewCount = metadataItems.length > 0 ? metadataItems[0].textContent.trim() : 'No views'; | |
const dateText = metadataItems.length > 1 ? metadataItems[1].textContent.trim() : 'No date'; | |
const estimatedDate = dateText !== 'No date' ? estimateDate(dateText) : 'Unknown date'; | |
const thumbnail = thumbnailElement ? thumbnailElement.src : ''; | |
blobContent += ` | |
<div> | |
<h3><a href="${videoLink}">${title}</a></h3> | |
<p>Views: ${viewCount}</p> | |
<p>Date: ${estimatedDate}</p> | |
<img src="${thumbnail}" alt="Thumbnail"> | |
</div> | |
`; | |
} | |
}); | |
blobContent += '</body></html>'; | |
// Create a Blob from the content | |
const blob = new Blob([blobContent], {type: 'text/html'}); | |
// Create a link element for downloading | |
const link = document.createElement('a'); | |
link.href = URL.createObjectURL(blob); | |
link.download = 'youtube_content.html'; | |
// Append the link to the document, trigger the download, and then remove the link | |
document.body.appendChild(link); | |
link.click(); | |
document.body.removeChild(link); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment