fennecinspace · August 10, 2024 08:56
diff --git a/lina.py b/lina.py
 from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.chrome.options import Options
 from webdriver_manager.chrome import ChromeDriverManager
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 import pandas as pd

 # Set up Chrome options
 chrome_options = Options()
 # chrome_options.add_argument("--headless")  # Run in headless mode (no browser UI)
 chrome_options.add_argument("--disable-gpu")

 # Set up the Chrome WebDriver using webdriver_manager
 driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

 # Open the website
 driver.get("https://cmrsurgical.com/job-search")

 # Wait for the job listing to load using explicit wait
 wait = WebDriverWait(driver, 10)
 wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.search_result_list .list-item a.search-item')))

 # Extract job titles, locations, job types, and URLs
 jobs = driver.find_elements(By.CSS_SELECTOR, '.search_result_list .list-item a.search-item')
 job_data = []

 for job in jobs:
    title = job.find_element(By.CLASS_NAME, 'search-item__title').text
    location = job.find_element(By.CLASS_NAME, 'search-item__location').text
    job_type = job.find_element(By.CLASS_NAME, 'search-item__discipline').text
    url = job.get_attribute('href')

    job_data.append({
        'Job Title': title,
        'Location': location,
        'Job Type': job_type,
        'URL': url,
    })

 for i in range(len(job_data)):
    try:
        # Open each job page
        driver.get(job_data[i]['URL'])

        # Wait for the required sections to load and scrape the details
        try:
            application_url = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div.apply-cta-container a'))).get_attribute('href')
        except:
            application_url = 'Not available'

        try:
            responsibilities = driver.find_element(By.XPATH, "//h3[text()='Responsibilities']/following-sibling::ul").text
        except:
            responsibilities = 'Not available'

        try:
            about_you = driver.find_element(By.XPATH, "//h3[text()='About you']/following-sibling::ul").text
        except:
            about_you = 'Not available'

        # Append data to job_data list
        job_data[i] = {
            **job_data[i],
            'Application': application_url,
            # 'Responsibilities': responsibilities,
            # 'About You': about_you,
        }
    except Exception as e:
        print(e)


 # Convert to DataFrame
 df = pd.DataFrame(job_data)

 # Save to CSV
 df.to_csv('jobs_detailed.csv', index=False)

 # Close the WebDriver
 driver.quit()

 print("Job details including application URL, responsibilities, and about you have been saved to jobs_detailed.csv.")
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.chrome.service import Service
	from selenium.webdriver.chrome.options import Options
	from webdriver_manager.chrome import ChromeDriverManager
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	import pandas as pd

	# Set up Chrome options
	chrome_options = Options()
	# chrome_options.add_argument("--headless") # Run in headless mode (no browser UI)
	chrome_options.add_argument("--disable-gpu")

	# Set up the Chrome WebDriver using webdriver_manager
	driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

	# Open the website
	driver.get("https://cmrsurgical.com/job-search")

	# Wait for the job listing to load using explicit wait
	wait = WebDriverWait(driver, 10)
	wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.search_result_list .list-item a.search-item')))

	# Extract job titles, locations, job types, and URLs
	jobs = driver.find_elements(By.CSS_SELECTOR, '.search_result_list .list-item a.search-item')
	job_data = []

	for job in jobs:
	title = job.find_element(By.CLASS_NAME, 'search-item__title').text
	location = job.find_element(By.CLASS_NAME, 'search-item__location').text
	job_type = job.find_element(By.CLASS_NAME, 'search-item__discipline').text
	url = job.get_attribute('href')

	job_data.append({
	'Job Title': title,
	'Location': location,
	'Job Type': job_type,
	'URL': url,
	})

	for i in range(len(job_data)):
	try:
	# Open each job page
	driver.get(job_data[i]['URL'])

	# Wait for the required sections to load and scrape the details
	try:
	application_url = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div.apply-cta-container a'))).get_attribute('href')
	except:
	application_url = 'Not available'

	try:
	responsibilities = driver.find_element(By.XPATH, "//h3[text()='Responsibilities']/following-sibling::ul").text
	except:
	responsibilities = 'Not available'

	try:
	about_you = driver.find_element(By.XPATH, "//h3[text()='About you']/following-sibling::ul").text
	except:
	about_you = 'Not available'

	# Append data to job_data list
	job_data[i] = {
	**job_data[i],
	'Application': application_url,
	# 'Responsibilities': responsibilities,
	# 'About You': about_you,
	}
	except Exception as e:
	print(e)


	# Convert to DataFrame
	df = pd.DataFrame(job_data)

	# Save to CSV
	df.to_csv('jobs_detailed.csv', index=False)

	# Close the WebDriver
	driver.quit()

	print("Job details including application URL, responsibilities, and about you have been saved to jobs_detailed.csv.")