Skip to content

Instantly share code, notes, and snippets.

@GorgeousOne
Last active July 10, 2025 12:19
Show Gist options
  • Save GorgeousOne/89fda2203740d231487b92e80fe22949 to your computer and use it in GitHub Desktop.
Save GorgeousOne/89fda2203740d231487b92e80fe22949 to your computer and use it in GitHub Desktop.
Summaery 2023 calender event scraper
'''Scraper to download the Summaery events into a .ics calender file for importing it into a calender app or something'''
import os
import pytz
import re
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from icalendar import Calendar, Event
def get_event_start_end_place(event_entry):
'''read the start, end time and possible place from the same calender info element'''
date_string = get_event_date(event_entry)
time_info = event_entry.find(class_='cal_maininfo')
infos = time_info.find_all('li')
time_pattern = r'(\d{1,2}\.\d{2})'
# filter event start and end for only the datetime string
start = date_string + ' ' + re.search(time_pattern, infos[0].text).group(1)
end = re.search(time_pattern, infos[1].text).group(1)
# add the event date to event end if it's not in the string yet
if len(end) < 10:
end = date_string + ' ' + end
place = re.sub(r'Ort:', '', infos[2].text).strip() if len(infos) > 2 else None
time_format = '%d.%m.%Y %H.%M'
germany = pytz.timezone('Europe/Berlin')
start = datetime.strptime(start, time_format)
end = datetime.strptime(end, time_format)
return germany.localize(start), germany.localize(end), place
def get_event_date(event_entry):
'''read the event date string from the big date element to the left. and append a year'''
month = event_entry.find(class_='month').text.strip()
day = event_entry.find(class_='day_of_month').text.strip()
return day.zfill(2) + '.' + month.zfill(2) + '.' + str(year)
def get_event_title_url(event_entry):
'''read the event title and url for more info from the desciption to the right'''
title_elem = event_entry.find('h2').find('a')
title = title_elem.text.strip()
url = 'https://www.uni-weimar.de' + title_elem['href']
return title, url
def get_event_teaser(event_entry):
'''read the description text of the event on the right'''
teaser = event_entry.find(class_='teasertext').text.strip()
return re.sub(r'\bmehr\b$', '', teaser).strip()
def add_event_to_cal(cal, start, end, place, title, url, teaser):
'''create and add a calendar event with all the info to the calendar'''
event = Event()
event.add('summary', title)
event.add('dtstart', start)
event.add('dtend', end)
if place:
event.add('location', place)
event.add('url', url)
event.add('description', teaser)
cal.add_component(event)
year = 2025
page_url = 'https://www.uni-weimar.de/de/universitaet/aktuell/veranstaltungskalender/highlights-des-jahres/2025/summaery/veranstaltungen/'
if __name__ == '__main__':
filepath = f'website_{year}.html'
if not os.path.exists(filepath):
print('downloading website')
response = requests.get(page_url)
with open(filepath, 'wb') as f:
print('saved website')
f.write(response.content)
with open(filepath, 'r', encoding='utf-8') as f:
print('loading html')
html_string = f.read()
import re
html_string = re.sub(r'\s\s+', ' ', html_string)
dom = BeautifulSoup(html_string, 'html.parser')
# find all events listed on the page
event_entries = dom.find_all(class_='summaeryLiveEvent')
print(f'Found {len(event_entries)} events')
cal = Calendar()
for i, entry in enumerate(event_entries):
add_event_to_cal(
cal,
*get_event_start_end_place(entry),
*get_event_title_url(entry),
get_event_teaser(entry))
print(i + 1, '/', len(event_entries), get_event_title_url(entry)[0])
# write them into one big icalendar file
with open(f'summaery_{year}.ics', 'wb') as f:
f.write(cal.to_ical())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment