gchristian · May 24, 2021 18:24
diff --git a/parsepdf.py b/parsepdf.py
 #separate pdf based on a phrase that can be used to delineate break points and names files by first word after that break point

 import PyPDF2
 import pdfplumber

 if __name__ == '__main__':
 	pdf_path = 'MBA Report Creator.pdf'
 	pdf_break_point = 'Student_Number '
 	base_pdf = PyPDF2.PdfFileReader(pdf_path)
 	new_pdf = PyPDF2.PdfFileWriter()
 	next_student_file = None
 	page_count = 0
 	
 	with pdfplumber.open(pdf_path) as pdf:
 		
 		for page in pdf.pages:
 			page_text= page.extract_text()
 			if pdf_break_point in page_text:
 				
 				if new_pdf.getNumPages() > 0 and next_student_file is not None:
 					with open(next_student_file + ".pdf", 'wb') as fh:
 							new_pdf.write(fh)
 							
 					new_pdf = PyPDF2.PdfFileWriter()
 					new_pdf.addPage(base_pdf.getPage(page_count))
 				else:
 					new_pdf.addPage(base_pdf.getPage(page_count))
 					
 				next_student_file = page_text.split(pdf_break_point)[1].split("\n")[0].strip()
 			else:
 				new_pdf.addPage(base_pdf.getPage(page_count))
 			
 			page_count = page_count + 1
 	
 	if new_pdf.getNumPages() > 0 and next_student_file is not None:
 		with open(next_student_file + ".pdf", 'wb') as fh:
 				new_pdf.write(fh)
	#separate pdf based on a phrase that can be used to delineate break points and names files by first word after that break point

	import PyPDF2
	import pdfplumber

	if __name__ == '__main__':
	pdf_path = 'MBA Report Creator.pdf'
	pdf_break_point = 'Student_Number '
	base_pdf = PyPDF2.PdfFileReader(pdf_path)
	new_pdf = PyPDF2.PdfFileWriter()
	next_student_file = None
	page_count = 0

	with pdfplumber.open(pdf_path) as pdf:

	for page in pdf.pages:
	page_text= page.extract_text()
	if pdf_break_point in page_text:

	if new_pdf.getNumPages() > 0 and next_student_file is not None:
	with open(next_student_file + ".pdf", 'wb') as fh:
	new_pdf.write(fh)

	new_pdf = PyPDF2.PdfFileWriter()
	new_pdf.addPage(base_pdf.getPage(page_count))
	else:
	new_pdf.addPage(base_pdf.getPage(page_count))

	next_student_file = page_text.split(pdf_break_point)[1].split("\n")[0].strip()
	else:
	new_pdf.addPage(base_pdf.getPage(page_count))

	page_count = page_count + 1

	if new_pdf.getNumPages() > 0 and next_student_file is not None:
	with open(next_student_file + ".pdf", 'wb') as fh:
	new_pdf.write(fh)