Skip to content

Instantly share code, notes, and snippets.

@ZachWatkins
Last active June 26, 2025 19:45
Show Gist options
  • Save ZachWatkins/ee49177cdd4b5f0825c06816d6e4a5a1 to your computer and use it in GitHub Desktop.
Save ZachWatkins/ee49177cdd4b5f0825c06816d6e4a5a1 to your computer and use it in GitHub Desktop.
Service class for using PHPSpreadsheet to read workbooks with low memory use
<?php
namespace App\Services;
use PhpOffice\PhpSpreadsheet\IOFactory;
use PhpOffice\PhpSpreadsheet\Reader\IReader;
use PhpOffice\PhpSpreadsheet\Reader\IReadFilter;
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
class WorkbookReadService
{
protected ?string $filePath = null;
protected ?string $fileType = null;
protected ?array $worksheetInfo = null;
public int $chunkSize = 400;
protected ?IReader $reader = null;
protected ?array $headerRow = null;
protected ?string $highestHeaderColumn = null;
protected $headerFormatter;
/**
* Set the file path and determine the file type for processing.
* @param string $filePath
*/
public function setFilePath(string $filePath): void
{
if ($filePath === $this->filePath) {
return;
}
$this->filePath = $filePath;
$this->fileType = ucfirst(pathinfo($filePath, PATHINFO_EXTENSION));
$this->headerRow = null;
$this->worksheetInfo = null;
$this->reader = null;
$this->highestHeaderColumn = null;
}
/**
* Get the header row from the workbook. Filters out empty header cells.
* @return array
* @throws \RuntimeException If the file path is not set.
*/
public function getHeaderRow(?Worksheet $sheet = null): array
{
if (!$this->filePath) {
throw new \RuntimeException('File path is not set.');
}
if ($this->headerRow) {
return $this->headerRow;
}
if (!$sheet) {
$reader = $this->getReader();
$chunkFilter = new ChunkReadFilter(1, 1);
$reader->setReadFilter($chunkFilter);
$spreadsheet = $reader->load($this->filePath);
$sheet = $spreadsheet->getActiveSheet();
}
$maxDataColumn = $sheet->getHighestColumn();
$rowIterator = $sheet->getRowIterator(1, 1);
$this->headerRow = [];
foreach ($rowIterator as $row) {
$columnIterator = $row->getCellIterator('A', $maxDataColumn);
foreach ($columnIterator as $cell) {
if ($cell->getValue() !== null && $cell->getValue() !== '') {
$column = $cell->getColumn();
$this->headerRow[$column] = $cell->getValue();
$this->highestHeaderColumn = $column;
}
}
}
return $this->headerRow;
}
/**
* Get the worksheet info.
* @return array
*/
public function getWorksheetInfo(): array
{
if (!$this->filePath) {
throw new \RuntimeException('File path is not set.');
}
if (!$this->worksheetInfo) {
$reader = $this->getReader();
$worksheetInfo = $reader->listWorksheetInfo($this->filePath)[0] ?? [];
$this->worksheetInfo = [
'worksheetName' => $worksheetInfo['worksheetName'] ?? '',
'totalRows' => $worksheetInfo['totalRows'] ?? 0,
'totalColumns' => $worksheetInfo['totalColumns'] ?? 0,
'lastColumnLetter' => $worksheetInfo['lastColumnLetter'] ?? '',
'lastColumnIndex' => $worksheetInfo['lastColumnIndex'] ?? 0,
'sheetState' => $worksheetInfo['sheetState'] ?? '',
];
}
return $this->worksheetInfo;
}
/**
* Yield every row from the workbook.
* @return \Generator
*/
public function yieldRow(): \Generator
{
if (!$this->filePath) {
throw new \RuntimeException('File path is not set.');
}
$rawRows = $this->getWorksheetInfo()['totalRows'];
for ($startRow = 1; $startRow <= $rawRows; $startRow += $this->chunkSize) {
$reader = $this->getReader();
$chunkFilter = new ChunkReadFilter($startRow, $this->chunkSize);
$reader->setReadFilter($chunkFilter);
$spreadsheet = $reader->load($this->filePath);
$activeSheet = $spreadsheet->getActiveSheet();
$endColumn = $this->getHighestHeaderColumn($activeSheet);
$endRow = min($startRow + $this->chunkSize - 1, $rawRows);
$rowIterator = $activeSheet->getRowIterator($startRow, $endRow);
foreach ($rowIterator as $row) {
$columnIterator = $row->getCellIterator('A', $endColumn);
$values = [];
foreach ($columnIterator as $cell) {
$values[$cell->getColumn()] = $cell->getValue();
}
yield $values;
}
// Release memory.
$spreadsheet->disconnectWorksheets();
$spreadsheet->__destruct();
$spreadsheet = null;
unset($spreadsheet);
$reader = null;
unset($reader);
$chunkFilter = null;
unset($chunkFilter);
}
}
/**
* Yield a chunk of rows from the workbook in a single read operation.
* @param int $startRow The first row to read.
* @param int|null $endRow The last row to read. If null, it will read the next chunk size rows.
* @return \Generator
* @throws \RuntimeException If the file path is not set.
* @throws \InvalidArgumentException If the start row is less than 1.
*/
public function yieldRowRange(int $startRow, int $endRow): \Generator
{
if (!$this->filePath) {
throw new \RuntimeException('File path is not set.');
}
if ($startRow < 1) {
throw new \InvalidArgumentException('Start row must be greater than or equal to 1.');
}
if ($endRow < $startRow) {
throw new \InvalidArgumentException('End row must be greater than or equal to start row. Found startRow: ' . $startRow . ', endRow: ' . $endRow);
}
$reader = $this->getReader();
$chunkFilter = new ChunkReadFilter($startRow, $this->chunkSize);
$reader->setReadFilter($chunkFilter);
$spreadsheet = $reader->load($this->filePath);
$activeSheet = $spreadsheet->getActiveSheet();
$endColumn = $this->getHighestHeaderColumn($activeSheet);
$rowIterator = $activeSheet->getRowIterator($startRow, $endRow);
foreach ($rowIterator as $row) {
$columnIterator = $row->getCellIterator('A', $endColumn);
$values = [];
foreach ($columnIterator as $cell) {
$values[$cell->getColumn()] = $cell->getValue();
}
yield $values;
}
// Release memory.
$spreadsheet->disconnectWorksheets();
$spreadsheet->__destruct();
$spreadsheet = null;
unset($spreadsheet);
$reader = null;
unset($reader);
$chunkFilter = null;
unset($chunkFilter);
}
/**
* Yield a chunk of rows from the workbook in a single read operation with the header row's cell values as row cell keys.
* @param int $startRow The first row to read.
* @param int|null $endRow The last row to read. If null, it will read the next chunk size rows.
* @return \Generator
* @throws \RuntimeException If the file path is not set.
* @throws \InvalidArgumentException If the start row is less than 1.
*/
public function yieldRowRangeWithHeaders(int $startRow, int $endRow): \Generator
{
if (!$this->filePath) {
throw new \RuntimeException('File path is not set.');
}
if ($startRow < 1) {
throw new \InvalidArgumentException('Start row must be greater than or equal to 1.');
}
if ($endRow < $startRow) {
throw new \InvalidArgumentException('End row must be greater than or equal to start row. Found startRow: ' . $startRow . ', endRow: ' . $endRow);
}
$reader = $this->getReader();
$chunkFilter = new ChunkReadFilter($startRow, $this->chunkSize);
$reader->setReadFilter($chunkFilter);
$spreadsheet = $reader->load($this->filePath);
$activeSheet = $spreadsheet->getActiveSheet();
$rowIterator = $activeSheet->getRowIterator(1, 1);
$header = [];
$endColumn = $activeSheet->getHighestDataColumn();
foreach ($rowIterator as $row) {
$columnIterator = $row->getCellIterator('A', $activeSheet->getHighestDataColumn());
foreach ($columnIterator as $cell) {
if ($cell->getValue() === null || $cell->getValue() === '') {
break; // Stop at the first empty cell in the header row.
}
$columnLetter = $cell->getColumn();
if ($this->headerFormatter) {
$header[$columnLetter] = call_user_func($this->headerFormatter, $cell->getValue());
} else {
$header[$columnLetter] = $cell->getValue();
}
$endColumn = $columnLetter;
}
}
$rowIterator = $activeSheet->getRowIterator($startRow, $endRow);
foreach ($rowIterator as $row) {
$columnIterator = $row->getCellIterator('A', $endColumn);
$values = [];
foreach ($columnIterator as $cell) {
$values[$header[$cell->getColumn()]] = $cell->getValue();
}
yield $values;
}
// Release memory.
$spreadsheet->disconnectWorksheets();
$spreadsheet->__destruct();
$spreadsheet = null;
unset($spreadsheet);
$reader = null;
unset($reader);
$chunkFilter = null;
unset($chunkFilter);
}
/**
* Set the file reader object, or create a new one based on the file type.
* @return IReader
*/
protected function getReader(): IReader
{
if (!$this->fileType) {
throw new \RuntimeException('File type is not set.');
}
if (!$this->reader) {
$this->reader = IOFactory::createReader($this->fileType);
}
return $this->reader;
}
/**
* Get the highest header column letter from the worksheet.
* This is useful for determining the last column with data in the header row.
* @param Worksheet $sheet
* @return string
*/
protected function getHighestHeaderColumn(Worksheet $sheet): string
{
if ($this->highestHeaderColumn) {
return $this->highestHeaderColumn;
}
$maxColumn = $sheet->getHighestColumn();
$rowIterator = $sheet->getRowIterator(1, 1);
foreach ($rowIterator as $row) {
$columnIterator = $row->getCellIterator('A', $maxColumn);
foreach ($columnIterator as $cell) {
if ($cell->getValue() !== null && $cell->getValue() !== '') {
$maxColumn = $cell->getColumn();
}
}
}
$this->highestHeaderColumn = $maxColumn;
return $maxColumn;
}
public function setHeaderFormatter(callable $formatter): void
{
$this->headerFormatter = $formatter;
}
}
/** Define a Read Filter class implementing IReadFilter */
class ChunkReadFilter implements IReadFilter
{
private int $startRow;
private int $endRow;
/**
* We expect a list of the rows that we want to read to be passed into the constructor.
* @param int $startRow The first row to read.
* @param int $chunkSize The number of rows to read in this chunk.
*/
public function __construct(int $startRow, int $chunkSize)
{
$this->startRow = $startRow;
$this->endRow = $startRow + $chunkSize;
}
public function readCell(string $columnAddress, int $row, string $worksheetName = ''): bool
{
// Only read the heading row, and the rows that were configured in the constructor
if (($row == 1) || ($row >= $this->startRow && $row < $this->endRow)) {
return true;
}
return false;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment