Created
February 2, 2024 10:21
-
-
Save walkermatt/2c706c431c40610c9da78fafebcf5fb9 to your computer and use it in GitHub Desktop.
Parse a directory of nginx access logs to get a list of referer values
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { Parser } from '@robojones/nginx-log-parser'; | |
import fs from 'fs'; | |
import { createReadStream } from 'fs'; | |
import zlib from 'zlib'; | |
import readline from 'readline'; | |
import path from 'path'; | |
async function* processFile(filePath, filter) { | |
const extension = path.extname(filePath); | |
let readStream; | |
// Create a read stream based on file extension | |
if (extension === '.gz') { | |
readStream = fs.createReadStream(filePath).pipe(zlib.createGunzip()); | |
} else { | |
// Assume we're reading a plain text file | |
readStream = createReadStream(filePath); | |
} | |
const rl = readline.createInterface({ | |
input: readStream, | |
crlfDelay: Infinity, | |
}); | |
for await (const line of rl) { | |
yield line; | |
} | |
} | |
// Function to read directory and process files | |
async function readDirectory(directoryPath) { | |
// The schema from the nginx config | |
const schema = | |
'$remote_addr - $remote_user [$time_local] "$request" $status $bytes_sent "$http_referer" "$http_user_agent"'; | |
// Create a parser that can read our log schema. | |
const parser = new Parser(schema); | |
try { | |
const files = await fs.promises.readdir(directoryPath); | |
const referers = new Set(); | |
for (const file of files) { | |
for await (const line of processFile(path.join(directoryPath, file))) { | |
const result = parser.parseLine(line); | |
if (result.http_referer != '-') { | |
// console.log(result); | |
referers.add(result.http_referer); | |
} | |
} | |
} | |
for (const referer of referers) { | |
console.log(referer); | |
} | |
} catch (error) { | |
console.error('Error reading directory:', error); | |
} | |
} | |
// Start reading the directory | |
readDirectory('/tmp/access_logs/'); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "parse-nginx-logs", | |
"version": "1.0.0", | |
"description": "", | |
"type": "module", | |
"main": "main.js", | |
"scripts": {}, | |
"keywords": [], | |
"author": "Matt Walker (http://longwayaround.org.uk)", | |
"license": "ISC", | |
"dependencies": { | |
"@robojones/nginx-log-parser": "^0.0.6" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment