Last active
March 26, 2017 22:12
-
-
Save cbsmith/a2ed58260d7435d16274d4aafa65efa1 to your computer and use it in GitHub Desktop.
First pass at doing a live "cat" in python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Live cat of file. Uses mmap to try to be efficient with memory, | |
though reads a byte at a time, so it burns through CPU by hitting the | |
python interpreter loop N times. We could add buffering, but that | |
would just make the code more complex, and who wants that? | |
stdout gets the default buffering, so consider invoking with | |
-u flag/PYTHONBUFFERED environment variable if not writing to | |
a file. | |
Requires fsmonitor: https://github.com/shaurz/fsmonitor | |
...which unfortunately lacks a decent monitor for non-Windows/Linux | |
systems, so burns up CPU unnecessarily on those platforms. Despite its | |
reputation, python makes it surprisingly difficult to write clean and | |
correct code. | |
No, this is *NOT* tail. Tail looks at the end of a file, and just | |
happens to support live updates to the end of a file. | |
''' | |
# all the python things we need | |
from contextlib import closing | |
from itertools import chain | |
from mmap import mmap, ACCESS_READ, ALLOCATIONGRANULARITY | |
from os import fstat | |
from sys import argv, exit, stdout | |
# the magic | |
from fsmonitor import FSMonitor | |
def dump_contents(fd, offset=0, output=stdout): | |
''' | |
Dump contents attached to a filedescriptor. | |
fd - a file descriptor, needs to support mmap | |
offset - offset into the file object to start at | |
''' | |
filesize = fstat(fd).st_size | |
gap = filesize - offset | |
if gap < 0: | |
exit(-1) # file shrank, can't do much useful | |
if gap > 0: | |
# some logic here to paper over mmap API's inherent ugliness | |
pageoffset = offset % ALLOCATIONGRANULARITY | |
mmapoffset = offset - pageoffset | |
with closing(mmap(fd, filesize - mmapoffset, access=ACCESS_READ, offset=mmapoffset)) as mapped: | |
mapped.seek(pageoffset) | |
for _ in xrange(gap): | |
output.write(mapped.read_byte()) | |
return filesize | |
def cat(infile=__file__, outfile=None): | |
m = FSMonitor() | |
offset = 0 | |
m.add_file_watch(infile) | |
# arbitary buffering size set to 4096 bytes | |
with open(outfile, 'wb', 4096) if outfile else stdout as output: | |
while True: | |
with open(infile, 'rb') as f: | |
file_loop = chain((None,), m.read_events()) # prepend initial iteration | |
for _ in file_loop: | |
offset = dump_contents(f.fileno(), offset, output) | |
if __name__ == '__main__': | |
cat(*argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment