Skip to content

Instantly share code, notes, and snippets.

@hatappi
Last active July 21, 2017 07:15
Show Gist options
  • Save hatappi/46a6d483049eecc7c86f0ce2c84d03f8 to your computer and use it in GitHub Desktop.
Save hatappi/46a6d483049eecc7c86f0ce2c84d03f8 to your computer and use it in GitHub Desktop.
require 'arrow'
input = Arrow::MemoryMappedInputStream.open("/dev/shm/arrays.arrow")
reader = Arrow::RecordBatchFileReader.new(input)
reader.first.find_column('x').entries.map(&:to_a)
# => [[1, 2], [3, 4]]
reader.first.find_column('y').entries.map(&:to_a)
# => [[5, 6], [7, 8]]
reader.first.find_column('z').entries.map(&:to_a)
# => [[19, 22], [43, 50]]
import pyarrow as pa
import numpy as np
x = np.matrix([[1,2], [3, 4]])
y = np.matrix([[5,6], [7, 8]])
z = x * y
batch = pa.RecordBatch.from_arrays([
pa.array(x.tolist()),
pa.array(y.tolist()),
pa.array(z.tolist()),
], ['x', 'y', 'z'])
with pa.OSFile("/dev/shm/arrays.arrow", "wb") as f:
schema = batch.schema
writer = pa.RecordBatchFileWriter(f, schema)
writer.write_batch(batch)
writer.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment