Last active
January 15, 2020 10:53
-
-
Save utrack/3629bcf8106ed2c3a4c4289cc976b803 to your computer and use it in GitHub Desktop.
bcp data slurper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"encoding/csv" | |
"os" | |
) | |
func main() { | |
bufSize := 2 ^ 15 | |
filename := os.Args[1] | |
f, err := os.Open(filename) | |
if err != nil { | |
panic(err) | |
} | |
rdr := bufio.NewReaderSize(f, bufSize) | |
w := csv.NewWriter(bufio.NewWriter(os.Stdout)) | |
scanner := bufio.NewScanner(rdr) | |
scanner.Split(scanTok) | |
var record []string | |
append := func(v string) { | |
if len(v) == 0 { | |
record = append(record, "\\N") | |
return | |
} | |
if v == "\000" { | |
record = append(record, "") | |
} else { | |
record = append(record, v) | |
} | |
} | |
for scanner.Scan() { | |
text := scanner.Text() | |
l := len(text) | |
if l == 0 { | |
continue | |
} | |
sig := text[l-1] | |
text = text[:l-1] | |
switch sig { | |
case '\001': | |
append(text) | |
case '\002': | |
append(text) | |
w.Write(record) | |
record = record[:0] | |
case '\000': | |
append("\000") | |
} | |
} | |
w.Flush() | |
} | |
func scanTok(data []byte, atEOF bool) (advance int, token []byte, err error) { | |
for i := 0; i < len(data); i++ { | |
switch data[i] { | |
case '\000', '\001', '\002': | |
return i + 1, data[:i+1], nil | |
} | |
} | |
if !atEOF { | |
return 0, nil, nil | |
} | |
// There is one final token to be delivered, which may be the empty string. | |
// Returning bufio.ErrFinalToken here tells Scan there are no more tokens after this | |
// but does not trigger an error to be returned from Scan itself. | |
return 0, data, bufio.ErrFinalToken | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment