Skip to content

Commit

Permalink
fix jsonl data when using directory as training data
Browse files Browse the repository at this point in the history
  • Loading branch information
josStorer committed Jul 9, 2023
1 parent 07d89e3 commit 5b1a944
Showing 1 changed file with 8 additions and 20 deletions.
28 changes: 8 additions & 20 deletions backend-golang/rwkv.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package backend_golang

import (
"encoding/json"
"errors"
"os"
"os/exec"
Expand Down Expand Up @@ -59,30 +60,17 @@ func (a *App) ConvertData(python string, input string, outputPrefix string, voca
if file.IsDir() || !strings.HasSuffix(file.Name(), ".txt") {
continue
}
txtFile, err := os.Open(input + "/" + file.Name())
textContent, err := os.ReadFile(input + "/" + file.Name())
if err != nil {
return "", err
}
defer txtFile.Close()
jsonlFile.WriteString("{\"text\": \"")
buf := make([]byte, 1024)
for {
n, err := txtFile.Read(buf)
if err != nil {
break
}
// regex replace \r\n \n \r with \\n
jsonlFile.WriteString(
strings.ReplaceAll(
strings.ReplaceAll(
strings.ReplaceAll(
strings.ReplaceAll(string(buf[:n]),
"\r\n", "\\n"),
"\n", "\\n"),
"\r", "\\n"),
"\n\n", "\\n"))
textJson, err := json.Marshal(map[string]string{"text": string(textContent)})
if err != nil {
return "", err
}
if _, err := jsonlFile.WriteString(string(textJson) + "\n"); err != nil {
return "", err
}
jsonlFile.WriteString("\"}\n")
}
input = outputPrefix + ".jsonl"
} else if err != nil {
Expand Down

0 comments on commit 5b1a944

Please sign in to comment.