diff --git a/cmd/registry.go b/cmd/registry.go index b493749..47ad8f0 100644 --- a/cmd/registry.go +++ b/cmd/registry.go @@ -27,7 +27,7 @@ var registryCmd = &cobra.Command{ tw := new(tabwriter.Writer) tw.Init(os.Stdout, 0, 8, 0, '\t', 0) for v, k := range tr.Map { - fmt.Fprintf(tw, "value: %s \t key: %b \n", v, k) + fmt.Fprintf(tw, "value: \"%s\" \t key: %b \n", v, k) } tw.Flush() }, diff --git a/internal/registry/registry.go b/internal/registry/registry.go index 3fb1bfb..be18d7d 100644 --- a/internal/registry/registry.go +++ b/internal/registry/registry.go @@ -4,8 +4,10 @@ type TokenRegistry struct{ Map map[string]int64 } -func (tr TokenRegistry) DistributeTokens(list []string) { - for _, v := range list { - tr.Map[v] = int64(len(tr.Map)) +func (tr TokenRegistry) DistributeTokens(list [][]string) { + for _, file := range list { + for _, value := range file { + tr.Map[value] = int64(len(tr.Map)) + } } } diff --git a/internal/splitter/splitter.go b/internal/splitter/splitter.go index bbfc72d..50d35bd 100644 --- a/internal/splitter/splitter.go +++ b/internal/splitter/splitter.go @@ -6,11 +6,30 @@ import ( "strings" ) -func SplitFile(path string) []string { - fileContent := getFileContent(path) - return strings.Fields(fileContent) +func SplitFile(path string) [][]string { + if path == "" { + return make([][]string, 0) + } + info, err := os.Stat(path) + if err != nil { + panic(err) + } + + if info.IsDir() { + paths := getFilePathsFromDir(path) + matrix := make([][]string, len(paths)) + for _, filePath := range paths { + matrix = addToMatrix(matrix, SplitFile(filePath)) + } + return matrix + } else { + matrix := make([][]string, 1) + matrix[0] = tokenize(getFileContent(path)) + return matrix + } } + func getFileContent(path string) string{ data, err := os.ReadFile(path) if err != nil { @@ -18,3 +37,29 @@ func getFileContent(path string) string{ } return string(data) } + +func getFilePathsFromDir(path string) []string{ + files, err := os.ReadDir(path) + if err != nil { + panic(fmt.Errorf("Directory could not be found %s", path)) + } + var paths = make([]string, len(files)) + + for _, file := range files { + pathToFile := fmt.Sprintf("%s/%s", path, file.Name()) + paths = append(paths, pathToFile) + } + + return paths +} + +func tokenize(s string) []string { + return strings.Fields(s) +} + +func addToMatrix(addToMatrix [][]string, getsAddedToMatrix [][]string) [][]string { + for _, slice := range getsAddedToMatrix { + addToMatrix = append(addToMatrix, slice) + } + return addToMatrix +}