diff --git a/feed.go b/feed.go index 779adfe..e0adfb1 100644 --- a/feed.go +++ b/feed.go @@ -5,6 +5,7 @@ import ( "strconv" "strings" "sync" + "unicode/utf8" "github.com/mmcdole/gofeed" log "github.com/sirupsen/logrus" @@ -19,6 +20,28 @@ type Feed struct { var wg sync.WaitGroup var isAllUpdate bool +/* +Based on the table in https://en.wikipedia.org/wiki/Comparison_of_file_systems#Limits + + the majority of filesystems have a limit of 255. + + Some of them refer to "bytes" and others refer to "UTF-8 characters". + Ideally we'd like to take as much as that as possible but we run the risk of + truncating at a point which leaves us with an incomplete UTF8 code point + representation. Instead, we need a UTF8-safe truncate - we define that function below. +*/ +const maxFileNameLength = 255 + +func truncateString(s string, n int) string { + if len(s) <= n { + return s + } + for !utf8.ValidString(s[:n]) { + n-- + } + return s[:n] +} + func DeleteFeedFiles(name string) { os.RemoveAll(Config.FeedDirectory + "/" + name) os.MkdirAll(Config.FeedDirectory+"/"+name, 0777) @@ -37,7 +60,7 @@ func UpdateFeed(name string) { } DeleteFeedFiles(name) for _, item := range feed.Items { - file, err := os.Create(Config.FeedDirectory + "/" + name + "/" + strings.ReplaceAll(item.Title, "/", "")) + file, err := os.Create(Config.FeedDirectory + "/" + name + "/" + truncateString(strings.ReplaceAll(item.Title, "/", ""), maxFileNameLength)) if err != nil { log.Error("Failed to create a file for article titled '" + item.Title + "'") continue diff --git a/feed_test.go b/feed_test.go new file mode 100644 index 0000000..a5fc4ed --- /dev/null +++ b/feed_test.go @@ -0,0 +1,24 @@ +package main + +import "testing" + +func TestFileNameTruncation(t *testing.T) { + names := []string{ + "我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我", // 255 x Chinese wo3 (我) + "我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我", // 256 x Chinese wo3 (我) + "我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我我", // 32 x Chinese wo3 (我) + "short"} // should not get truncated + + for _, name := range names { + shortened := truncateString(name, maxFileNameLength) + if len(name) < maxFileNameLength { + if name != shortened { + t.Errorf("Filename should not be altered, but it was. Original was %s", name) + } + } else { + if len(shortened) > maxFileNameLength { + t.Errorf("Filename was too long - should have been truncated. Length was %d", len(name)) + } + } + } +}