From 2d9ed73d996f157faa5690a0afe23f98aaaec456 Mon Sep 17 00:00:00 2001 From: Googulator Date: Wed, 22 Sep 2021 14:41:03 +0200 Subject: [PATCH] Fix handling of zero-length label written last in the lexicon There is no restriction in place disallowing zero-length labels, and in some use cases, it makes sense to have them. However, when the lexicon file happens to include a zero-length label as the last label in the list, split(" ") swallows it. This can later lead to ArrayIndexOutOfBoundExceptions when something gets classified with the zero-length label. --- src/main/java/com/digitalpebble/classification/Lexicon.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/digitalpebble/classification/Lexicon.java b/src/main/java/com/digitalpebble/classification/Lexicon.java index 3a59a75..d608265 100755 --- a/src/main/java/com/digitalpebble/classification/Lexicon.java +++ b/src/main/java/com/digitalpebble/classification/Lexicon.java @@ -303,7 +303,11 @@ private void loadFromFile(String filename) throws IOException { .readLine()); this.normalizeVector = Boolean.parseBoolean(reader.readLine()); this.classifierType = reader.readLine(); - this.labels = Arrays.asList(reader.readLine().split(" ")); + this.labels = new ArrayList(); + // Need -1 to handle the case where the last label is zero length + this.labels.addAll(Arrays.asList(reader.readLine().split(" ", -1))); + // Remove the extra entry created by the terminating space + this.labels.remove(labels.size() - 1); String[] tmp = reader.readLine().split(" "); for (String f : tmp) { // see if there is a custom weight for it