threadlocalutils.cpp

/*
This file is part of FlashMQ (https://www.flashmq.org)
Copyright (C) 2021-2023 Wiebe Cazemier

FlashMQ is free software: you can redistribute it and/or modify
it under the terms of The Open Software License 3.0 (OSL-3.0).

See LICENSE for license details.
*/

#ifdef __SSE4_2__

#include "threadlocalutils.h"

#include <algorithm>
#include <cstring>
#include <cassert>
#include <cstdint>
#include <stdexcept>

std::vector<std::string> SimdUtils::splitTopic(const std::string &topic)
{
    const unsigned s = topic.size();

    if (s > 65535)
        throw std::runtime_error("Trying to split a string longer than the maximum MQTT topic length.");

    // Prefill the last 16 byte "line" with zeros
    _mm_store_si128((__m128i *)(topicCopy.begin() + (s & ~15u)), _mm_setzero_si128());

    std::copy_n(topic.begin(), s, topicCopy.begin());
    /* Add a trailing '/'
     * The reason is that we then always find a last / so a special case to handle the last subtopic is not necessary.
     * We can just stop searching when the location is of this trailing /
     * */
    topicCopy[s] = '/';

    std::vector<std::string> output;
    output.reserve(16);

    const char * b = topicCopy.data();
    const char * i = topicCopy.data();
    const char * const e = topicCopy.data() + s;
    while (true)
    {
        __m128i loaded = _mm_loadu_si128((const __m128i *)i);
        unsigned index = _mm_cmpestri(slashes, 1, loaded, 16, 0);
        i += index;
        if (index < 16)
        {
            // This means that a '/' was found
            // i will point at the position where '/' was found
            output.emplace_back(b, i);
            if (i == e)
                break;
            ++i; // advance over the separator
            b = i;
        }
    }

    return output;
}

/**
 * @brief SimdUtils::isValidUtf8 checks UTF-8 validity 16 bytes at a time, using SSE 4.2.
 * @param s
 * @param alsoCheckInvalidPublishChars is for checking the presence of '#' and '+' which is not allowed in publishes.
 * @return
 */
bool SimdUtils::isValidUtf8(const std::string &s, bool alsoCheckInvalidPublishChars)
{
    const int len = s.size();

    if (len + 16 > TOPIC_MEMORY_LENGTH)
        return false;

    std::memcpy(topicCopy.data(), s.c_str(), len);
    std::memset(&topicCopy.data()[len], 0x20, 16); // I fill out with spaces, as valid chars

    int n = 0;
    const char *i = topicCopy.data();
    while (n < len)
    {
        const int len_left = len - n;
        assert(len_left > 0);
        __m128i loaded = _mm_loadu_si128((__m128i*)&i[n]);
        __m128i loaded_AND_non_ascii = _mm_and_si128(loaded, non_ascii_mask);

        if (alsoCheckInvalidPublishChars && (_mm_movemask_epi8(_mm_cmpeq_epi8(loaded, pound) || _mm_movemask_epi8(_mm_cmpeq_epi8(loaded, plus)))))
            return false;

        int index = _mm_cmpestri(non_ascii_mask, 1, loaded_AND_non_ascii, len_left, 0);
        n += index;

        // Checking multi-byte chars one by one. With some effort, this may be done using SIMD too, but the majority of uses will
        // have a minimum of multi byte chars.
        if (index < 16)
        {
            uint8_t x = i[n++];
            int8_t char_len_left = 0;
            int8_t total_char_len = 0;
            uint32_t cur_code_point = 0;

            if((x & 0b11100000) == 0b11000000) // 2 byte char
            {
                char_len_left = 1;
                cur_code_point += ((x & 0b00011111) << 6);
            }
            else if((x & 0b11110000) == 0b11100000) // 3 byte char
            {
                char_len_left = 2;
                cur_code_point += ((x & 0b00001111) << 12);
            }
            else if((x & 0b11111000) == 0b11110000) // 4 byte char
            {
                char_len_left = 3;
                cur_code_point += ((x & 0b00000111) << 18);
            }
            else
                return false;

            total_char_len = char_len_left + 1;

            while (char_len_left > 0)
            {
                if (n >= len)
                    return false;

                x = i[n++];

                if((x & 0b11000000) != 0b10000000) // All remainer bytes of this code point needs to start with 10
                    return false;
                char_len_left--;
                cur_code_point += ((x & 0b00111111) << (6*char_len_left));
            }

            // Check overlong values, to avoid having mulitiple representations of the same value.
            if (total_char_len == 2 && cur_code_point < 0x80)
                return false;
            else if (total_char_len == 3 && cur_code_point < 0x800)
                return false;
            else if (total_char_len == 4 && cur_code_point < 0x10000)
                return false;

            if (cur_code_point >= 0xD800 && cur_code_point <= 0xDFFF) // Dec 55296-57343
                return false;

            if (cur_code_point >= 0x7F && cur_code_point <= 0x009F)
                return false;

            // Unicode spec: "Which code points are noncharacters?".
            if (cur_code_point >= 0xFDD0 && cur_code_point <= 0xFDEF)
                return false;
            // The last two code points of each of the 17 planes are the remaining 34 non-chars.
            const uint32_t plane = (cur_code_point & 0x1F0000) >> 16;
            const uint32_t last_16_bit = cur_code_point & 0xFFFF;
            if (plane <= 16 && (last_16_bit == 0xFFFE || last_16_bit == 0xFFFF))
                return false;
        }
        else
        {
            if (_mm_movemask_epi8(_mm_cmplt_epi8(loaded, lowerBound)))
                return false;

            if (_mm_movemask_epi8(_mm_cmpgt_epi8(loaded, lastAsciiChar)))
                return false;
        }

    }

    return true;
}

#endif