-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy paththreadlocalutils.cpp
174 lines (142 loc) · 5.64 KB
/
threadlocalutils.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
/*
This file is part of FlashMQ (https://www.flashmq.org)
Copyright (C) 2021-2023 Wiebe Cazemier
FlashMQ is free software: you can redistribute it and/or modify
it under the terms of The Open Software License 3.0 (OSL-3.0).
See LICENSE for license details.
*/
#ifdef __SSE4_2__
#include "threadlocalutils.h"
#include <algorithm>
#include <cstring>
#include <cassert>
#include <cstdint>
#include <stdexcept>
std::vector<std::string> SimdUtils::splitTopic(const std::string &topic)
{
const unsigned s = topic.size();
if (s > 65535)
throw std::runtime_error("Trying to split a string longer than the maximum MQTT topic length.");
// Prefill the last 16 byte "line" with zeros
_mm_store_si128((__m128i *)(topicCopy.begin() + (s & ~15u)), _mm_setzero_si128());
std::copy_n(topic.begin(), s, topicCopy.begin());
/* Add a trailing '/'
* The reason is that we then always find a last / so a special case to handle the last subtopic is not necessary.
* We can just stop searching when the location is of this trailing /
* */
topicCopy[s] = '/';
std::vector<std::string> output;
output.reserve(16);
const char * b = topicCopy.data();
const char * i = topicCopy.data();
const char * const e = topicCopy.data() + s;
while (true)
{
__m128i loaded = _mm_loadu_si128((const __m128i *)i);
unsigned index = _mm_cmpestri(slashes, 1, loaded, 16, 0);
i += index;
if (index < 16)
{
// This means that a '/' was found
// i will point at the position where '/' was found
output.emplace_back(b, i);
if (i == e)
break;
++i; // advance over the separator
b = i;
}
}
return output;
}
/**
* @brief SimdUtils::isValidUtf8 checks UTF-8 validity 16 bytes at a time, using SSE 4.2.
* @param s
* @param alsoCheckInvalidPublishChars is for checking the presence of '#' and '+' which is not allowed in publishes.
* @return
*/
bool SimdUtils::isValidUtf8(const std::string &s, bool alsoCheckInvalidPublishChars)
{
const int len = s.size();
if (len + 16 > TOPIC_MEMORY_LENGTH)
return false;
std::memcpy(topicCopy.data(), s.c_str(), len);
std::memset(&topicCopy.data()[len], 0x20, 16); // I fill out with spaces, as valid chars
int n = 0;
const char *i = topicCopy.data();
while (n < len)
{
const int len_left = len - n;
assert(len_left > 0);
__m128i loaded = _mm_loadu_si128((__m128i*)&i[n]);
__m128i loaded_AND_non_ascii = _mm_and_si128(loaded, non_ascii_mask);
if (alsoCheckInvalidPublishChars && (_mm_movemask_epi8(_mm_cmpeq_epi8(loaded, pound) || _mm_movemask_epi8(_mm_cmpeq_epi8(loaded, plus)))))
return false;
int index = _mm_cmpestri(non_ascii_mask, 1, loaded_AND_non_ascii, len_left, 0);
n += index;
// Checking multi-byte chars one by one. With some effort, this may be done using SIMD too, but the majority of uses will
// have a minimum of multi byte chars.
if (index < 16)
{
uint8_t x = i[n++];
int8_t char_len_left = 0;
int8_t total_char_len = 0;
uint32_t cur_code_point = 0;
if((x & 0b11100000) == 0b11000000) // 2 byte char
{
char_len_left = 1;
cur_code_point += ((x & 0b00011111) << 6);
}
else if((x & 0b11110000) == 0b11100000) // 3 byte char
{
char_len_left = 2;
cur_code_point += ((x & 0b00001111) << 12);
}
else if((x & 0b11111000) == 0b11110000) // 4 byte char
{
char_len_left = 3;
cur_code_point += ((x & 0b00000111) << 18);
}
else
return false;
total_char_len = char_len_left + 1;
while (char_len_left > 0)
{
if (n >= len)
return false;
x = i[n++];
if((x & 0b11000000) != 0b10000000) // All remainer bytes of this code point needs to start with 10
return false;
char_len_left--;
cur_code_point += ((x & 0b00111111) << (6*char_len_left));
}
// Check overlong values, to avoid having mulitiple representations of the same value.
if (total_char_len == 2 && cur_code_point < 0x80)
return false;
else if (total_char_len == 3 && cur_code_point < 0x800)
return false;
else if (total_char_len == 4 && cur_code_point < 0x10000)
return false;
if (cur_code_point >= 0xD800 && cur_code_point <= 0xDFFF) // Dec 55296-57343
return false;
if (cur_code_point >= 0x7F && cur_code_point <= 0x009F)
return false;
// Unicode spec: "Which code points are noncharacters?".
if (cur_code_point >= 0xFDD0 && cur_code_point <= 0xFDEF)
return false;
// The last two code points of each of the 17 planes are the remaining 34 non-chars.
const uint32_t plane = (cur_code_point & 0x1F0000) >> 16;
const uint32_t last_16_bit = cur_code_point & 0xFFFF;
if (plane <= 16 && (last_16_bit == 0xFFFE || last_16_bit == 0xFFFF))
return false;
}
else
{
if (_mm_movemask_epi8(_mm_cmplt_epi8(loaded, lowerBound)))
return false;
if (_mm_movemask_epi8(_mm_cmpgt_epi8(loaded, lastAsciiChar)))
return false;
}
}
return true;
}
#endif