-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathalphabet.hpp
89 lines (78 loc) · 2.84 KB
/
alphabet.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#pragma once
#include <array>
#include <cassert>
#include <sstream>
#include <vector>
// static_for helper
template <std::size_t N> struct num { static const constexpr auto value = N; };
template <class F, std::size_t... Is>
void constexpr static_for(F func, std::index_sequence<Is...>) {
(func(num<Is>{}), ...);
}
template <std::size_t N, typename F> constexpr void static_for(F func) {
static_for(func, std::make_index_sequence<N>());
}
template <typename T>
std::ostream& operator<<(std::ostream& output, std::vector<T> const& values) {
for (auto const& value : values) output << value;
return output;
}
template <typename T, size_t N>
std::ostream& operator<<(std::ostream& output, std::array<T, N> const& values) {
for (auto const& value : values) output << value << " ";
return output;
}
inline std::ostream& operator<<(std::ostream& output, std::vector<int8_t> const& values) {
for (auto const& value : values) output << (int)value << " ";
return output;
}
// Alphabet stuff
enum Alphabet { dna, rna, protein, size };
struct Alpha {
// list of alphabets
static const constexpr std::tuple<std::array<char, 6>, std::array<char, 6>,
std::array<char, 22>>
alphabets = {{'A', 'C', 'G', 'T', ']', '['},
{'A', 'C', 'G', 'U', ']', '['},
{'A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 'L',
'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', ']', '['}};
static const constexpr size_t alphabetSizes[] = {6, 6, 22};
// array containing encoding char -> int, for each alphabet
std::array<std::array<int8_t, 128>, Alphabet::size> a;
static std::vector<char> getAlphabet(Alphabet alpha) {
std::vector<char> res{};
switch (alpha) {
case Alphabet::dna:
res.insert(res.begin(), std::get<Alphabet::dna>(alphabets).begin(),
std::get<Alphabet::dna>(alphabets).end());
break;
case Alphabet::rna:
res.insert(res.begin(), std::get<Alphabet::rna>(alphabets).begin(),
std::get<Alphabet::rna>(alphabets).end());
break;
case Alphabet::protein:
res.insert(res.begin(), std::get<Alphabet::protein>(alphabets).begin(),
std::get<Alphabet::protein>(alphabets).end());
break;
default:
break;
}
return res;
}
constexpr Alpha() : a() {
static_for<Alphabet::size>([&](auto alpha) {
for (int i = 0; i < 128; ++i) a[alpha.value][i] = -1;
for (size_t i = 0; i < std::get<alpha.value>(alphabets).size(); ++i) {
a[alpha.value][std::get<alpha.value>(alphabets)[i]] = i;
a[alpha.value][std::get<alpha.value>(alphabets)[i] + 32] = i; // lowercase
}
});
}
constexpr inline int8_t encode(const char& c, const Alphabet alpha) const {
return a[alpha][c];
}
template <Alphabet alpha> constexpr inline char decode(const int8_t& c) const {
if (c < 0) return '*';
return std::get<alpha>(alphabets)[c];
}
};