epee: support unicode in parsed strings

This commit is contained in:
moneromooo-monero 2019-04-30 19:16:11 +00:00
parent 310c26824d
commit eeca5ca0c8
No known key found for this signature in database
GPG key ID: 686F07454D6CEFC3
3 changed files with 78 additions and 22 deletions

View file

@ -31,6 +31,9 @@
#include <algorithm>
#include <boost/utility/string_ref.hpp>
#undef MONERO_DEFAULT_LOG_CATEGORY
#define MONERO_DEFAULT_LOG_CATEGORY "serialization"
namespace epee
{
namespace misc_utils
@ -62,6 +65,26 @@ namespace misc_utils
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const constexpr unsigned char isx[256] =
{
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
};
inline bool isspace(char c)
{
return lut[(uint8_t)c] & 8;
@ -162,6 +185,42 @@ namespace misc_utils
val.push_back('\\');break;
case '/': //Slash character
val.push_back('/');break;
case 'u': //Unicode code point
if (buf_end - it < 4)
{
ASSERT_MES_AND_THROW("Invalid Unicode escape sequence");
}
else
{
uint32_t dst = 0;
for (int i = 0; i < 4; ++i)
{
const unsigned char tmp = isx[(int)*++it];
CHECK_AND_ASSERT_THROW_MES(tmp != 0xff, "Bad Unicode encoding");
dst = dst << 4 | tmp;
}
// encode as UTF-8
if (dst <= 0x7f)
{
val.push_back(dst);
}
else if (dst <= 0x7ff)
{
val.push_back(0xc0 | (dst >> 6));
val.push_back(0x80 | (dst & 0x3f));
}
else if (dst <= 0xffff)
{
val.push_back(0xe0 | (dst >> 12));
val.push_back(0x80 | ((dst >> 6) & 0x3f));
val.push_back(0x80 | (dst & 0x3f));
}
else
{
ASSERT_MES_AND_THROW("Unicode code point is out or range");
}
}
break;
default:
val.push_back(*it);
LOG_PRINT_L0("Unknown escape sequence :\"\\" << *it << "\"");