Improve files links generation parsing and errors

Implement URL safe base64 for file links
Implement sneaking file data into URL fragment
Deprecate Radix65 in favore of RsBase64 which supports also URL safe encoding
This commit is contained in:
Gioacchino Mazzurco 2020-03-12 18:57:07 +01:00
parent 55d466f79b
commit d203f31d0c
No known key found for this signature in database
GPG key ID: A1FBCA3872E87051
8 changed files with 403 additions and 49 deletions

View file

@ -26,7 +26,10 @@
#include <vector>
#include <stdint.h>
class Radix64
#include "util/rsdeprecate.h"
/** @deprecated use RsBase64 instead which supports also URL safe encoding */
class RS_DEPRECATED_FOR(RsBase64) Radix64
{
public:
static std::vector<uint8_t> decode(const std::string& buffer)
@ -195,5 +198,3 @@ again:
return true ;
}
};

View file

@ -0,0 +1,191 @@
/*******************************************************************************
* *
* libretroshare base64 encoding utilities *
* *
* Copyright (C) 2020 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2020 Asociación Civil Altermundi <info@altermundi.net> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as *
* published by the Free Software Foundation, either version 3 of the *
* License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
* *
*******************************************************************************/
#include <cmath>
#include "util/rsbase64.h"
#include "util/rsdebug.h"
#if __cplusplus < 201703L
/* Solve weird undefined reference error with C++ < 17 see:
* https://stackoverflow.com/questions/8016780/undefined-reference-to-static-constexpr-char
*/
/*static*/ decltype(RsBase64::bDict) constexpr RsBase64::bDict;
/*static*/ decltype(RsBase64::uDict) constexpr RsBase64::uDict;
/*static*/ decltype(RsBase64::rDict) constexpr RsBase64::rDict;
/*static*/ decltype(RsBase64::sPad) constexpr RsBase64::sPad;
#endif
/*static*/ void RsBase64::encode(
rs_view_ptr<const uint8_t> data, size_t len, std::string& outString,
bool padding, bool urlSafe )
{
const char* sDict = urlSafe ? uDict : bDict;
// Workaround if input and output are the same buffer.
bool inplace = (outString.data() == reinterpret_cast<const char*>(data));
std::string tBuff;
std::string& outStr = inplace ? tBuff : outString;
auto encSize = encodedSize(len, padding);
outStr.resize(encSize);
char* p = &outStr[0];
for (; len >= 3; len -= 3, data += 3)
{
*p++ = sDict[ (data[0] >> 2) & 077 ];
*p++ = sDict[
(((data[0] << 4) & 060) | ((data[1] >> 4) & 017)) & 077 ];
*p++ = sDict[
(((data[1] << 2) & 074) | ((data[2] >> 6) & 03)) & 077 ];
*p++ = sDict[ data[2] & 077 ];
}
if (len == 2)
{
*p++ = sDict[ (data[0] >> 2) & 077 ];
*p++ = sDict[
(((data[0] << 4) & 060) | ((data[1] >> 4) & 017)) & 077 ];
*p++ = sDict[ ((data[1] << 2) & 074) ];
if(padding) *p++ = sPad;
}
else if (len == 1)
{
*p++ = sDict[ (data[0] >> 2) & 077 ];
*p++ = sDict[ (data[0] << 4) & 060 ];
if(padding) { *p++ = sPad; *p++ = sPad; }
}
if(inplace) outString = tBuff;
}
/*static*/ std::error_condition RsBase64::decode(
const std::string& encoded, std::vector<uint8_t>& decoded )
{
size_t decSize; std::error_condition ec;
std::tie(decSize, ec) = decodedSize(encoded);
if(!decSize || ec) return ec;
size_t encSize = encoded.size();
decoded.resize(decSize);
for (size_t i = 0, o = 0; i < encSize; i += 4, o += 3)
{
char input0 = encoded[i + 0];
char input1 = encoded[i + 1];
/* At the end of the string, missing bytes 2 and 3 are considered
* padding '=' */
char input2 = i + 2 < encoded.size() ? encoded[i + 2] : sPad;
char input3 = i + 3 < encSize ? encoded[i + 3] : sPad;
// If any unknown characters appear, it's an error.
if(!( isBase64Char(input0) && isBase64Char(input1) &&
isBase64Char(input2) && isBase64Char(input3) ))
return std::errc::argument_out_of_domain;
/* If padding appears anywhere but the last 1 or 2 characters, or if
* it appears but encoded.size() % 4 != 0, it's an error. */
bool at_end = (i + 4 >= encSize);
if ( (input0 == sPad) || (input1 == sPad) ||
( input2 == sPad && !at_end ) ||
( input2 == sPad && input3 != sPad ) ||
( input3 == sPad && !at_end) )
return std::errc::illegal_byte_sequence;
uint32_t b0 = rDict[static_cast<uint8_t>(input0)] & 0x3f;
uint32_t b1 = rDict[static_cast<uint8_t>(input1)] & 0x3f;
uint32_t b2 = rDict[static_cast<uint8_t>(input2)] & 0x3f;
uint32_t b3 = rDict[static_cast<uint8_t>(input3)] & 0x3f;
uint32_t stream = (b0 << 18) | (b1 << 12) | (b2 << 6) | b3;
decoded[o + 0] = (stream >> 16) & 0xFF;
if (input2 != sPad) decoded[o + 1] = (stream >> 8) & 0xFF;
/* If there are any stale bits in this from input1, the text is
* malformed. */
else if (((stream >> 8) & 0xFF) != 0)
return std::errc::invalid_argument;
if (input3 != sPad) decoded[o + 2] = (stream >> 0) & 0xFF;
/* If there are any stale bits in this from input2, the text is
* malformed. */
else if (((stream >> 0) & 0xFF) != 0)
return std::errc::invalid_argument;
}
return std::error_condition();
}
/*static*/ size_t RsBase64::encodedSize(size_t decodedSize, bool padding)
{
if(padding) return 4 * (decodedSize + 2) / 3;
return static_cast<size_t>(
std::ceil(4L * static_cast<double>(decodedSize) / 3L) );
}
/*static*/ std::tuple<size_t, std::error_condition> RsBase64::decodedSize(
const std::string& input )
{
const auto success = [](size_t val)
{ return std::make_tuple(val, std::error_condition()); };
if(input.empty()) return success(0);
auto mod = input.size() % 4;
if(mod == 1) std::make_tuple(0, std::errc::invalid_argument);
size_t padded_size = ((input.size() + 3) / 4) * 3;
if (mod >= 2 || (mod == 0 && input[input.size() - 1] == sPad))
{
/* If the last byte is '=', or the input size % 4 is 2 or 3 (thus
* there are implied '='s), then the actual size is 1-2 bytes
* smaller. */
if ( mod == 2 || (mod == 0 && input[input.size() - 2] == sPad) )
{
/* If the second-to-last byte is also '=', or the input
* size % 4 is 2 (implying a second '='), then the actual size
* is 2 bytes smaller. */
return success(padded_size - 2);
}
else
{
/* Otherwise it's just the last character and the actual size is
* 1 byte smaller. */
return success(padded_size - 1);
}
}
return success(padded_size);
}
/*static*/ size_t RsBase64::stripInvalid(
const std::string& in, std::string& out )
{
size_t strippedCnt = 0;
auto inSize = in.size();
out.resize(inSize);
for(size_t i = 0; i < inSize; ++i)
{
if(isBase64Char(in[i])) out[i-strippedCnt] = in[i];
else ++strippedCnt;
}
out.resize(inSize-strippedCnt);
return strippedCnt;
}

View file

@ -0,0 +1,139 @@
/*******************************************************************************
* *
* libretroshare base64 encoding utilities *
* *
* Copyright (C) 2020 Gioacchino Mazzurco <gio@eigenlab.org> *
* Copyright (C) 2020 Asociación Civil Altermundi <info@altermundi.net> *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as *
* published by the Free Software Foundation, either version 3 of the *
* License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <https://www.gnu.org/licenses/>. *
* *
*******************************************************************************/
#pragma once
#include <string>
#include <vector>
#include <cstdint>
#include <system_error>
#include <tuple>
#include "util/rsmemory.h"
/**
* Implement methods to encode e decode to base64 format as per RFC 4648
* This implementation support also the file name and URL safe base64url format
* @see https://tools.ietf.org/html/rfc4648#section-5
*/
class RsBase64
{
public:
/// Enable base64url by default
static constexpr bool DEFAULT_URL_SAFE = true;
/// Disable padding by default
static constexpr bool DEFAULT_PADDING = false;
/**
* @brief Encode arbitrary data to base64
* @param[in] data pointer to the input data buffer
* @param[in] len lenght of the input buffer
* @param[out] outString storage for the resulting base64 encoded string
* @param[in] padding set to true to enable padding to 32 bits
* @param[in] urlSafe pass true for base64url format, false for base64 format
*/
static void encode(
rs_view_ptr<const uint8_t> data, size_t len,
std::string& outString,
bool padding = DEFAULT_PADDING, bool urlSafe = DEFAULT_URL_SAFE );
/**
* @brief Decode data from a base64 encoded string
* @param[in] encoded encoded string
* @param[out] decoded storage for decoded data
* @return success or error details
*/
static std::error_condition decode(
const std::string& encoded, std::vector<uint8_t>& decoded );
/**
* Remove invalid characters from base64 encoded string.
* Often when copy and pasting from one progam to another long base64
* strings, new lines, spaces or other characters end up polluting the
* original text. This function is useful to cleanup the pollution before
* attempting to decode the message.
* @param in input string
* @param out storage for cleaned string. In-place operation in supported so
* the same input string may be passed.
* @return count of stripped invalid characters
*/
static size_t stripInvalid(const std::string& in, std::string& out);
/**
* Calculate how much bytes are needed to store the base64 encoded version
* of some data.
* @param decodedSize size of the original decoded data
* @param padding true to enable base64 padding
* @return how much bytes would take to store the encoded version
*/
static size_t encodedSize(
size_t decodedSize, bool padding = DEFAULT_PADDING );
/**
* @brief Calculate how much space is needed to store the decoded version of
* a base64 encoded string
* @param input encoded string
* @return decoded size, plus error information on failure
*/
static std::tuple<size_t, std::error_condition> decodedSize(
const std::string& input );
private:
/// base64 conversion table
static constexpr char bDict[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
/// base64url conversion table
static constexpr char uDict[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
/// This reverse table supports both base64 and base64url
static constexpr int8_t rDict[256] = {
/* index +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 */
/* 0 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 16 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 32 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63,
/* 48 */ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, 0, -1, -1,
/* 64 */ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
/* 80 */ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63,
/* 96 */ -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
/* 112 */ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
/* 128 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 144 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 160 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 176 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 192 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 208 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 224 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
/* 240 */ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
/// base64 padding character
static constexpr char sPad = '=';
/** Check if given character is valid either for base64 or for base64url
* @param c character to check
* @return true if valid false otherwise
*/
static inline bool isBase64Char(char c)
{ return rDict[static_cast<uint8_t>(c)] >= 0; }
};