RetroShare/libbitdht/src/bitdht/bencode.c

603 lines
12 KiB
C
Raw Normal View History

/*
* C implementation of a bencode decoder.
* This is the format defined by BitTorrent:
* http://wiki.theory.org/BitTorrentSpecification#bencoding
*
* The only external requirements are a few [standard] function calls and
* the long long type. Any sane system should provide all of these things.
*
* See the bencode.h header file for usage information.
*
* This is released into the public domain.
* Written by Mike Frysinger <vapier@gmail.com>.
*/
/*
* This implementation isn't optimized at all as I wrote it to support
* a bogus system. I have no real interest in this format. Feel free
* to send me patches (so long as you don't copyright them and you release
* your changes into the public domain as well).
*/
#include <stdio.h>
#include <stdlib.h> /* malloc() realloc() free() strtoll() */
#include <string.h> /* memset() */
#include "bitdht/bencode.h"
/***
* #define BE_DEBUG_DECODE 1
* #define BE_DEBUG 1 // controlled from Makefile too.
***/
#ifdef BE_DEBUG_DECODE
#include <stdio.h> /* debug */
#endif
static be_node *be_alloc(be_type type)
{
be_node *ret = (be_node *) malloc(sizeof(*ret));
if (ret) {
memset(ret, 0x00, sizeof(*ret));
ret->type = type;
}
return ret;
}
static long long _be_decode_int(const char **data, long long *data_len)
{
char *endp;
long long ret = strtoll(*data, &endp, 10);
*data_len -= (endp - *data);
*data = endp;
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode_int(pnt: %p, rem: %lld) = %lld\n", *data, *data_len, ret);
#endif
return ret;
}
long long be_str_len(be_node *node)
{
long long ret = 0;
if (node->val.s)
memcpy(&ret, node->val.s - sizeof(ret), sizeof(ret));
return ret;
}
static char *_be_decode_str(const char **data, long long *data_len)
{
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode_str(pnt: %p, rem: %lld)\n", *data, *data_len);
#endif
long long sllen = _be_decode_int(data, data_len);
long slen = sllen;
unsigned long len;
char *ret = NULL;
/* slen is signed, so negative values get rejected */
if (sllen < 0)
{
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode_str() reject bad length\n");
#endif
return ret;
}
/* reject attempts to allocate large values that overflow the
* size_t type which is used with malloc()
*/
if (sizeof(long long) != sizeof(long))
if (sllen != slen)
{
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode_str() reject large_values\n");
#endif
return ret;
}
/* make sure we have enough data left */
if (sllen > *data_len - 1)
{
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode_str() reject large_values\n");
#endif
return ret;
}
/* switch from signed to unsigned so we don't overflow below */
len = slen;
if (**data == ':') {
char *_ret = (char *) malloc(sizeof(sllen) + len + 1);
memcpy(_ret, &sllen, sizeof(sllen));
ret = _ret + sizeof(sllen);
memcpy(ret, *data + 1, len);
ret[len] = '\0';
*data += len + 1;
*data_len -= len + 1;
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode_str() read %ld bytes\n", len+1);
#endif
}
else
{
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode_str() reject missing :\n");
#endif
}
return ret;
}
static be_node *_be_decode(const char **data, long long *data_len)
{
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode(pnt: %p, rem: %lld)\n", *data, *data_len);
#endif
be_node *ret = NULL;
if (!*data_len)
{
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode() reject invalid datalen\n");
#endif
return ret;
}
switch (**data) {
/* lists */
case 'l': {
unsigned int i = 0;
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode() found list\n");
#endif
ret = be_alloc(BE_LIST);
--(*data_len);
++(*data);
while (**data != 'e') {
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode() list get item (%d)\n", i);
#endif
ret->val.l = (be_node **) realloc(ret->val.l, (i + 2) * sizeof(*ret->val.l));
ret->val.l[i] = _be_decode(data, data_len);
if (ret->val.l[i] == NULL)
{
/* failed decode - kill decode */
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode() failed list decode - kill\n");
#endif
be_free(ret);
return NULL;
}
++i;
}
--(*data_len);
++(*data);
/* empty list case. */
if (i == 0)
{
ret->val.l = (be_node **) realloc(ret->val.l, 1 * sizeof(*ret->val.l));
}
ret->val.l[i] = NULL;
return ret;
}
/* dictionaries */
case 'd': {
unsigned int i = 0;
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode() found dictionary\n");
#endif
ret = be_alloc(BE_DICT);
--(*data_len);
++(*data);
while (**data != 'e') {
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode() dictionary get key (%d)\n", i);
#endif
ret->val.d = (be_dict *) realloc(ret->val.d, (i + 2) * sizeof(*ret->val.d));
ret->val.d[i].key = _be_decode_str(data, data_len);
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode() dictionary get val\n");
#endif
ret->val.d[i].val = _be_decode(data, data_len);
if ((ret->val.d[i].key == NULL) || (ret->val.d[i].val == NULL))
{
/* failed decode - kill decode */
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode() failed dict decode - kill\n");
#endif
be_free(ret);
return NULL;
}
++i;
}
--(*data_len);
++(*data);
/* empty dictionary case. */
if (i == 0)
{
ret->val.d = (be_dict *) realloc(ret->val.d, 1 * sizeof(*ret->val.d));
}
ret->val.d[i].val = NULL;
return ret;
}
/* integers */
case 'i': {
ret = be_alloc(BE_INT);
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode() found int\n");
#endif
--(*data_len);
++(*data);
ret->val.i = _be_decode_int(data, data_len);
if (**data != 'e')
{
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode() reject data != e - kill\n");
#endif
be_free(ret);
return NULL;
}
--(*data_len);
++(*data);
return ret;
}
/* byte strings */
case '0'...'9': {
ret = be_alloc(BE_STR);
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode() found string\n");
#endif
ret->val.s = _be_decode_str(data, data_len);
return ret;
}
/* invalid */
default:
#ifdef BE_DEBUG_DECODE
fprintf(stderr, "bencode::_be_decode() found invalid - kill\n");
#endif
return NULL;
break;
}
return ret;
}
be_node *be_decoden(const char *data, long long len)
{
return _be_decode(&data, &len);
}
be_node *be_decode(const char *data)
{
return be_decoden(data, strlen(data));
}
static inline void _be_free_str(char *str)
{
if (str)
free(str - sizeof(long long));
}
void be_free(be_node *node)
{
switch (node->type) {
case BE_STR:
_be_free_str(node->val.s);
break;
case BE_INT:
break;
case BE_LIST: {
unsigned int i;
for (i = 0; node->val.l[i]; ++i)
be_free(node->val.l[i]);
free(node->val.l);
break;
}
case BE_DICT: {
unsigned int i;
for (i = 0; node->val.d[i].val; ++i) {
_be_free_str(node->val.d[i].key);
be_free(node->val.d[i].val);
}
free(node->val.d);
break;
}
}
free(node);
}
#ifdef BE_DEBUG
#include <stdio.h>
#include <stdint.h>
static void _be_dump_indent(ssize_t indent)
{
while (indent-- > 0)
printf(" ");
}
static void _be_dump(be_node *node, ssize_t indent)
{
size_t i;
_be_dump_indent(indent);
indent = abs(indent);
switch (node->type) {
case BE_STR:
be_dump_str(node);
//printf("str = %s (len = %lli)\n", node->val.s, be_str_len(node));
break;
case BE_INT:
printf("int = %lli\n", node->val.i);
break;
case BE_LIST:
puts("list [");
for (i = 0; node->val.l[i]; ++i)
_be_dump(node->val.l[i], indent + 1);
_be_dump_indent(indent);
puts("]");
break;
case BE_DICT:
puts("dict {");
for (i = 0; node->val.d[i].val; ++i) {
_be_dump_indent(indent + 1);
printf("%s => ", node->val.d[i].key);
_be_dump(node->val.d[i].val, -(indent + 1));
}
_be_dump_indent(indent);
puts("}");
break;
}
}
void be_dump(be_node *node)
{
_be_dump(node, 0);
}
void be_dump_str(be_node *node)
{
if (node->type != BE_STR)
{
printf("be_dump_str(): error not a string\n");
return;
}
int len = be_str_len(node);
int i = 0;
printf("str[%d] = ", len);
for(i = 0; i < len; i++)
{
/* sensible chars */
if ((node->val.s[i] > 31) && (node->val.s[i] < 127))
{
printf("%c", node->val.s[i]);
}
else
{
printf("[%d]", node->val.s[i]);
}
}
printf("\n");
}
#endif
/******************** New Functions added by drBob *************
* Output bencode
*
*/
int be_encode(be_node *node, char *str, int len)
{
size_t i;
int loc = 0;
switch (node->type) {
case BE_STR:
snprintf(str, len, "%lli:", be_str_len(node));
loc += strlen(&(str[loc]));
memcpy(&(str[loc]), node->val.s, be_str_len(node));
loc += be_str_len(node);
break;
case BE_INT:
snprintf(str, len, "i%llie", node->val.i);
loc += strlen(&(str[loc]));
break;
case BE_LIST:
snprintf(str, len, "l");
loc += 1;
for (i = 0; node->val.l[i]; ++i)
{
loc += be_encode(node->val.l[i], &(str[loc]), len-loc);
}
snprintf(&(str[loc]), len - loc, "e");
loc += 1;
break;
case BE_DICT:
snprintf(str, len, "d");
loc += 1;
for (i = 0; node->val.d[i].val; ++i) {
/* assumption that key must be ascii! */
snprintf(&(str[loc]), len-loc, "%i:%s",
(int) strlen(node->val.d[i].key),
node->val.d[i].key);
loc += strlen(&(str[loc]));
loc += be_encode(node->val.d[i].val, &(str[loc]), len-loc);
}
snprintf(&(str[loc]), len - loc, "e");
loc += 1;
break;
}
return loc;
}
/* hackish way to create nodes! */
be_node *be_create_dict()
{
be_node *n = be_decode("de");
return n;
}
be_node *be_create_list()
{
be_node *n = be_decode("le");
return n;
}
be_node *be_create_str(const char *str)
{
/* must */
be_node *n = be_alloc(BE_STR);
int len = strlen(str);
long long int sllen = len;
char *_ret = (char *) malloc(sizeof(sllen) + len + 1);
char *ret = NULL;
memcpy(_ret, &sllen, sizeof(sllen));
ret = _ret + sizeof(sllen);
memcpy(ret, str, len);
ret[len] = '\0';
n->val.s = ret;
return n;
}
be_node *be_create_str_wlen(const char *str, int len) /* not including \0 */
{
/* must */
be_node *n = be_alloc(BE_STR);
long long int sllen = len;
char *_ret = (char *) malloc(sizeof(sllen) + len + 1);
char *ret = NULL;
memcpy(_ret, &sllen, sizeof(sllen));
ret = _ret + sizeof(sllen);
memcpy(ret, str, len);
ret[len] = '\0';
n->val.s = ret;
return n;
}
be_node *be_create_int(long long int num)
{
/* must */
be_node *n = be_alloc(BE_INT);
n->val.i = num;
return n;
}
int be_add_keypair(be_node *dict, const char *str, be_node *node)
{
int i = 0;
/* only if dict type */
if (dict->type != BE_DICT)
{
return 0;
}
// get to end of dict.
for(i = 0; dict->val.d[i].val; i++);
//fprintf(stderr, "be_add_keypair() i = %d\n",i);
/* realloc space */
dict->val.d = (be_dict *) realloc(dict->val.d, (i + 2) * sizeof(*dict->val.d));
/* stupid key storage system */
int len = strlen(str);
long long int sllen = len;
char *_ret = (char *) malloc(sizeof(sllen) + len + 1);
char *ret = NULL;
//fprintf(stderr, "be_add_keypair() key len = %d\n",len);
memcpy(_ret, &sllen, sizeof(sllen));
ret = _ret + sizeof(sllen);
memcpy(ret, str, len);
ret[len] = '\0';
dict->val.d[i].key = ret;
dict->val.d[i].val = node;
i++;
dict->val.d[i].val = NULL;
return 1;
}
int be_add_list(be_node *list, be_node *node)
{
int i = 0;
/* only if dict type */
if (list->type != BE_LIST)
{
return 0;
}
// get to end of dict.
for(i = 0; list->val.l[i]; i++);
/* realloc space */
list->val.l = (be_node **) realloc(list->val.l, (i + 2) * sizeof(*list->val.l));
list->val.l[i] = node;
++i;
list->val.l[i] = NULL;
return 1;
}