Newer
Older
/*-------------------------------------------------------------------------
*
* encode.c
* Various data encoding/decoding things.
*
* Copyright (c) 2001-2005, PostgreSQL Global Development Group
* $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.16 2005/10/15 02:49:28 momjian Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <ctype.h>
#include "utils/builtins.h"
struct pg_encoding
{
unsigned (*encode_len) (const char *data, unsigned dlen);
unsigned (*decode_len) (const char *data, unsigned dlen);
unsigned (*encode) (const char *data, unsigned dlen, char *res);
unsigned (*decode) (const char *data, unsigned dlen, char *res);
static struct pg_encoding *pg_find_encoding(const char *name);
/*
* SQL functions.
*/
Datum
binary_encode(PG_FUNCTION_ARGS)
{
bytea *data = PG_GETARG_BYTEA_P(0);
Datum name = PG_GETARG_DATUM(1);
text *result;
char *namebuf;
int datalen,
resultlen,
res;
struct pg_encoding *enc;
datalen = VARSIZE(data) - VARHDRSZ;
namebuf = DatumGetCString(DirectFunctionCall1(textout, name));
enc = pg_find_encoding(namebuf);
if (enc == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized encoding: \"%s\"", namebuf)));
resultlen = enc->encode_len(VARDATA(data), datalen);
result = palloc(VARHDRSZ + resultlen);
res = enc->encode(VARDATA(data), datalen, VARDATA(result));
/* Make this FATAL 'cause we've trodden on memory ... */
elog(FATAL, "overflow - encode estimate too small");
VARATT_SIZEP(result) = VARHDRSZ + res;
PG_RETURN_TEXT_P(result);
}
Datum
binary_decode(PG_FUNCTION_ARGS)
{
text *data = PG_GETARG_TEXT_P(0);
Datum name = PG_GETARG_DATUM(1);
bytea *result;
char *namebuf;
int datalen,
resultlen,
res;
struct pg_encoding *enc;
datalen = VARSIZE(data) - VARHDRSZ;
namebuf = DatumGetCString(DirectFunctionCall1(textout, name));
enc = pg_find_encoding(namebuf);
if (enc == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized encoding: \"%s\"", namebuf)));
resultlen = enc->decode_len(VARDATA(data), datalen);
result = palloc(VARHDRSZ + resultlen);
res = enc->decode(VARDATA(data), datalen, VARDATA(result));
/* Make this FATAL 'cause we've trodden on memory ... */
elog(FATAL, "overflow - decode estimate too small");
VARATT_SIZEP(result) = VARHDRSZ + res;
PG_RETURN_BYTEA_P(result);
}
/*
* HEX
*/
static const char *hextbl = "0123456789abcdef";
static const int8 hexlookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
static unsigned
hex_encode(const char *src, unsigned len, char *dst)
while (src < end)
{
*dst++ = hextbl[(*src >> 4) & 0xF];
*dst++ = hextbl[*src & 0xF];
src++;
}
return len * 2;
}
int res = -1;
res = hexlookup[(unsigned char) c];
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
Peter Eisentraut
committed
errmsg("invalid hexadecimal digit: \"%c\"", c)));
hex_decode(const char *src, unsigned len, char *dst)
v2,
*p = dst;
srcend = src + len;
s = src;
p = dst;
while (s < srcend)
{
if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
{
s++;
continue;
}
v1 = get_hex(*s++) << 4;
if (s >= srcend)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid hexadecimal data: odd number of digits")));
v2 = get_hex(*s++);
*p++ = v1 | v2;
}
return p - dst;
}
static unsigned
hex_enc_len(const char *src, unsigned srclen)
{
return srclen << 1;
}
static unsigned
hex_dec_len(const char *src, unsigned srclen)
{
return srclen >> 1;
}
/*
* BASE64
*/
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static const int8 b64lookup[128] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
};
static unsigned
b64_encode(const char *src, unsigned len, char *dst)
*end = src + len;
int pos = 2;
uint32 buf = 0;
s = src;
p = dst;
while (s < end)
{
buf |= (unsigned char) *s << (pos << 3);
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
pos--;
s++;
/* write it out */
if (pos < 0)
{
*p++ = _base64[(buf >> 18) & 0x3f];
*p++ = _base64[(buf >> 12) & 0x3f];
*p++ = _base64[(buf >> 6) & 0x3f];
*p++ = _base64[buf & 0x3f];
pos = 2;
buf = 0;
}
if (p >= lend)
{
*p++ = '\n';
lend = p + 76;
}
}
if (pos != 2)
{
*p++ = _base64[(buf >> 18) & 0x3f];
*p++ = _base64[(buf >> 12) & 0x3f];
*p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
*p++ = '=';
}
return p - dst;
}
static unsigned
b64_decode(const char *src, unsigned len, char *dst)
const char *srcend = src + len,
int b = 0;
uint32 buf = 0;
int pos = 0,
end = 0;
while (s < srcend)
{
c = *s++;
if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
continue;
if (c == '=')
{
/* end sequence */
if (!end)
{
if (pos == 2)
end = 1;
else if (pos == 3)
end = 2;
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unexpected \"=\"")));
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid symbol")));
}
/* add it to buffer */
buf = (buf << 6) + b;
pos++;
if (pos == 4)
{
*p++ = (buf >> 16) & 255;
if (end == 0 || end > 1)
*p++ = (buf >> 8) & 255;
if (end == 0 || end > 2)
*p++ = buf & 255;
buf = 0;
pos = 0;
}
}
if (pos != 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid end sequence")));
return p - dst;
}
static unsigned
b64_enc_len(const char *src, unsigned srclen)
{
/* 3 bytes will be converted to 4, linefeed after 76 chars */
return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4);
}
static unsigned
b64_dec_len(const char *src, unsigned srclen)
/*
* Escape
* Minimally escape bytea to text.
* De-escape text to bytea.
* Only two characters are escaped:
* \0 (null) and \\ (backslash)
* De-escapes \\ and any \### octal
*/
#define VAL(CH) ((CH) - '0')
#define DIG(VAL) ((VAL) + '0')
static unsigned
esc_encode(const char *src, unsigned srclen, char *dst)
const char *end = src + srclen;
char *rp = dst;
int len = 0;
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
while (src < end)
{
if (*src == '\0')
{
rp[0] = '\\';
rp[1] = '0';
rp[2] = '0';
rp[3] = '0';
rp += 4;
len += 4;
}
else if (*src == '\\')
{
rp[0] = '\\';
rp[1] = '\\';
rp += 2;
len += 2;
}
else
{
*rp++ = *src;
len++;
}
src++;
}
return len;
}
static unsigned
esc_decode(const char *src, unsigned srclen, char *dst)
const char *end = src + srclen;
char *rp = dst;
int len = 0;
while (src < end)
{
if (src[0] != '\\')
*rp++ = *src++;
else if (src + 3 < end &&
(src[1] >= '0' && src[1] <= '3') &&
(src[2] >= '0' && src[2] <= '7') &&
(src[3] >= '0' && src[3] <= '7'))
int val;
val = VAL(src[1]);
val <<= 3;
val += VAL(src[2]);
val <<= 3;
*rp++ = val + VAL(src[3]);
src += 4;
}
else if (src + 1 < end &&
(src[1] == '\\'))
{
*rp++ = '\\';
src += 2;
}
else
{
/*
* One backslash, not followed by ### valid octal. Should never
* get here, since esc_dec_len does same check.
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
Peter Eisentraut
committed
errmsg("invalid input syntax for type bytea")));
return len;
}
static unsigned
esc_enc_len(const char *src, unsigned srclen)
int len = 0;
while (src < end)
{
if (*src == '\0')
len += 4;
else if (*src == '\\')
len += 2;
else
len++;
src++;
}
return len;
}
static unsigned
esc_dec_len(const char *src, unsigned srclen)
int len = 0;
while (src < end)
{
if (src[0] != '\\')
src++;
else if (src + 3 < end &&
(src[1] >= '0' && src[1] <= '3') &&
(src[2] >= '0' && src[2] <= '7') &&
(src[3] >= '0' && src[3] <= '7'))
{
/*
* backslash + valid octal
*/
src += 4;
}
else if (src + 1 < end &&
(src[1] == '\\'))
{
/*
* two backslashes = backslash
*/
src += 2;
}
else
{
/*
* one backslash, not followed by ### valid octal
*/
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
Peter Eisentraut
committed
errmsg("invalid input syntax for type bytea")));
}
len++;
}
return len;
}
static struct
{
} enclist[] =
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
{
{
"hex",
{
hex_enc_len, hex_dec_len, hex_encode, hex_decode
}
},
{
"base64",
{
b64_enc_len, b64_dec_len, b64_encode, b64_decode
}
},
{
"escape",
{
esc_enc_len, esc_dec_len, esc_encode, esc_decode
}
},
{
NULL,
{
NULL, NULL, NULL, NULL
}
}
};
static struct pg_encoding *
pg_find_encoding(const char *name)
{
int i;
if (pg_strcasecmp(enclist[i].name, name) == 0)
return &enclist[i].enc;
return NULL;
}