Hi! I'm working on an implementation for a new data type (PostgreSQL
version 9.1 beta 3 on Windows 7 32 bits), according to the following rules:
- 1. NULL values are stored as is;
- 2. character strings (up to 16 bytes) are stored without leading or
trailing spaces;
- 3. empty character strings are stored as NULL values.
Using the extension support for new data types, the following source
code in C was written.
cs_type.h
--------------
#include "postgres.h"
#include "fmgr.h"
///
/// Export DLL functions.
///
#if defined(_WIN32) #define DLLEXPORT __declspec(dllexport)
#else #define DLLEXPORT
#endif
///
/// PostgreSQL magic block.
///
#ifdef PG_MODULE_MAGIC PG_MODULE_MAGIC;
#endif
///
/// Function prototypes.
///
DLLEXPORT Datum cs_type_in(PG_FUNCTION_ARGS);
DLLEXPORT Datum cs_type_out(PG_FUNCTION_ARGS);
cs_type.c
--------------
#include "cs_type.h"
///
/// Version-1 calling convention.
/// Input function.
///
PG_FUNCTION_INFO_V1(cs_type_in);
DLLEXPORT Datum cs_type_in(PG_FUNCTION_ARGS) { char *cp1; // Trimming routine - for parsing the whole string.
char*cp2; // Trimming routine - for shifting & padding. VarChar *v; // Return value.
char *c1 = PG_GETARG_CSTRING(0);
char *c2 = (char *)palloc(strlen(c1) + 1); strcpy(c2, c1);
/// /// Trimming routine.
(http://stackoverflow.com/questions/656542/trim-a-string-in-c) /// // skip leading spaces, shift remaining chars
for(cp1 = c2;isspace(*cp1);cp1++ ) // skip leading spaces, via cp1 ; for(cp2 = c2;*cp1;cp1++,cp2++) // shift
leftremaining chars, via cp2 *cp2 = *cp1; *cp2-- = 0; // mark new end of string for str // replace
trailingspaces with '\0' while(cp2 > c2 && isspace(*cp2)) *cp2-- = 0; // pad with '\0's
if(strlen(c2) == 0) { // Empty string: return NULL. PG_RETURN_NULL(); } else if(strlen(c2) > 16) { //
Valuetoo long: error. ereport(ERROR, (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH),
errmsg("value too long for type cs_type"), errhint("type cs_type
supports up to 16 bytes"))); }
/// /// Result as varchar. /// v = (VarChar *)palloc(VARHDRSZ + strlen(c2) + 1); SET_VARSIZE(v,
VARHDRSZ+ strlen(c2) + 1); strcpy(VARDATA(v), c2); PG_RETURN_VARCHAR_P(v);
}
///
/// Version-1 calling convention.
/// Output function.
///
PG_FUNCTION_INFO_V1(cs_type_out);
DLLEXPORT Datum cs_type_out(PG_FUNCTION_ARGS) {
VarChar *v = PG_GETARG_VARCHAR_P(0);
/// /// Result as cstring. /// char *c = (char *)palloc(VARSIZE(v) - VARHDRSZ + 1); strcpy(c,
VARDATA(v)); PG_RETURN_CSTRING(c);
}
On the PostgreSQL side, the following objects were created.
CREATE OR REPLACE FUNCTION cs_type_in(cstring) RETURNS cs_type AS
'$libdir/cs_type', 'cs_type_in' LANGUAGE c STRICT COST 1;
ALTER FUNCTION cs_type_in(cstring) OWNER TO postgres;
CREATE OR REPLACE FUNCTION cs_type_out(cs_type) RETURNS cstring AS
'$libdir/cs_type', 'cs_type_out' LANGUAGE c STRICT COST 1;
ALTER FUNCTION cs_type_out(cs_type) OWNER TO postgres;
CREATE TYPE cs_type ( INPUT = cs_type_in(cstring), OUTPUT = cs_type_out(cs_type), LIKE = varchar
);
ALTER TYPE cs_type OWNER TO postgres;
CREATE TABLE test_cs_type
( cs_value cs_type
)
WITH ( OIDS=FALSE
);
ALTER TABLE test_cs_type OWNER TO postgres;
When called directly, the function cs_type_in(cstring) works as
expected, attending the three rules described above. For example:
SELECT cs_type_in('TEST'); -- returns 'TEST'
SELECT cs_type_in(NULL); -- returns NULL
SELECT cs_type_in(''); -- returns NULL
SELECT cs_type_in(' '); -- returns NULL
However, on INSERT clauses, only the rules 1 and 2 work; an attempt to
insert an empty string (or a string with white spaces) generates an
error. For example:
INSERT INTO test_cs_type VALUES (NULL); -- works fine
INSERT INTO test_cs_type VALUES ('TEST'); -- works fine
INSERT INTO test_cs_type VALUES (''); -- error!
INSERT INTO test_cs_type VALUES (' '); -- error!
The error message displayed is:
ERRO: input function 49344 returned NULL
LINE 1: INSERT INTO dicom_data.test_cs_type VALUES (' '); ^
********** Error **********
ERRO: input function 49344 returned NULL
SQL state: XX000
Character: 45
It seems like the call to PG_RETURN_NULL() on the input function is
causing the error. Is this the correct behaviour? There's another way to
return a NULL value as the result of a data type's input function?
Best regards,
Alexandre