diff --git a/convert.c b/convert.c index 84cc1d8..2168994 100644 --- a/convert.c +++ b/convert.c @@ -1270,7 +1270,7 @@ inolog("2stime fr=%d\n", std_time.fr); #ifdef UNICODE_SUPPORT if (fCType == SQL_C_WCHAR) { - len = utf8_to_ucs2_lf(neut_str, SQL_NTS, lf_conv, NULL, 0); + len = utf8_to_ucs2_lf(neut_str, SQL_NTS, lf_conv, NULL, 0, FALSE); len *= WCLEN; changed = TRUE; } @@ -1330,7 +1330,7 @@ inolog("2stime fr=%d\n", std_time.fr); len = pg_bin2whex(pgdc->ttlbuf, (SQLWCHAR *) pgdc->ttlbuf, len); } else - utf8_to_ucs2_lf(neut_str, SQL_NTS, lf_conv, (SQLWCHAR *) pgdc->ttlbuf, len / WCLEN); + utf8_to_ucs2_lf(neut_str, SQL_NTS, lf_conv, (SQLWCHAR *) pgdc->ttlbuf, len / WCLEN, FALSE); } else #endif /* UNICODE_SUPPORT */ diff --git a/psqlodbc.h b/psqlodbc.h index f63f53c..f452f43 100644 --- a/psqlodbc.h +++ b/psqlodbc.h @@ -590,11 +590,9 @@ const pthread_mutexattr_t *getMutexAttr(void); #define WCLEN sizeof(SQLWCHAR) SQLULEN ucs2strlen(const SQLWCHAR *ucs2str); char *ucs2_to_utf8(const SQLWCHAR *ucs2str, SQLLEN ilen, SQLLEN *olen, BOOL tolower); -SQLULEN utf8_to_ucs2_lf0(const char * utf8str, SQLLEN ilen, BOOL lfconv, SQLWCHAR *ucs2str, SQLULEN buflen); -SQLULEN utf8_to_ucs2_lf1(const char * utf8str, SQLLEN ilen, BOOL lfconv, SQLWCHAR *ucs2str, SQLULEN buflen); +SQLULEN utf8_to_ucs2_lf(const char * utf8str, SQLLEN ilen, BOOL lfconv, SQLWCHAR *ucs2str, SQLULEN buflen, BOOL errcheck); int msgtowstr(const char *, const char *, int, LPWSTR, int); int wstrtomsg(const char *, const LPWSTR, int, char *, int); -#define utf8_to_ucs2_lf(utf8str, ilen, lfconv, ucs2str, buflen) utf8_to_ucs2_lf0(utf8str, ilen, lfconv, ucs2str, buflen) #define utf8_to_ucs2(utf8str, ilen, ucs2str, buflen) utf8_to_ucs2_lf0(utf8str, ilen, FALSE, ucs2str, buflen) #endif /* UNICODE_SUPPORT */ diff --git a/test/Makefile b/test/Makefile index cbeb134..a819095 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,7 +1,7 @@ TESTS = connect stmthandles select commands multistmt getresult prepare \ params notice arraybinding insertreturning dataatexecution \ boolsaschar cvtnulldate alter quotes cursors positioned-update \ - catalogfunctions bindcol + catalogfunctions bindcol lfconversion TESTBINS = $(patsubst %,src/%-test, $(TESTS)) TESTSQLS = $(patsubst %,sql/%.sql, $(TESTS)) diff --git a/test/expected/lfconversion.out b/test/expected/lfconversion.out new file mode 100644 index 0000000..ca4776c --- /dev/null +++ b/test/expected/lfconversion.out @@ -0,0 +1,18 @@ +\! ./src/lfconversion-test +connected +reading to char buffer... +strlen 22, SQLGetData claims 22 + +reading to char buffer, with truncation... +strlen 9, SQLGetData claims 22 + +reading to SQLWCHAR buffer... +len 22 chars, SQLGetData claims 44 bytes + +reading to SQLWCHAR buffer, with truncation... +len 4 chars, SQLGetData claims 44 bytes + +reading to SQLWCHAR buffer, with LF->CR+LF conversion causing truncation... +len 20 chars, SQLGetData claims 44 bytes + +disconnecting diff --git a/test/src/lfconversion-test.c b/test/src/lfconversion-test.c new file mode 100644 index 0000000..20f4b7f --- /dev/null +++ b/test/src/lfconversion-test.c @@ -0,0 +1,111 @@ +#include +#include +#include + +#if 0 +#include +#endif + +#include "common.h" + +int main(int argc, char **argv) +{ + SQLRETURN rc; + SQLLEN sqlLen; + SQLLEN ccharlen; + SQLLEN wcharlen; + HSTMT hstmt = SQL_NULL_HSTMT; + char *sql; + int i; + char buf[1000]; + SQLWCHAR wbuf[1000]; + + /* Enable LF -> CR+LF conversion */ + test_connect_ext("CX=1"); + + rc = SQLAllocStmt(conn, &hstmt); + if (!SQL_SUCCEEDED(rc)) + { + print_diag("failed to allocate stmt handle", SQL_HANDLE_DBC, conn); + exit(1); + } + + /* + * Return several columns that all contain the same string, with newlines. + * We want to try getting the column contents with several different + * options, and the driver doesn't let you fetch the same column more than + * once. + */ + sql = "SELECT E'string\nwith\nnewlines', E'string\nwith\nnewlines', " + "E'string\nwith\nnewlines', E'string\nwith\nnewlines', " + "E'string\nwith\nnewlines'"; + rc = SQLExecDirect(hstmt, (SQLCHAR *) sql, SQL_NTS); + CHECK_STMT_RESULT(rc, "SQLExecDirect failed", hstmt); + + rc = SQLFetch(hstmt); + CHECK_STMT_RESULT(rc, "SQLFetch failed", hstmt); + + printf("reading to char buffer...\n"); + rc = SQLGetData(hstmt, 1, SQL_C_CHAR, buf, sizeof(buf), &ccharlen); + CHECK_STMT_RESULT(rc, "SQLGetData failed", hstmt); + printf("strlen %d, SQLGetData claims %d\n\n", strlen(buf), ccharlen); + + printf("reading to char buffer, with truncation...\n"); + rc = SQLGetData(hstmt, 2, SQL_C_CHAR, buf, 10, &ccharlen); + CHECK_STMT_RESULT(rc, "SQLGetData failed", hstmt); + printf("strlen %d, SQLGetData claims %d\n\n", strlen(buf), ccharlen); + + printf("reading to SQLWCHAR buffer...\n"); + rc = SQLGetData(hstmt, 3, SQL_C_WCHAR, wbuf, sizeof(wbuf), &wcharlen); + CHECK_STMT_RESULT(rc, "SQLGetData failed", hstmt); + + /* On some platforms, SQLWCHAR != wchar_t, so we cannot use wcslen here */ + for (i = 0; i < sizeof(wbuf) && wbuf[i] != 0; i++); + printf("len %d chars, SQLGetData claims %d bytes\n\n", i, wcharlen); + + printf("reading to SQLWCHAR buffer, with truncation...\n"); + rc = SQLGetData(hstmt, 4, SQL_C_WCHAR, wbuf, 10, &wcharlen); + CHECK_STMT_RESULT(rc, "SQLGetData failed", hstmt); + + for (i = 0; i < sizeof(wbuf) && wbuf[i] != 0; i++); + printf("len %d chars, SQLGetData claims %d bytes\n\n", i, wcharlen); + + /* + * Read into a buffer that's slightly too small, so that it would fit + * if it wasn't for the LF->CR+LF conversion. + */ + printf("reading to SQLWCHAR buffer, with LF->CR+LF conversion causing truncation...\n"); + rc = SQLGetData(hstmt, 5, SQL_C_WCHAR, wbuf, 42, &wcharlen); + CHECK_STMT_RESULT(rc, "SQLGetData failed", hstmt); + + for (i = 0; i < sizeof(wbuf) && wbuf[i] != 0; i++); + printf("len %d chars, SQLGetData claims %d bytes\n\n", i, wcharlen); + + /* + * Print out the string, but on Unix we have to convert it to UTF-8 first. + * On Windows we could just use wprintf. + */ +#if 0 + { + iconv_t cd = iconv_open("UTF-8", "UCS-2"); + char utf8buf[1000]; + size_t l; + size_t inbytes = wcharlen; + size_t outbytes = sizeof(utf8buf); + char *obuf = utf8buf; + char *ibuf = (char *) wbuf; + + l = iconv(cd, &ibuf, &inbytes, &obuf, &outbytes); + *obuf = 0; + printf("inremains %d outremains %d l: %d s: %s\n", inbytes, outbytes, l, utf8buf); + } +#endif + + rc = SQLFreeStmt(hstmt, SQL_CLOSE); + CHECK_STMT_RESULT(rc, "SQLFreeStmt failed", hstmt); + + /* Clean up */ + test_disconnect(); + + return 0; +} diff --git a/win_unicode.c b/win_unicode.c index 145a332..7b13e9e 100644 --- a/win_unicode.c +++ b/win_unicode.c @@ -164,154 +164,159 @@ char *ucs2_to_utf8(const SQLWCHAR *ucs2str, SQLLEN ilen, SQLLEN *olen, BOOL lowe #define byte4_m32 0x0f #define byte4_m4 0x3f -#define def_utf2ucs(errcheck) \ -SQLULEN utf8_to_ucs2_lf##errcheck(const char *utf8str, SQLLEN ilen, BOOL lfconv, SQLWCHAR *ucs2str, SQLULEN bufcount) \ -{ \ - int i, lfcount = 0; \ - SQLULEN rtn, ocount, wcode; \ - const UCHAR *str; \ -\ -/*mylog("utf8_to_ucs2 ilen=%d bufcount=%d", ilen, bufcount);*/ \ - if (!utf8str) \ - return 0; \ -/*mylog(" string=%s\n", utf8str);*/ \ - if (little_endian < 0) \ - { \ - int crt = 1; \ - little_endian = (0 != ((char *) &crt)[0]); \ - } \ - if (!bufcount) \ - ucs2str = NULL; \ - else if (!ucs2str) \ - bufcount = 0; \ - if (ilen < 0) \ - ilen = strlen(utf8str); \ - for (i = 0, ocount = 0, str = utf8str; i < ilen && *str;) \ - { \ - /* if (iswascii(*str)) */ \ - if (isascii(*str)) \ - { \ - if (lfconv && PG_LINEFEED == *str && \ - (i == 0 || PG_CARRIAGE_RETURN != str[-1])) \ - { \ - if (ocount < bufcount) \ - ucs2str[ocount] = PG_CARRIAGE_RETURN; \ - ocount++; \ - lfcount++; \ - } \ - if (ocount < bufcount) \ - ucs2str[ocount] = *str; \ - ocount++; \ - i++; \ - str++; \ - } \ - else if (0xf8 == (*str & 0xf8)) /* more than 5 byte code */ \ - { \ - ocount = (SQLULEN) -1; \ - goto cleanup; \ - } \ - else if (0xf0 == (*str & 0xf8)) /* 4 byte code */ \ - { \ - if (01 == 0##errcheck) \ - { \ - if (i + 4 > ilen || \ - 0 == (str[1] & 0x80) || \ - 0 == (str[2] & 0x80) || \ - 0 == (str[3] & 0x80)) \ - { \ - ocount = (SQLULEN) -1; \ - goto cleanup; \ - } \ - } \ - if (ocount < bufcount) \ - { \ - wcode = (surrog1_bits | \ - ((((UInt4) *str) & byte4_m1) << 8) | \ - ((((UInt4) str[1]) & byte4_m2) << 2) | \ - ((((UInt4) str[2]) & byte4_m31) >> 4)) \ - - surrogate_adjust; \ - ucs2str[ocount] = (SQLWCHAR) wcode; \ - } \ - ocount++; \ - if (ocount < bufcount) \ - { \ - wcode = surrog2_bits | \ - ((((UInt4) str[2]) & byte4_m32) << 6) | \ - (((UInt4) str[3]) & byte4_m4); \ - ucs2str[ocount] = (SQLWCHAR) wcode; \ - } \ - ocount++; \ - i += 4; \ - str += 4; \ - } \ - else if (0xe0 == (*str & 0xf0)) /* 3 byte code */ \ - { \ - if (01 == 0##errcheck) \ - { \ - if (i + 3 > ilen || \ - 0 == (str[1] & 0x80) || \ - 0 == (str[2] & 0x80)) \ - { \ - ocount = (SQLULEN) -1; \ - goto cleanup; \ - } \ - } \ - if (ocount < bufcount) \ - { \ - wcode = ((((UInt4) *str) & byte3_m1) << 12) | \ - ((((UInt4) str[1]) & byte3_m2) << 6) | \ - (((UInt4) str[2]) & byte3_m3); \ - ucs2str[ocount] = (SQLWCHAR) wcode; \ - } \ - ocount++; \ - i += 3; \ - str += 3; \ - } \ - else if (0xc0 == (*str & 0xe0)) /* 2 byte code */ \ - { \ - if (01 == 0##errcheck) \ - { \ - if (i + 2 > ilen || \ - 0 == (str[1] & 0x80)) \ - { \ - ocount = (SQLULEN) -1; \ - goto cleanup; \ - } \ - } \ - if (ocount < bufcount) \ - { \ - wcode = ((((UInt4) *str) & byte2_m1) << 6) | \ - (((UInt4) str[1]) & byte2_m2); \ - ucs2str[ocount] = (SQLWCHAR) wcode; \ - } \ - ocount++; \ - i += 2; \ - str += 2; \ - } \ - else \ - { \ - ocount = (SQLULEN) -1; \ - goto cleanup; \ - } \ - } \ -cleanup: \ - rtn = ocount; \ - if (ocount == (SQLULEN) -1) \ - { \ - if (00 == 0##errcheck) \ - rtn = 0; \ - ocount = 0; \ - } \ - if (ocount >= bufcount && ocount < bufcount + lfcount) \ - return utf8_to_ucs2_lf##errcheck(utf8str, ilen, FALSE, ucs2str, bufcount); \ - if (ocount < bufcount && ucs2str) \ - ucs2str[ocount] = 0; \ -/*mylog(" ocount=%d\n", ocount);*/ \ - return rtn; \ -} +/* + * Convert a string from UTF-8 encoding to UCS-2. + * + * utf8str - input string in UTF-8 + * ilen - length of input string in bytes (or SQL_NTS) + * lfconv - TRUE if line feeds (LF) should be converted to CR + LF + * ucs2str - output buffer + * bufcount - size of output buffer + * errcheck - if TRUE, check for invalidly encoded input characters + * + * Returns the number of SQLWCHARs copied to output buffer. If the output + * buffer is too small, the output is truncated. The output string is + * NULL-terminated, except when the output is truncated. + */ +SQLULEN +utf8_to_ucs2_lf(const char *utf8str, SQLLEN ilen, BOOL lfconv, + SQLWCHAR *ucs2str, SQLULEN bufcount, BOOL errcheck) +{ + int i; + SQLULEN rtn, ocount, wcode; + const UCHAR *str; -def_utf2ucs(0) -def_utf2ucs(1) +/*mylog("utf8_to_ucs2 ilen=%d bufcount=%d", ilen, bufcount);*/ + if (!utf8str) + return 0; +/*mylog(" string=%s\n", utf8str);*/ + + if (!bufcount) + ucs2str = NULL; + else if (!ucs2str) + bufcount = 0; + if (ilen < 0) + ilen = strlen(utf8str); + for (i = 0, ocount = 0, str = utf8str; i < ilen && *str;) + { + /* if (iswascii(*str)) */ + if (isascii(*str)) + { + if (lfconv && PG_LINEFEED == *str && + (i == 0 || PG_CARRIAGE_RETURN != str[-1])) + { + if (ocount < bufcount) + ucs2str[ocount] = PG_CARRIAGE_RETURN; + ocount++; + } + if (ocount < bufcount) + ucs2str[ocount] = *str; + ocount++; + i++; + str++; + } + else if (0xf8 == (*str & 0xf8)) /* more than 5 byte code */ + { + ocount = (SQLULEN) -1; + goto cleanup; + } + else if (0xf0 == (*str & 0xf8)) /* 4 byte code */ + { + if (errcheck) + { + if (i + 4 > ilen || + 0 == (str[1] & 0x80) || + 0 == (str[2] & 0x80) || + 0 == (str[3] & 0x80)) + { + ocount = (SQLULEN) -1; + goto cleanup; + } + } + if (ocount < bufcount) + { + wcode = (surrog1_bits | + ((((UInt4) *str) & byte4_m1) << 8) | + ((((UInt4) str[1]) & byte4_m2) << 2) | + ((((UInt4) str[2]) & byte4_m31) >> 4)) + - surrogate_adjust; + ucs2str[ocount] = (SQLWCHAR) wcode; + } + ocount++; + if (ocount < bufcount) + { + wcode = surrog2_bits | + ((((UInt4) str[2]) & byte4_m32) << 6) | + (((UInt4) str[3]) & byte4_m4); + ucs2str[ocount] = (SQLWCHAR) wcode; + } + ocount++; + i += 4; + str += 4; + } + else if (0xe0 == (*str & 0xf0)) /* 3 byte code */ + { + if (errcheck) + { + if (i + 3 > ilen || + 0 == (str[1] & 0x80) || + 0 == (str[2] & 0x80)) + { + ocount = (SQLULEN) -1; + goto cleanup; + } + } + if (ocount < bufcount) + { + wcode = ((((UInt4) *str) & byte3_m1) << 12) | + ((((UInt4) str[1]) & byte3_m2) << 6) | + (((UInt4) str[2]) & byte3_m3); + ucs2str[ocount] = (SQLWCHAR) wcode; + } + ocount++; + i += 3; + str += 3; + } + else if (0xc0 == (*str & 0xe0)) /* 2 byte code */ + { + if (errcheck) + { + if (i + 2 > ilen || + 0 == (str[1] & 0x80)) + { + ocount = (SQLULEN) -1; + goto cleanup; + } + } + if (ocount < bufcount) + { + wcode = ((((UInt4) *str) & byte2_m1) << 6) | + (((UInt4) str[1]) & byte2_m2); + ucs2str[ocount] = (SQLWCHAR) wcode; + } + ocount++; + i += 2; + str += 2; + } + else + { + ocount = (SQLULEN) -1; + goto cleanup; + } + } +cleanup: + rtn = ocount; + if (ocount == (SQLULEN) -1) + { + if (!errcheck) + rtn = 0; + ocount = 0; + } + if (ocount < bufcount && ucs2str) + ucs2str[ocount] = 0; +/*mylog(" ocount=%d\n", ocount);*/ + return rtn; +} int msgtowstr(const char *enc, const char *inmsg, int inlen, LPWSTR outmsg, int buflen) {