Обсуждение: libpq bug?
Hi, ALL,
Following code:
int PostgresDatabase::GetTableOwner (const std::wstring &schemaName,
const std::wstring &tableName, std::wstring &owner,
std::vector<std::wstring> &errorMsg)
{
int result = 0;
std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
c.relowner AND n.nspname = $1 AND relname = $2";
char *values[2];
values[0] = NULL, values[1] = NULL;
values[0] = new char[schemaName.length() + 1];
values[1] = new char[tableName.length() + 1];
memset( values[0], '\0', schemaName.length() + 1 );
memset( values[1], '\0', tableName.length() + 1 );
strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str()
).c_str() );
strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
).c_str() );
int len1 = (int) schemaName.length();
int len2 = (int) tableName.length();
int length[2] = { len1, len2 };
int formats[2] = { 1, 1 };
PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
ExecStatusType status = PQresultStatus( res );
if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK )
{
result = 1;
std::wstring err = m_pimpl->m_myconv.from_bytes(
PQerrorMessage( m_db ) );
errorMsg.push_back( L"Error executing query: " + err );
PQclear( res );
}
else
{
owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) );
}
return result;
}
when ran with the call of
GetTableOwner( "public", "abcß", owner, errorMsg );
returns:
ERROR: Invalid byte sequence for encoding UTF8.
Does this mean I found the bug in the library?
Any idea what I can do?
Thank you.
Hi, Patrick,
Here is my new code:
int PostgresDatabase::GetTableOwner (const std::wstring &schemaName,
const std::wstring &tableName, std::wstring &owner,
std::vector<std::wstring> &errorMsg)
{
int result = 0;
std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
c.relowner AND n.nspname = $1 AND relname = $2";
char *values[2];
values[0] = NULL, values[1] = NULL;
int charlength1 = schemaName.length() * sizeof( wchar_t ),
charlength2 = tableName.length() * sizeof( wchar_t );
values[0] = new char[schemaName.length() * sizeof( wchar_t ) + 1];
values[1] = new char[tableName.length() * sizeof( wchar_t ) + 1];
memset( values[0], '\0', schemaName.length() * sizeof( wchar_t ) + 1 );
memset( values[1], '\0', tableName.length() * sizeof( wchar_t ) + 1 );
strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str()
).c_str() );
strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
).c_str() );
int len1 = (int) schemaName.length() * sizeof( wchar_t );
int len2 = (int) tableName.length() * sizeof( wchar_t );
int length[2] = { len1, len2 };
int formats[2] = { 1, 1 };
PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
ExecStatusType status = PQresultStatus( res );
if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK )
{
result = 1;
std::wstring err = m_pimpl->m_myconv.from_bytes(
PQerrorMessage( m_db ) );
errorMsg.push_back( L"Error executing query: " + err );
PQclear( res );
}
else
{
owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) );
}
return result;
}
The charlength2 variable contains the value of 8 and I'm still getting
the same error.
Any idea?
Thank you.
On Fri, Dec 28, 2018 at 5:40 PM patrick keshishian <pkeshish@gmail.com> wrote:
>
> On Fri, Dec 28, 2018 at 3:07 PM Igor Korot <ikorot01@gmail.com> wrote:
>>
>> Hi,
>>
>> On Fri, Dec 28, 2018 at 4:51 PM patrick keshishian <pkeshish@gmail.com> wrote:
>> >
>> >
>> > On Fri, Dec 28, 2018 at 2:00 PM Igor Korot <ikorot01@gmail.com> wrote:
>> >>
>> >> Hi, ALL,
>> >> Following code:
>> >>
>> >> int PostgresDatabase::GetTableOwner (const std::wstring &schemaName,
>> >> const std::wstring &tableName, std::wstring &owner,
>> >> std::vector<std::wstring> &errorMsg)
>> >> {
>> >> int result = 0;
>> >> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
>> >> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
>> >> c.relowner AND n.nspname = $1 AND relname = $2";
>> >> char *values[2];
>> >> values[0] = NULL, values[1] = NULL;
>> >> values[0] = new char[schemaName.length() + 1];
>> >> values[1] = new char[tableName.length() + 1];
>> >> memset( values[0], '\0', schemaName.length() + 1 );
>> >> memset( values[1], '\0', tableName.length() + 1 );
>> >> strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str()
>> >> ).c_str() );
>> >> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
>> >> ).c_str() );
>> >> int len1 = (int) schemaName.length();
>> >> int len2 = (int) tableName.length();
>> >> int length[2] = { len1, len2 };
>> >> int formats[2] = { 1, 1 };
>> >> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
>> >> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
>> >> ExecStatusType status = PQresultStatus( res );
>> >> if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK )
>> >> {
>> >> result = 1;
>> >> std::wstring err = m_pimpl->m_myconv.from_bytes(
>> >> PQerrorMessage( m_db ) );
>> >> errorMsg.push_back( L"Error executing query: " + err );
>> >> PQclear( res );
>> >> }
>> >> else
>> >> {
>> >> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) );
>> >> }
>> >> return result;
>> >> }
>> >>
>> >> when ran with the call of
>> >>
>> >> GetTableOwner( "public", "abcß", owner, errorMsg );
>> >>
>> >> returns:
>> >>
>> >> ERROR: Invalid byte sequence for encoding UTF8.
>> >>
>> >> Does this mean I found the bug in the library?
>> >
>> >
>> > The bug is in your C++ code. "abcß" as tableName.lenght() (wstring) returns 4 (as in four characters) not number
ofbytes required to represent the intended string: 61 62 63 c3 9f
>> > Since the last character is a 2 bytes in length. Therefore, your call to PQexecParams() specifies a shorter length
andhence an invalid UTF-8 sequence.
>> >
>> > Furthermore, your value[] array allocation is in error since wstring::length returns number of characters, not
numberof bytes. so you will end up with buffer-overflows.
>>
>> So I should use
>> https://stackoverflow.com/questions/9278723/how-can-i-get-the-byte-size-of-stdwstring
>> in both places?
>
>
> size() also returns 4. If you multiply it with sizeof(wchar_t) you will end up with maximum buffers size necessary to
holdthe string (minus terminating \0), but not the correct length you are after. I am unsure of the "correct" C++
solution.
>
> Sorry,
> --patrick
>
>
>>
>> Thank you.
>>
>> >
>> > HTH,
>> > --patrick
>> >
>> >
>> >>
>> >> Any idea what I can do?
>> >>
>> >> Thank you.
>> >>
Hi, ALL,
Following code:
int PostgresDatabase::GetTableOwner (const std::wstring &schemaName,
const std::wstring &tableName, std::wstring &owner,
std::vector<std::wstring> &errorMsg)
{
int result = 0;
std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
c.relowner AND n.nspname = $1 AND relname = $2";
char *values[2];
values[0] = NULL, values[1] = NULL;
values[0] = new char[schemaName.length() + 1];
values[1] = new char[tableName.length() + 1];
memset( values[0], '\0', schemaName.length() + 1 );
memset( values[1], '\0', tableName.length() + 1 );
strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str()
).c_str() );
strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
).c_str() );
int len1 = (int) schemaName.length();
int len2 = (int) tableName.length();
int length[2] = { len1, len2 };
int formats[2] = { 1, 1 };
PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
ExecStatusType status = PQresultStatus( res );
if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK )
{
result = 1;
std::wstring err = m_pimpl->m_myconv.from_bytes(
PQerrorMessage( m_db ) );
errorMsg.push_back( L"Error executing query: " + err );
PQclear( res );
}
else
{
owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) );
}
return result;
}
when ran with the call of
GetTableOwner( "public", "abcß", owner, errorMsg );
returns:
ERROR: Invalid byte sequence for encoding UTF8.
Does this mean I found the bug in the library?
Any idea what I can do?
Thank you.
Hi,
On Fri, Dec 28, 2018 at 4:51 PM patrick keshishian <pkeshish@gmail.com> wrote:
>
>
> On Fri, Dec 28, 2018 at 2:00 PM Igor Korot <ikorot01@gmail.com> wrote:
>>
>> Hi, ALL,
>> Following code:
>>
>> int PostgresDatabase::GetTableOwner (const std::wstring &schemaName,
>> const std::wstring &tableName, std::wstring &owner,
>> std::vector<std::wstring> &errorMsg)
>> {
>> int result = 0;
>> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
>> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
>> c.relowner AND n.nspname = $1 AND relname = $2";
>> char *values[2];
>> values[0] = NULL, values[1] = NULL;
>> values[0] = new char[schemaName.length() + 1];
>> values[1] = new char[tableName.length() + 1];
>> memset( values[0], '\0', schemaName.length() + 1 );
>> memset( values[1], '\0', tableName.length() + 1 );
>> strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str()
>> ).c_str() );
>> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
>> ).c_str() );
>> int len1 = (int) schemaName.length();
>> int len2 = (int) tableName.length();
>> int length[2] = { len1, len2 };
>> int formats[2] = { 1, 1 };
>> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
>> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
>> ExecStatusType status = PQresultStatus( res );
>> if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK )
>> {
>> result = 1;
>> std::wstring err = m_pimpl->m_myconv.from_bytes(
>> PQerrorMessage( m_db ) );
>> errorMsg.push_back( L"Error executing query: " + err );
>> PQclear( res );
>> }
>> else
>> {
>> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) );
>> }
>> return result;
>> }
>>
>> when ran with the call of
>>
>> GetTableOwner( "public", "abcß", owner, errorMsg );
>>
>> returns:
>>
>> ERROR: Invalid byte sequence for encoding UTF8.
>>
>> Does this mean I found the bug in the library?
>
>
> The bug is in your C++ code. "abcß" as tableName.lenght() (wstring) returns 4 (as in four characters) not number of
bytesrequired to represent the intended string: 61 62 63 c3 9f
> Since the last character is a 2 bytes in length. Therefore, your call to PQexecParams() specifies a shorter length
andhence an invalid UTF-8 sequence.
>
> Furthermore, your value[] array allocation is in error since wstring::length returns number of characters, not number
ofbytes. so you will end up with buffer-overflows.
So I should use
https://stackoverflow.com/questions/9278723/how-can-i-get-the-byte-size-of-stdwstring
in both places?
Thank you.
>
> HTH,
> --patrick
>
>
>>
>> Any idea what I can do?
>>
>> Thank you.
>>
Hi,
On Fri, Dec 28, 2018 at 5:07 PM Igor Korot <ikorot01@gmail.com> wrote:
>
> Hi,
>
> On Fri, Dec 28, 2018 at 4:51 PM patrick keshishian <pkeshish@gmail.com> wrote:
> >
> >
> > On Fri, Dec 28, 2018 at 2:00 PM Igor Korot <ikorot01@gmail.com> wrote:
> >>
> >> Hi, ALL,
> >> Following code:
> >>
> >> int PostgresDatabase::GetTableOwner (const std::wstring &schemaName,
> >> const std::wstring &tableName, std::wstring &owner,
> >> std::vector<std::wstring> &errorMsg)
> >> {
> >> int result = 0;
> >> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
> >> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
> >> c.relowner AND n.nspname = $1 AND relname = $2";
> >> char *values[2];
> >> values[0] = NULL, values[1] = NULL;
> >> values[0] = new char[schemaName.length() + 1];
> >> values[1] = new char[tableName.length() + 1];
> >> memset( values[0], '\0', schemaName.length() + 1 );
> >> memset( values[1], '\0', tableName.length() + 1 );
> >> strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str()
> >> ).c_str() );
> >> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
> >> ).c_str() );
> >> int len1 = (int) schemaName.length();
> >> int len2 = (int) tableName.length();
> >> int length[2] = { len1, len2 };
> >> int formats[2] = { 1, 1 };
> >> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
> >> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
> >> ExecStatusType status = PQresultStatus( res );
> >> if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK )
> >> {
> >> result = 1;
> >> std::wstring err = m_pimpl->m_myconv.from_bytes(
> >> PQerrorMessage( m_db ) );
> >> errorMsg.push_back( L"Error executing query: " + err );
> >> PQclear( res );
> >> }
> >> else
> >> {
> >> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) );
> >> }
> >> return result;
> >> }
> >>
> >> when ran with the call of
> >>
> >> GetTableOwner( "public", "abcß", owner, errorMsg );
> >>
> >> returns:
> >>
> >> ERROR: Invalid byte sequence for encoding UTF8.
> >>
> >> Does this mean I found the bug in the library?
> >
> >
> > The bug is in your C++ code. "abcß" as tableName.lenght() (wstring) returns 4 (as in four characters) not number of
bytesrequired to represent the intended string: 61 62 63 c3 9f
> > Since the last character is a 2 bytes in length. Therefore, your call to PQexecParams() specifies a shorter length
andhence an invalid UTF-8 sequence.
> >
> > Furthermore, your value[] array allocation is in error since wstring::length returns number of characters, not
numberof bytes. so you will end up with buffer-overflows.
>
> So I should use https://stackoverflow.com/questions/9278723/how-can-i-get-the-byte-size-of-stdwstring in both places?
And this solution will work cross-platform, right?
Thank you.
>
> Thank you.
>
> >
> > HTH,
> > --patrick
> >
> >
> >>
> >> Any idea what I can do?
> >>
> >> Thank you.
> >>
Hi,
On Fri, Dec 28, 2018 at 4:51 PM patrick keshishian <pkeshish@gmail.com> wrote:
>
>
> On Fri, Dec 28, 2018 at 2:00 PM Igor Korot <ikorot01@gmail.com> wrote:
>>
>> Hi, ALL,
>> Following code:
>>
>> int PostgresDatabase::GetTableOwner (const std::wstring &schemaName,
>> const std::wstring &tableName, std::wstring &owner,
>> std::vector<std::wstring> &errorMsg)
>> {
>> int result = 0;
>> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
>> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
>> c.relowner AND n.nspname = $1 AND relname = $2";
>> char *values[2];
>> values[0] = NULL, values[1] = NULL;
>> values[0] = new char[schemaName.length() + 1];
>> values[1] = new char[tableName.length() + 1];
>> memset( values[0], '\0', schemaName.length() + 1 );
>> memset( values[1], '\0', tableName.length() + 1 );
>> strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str()
>> ).c_str() );
>> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
>> ).c_str() );
>> int len1 = (int) schemaName.length();
>> int len2 = (int) tableName.length();
>> int length[2] = { len1, len2 };
>> int formats[2] = { 1, 1 };
>> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
>> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
>> ExecStatusType status = PQresultStatus( res );
>> if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK )
>> {
>> result = 1;
>> std::wstring err = m_pimpl->m_myconv.from_bytes(
>> PQerrorMessage( m_db ) );
>> errorMsg.push_back( L"Error executing query: " + err );
>> PQclear( res );
>> }
>> else
>> {
>> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) );
>> }
>> return result;
>> }
>>
>> when ran with the call of
>>
>> GetTableOwner( "public", "abcß", owner, errorMsg );
>>
>> returns:
>>
>> ERROR: Invalid byte sequence for encoding UTF8.
>>
>> Does this mean I found the bug in the library?
>
>
> The bug is in your C++ code. "abcß" as tableName.lenght() (wstring) returns 4 (as in four characters) not number of bytes required to represent the intended string: 61 62 63 c3 9f
> Since the last character is a 2 bytes in length. Therefore, your call to PQexecParams() specifies a shorter length and hence an invalid UTF-8 sequence.
>
> Furthermore, your value[] array allocation is in error since wstring::length returns number of characters, not number of bytes. so you will end up with buffer-overflows.
So I should use
https://stackoverflow.com/questions/9278723/how-can-i-get-the-byte-size-of-stdwstring
in both places?
Thank you.
>
> HTH,
> --patrick
>
>
>>
>> Any idea what I can do?
>>
>> Thank you.
>>
Hi,
On Sat, Dec 29, 2018 at 1:37 AM patrick keshishian <pkeshish@gmail.com> wrote:
>
> On Fri, Dec 28, 2018 at 5:40 PM Igor Korot <ikorot01@gmail.com> wrote:
>>
>> Hi, Patrick,
>>
>> Here is my new code:
>>
>> int PostgresDatabase::GetTableOwner (const std::wstring
&schemaName,
>> const std::wstring &tableName, std::wstring &owner,
>> std::vector<std::wstring> &errorMsg)
>> {
>> int result = 0;
>> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
>> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
>> c.relowner AND n.nspname = $1 AND relname = $2";
>> char *values[2];
>> values[0] = NULL, values[1] = NULL;
>> int charlength1 = schemaName.length() * sizeof( wchar_t ),
>> charlength2 = tableName.length() * sizeof( wchar_t );
>> values[0] = new char[schemaName.length() * sizeof( wchar_t ) + 1];
>> values[1] = new char[tableName.length() * sizeof( wchar_t ) + 1];
>> memset( values[0], '\0', schemaName.length() * sizeof(
wchar_t ) + 1 );
>> memset( values[1], '\0', tableName.length() * sizeof(
wchar_t ) + 1 );
>> strcpy( values[0], m_pimpl->m_myconv.to_bytes(
schemaName.c_str()
>> ).c_str() );
>> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
>> ).c_str() );
>> int len1 = (int) schemaName.length() * sizeof( wchar_t );
>> int len2 = (int) tableName.length() * sizeof( wchar_t );
>> int length[2] = { len1, len2 };
>> int formats[2] = { 1, 1 };
>> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
>> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
>> ExecStatusType status = PQresultStatus( res );
>> if( status != PGRES_COMMAND_OK && status !=
PGRES_TUPLES_OK )
>> {
>> result = 1;
>> std::wstring err = m_pimpl->m_myconv.from_bytes(
>> PQerrorMessage( m_db ) );
>> errorMsg.push_back( L"Error executing query: " + err );
>> PQclear( res );
>> }
>> else
>> {
>> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue(
res, 0, 0 ) );
>> }
>> return result;
>> }
>>
>> The charlength2 variable contains the value of 8 and I'm still getting
>> the same error.
>
>
> I was hoping someone more versed in C++ would jump in to answer
your question. I haven't used C++ in at least a decade.
> You need to convert the wchar_t data that wstring stores into
UTF-8. Personally, I would use iconv (common enough).
But that is very weird.
When I check what is stored in the values[1] array, I see the same
byte sequence as what I got from the database
information_schema.tables..
Maybe I should just upgrade the libpq and try the latest release?
>
> I assume the PostgresDatabase class is your own (?) I would add a
helper function to do the conversion. Here is a very rough template
for you to adapt if you think it helps you.
Yes, PostgresDatabase is my class.
I will look at that later today, but it would definitely be
interesting to get someone with the current C++
experience (especially with C++11), because I believe that I am doing
a conversion into UTF8.
The m_convert variable is declared as:
std::wstring_convert<std::codecvt_utf8<wchar_t> > m_myconv;
and so I think it is converting to the UTF8.
Thank you.
>
>
> #include <err.h>
>
> #include <stdlib.h>
>
> #include <string.h>
>
> #include <iostream>
>
> #include <string>
>
>
> #include <iconv.h>
>
>
> class PGDB {
>
> public:
>
> // your stuff ...
>
> iconv_t ic;
>
>
> PGDB(void) {
>
> setlocale(LC_CTYPE, "");
>
> ic = iconv_open("UTF-8", "wchar_t");
>
> if ((iconv_t)-1 == ic)
>
> errx(1, "iconv_open");
>
> }
>
> ~PGDB() {
>
> iconv_close(ic);
>
> }
>
> // caller should free()
>
> char *wchar2utf8(std::wstring const &ws) {
>
> char *in, *buf, *out;
>
> size_t bufsz, inbytes, outbytes;
>
>
> in = (char *)ws.data();
>
> inbytes = ws.length() * sizeof(wchar_t);
>
> outbytes = inbytes;
>
> bufsz = inbytes + 1; // XXX check for overflow
>
>
> buf = (char *)calloc(bufsz, 1);
>
> if (NULL == buf)
>
> err(1, NULL); // or throw something
>
>
> out = buf;
>
> if ((size_t)-1 == iconv(ic, &in, &inbytes, &out, &outbytes))
>
> errx(1, "iconv"); // or throw ...
>
>
> // TODO ensure inbytes is 0 (meaning all input consumed)
>
> return buf;
>
> }
>
> };
>
>
> // demo using above PGDB class/code
>
> int main(int argc, char *argv[])
>
> {
>
> char *str;
>
> size_t i, n;
>
> std::wstring tab;
>
> PGDB pg;
>
>
> tab = L"ºabcß";
>
> str = pg.wchar2utf8(tab);
>
>
> n = strlen(str);
>
> for (i = 0; i < n; ++i) {
>
> printf("%02hhx ", str[i]);
>
> }
>
> printf("\n");
>
> printf("->%s<-\n", str);
>
>
> free(str);
>
> return 0;
>
> }
>
>
>
> So in GetTableOwner() you'd call wchar2utf8() for the input wstring data and you don't do the new/memset/strcpy. Just
rememberto free() the returned utf8 string pointers after call to PQexecParams().
>
> cheers,
> --patrick
>
>
>
>>
>> Any idea?
>>
>> Thank you.
>>
>> On Fri, Dec 28, 2018 at 5:40 PM patrick keshishian <pkeshish@gmail.com> wrote:
>> >
>> > On Fri, Dec 28, 2018 at 3:07 PM Igor Korot <ikorot01@gmail.com> wrote:
>> >>
>> >> Hi,
>> >>
>> >> On Fri, Dec 28, 2018 at 4:51 PM patrick keshishian <pkeshish@gmail.com> wrote:
>> >> >
>> >> >
>> >> > On Fri, Dec 28, 2018 at 2:00 PM Igor Korot <ikorot01@gmail.com> wrote:
>> >> >>
>> >> >> Hi, ALL,
>> >> >> Following code:
>> >> >>
>> >> >> int PostgresDatabase::GetTableOwner (const std::wstring &schemaName,
>> >> >> const std::wstring &tableName, std::wstring &owner,
>> >> >> std::vector<std::wstring> &errorMsg)
>> >> >> {
>> >> >> int result = 0;
>> >> >> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
>> >> >> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
>> >> >> c.relowner AND n.nspname = $1 AND relname = $2";
>> >> >> char *values[2];
>> >> >> values[0] = NULL, values[1] = NULL;
>> >> >> values[0] = new char[schemaName.length() + 1];
>> >> >> values[1] = new char[tableName.length() + 1];
>> >> >> memset( values[0], '\0', schemaName.length() + 1 );
>> >> >> memset( values[1], '\0', tableName.length() + 1 );
>> >> >> strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str()
>> >> >> ).c_str() );
>> >> >> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
>> >> >> ).c_str() );
>> >> >> int len1 = (int) schemaName.length();
>> >> >> int len2 = (int) tableName.length();
>> >> >> int length[2] = { len1, len2 };
>> >> >> int formats[2] = { 1, 1 };
>> >> >> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
>> >> >> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
>> >> >> ExecStatusType status = PQresultStatus( res );
>> >> >> if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK )
>> >> >> {
>> >> >> result = 1;
>> >> >> std::wstring err = m_pimpl->m_myconv.from_bytes(
>> >> >> PQerrorMessage( m_db ) );
>> >> >> errorMsg.push_back( L"Error executing query: " + err );
>> >> >> PQclear( res );
>> >> >> }
>> >> >> else
>> >> >> {
>> >> >> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) );
>> >> >> }
>> >> >> return result;
>> >> >> }
>> >> >>
>> >> >> when ran with the call of
>> >> >>
>> >> >> GetTableOwner( "public", "abcß", owner, errorMsg );
>> >> >>
>> >> >> returns:
>> >> >>
>> >> >> ERROR: Invalid byte sequence for encoding UTF8.
>> >> >>
>> >> >> Does this mean I found the bug in the library?
>> >> >
>> >> >
>> >> > The bug is in your C++ code. "abcß" as tableName.lenght() (wstring) returns 4 (as in four characters) not
numberof bytes required to represent the intended string: 61 62 63 c3 9f
>> >> > Since the last character is a 2 bytes in length. Therefore, your call to PQexecParams() specifies a shorter
lengthand hence an invalid UTF-8 sequence.
>> >> >
>> >> > Furthermore, your value[] array allocation is in error since wstring::length returns number of characters, not
numberof bytes. so you will end up with buffer-overflows.
>> >>
>> >> So I should use
>> >> https://stackoverflow.com/questions/9278723/how-can-i-get-the-byte-size-of-stdwstring
>> >> in both places?
>> >
>> >
>> > size() also returns 4. If you multiply it with sizeof(wchar_t) you will end up with maximum buffers size necessary
tohold the string (minus terminating \0), but not the correct length you are after. I am unsure of the "correct" C++
solution.
>> >
>> > Sorry,
>> > --patrick
>> >
>> >
>> >>
>> >> Thank you.
>> >>
>> >> >
>> >> > HTH,
>> >> > --patrick
>> >> >
>> >> >
>> >> >>
>> >> >> Any idea what I can do?
>> >> >>
>> >> >> Thank you.
>> >> >>
Hi, Patrick,
Here is my new code:
int PostgresDatabase::GetTableOwner (const std::wstring &schemaName,
const std::wstring &tableName, std::wstring &owner,
std::vector<std::wstring> &errorMsg)
{
int result = 0;
std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
c.relowner AND n.nspname = $1 AND relname = $2";
char *values[2];
values[0] = NULL, values[1] = NULL;
int charlength1 = schemaName.length() * sizeof( wchar_t ),
charlength2 = tableName.length() * sizeof( wchar_t );
values[0] = new char[schemaName.length() * sizeof( wchar_t ) + 1];
values[1] = new char[tableName.length() * sizeof( wchar_t ) + 1];
memset( values[0], '\0', schemaName.length() * sizeof( wchar_t ) + 1 );
memset( values[1], '\0', tableName.length() * sizeof( wchar_t ) + 1 );
strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str()
).c_str() );
strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
).c_str() );
int len1 = (int) schemaName.length() * sizeof( wchar_t );
int len2 = (int) tableName.length() * sizeof( wchar_t );
int length[2] = { len1, len2 };
int formats[2] = { 1, 1 };
PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
ExecStatusType status = PQresultStatus( res );
if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK )
{
result = 1;
std::wstring err = m_pimpl->m_myconv.from_bytes(
PQerrorMessage( m_db ) );
errorMsg.push_back( L"Error executing query: " + err );
PQclear( res );
}
else
{
owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) );
}
return result;
}
The charlength2 variable contains the value of 8 and I'm still getting
the same error.
#include <err.h>
#include <stdlib.h>
#include <string.h>
#include <iostream>
#include <string>
#include <iconv.h>
class PGDB {
public:
// your stuff ...
iconv_t ic;
PGDB(void) {
setlocale(LC_CTYPE, "");
ic = iconv_open("UTF-8", "wchar_t");
if ((iconv_t)-1 == ic)
errx(1, "iconv_open");
}
~PGDB() {
iconv_close(ic);
}
// caller should free()
char *wchar2utf8(std::wstring const &ws) {
char *in, *buf, *out;
size_t bufsz, inbytes, outbytes;
in = (char *)ws.data();
inbytes = ws.length() * sizeof(wchar_t);
outbytes = inbytes;
bufsz = inbytes + 1; // XXX check for overflow
buf = (char *)calloc(bufsz, 1);
if (NULL == buf)
err(1, NULL); // or throw something
out = buf;
if ((size_t)-1 == iconv(ic, &in, &inbytes, &out, &outbytes))
errx(1, "iconv"); // or throw ...
// TODO ensure inbytes is 0 (meaning all input consumed)
return buf;
}
};
// demo using above PGDB class/code
int main(int argc, char *argv[])
{
char *str;
size_t i, n;
std::wstring tab;
PGDB pg;
tab = L"ºabcß";
str = pg.wchar2utf8(tab);
n = strlen(str);
for (i = 0; i < n; ++i) {
printf("%02hhx ", str[i]);
}
printf("\n");
printf("->%s<-\n", str);
free(str);
return 0;
}
Any idea?
Thank you.
On Fri, Dec 28, 2018 at 5:40 PM patrick keshishian <pkeshish@gmail.com> wrote:
>
> On Fri, Dec 28, 2018 at 3:07 PM Igor Korot <ikorot01@gmail.com> wrote:
>>
>> Hi,
>>
>> On Fri, Dec 28, 2018 at 4:51 PM patrick keshishian <pkeshish@gmail.com> wrote:
>> >
>> >
>> > On Fri, Dec 28, 2018 at 2:00 PM Igor Korot <ikorot01@gmail.com> wrote:
>> >>
>> >> Hi, ALL,
>> >> Following code:
>> >>
>> >> int PostgresDatabase::GetTableOwner (const std::wstring &schemaName,
>> >> const std::wstring &tableName, std::wstring &owner,
>> >> std::vector<std::wstring> &errorMsg)
>> >> {
>> >> int result = 0;
>> >> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
>> >> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
>> >> c.relowner AND n.nspname = $1 AND relname = $2";
>> >> char *values[2];
>> >> values[0] = NULL, values[1] = NULL;
>> >> values[0] = new char[schemaName.length() + 1];
>> >> values[1] = new char[tableName.length() + 1];
>> >> memset( values[0], '\0', schemaName.length() + 1 );
>> >> memset( values[1], '\0', tableName.length() + 1 );
>> >> strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str()
>> >> ).c_str() );
>> >> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
>> >> ).c_str() );
>> >> int len1 = (int) schemaName.length();
>> >> int len2 = (int) tableName.length();
>> >> int length[2] = { len1, len2 };
>> >> int formats[2] = { 1, 1 };
>> >> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
>> >> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
>> >> ExecStatusType status = PQresultStatus( res );
>> >> if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK )
>> >> {
>> >> result = 1;
>> >> std::wstring err = m_pimpl->m_myconv.from_bytes(
>> >> PQerrorMessage( m_db ) );
>> >> errorMsg.push_back( L"Error executing query: " + err );
>> >> PQclear( res );
>> >> }
>> >> else
>> >> {
>> >> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) );
>> >> }
>> >> return result;
>> >> }
>> >>
>> >> when ran with the call of
>> >>
>> >> GetTableOwner( "public", "abcß", owner, errorMsg );
>> >>
>> >> returns:
>> >>
>> >> ERROR: Invalid byte sequence for encoding UTF8.
>> >>
>> >> Does this mean I found the bug in the library?
>> >
>> >
>> > The bug is in your C++ code. "abcß" as tableName.lenght() (wstring) returns 4 (as in four characters) not number of bytes required to represent the intended string: 61 62 63 c3 9f
>> > Since the last character is a 2 bytes in length. Therefore, your call to PQexecParams() specifies a shorter length and hence an invalid UTF-8 sequence.
>> >
>> > Furthermore, your value[] array allocation is in error since wstring::length returns number of characters, not number of bytes. so you will end up with buffer-overflows.
>>
>> So I should use
>> https://stackoverflow.com/questions/9278723/how-can-i-get-the-byte-size-of-stdwstring
>> in both places?
>
>
> size() also returns 4. If you multiply it with sizeof(wchar_t) you will end up with maximum buffers size necessary to hold the string (minus terminating \0), but not the correct length you are after. I am unsure of the "correct" C++ solution.
>
> Sorry,
> --patrick
>
>
>>
>> Thank you.
>>
>> >
>> > HTH,
>> > --patrick
>> >
>> >
>> >>
>> >> Any idea what I can do?
>> >>
>> >> Thank you.
>> >>
Hi,
So, does anybody have an idea?
I do have following code at the beginning of the cpp file:
#ifdef WIN32
#include <windows.h>
#pragma execution_character_set("utf-8")
#endif
but even running it on OSX, I am getting this same error.
Thank you.
On Fri, Dec 28, 2018 at 11:30 PM Igor Korot <ikorot01@gmail.com> wrote:
>
> Hi,
>
> On Sat, Dec 29, 2018 at 1:37 AM patrick keshishian <pkeshish@gmail.com> wrote:
> >
> > On Fri, Dec 28, 2018 at 5:40 PM Igor Korot <ikorot01@gmail.com> wrote:
> >>
> >> Hi, Patrick,
> >>
> >> Here is my new code:
> >>
> >> int PostgresDatabase::GetTableOwner (const std::wstring
> &schemaName,
> >> const std::wstring &tableName, std::wstring &owner,
> >> std::vector<std::wstring> &errorMsg)
> >> {
> >> int result = 0;
> >> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
> >> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
> >> c.relowner AND n.nspname = $1 AND relname = $2";
> >> char *values[2];
> >> values[0] = NULL, values[1] = NULL;
> >> int charlength1 = schemaName.length() * sizeof( wchar_t ),
> >> charlength2 = tableName.length() * sizeof( wchar_t );
> >> values[0] = new char[schemaName.length() * sizeof( wchar_t ) + 1];
> >> values[1] = new char[tableName.length() * sizeof( wchar_t ) + 1];
> >> memset( values[0], '\0', schemaName.length() * sizeof(
> wchar_t ) + 1 );
> >> memset( values[1], '\0', tableName.length() * sizeof(
> wchar_t ) + 1 );
> >> strcpy( values[0], m_pimpl->m_myconv.to_bytes(
> schemaName.c_str()
> >> ).c_str() );
> >> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
> >> ).c_str() );
> >> int len1 = (int) schemaName.length() * sizeof( wchar_t );
> >> int len2 = (int) tableName.length() * sizeof( wchar_t );
> >> int length[2] = { len1, len2 };
> >> int formats[2] = { 1, 1 };
> >> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
> >> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
> >> ExecStatusType status = PQresultStatus( res );
> >> if( status != PGRES_COMMAND_OK && status !=
> PGRES_TUPLES_OK )
> >> {
> >> result = 1;
> >> std::wstring err = m_pimpl->m_myconv.from_bytes(
> >> PQerrorMessage( m_db ) );
> >> errorMsg.push_back( L"Error executing query: " + err );
> >> PQclear( res );
> >> }
> >> else
> >> {
> >> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue(
> res, 0, 0 ) );
> >> }
> >> return result;
> >> }
> >>
> >> The charlength2 variable contains the value of 8 and I'm still getting
> >> the same error.
> >
> >
> > I was hoping someone more versed in C++ would jump in to answer
> your question. I haven't used C++ in at least a decade.
> > You need to convert the wchar_t data that wstring stores into
> UTF-8. Personally, I would use iconv (common enough).
>
> But that is very weird.
> When I check what is stored in the values[1] array, I see the same
> byte sequence as what I got from the database
> information_schema.tables..
> Maybe I should just upgrade the libpq and try the latest release?
>
> >
> > I assume the PostgresDatabase class is your own (?) I would add a
> helper function to do the conversion. Here is a very rough template
> for you to adapt if you think it helps you.
>
> Yes, PostgresDatabase is my class.
> I will look at that later today, but it would definitely be
> interesting to get someone with the current C++
> experience (especially with C++11), because I believe that I am doing
> a conversion into UTF8.
>
> The m_convert variable is declared as:
>
> std::wstring_convert<std::codecvt_utf8<wchar_t> > m_myconv;
>
> and so I think it is converting to the UTF8.
>
> Thank you.
>
> >
> >
> > #include <err.h>
> >
> > #include <stdlib.h>
> >
> > #include <string.h>
> >
> > #include <iostream>
> >
> > #include <string>
> >
> >
> > #include <iconv.h>
> >
> >
> > class PGDB {
> >
> > public:
> >
> > // your stuff ...
> >
> > iconv_t ic;
> >
> >
> > PGDB(void) {
> >
> > setlocale(LC_CTYPE, "");
> >
> > ic = iconv_open("UTF-8", "wchar_t");
> >
> > if ((iconv_t)-1 == ic)
> >
> > errx(1, "iconv_open");
> >
> > }
> >
> > ~PGDB() {
> >
> > iconv_close(ic);
> >
> > }
> >
> > // caller should free()
> >
> > char *wchar2utf8(std::wstring const &ws) {
> >
> > char *in, *buf, *out;
> >
> > size_t bufsz, inbytes, outbytes;
> >
> >
> > in = (char *)ws.data();
> >
> > inbytes = ws.length() * sizeof(wchar_t);
> >
> > outbytes = inbytes;
> >
> > bufsz = inbytes + 1; // XXX check for overflow
> >
> >
> > buf = (char *)calloc(bufsz, 1);
> >
> > if (NULL == buf)
> >
> > err(1, NULL); // or throw something
> >
> >
> > out = buf;
> >
> > if ((size_t)-1 == iconv(ic, &in, &inbytes, &out, &outbytes))
> >
> > errx(1, "iconv"); // or throw ...
> >
> >
> > // TODO ensure inbytes is 0 (meaning all input consumed)
> >
> > return buf;
> >
> > }
> >
> > };
> >
> >
> > // demo using above PGDB class/code
> >
> > int main(int argc, char *argv[])
> >
> > {
> >
> > char *str;
> >
> > size_t i, n;
> >
> > std::wstring tab;
> >
> > PGDB pg;
> >
> >
> > tab = L"ºabcß";
> >
> > str = pg.wchar2utf8(tab);
> >
> >
> > n = strlen(str);
> >
> > for (i = 0; i < n; ++i) {
> >
> > printf("%02hhx ", str[i]);
> >
> > }
> >
> > printf("\n");
> >
> > printf("->%s<-\n", str);
> >
> >
> > free(str);
> >
> > return 0;
> >
> > }
> >
> >
> >
> > So in GetTableOwner() you'd call wchar2utf8() for the input wstring data and you don't do the new/memset/strcpy.
Justremember to free() the returned utf8 string pointers after call to PQexecParams().
> >
> > cheers,
> > --patrick
> >
> >
> >
> >>
> >> Any idea?
> >>
> >> Thank you.
> >>
> >> On Fri, Dec 28, 2018 at 5:40 PM patrick keshishian <pkeshish@gmail.com> wrote:
> >> >
> >> > On Fri, Dec 28, 2018 at 3:07 PM Igor Korot <ikorot01@gmail.com> wrote:
> >> >>
> >> >> Hi,
> >> >>
> >> >> On Fri, Dec 28, 2018 at 4:51 PM patrick keshishian <pkeshish@gmail.com> wrote:
> >> >> >
> >> >> >
> >> >> > On Fri, Dec 28, 2018 at 2:00 PM Igor Korot <ikorot01@gmail.com> wrote:
> >> >> >>
> >> >> >> Hi, ALL,
> >> >> >> Following code:
> >> >> >>
> >> >> >> int PostgresDatabase::GetTableOwner (const std::wstring &schemaName,
> >> >> >> const std::wstring &tableName, std::wstring &owner,
> >> >> >> std::vector<std::wstring> &errorMsg)
> >> >> >> {
> >> >> >> int result = 0;
> >> >> >> std::wstring query = L"SELECT u.usename FROM pg_class c, pg_user
> >> >> >> u, pg_namespace n WHERE n.oid = c.relnamespace AND u.usesysid =
> >> >> >> c.relowner AND n.nspname = $1 AND relname = $2";
> >> >> >> char *values[2];
> >> >> >> values[0] = NULL, values[1] = NULL;
> >> >> >> values[0] = new char[schemaName.length() + 1];
> >> >> >> values[1] = new char[tableName.length() + 1];
> >> >> >> memset( values[0], '\0', schemaName.length() + 1 );
> >> >> >> memset( values[1], '\0', tableName.length() + 1 );
> >> >> >> strcpy( values[0], m_pimpl->m_myconv.to_bytes( schemaName.c_str()
> >> >> >> ).c_str() );
> >> >> >> strcpy( values[1], m_pimpl->m_myconv.to_bytes( tableName.c_str()
> >> >> >> ).c_str() );
> >> >> >> int len1 = (int) schemaName.length();
> >> >> >> int len2 = (int) tableName.length();
> >> >> >> int length[2] = { len1, len2 };
> >> >> >> int formats[2] = { 1, 1 };
> >> >> >> PGresult *res = PQexecParams( m_db, m_pimpl->m_myconv.to_bytes(
> >> >> >> query.c_str() ).c_str(), 2, NULL, values, length, formats, 1 );
> >> >> >> ExecStatusType status = PQresultStatus( res );
> >> >> >> if( status != PGRES_COMMAND_OK && status != PGRES_TUPLES_OK )
> >> >> >> {
> >> >> >> result = 1;
> >> >> >> std::wstring err = m_pimpl->m_myconv.from_bytes(
> >> >> >> PQerrorMessage( m_db ) );
> >> >> >> errorMsg.push_back( L"Error executing query: " + err );
> >> >> >> PQclear( res );
> >> >> >> }
> >> >> >> else
> >> >> >> {
> >> >> >> owner = m_pimpl->m_myconv.from_bytes( PQgetvalue( res, 0, 0 ) );
> >> >> >> }
> >> >> >> return result;
> >> >> >> }
> >> >> >>
> >> >> >> when ran with the call of
> >> >> >>
> >> >> >> GetTableOwner( "public", "abcß", owner, errorMsg );
> >> >> >>
> >> >> >> returns:
> >> >> >>
> >> >> >> ERROR: Invalid byte sequence for encoding UTF8.
> >> >> >>
> >> >> >> Does this mean I found the bug in the library?
> >> >> >
> >> >> >
> >> >> > The bug is in your C++ code. "abcß" as tableName.lenght() (wstring) returns 4 (as in four characters) not
numberof bytes required to represent the intended string: 61 62 63 c3 9f
> >> >> > Since the last character is a 2 bytes in length. Therefore, your call to PQexecParams() specifies a shorter
lengthand hence an invalid UTF-8 sequence.
> >> >> >
> >> >> > Furthermore, your value[] array allocation is in error since wstring::length returns number of characters,
notnumber of bytes. so you will end up with buffer-overflows.
> >> >>
> >> >> So I should use
> >> >> https://stackoverflow.com/questions/9278723/how-can-i-get-the-byte-size-of-stdwstring
> >> >> in both places?
> >> >
> >> >
> >> > size() also returns 4. If you multiply it with sizeof(wchar_t) you will end up with maximum buffers size
necessaryto hold the string (minus terminating \0), but not the correct length you are after. I am unsure of the
"correct"C++ solution.
> >> >
> >> > Sorry,
> >> > --patrick
> >> >
> >> >
> >> >>
> >> >> Thank you.
> >> >>
> >> >> >
> >> >> > HTH,
> >> >> > --patrick
> >> >> >
> >> >> >
> >> >> >>
> >> >> >> Any idea what I can do?
> >> >> >>
> >> >> >> Thank you.
> >> >> >>