Обсуждение: Optimizing query: select ... where id = 4 and md5(...) = '...'

Поиск
Список
Период
Сортировка

Optimizing query: select ... where id = 4 and md5(...) = '...'

От
"Alexander Farber"
Дата:
Hello,

in my application I'm trying to authenticate users
against a table called "users". The integer column
"id" should match, but also an md5 hash of the
"password" column (salted with a string) should match.
My authentication function (written in C, using libpq)
should return a "username" (is a varchar(200) field).

I wonder, what is faster: fetching 2 columns - the
username and the md5-result and then comparing the
md5 string against the argument in my app, like here:

punbb=> select username, md5('deadbeef' || password) from users where id = 4;
 username |               md5
----------+----------------------------------
 Vasja    | dcde745cc304742e26d62e683a9ecb0a
(1 row)

punbb=> explain select username, md5('deadbeef' || password) from
users where id = 4;
                                QUERY PLAN
--------------------------------------------------------------------------
 Index Scan using users_pkey on users  (cost=0.00..5.95 rows=1 width=156)
   Index Cond: (id = 4)
(2 rows)

Or letting the database doing this comparison for me:

punbb=> select username from users where id = 4 and md5('deadbeef' ||
password) = 'dcde745cc304742e26d62e683a9ecb0a';
 username
----------
 Vasja
(1 row)

punbb=> explain select username from users where id = 4 and
md5('deadbeef' || password) = 'dcde745cc304742e26d62e683a9ecb0a';
                                              QUERY PLAN
------------------------------------------------------------------------------------------------------
 Index Scan using users_pkey on users  (cost=0.00..5.95 rows=1 width=118)
   Index Cond: (id = 4)
   Filter: (md5(('deadbeef'::text || ("password")::text)) =
'dcde745cc304742e26d62e683a9ecb0a'::text)
(3 rows)

I've prepared a test case with the code listed at the
botom and have run it 1000 times, but am still unsure:

$ time perl -e 'for (1..1000) {system("./fetch-user", "APP_QUERY") and die $!}'
....
username: Vasja
....
5.038u 5.734s 0:26.29 40.9%     0+0k 0+4io 0pf+0w

$ time perl -e 'for (1..1000) {system("./fetch-user", "DB_QUERY") and die $!}'
....
username: Vasja
....
4.757u 5.890s 0:26.52 40.1%     0+0k 0+8io 0pf+0w

How does one profile PostgreSQL-queries in general?

Thank you
Alex

PS: Using Postgresql 8.1.0 (from packages) on OpenBSD/386 -current

PPS: My test program, call with APP_QUERY or DB_QUERY:

#include <err.h>
#include <stdio.h>
#include <libpq-fe.h>

#define DB_CONN_STR     "host=/var/www/tmp user=punbb dbname=punbb"
#define APP_QUERY       "select username, md5('deadbeef' || password) " \
                        "from users where id = $1"
#define DB_QUERY        "select username from users where id = $1 and " \
                        "md5('deadbeef' || password) = $2"
int
main(int argc, char *argv[])
{
        PGconn          *conn;
        PGresult        *res;
        const char      *query;
        const char      *args[2];
        unsigned        nargs;
        char            username[201];

        if (! strcmp(argv[1], "APP_QUERY")) {
                query = APP_QUERY;
                nargs = 1;
        } else if (! strcmp(argv[1], "DB_QUERY")) {
                query = DB_QUERY;
                nargs = 2;
        } else
                errx(1, "wrong usage: supply APP_QUERY or DB_QUERY");

        if ((conn = PQconnectdb(DB_CONN_STR)) == NULL)
                err(1, "Connect failed: out of memory");

        if (PQstatus(conn) != CONNECTION_OK)
                err(1, "Connect failed: %s", PQerrorMessage(conn));

        if ((res = PQprepare(conn, "sql_fetch_username",
            query, nargs, NULL)) == NULL)
                err(1, "Preparing '%s' failed: out of memory", query);

        if (PQresultStatus(res) != PGRES_COMMAND_OK)
                err(1, "Preparing statement failed: %s", PQerrorMessage(conn));

        PQclear(res);

        args[0] = "4";
        args[1] = "dcde745cc304742e26d62e683a9ecb0a";

        if ((res = PQexecPrepared(conn, "sql_fetch_username",
            nargs, args, NULL, NULL, 0)) == NULL)
                err(1, "Executing statement '%s' failed: out of memory",
                    query);

        if (PQresultStatus(res) != PGRES_TUPLES_OK)
                err(1, "Executing statement '%s' failed: %s",
                    query, PQerrorMessage(conn));
                PQclear(res);

        if (nargs == 1)
                (void) strcmp(args[1], PQgetvalue(res, 0, 1));

        fprintf(stderr, "username: %s\n", PQgetvalue(res, 0, 0));

        PQfinish(conn);
        return 0;
}


--
http://preferans.de

Re: Optimizing query: select ... where id = 4 and md5(...) = '...'

От
Martijn van Oosterhout
Дата:
On Mon, Jul 03, 2006 at 03:13:15PM +0200, Alexander Farber wrote:
> Hello,
>
> in my application I'm trying to authenticate users
> against a table called "users". The integer column
> "id" should match, but also an md5 hash of the
> "password" column (salted with a string) should match.
> My authentication function (written in C, using libpq)
> should return a "username" (is a varchar(200) field).
>
> I wonder, what is faster: fetching 2 columns - the
> username and the md5-result and then comparing the
> md5 string against the argument in my app, like here:

I don't know about speed, but I think the choice should really be based
on whether you want to be able to tell the difference between unknown
user and bad password. You can still do the comparison in the database
by doing something like:

select username, md5('deadbeef' || password) = 'blah'
from users where id = 4;

So the second field will be true or false.

In any case, the testing you're doing is bogus, since you're probably
testing backend startup time as well, which is probably longer than the
query you're running anyway. Even then, 5ms for the whole process is
not to be sneezed at.

Have a nice day,
--
Martijn van Oosterhout   <kleptog@svana.org>   http://svana.org/kleptog/
> From each according to his ability. To each according to his ability to litigate.

Вложения

Re: Optimizing query: select ... where id = 4 and md5(...) = '...'

От
"Alexander Farber"
Дата:
Yes, you're probably right.

I'm just trying to ensure, that the slow md5() function isn't
called for every row in the table. If that's not the case, then
the other tiny speed differences are not that important for me.

Your query works too, thanks for the hint.

punbb=> select username, md5('deadbeef' || password) = 'blah' from
users where id = 4;
 username | ?column?
----------+----------
 Vasja    | f
(1 row)

punbb=> explain select username, md5('deadbeef' || password) = 'blah'
from users where id = 4;
                                QUERY PLAN
--------------------------------------------------------------------------
 Index Scan using users_pkey on users  (cost=0.00..5.95 rows=1 width=156)
   Index Cond: (id = 4)
(2 rows)

Regards
Alex

On 7/3/06, Martijn van Oosterhout <kleptog@svana.org> wrote:
> On Mon, Jul 03, 2006 at 03:13:15PM +0200, Alexander Farber wrote:
> > in my application I'm trying to authenticate users
> > against a table called "users". The integer column
> > "id" should match, but also an md5 hash of the
> > "password" column (salted with a string) should match.
> > My authentication function (written in C, using libpq)
> > should return a "username" (is a varchar(200) field).
> >
> > I wonder, what is faster: fetching 2 columns - the
> > username and the md5-result and then comparing the
> > md5 string against the argument in my app, like here:
>
> I don't know about speed, but I think the choice should really be based
> on whether you want to be able to tell the difference between unknown
> user and bad password. You can still do the comparison in the database
> by doing something like:
>
> select username, md5('deadbeef' || password) = 'blah'
> from users where id = 4;
>
> So the second field will be true or false.
>
> In any case, the testing you're doing is bogus, since you're probably
> testing backend startup time as well, which is probably longer than the
> query you're running anyway. Even then, 5ms for the whole process is
> not to be sneezed at.
> --
> Martijn van Oosterhout   <kleptog@svana.org>   http://svana.org/kleptog/
> > From each according to his ability. To each according to his ability to litigate.
>
>


--
http://preferans.de

Re: Optimizing query: select ... where id = 4 and md5(...)

От
Alban Hertroys
Дата:
Alexander Farber wrote:
> I wonder, what is faster: fetching 2 columns - the
> username and the md5-result and then comparing the
> md5 string against the argument in my app, like here:
>
> punbb=> select username, md5('deadbeef' || password) from users where id
> = 4;
> username |               md5
> ----------+----------------------------------
> Vasja    | dcde745cc304742e26d62e683a9ecb0a
> (1 row)

Why don't you store the hashed value instead?

--
Alban Hertroys
alban@magproductions.nl

magproductions b.v.

T: ++31(0)534346874
F: ++31(0)534346876
M:
I: www.magproductions.nl
A: Postbus 416
    7500 AK Enschede

// Integrate Your World //

Re: Optimizing query: select ... where id = 4 and md5(...) = '...'

От
"Alexander Farber"
Дата:
Hi Alban,

On 7/3/06, Alban Hertroys <alban@magproductions.nl> wrote:
> Alexander Farber wrote:
> > punbb=> select username, md5('deadbeef' || password) from users where id
> > = 4;
> > username |               md5
> > ----------+----------------------------------
> > Vasja    | dcde745cc304742e26d62e683a9ecb0a
> > (1 row)
>
> Why don't you store the hashed value instead?

actually it's not me - it's the punBB forum SW
( http://docs.punbb.org/dev.html#dbtables )
against which I want my libpq-program to authenticate.

And additional reason is that I could change my salt string
if I wanted and the users could still authenticate without
changing their passwords

Regards
Alex

--
http://preferans.de