Re: Emacs vs pg_indent's weird indentation for function declarations

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: Emacs vs pg_indent's weird indentation for function declarations
Дата
Msg-id 9170.1557955021@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: Emacs vs pg_indent's weird indentation for function declarations  (Tom Lane <tgl@sss.pgh.pa.us>)
Ответы Re: Emacs vs pg_indent's weird indentation for function declarations  (Thomas Munro <thomas.munro@gmail.com>)
Список pgsql-hackers
I wrote:
> I experimented with fixing this.  I was able to get pg_bsd_indent to
> distinguish multi-line function declarations from definitions, but it
> turns out that it doesn't help your concern about the lines being too
> long after re-indenting.  Contrary to what you imagine above, it seems
> pg_bsd_indent will not reflow argument lists, regardless of whether it
> thinks there needs to be more or less leading whitespace.

Actually, now that I think about it, pgindent seldom revisits line-break
decisions in code anyway; it's only aggressive about reflowing comments.
So maybe we shouldn't be expecting it to fix this.

Also, looking at sample results (attached), it seems like we don't have
such a large problem as one might guess.  It looks like people have
mostly formatted declarations to work with this indentation already,
either because their editors did it automatically, or because they were
thinking that this might get fixed someday.  (I know I've often had that
in the back of my mind.)  There are some places in the attached diff
that I might take the trouble to clean up manually, but not that many.

I found out that my initial draft of a multi-line lookahead function was
not nearly smart enough to survive contact with the PG source corpus,
but after rejiggering it to cope with comments and attributes, it seems
to do quite well.

A small problem with the "rejiggering" is that it now makes the wrong
choice for K&R-style function definitions, causing them to be weirdly
indented.  For our purposes, that's a non-problem so I'm not excited
about trying to make it smart enough to recognize those.  We do have
a couple of amazingly old and crufty K&R-style functions in src/port/,
though, so probably we'd wish to fix those.

Attached is working draft of pg_bsd_indent changes (still sans comments)
as well as a patch showing the difference between current pgindent
results on HEAD and the results of this version.  I think there's no
question that this is an improvement.

            regards, tom lane

diff --git a/indent.h b/indent.h
index 0fffd89..1708dbc 100644
--- a/indent.h
+++ b/indent.h
@@ -41,6 +41,8 @@ void    diag2(int, const char *);
 void    diag3(int, const char *, int);
 void    diag4(int, const char *, int, int);
 void    dump_line(void);
+int    lookahead(void);
+void    lookahead_reset(void);
 void    fill_buffer(void);
 void    parse(int);
 void    pr_comment(void);
diff --git a/io.c b/io.c
index df11094..8d13a52 100644
--- a/io.c
+++ b/io.c
@@ -51,6 +51,13 @@ static char sccsid[] = "@(#)io.c    8.1 (Berkeley) 6/6/93";
 
 int         comment_open;
 static int  paren_target;
+
+static char *lookahead_buf;    /* malloc'd buffer, or NULL initially */
+static char *lookahead_buf_end; /* end+1 of allocated space */
+static char *lookahead_start;    /* => next char for fill_buffer() to fetch */
+static char *lookahead_ptr;    /* => next char for lookahead() to fetch */
+static char *lookahead_end;    /* last+1 valid char in lookahead_buf */
+
 static int pad_output(int current, int target);
 
 void
@@ -252,6 +259,58 @@ compute_label_target(void)
     : ps.ind_size * (ps.ind_level - label_offset) + 1;
 }
 
+/*
+ * Read data ahead of what has been collected into in_buffer.
+ *
+ * Successive calls get further and further ahead, until we hit EOF.
+ * Call lookahead_reset to rescan from just beyond in_buffer.
+ */
+int
+lookahead(void)
+{
+    while (lookahead_ptr >= lookahead_end) {
+      int i = getc(input);
+
+      if (i == EOF)
+    return i;
+      if (i == '\0')
+    continue;        /* fill_buffer drops nulls, so do we */
+
+      if (lookahead_end >= lookahead_buf_end) {
+    /* Need to allocate or enlarge lookahead_buf */
+    char *new_buf;
+    size_t req;
+
+    if (lookahead_buf == NULL) {
+      req = 64;
+      new_buf = malloc(req);
+    } else {
+      req = (lookahead_buf_end - lookahead_buf) * 2;
+      new_buf = realloc(lookahead_buf, req);
+    }
+    if (new_buf == NULL)
+      errx(1, "too much lookahead required");
+    lookahead_start = new_buf + (lookahead_start - lookahead_buf);
+    lookahead_ptr = new_buf + (lookahead_ptr - lookahead_buf);
+    lookahead_end = new_buf + (lookahead_end - lookahead_buf);
+    lookahead_buf = new_buf;
+    lookahead_buf_end = new_buf + req;
+      }
+
+      *lookahead_end++ = i;
+    }
+    return (unsigned char) *lookahead_ptr++;
+}
+
+/*
+ * Reset so that lookahead() will again scan from just beyond what's in
+ * in_buffer.
+ */
+void
+lookahead_reset(void)
+{
+    lookahead_ptr = lookahead_start;
+}
 
 /*
  * Copyright (C) 1976 by the Board of Trustees of the University of Illinois
@@ -293,11 +352,16 @@ fill_buffer(void)
         p = in_buffer + offset;
         in_buffer_limit = in_buffer + size - 2;
     }
-    if ((i = getc(f)) == EOF) {
-        *p++ = ' ';
-        *p++ = '\n';
-        had_eof = true;
-        break;
+    if (lookahead_start < lookahead_end) {
+      i = (unsigned char) *lookahead_start++;
+    } else {
+      lookahead_start = lookahead_ptr = lookahead_end = lookahead_buf;
+      if ((i = getc(f)) == EOF) {
+        *p++ = ' ';
+        *p++ = '\n';
+        had_eof = true;
+        break;
+      }
     }
     if (i != '\0')
         *p++ = i;
diff --git a/lexi.c b/lexi.c
index 3c7bfef..df71a20 100644
--- a/lexi.c
+++ b/lexi.c
@@ -148,6 +148,54 @@ strcmp_type(const void *e1, const void *e2)
     return (strcmp(e1, *(const char * const *)e2));
 }
 
+/*
+ * Scan over a function argument declaration list, then see if it is
+ * followed by '{', indicating that it's a function definition.
+ * If it's followed by ';' or ',', it's a prototype.  Otherwise keep
+ * scanning, because there might be whitespace, comments, or attribute
+ * declarations before we get to the telltale punctuation.
+ *
+ * Note that this will make the wrong decision for a K&R-style function
+ * definition.  Too bad.
+ */
+static int
+is_func_definition(char *tp)
+{
+  int paren_depth = 0;
+  int in_comment = false;
+  int lastc = 0;
+
+  lookahead_reset();
+  for (;;) {
+    int c;
+
+    if (tp < buf_end)
+      c = *tp++;
+    else {
+    c = lookahead();
+    if (c == EOF)
+      break;
+    }
+    if (in_comment) {
+      if (lastc == '*' && c == '/')
+    in_comment = false;
+    } else if (lastc == '/' && c == '*')
+      in_comment = true;
+    else if (c == '(')
+      paren_depth++;
+    else if (c == ')') {
+      paren_depth--;
+      if (paren_depth < 0)
+    return false;
+    } else if (paren_depth == 0 && c == '{')
+      return true;
+    else if (paren_depth == 0 && (c == ';' || c == ','))
+      return false;
+    lastc = c;
+  }
+  return false;
+}
+
 int
 lexi(struct parser_state *state)
 {
@@ -348,15 +396,12 @@ lexi(struct parser_state *state)
     }            /* end of if (found_it) */
     if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 &&
         state->in_parameter_declaration == 0 && state->block_init == 0) {
-        char *tp = buf_ptr;
-        while (tp < buf_end)
-        if (*tp++ == ')' && (*tp == ';' || *tp == ','))
-            goto not_proc;
+      if (is_func_definition(buf_ptr)) {
         strncpy(state->procname, token, sizeof state->procname - 1);
         if (state->in_decl)
         state->in_parameter_declaration = 1;
         return (funcname);
-    not_proc:;
+      }
     }
     /*
      * The following hack attempts to guess whether or not the current
diff --git a/tests/declarations.0.stdout b/tests/declarations.0.stdout
index e97e447..ec63596 100644
--- a/tests/declarations.0.stdout
+++ b/tests/declarations.0.stdout
@@ -64,10 +64,10 @@ static int    ald_shutingdown = 0;
 struct thread  *ald_thread;
 
 static int
-do_execve(td, args, mac_p)
-    struct thread  *td;
-    struct image_args *args;
-    struct mac     *mac_p;
+        do_execve(td, args, mac_p)
+struct thread  *td;
+struct image_args *args;
+struct mac     *mac_p;
 {
 
 }
diff --git a/tests/list_head.0.stdout b/tests/list_head.0.stdout
index b6f0762..a8f985f 100644
--- a/tests/list_head.0.stdout
+++ b/tests/list_head.0.stdout
@@ -1,10 +1,10 @@
 /* $FreeBSD$ */
 /* See r309380 */
 static int
-do_execve(td, args, mac_p)
-    struct thread  *td;
-    struct image_args *args;
-    struct mac     *mac_p;
+        do_execve(td, args, mac_p)
+struct thread  *td;
+struct image_args *args;
+struct mac     *mac_p;
 {
 
 }

Вложения

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Tomas Vondra
Дата:
Сообщение: Re: Replace hashtable growEnable flag
Следующее
От: Thomas Munro
Дата:
Сообщение: Re: Emacs vs pg_indent's weird indentation for function declarations