commit 79d0dc78b9f3b0bbc078876417b8f46970308e6e Author: Jacob Champion Date: Thu Jan 4 11:46:06 2024 -0800 WIP: try to speed up prediction diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c index d875d57df7..9149d45a4b 100644 --- a/src/common/jsonapi.c +++ b/src/common/jsonapi.c @@ -165,39 +165,47 @@ static char JSON_PROD_MORE_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_NT_KEYPAI * Any combination not specified here represents an error. */ -static char *td_parser_table[JSON_NUM_NONTERMINALS][JSON_NUM_TERMINALS] = +struct td_entry +{ + size_t len; + char *prod; +}; + +#define TD_ENTRY(PROD) { sizeof(PROD) - 1, (PROD) } + +static struct td_entry td_parser_table[JSON_NUM_NONTERMINALS][JSON_NUM_TERMINALS] = { /* JSON */ - [OFS(JSON_NT_JSON)][JSON_TOKEN_STRING] = JSON_PROD_SCALAR_STRING, - [OFS(JSON_NT_JSON)][JSON_TOKEN_NUMBER] = JSON_PROD_SCALAR_NUMBER, - [OFS(JSON_NT_JSON)][JSON_TOKEN_TRUE] = JSON_PROD_SCALAR_TRUE, - [OFS(JSON_NT_JSON)][JSON_TOKEN_FALSE] = JSON_PROD_SCALAR_FALSE, - [OFS(JSON_NT_JSON)][JSON_TOKEN_NULL] = JSON_PROD_SCALAR_NULL, - [OFS(JSON_NT_JSON)][JSON_TOKEN_ARRAY_START] = JSON_PROD_ARRAY, - [OFS(JSON_NT_JSON)][JSON_TOKEN_OBJECT_START] = JSON_PROD_OBJECT, + [OFS(JSON_NT_JSON)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_SCALAR_STRING), + [OFS(JSON_NT_JSON)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_SCALAR_NUMBER), + [OFS(JSON_NT_JSON)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_SCALAR_TRUE), + [OFS(JSON_NT_JSON)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_SCALAR_FALSE), + [OFS(JSON_NT_JSON)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_SCALAR_NULL), + [OFS(JSON_NT_JSON)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY), + [OFS(JSON_NT_JSON)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_OBJECT), /* ARRAY_ELEMENTS */ - [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_START] = JSON_PROD_ARRAY_ELEMENTS, - [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_OBJECT_START] = JSON_PROD_ARRAY_ELEMENTS, - [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_STRING] = JSON_PROD_ARRAY_ELEMENTS, - [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NUMBER] = JSON_PROD_ARRAY_ELEMENTS, - [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_TRUE] = JSON_PROD_ARRAY_ELEMENTS, - [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_FALSE] = JSON_PROD_ARRAY_ELEMENTS, - [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NULL] = JSON_PROD_ARRAY_ELEMENTS, - [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = JSON_PROD_EPSILON, + [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS), + [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS), + [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS), + [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS), + [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS), + [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS), + [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS), + [OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON), /* MORE_ARRAY_ELEMENTS */ - [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_COMMA] = JSON_PROD_MORE_ARRAY_ELEMENTS, - [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = JSON_PROD_EPSILON, + [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_ARRAY_ELEMENTS), + [OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON), /* * KEYPAIR - could expand but it's clearer not to. */ - [OFS(JSON_NT_KEYPAIR)][JSON_TOKEN_STRING] = JSON_PROD_KEYPAIR, + [OFS(JSON_NT_KEYPAIR)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_KEYPAIR), /* KEY_PAIRS */ - [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_STRING] = JSON_PROD_KEY_PAIRS, - [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = JSON_PROD_EPSILON, + [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_KEY_PAIRS), + [OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON), /* MORE_KEY_PAIRS */ - [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_COMMA] = JSON_PROD_MORE_KEY_PAIRS, - [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = JSON_PROD_EPSILON, + [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_KEY_PAIRS), + [OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON), }; /* the GOAL production. Not stored in the table, but will be the initial contents of the prediction stack */ @@ -419,11 +427,10 @@ dec_lex_level(JsonLexContext *lex) } static inline void -push_prediction(JsonParserStack *pstack, char *prod) +push_prediction(JsonParserStack *pstack, struct td_entry entry) { - int len = strlen(prod); - memcpy(pstack->prediction + pstack->pred_index, prod, len); - pstack->pred_index += len; + memcpy(pstack->prediction + pstack->pred_index, entry.prod, entry.len); + pstack->pred_index += entry.len; } static inline char @@ -651,12 +658,15 @@ pg_parse_json_incremental(JsonLexContext *lex, /* use prediction stack for incremental parsing */ if (!have_prediction(pstack)) - push_prediction(pstack, JSON_PROD_GOAL); + { + struct td_entry goal = TD_ENTRY(JSON_PROD_GOAL); + push_prediction(pstack, goal); + } while (have_prediction(pstack)) { char top = pop_prediction(pstack); - char *prod; + struct td_entry entry; /* * these first two branches are the guts of the Table Driven @@ -676,14 +686,14 @@ pg_parse_json_incremental(JsonLexContext *lex, tok = lex_peek(lex); } } - else if (IS_NT(top) && (prod = td_parser_table[OFS(top)][tok]) != NULL) + else if (IS_NT(top) && (entry = td_parser_table[OFS(top)][tok]).prod != NULL) { /* * the token is in the director set for a production of the * non-terminal at the top of the stack, so push the reversed * RHS of the production onto the stack. */ - push_prediction(pstack, prod); + push_prediction(pstack, entry); } else if (IS_SEM(top)) {