File 0001-1C_FULL.patch of Package postgresql18
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000000..932935ec109
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "pg_wait_sampling"]
+ path = contrib/pg_wait_sampling
+ url = ../pg_wait_sampling
diff --git a/contrib/Makefile b/contrib/Makefile
index 2f0a88d3f77..cffda2ff663 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global
SUBDIRS = \
amcheck \
auth_delay \
+ auto_dump \
auto_explain \
basic_archive \
basebackup_to_shell \
@@ -19,17 +20,22 @@ SUBDIRS = \
dict_int \
dict_xsyn \
earthdistance \
+ fasttrun \
file_fdw \
fuzzystrmatch \
+ fulleq \
hstore \
intagg \
intarray \
isn \
lo \
ltree \
+ mchar \
oid2name \
+ online_analyze \
pageinspect \
passwordcheck \
+ plantuner \
pg_buffercache \
pg_freespacemap \
pg_logicalinspect \
@@ -41,6 +47,7 @@ SUBDIRS = \
pgrowlocks \
pgstattuple \
pg_visibility \
+ pg_wait_sampling \
pg_walinspect \
postgres_fdw \
seg \
@@ -51,7 +58,8 @@ SUBDIRS = \
tsm_system_rows \
tsm_system_time \
unaccent \
- vacuumlo
+ vacuumlo \
+ dbcopies_decoding
ifeq ($(with_ssl),openssl)
SUBDIRS += pgcrypto sslinfo
diff --git a/contrib/auto_dump/Makefile b/contrib/auto_dump/Makefile
new file mode 100644
index 00000000000..351f4cbcb98
--- /dev/null
+++ b/contrib/auto_dump/Makefile
@@ -0,0 +1,16 @@
+MODULES = auto_dump
+
+EXTENSION = auto_dump
+PGFILEDESC = "auto_dump"
+DATA = auto_dump--1.0.sql auto_dump--1.0--1.1.sql
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/auto_dump
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/auto_dump/auto_dump--1.0--1.1.sql b/contrib/auto_dump/auto_dump--1.0--1.1.sql
new file mode 100644
index 00000000000..57fecc9dcc0
--- /dev/null
+++ b/contrib/auto_dump/auto_dump--1.0--1.1.sql
@@ -0,0 +1,11 @@
+CREATE OR REPLACE FUNCTION auto_dump_immediate(pid int)
+RETURNS void AS 'MODULE_PATHNAME'
+LANGUAGE C RETURNS NULL ON NULL INPUT VOLATILE;
+
+CREATE OR REPLACE FUNCTION auto_dump_current(pid int)
+RETURNS void AS 'MODULE_PATHNAME'
+LANGUAGE C RETURNS NULL ON NULL INPUT VOLATILE;
+
+CREATE OR REPLACE FUNCTION auto_dump_next(pid int)
+RETURNS void AS 'MODULE_PATHNAME'
+LANGUAGE C RETURNS NULL ON NULL INPUT VOLATILE;
diff --git a/contrib/auto_dump/auto_dump--1.0.sql b/contrib/auto_dump/auto_dump--1.0.sql
new file mode 100644
index 00000000000..36da0901e90
--- /dev/null
+++ b/contrib/auto_dump/auto_dump--1.0.sql
@@ -0,0 +1 @@
+\echo Use "CREATE EXTENSION auto_dump" to load this file. \quit
diff --git a/contrib/auto_dump/auto_dump.c b/contrib/auto_dump/auto_dump.c
new file mode 100644
index 00000000000..6ef652782ee
--- /dev/null
+++ b/contrib/auto_dump/auto_dump.c
@@ -0,0 +1,1400 @@
+#include "c.h"
+#include "postgres.h"
+
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+
+#include "access/heapam.h"
+#include "access/skey.h"
+#include "access/table.h"
+#include "access/tableam.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_class_d.h"
+#include "catalog/pg_proc_d.h"
+#include "commands/copy.h"
+#include "commands/explain.h"
+#include "common/file_perm.h"
+#include "common/keywords.h"
+#include "executor/instrument.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "nodes/pg_list.h"
+#include "storage/ipc.h"
+#include "storage/lockdefs.h"
+#include "storage/proc.h"
+#include "storage/procnumber.h"
+#include "tcop/pquery.h"
+#include "utils/backend_status.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/fmgrprotos.h"
+#include "utils/guc.h"
+#include "utils/lsyscache.h"
+#include "utils/regproc.h"
+#include "utils/snapmgr.h"
+#include "commands/explain_state.h"
+#include "commands/explain_format.h"
+#include "utils/syscache.h"
+#include "utils/acl.h"
+#include "catalog/pg_authid_d.h"
+
+
+enum{
+ MODE_OFF=0,
+ MODE_IMMEDIATE=1,
+ MODE_CURRENT=2,
+ MODE_NEXT=3,
+};
+
+enum{
+ DATA_FORMAT_INSERT=0,
+ DATA_FORMAT_COPY_FILE=1,
+ DATA_FORMAT_COPY_STDIN=2,
+};
+
+static const struct config_enum_entry data_format_options[] = {
+ {"insert", DATA_FORMAT_INSERT, false},
+ {"copy-file", DATA_FORMAT_COPY_FILE, false},
+ {"copy-stdin", DATA_FORMAT_COPY_STDIN, false},
+ {NULL, 0, false}
+};
+
+PG_MODULE_MAGIC;
+
+PG_FUNCTION_INFO_V1(auto_dump_immediate);
+Datum auto_dump_immediate(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(auto_dump_current);
+Datum auto_dump_current(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(auto_dump_next);
+Datum auto_dump_next(PG_FUNCTION_ARGS);
+
+static bool dump_enable = false;
+static bool dump_all_temp_tables = false;
+static bool dump_temporary_tables = true;
+static bool dump_persistent_tables = false;
+static int data_format = DATA_FORMAT_COPY_STDIN;
+static bool dump_data = true;
+static bool dump_indexes = true;
+static bool dump_query = true;
+static bool dump_create = true;
+static bool dump_plan = true;
+static bool dump_readme = true;
+static bool dump_on_cancel = false;
+static bool dump_on_bad_plan = false;
+static char* dump_on_query_string = "";
+static int dump_on_duration = -1;
+static int dump_plan_count_threshold = 0;
+static int dump_plan_percent_threshold = 0;
+static char* output_directory = "";
+static char* query_output_directory;
+static int nesting_level;
+static bool query_dumped;
+static bool plan_dumped;
+static bool plan_analysis_dumped;
+static char volatile* backend_dump_mode;
+
+static ExecutorStart_hook_type prev_ExecutorStart;
+static ProcessInterrupts_hook_type prev_ProcessInterrupts;
+static ExecutorEnd_hook_type prev_ExecutorEnd;
+static ExecutorRun_hook_type prev_ExecutorRun;
+static ExecutorFinish_hook_type prev_ExecutorFinish;
+static shmem_startup_hook_type prev_shmem_startup_hook;
+static shmem_request_hook_type prev_shmem_request_hook;
+
+typedef struct FieldInfo{
+
+ HeapTuple tuple;
+ Form_pg_attribute form;
+ AttrNumber attnum;
+ FmgrInfo outfunc;
+ bool first;
+ char *attname;
+} FieldInfo;
+
+#define IS_MODE_ACTIVE() (MyProcNumber != INVALID_PROC_NUMBER && backend_dump_mode && ((backend_dump_mode[MyProcNumber] == MODE_IMMEDIATE) || (backend_dump_mode[MyProcNumber] == MODE_CURRENT)))
+
+
+
+/*
+ * Checks, if plan is "bad" based on difference between expected number of rows
+ * and actual number of rows. Plan in considered bad if both relative and absolute
+ * threshold are reached (or disabled).
+ */
+static bool
+IsBadPlan(PlanState *planstate)
+{
+ double expect_rows;
+ double actual_rows;
+ ListCell* lc;
+ int c;
+
+ if (!planstate)
+ return false;
+
+ if (planstate->instrument)
+ {
+ InstrEndLoop(planstate->instrument);
+
+ expect_rows = planstate->plan->plan_rows;
+ actual_rows = planstate->instrument->ntuples;
+
+ if (planstate->instrument->nloops > 0)
+ actual_rows /= planstate->instrument->nloops;
+
+ if (
+ (fabs(actual_rows - expect_rows) > dump_plan_count_threshold) &&
+ (expect_rows <= 0 || fabs(actual_rows-expect_rows)/expect_rows*100 > dump_plan_percent_threshold))
+ {
+ ereport(DEBUG5,
+ (errmsg("auto_dump hit bad plan threshold: expected=%.0f actual=%.0f", expect_rows, actual_rows),
+ errhidestmt(true),
+ errhidecontext(true)));
+ return true;
+ }
+ }
+
+ if (IsBadPlan(outerPlanState(planstate)))
+ return true;
+
+ if (IsBadPlan(innerPlanState(planstate)))
+ return true;
+
+ foreach(lc, planstate->initPlan)
+ {
+ if(IsBadPlan(((SubPlanState*)lfirst(lc))->planstate))
+ return true;
+ }
+
+ foreach(lc, planstate->subPlan)
+ {
+ if(IsBadPlan(((SubPlanState*)lfirst(lc))->planstate))
+ return true;
+ }
+
+ switch (nodeTag(planstate->plan))
+ {
+ case T_Append:
+ for(c=0; c < ((AppendState *) planstate)->as_nplans; c++)
+ {
+ if (IsBadPlan(((AppendState *) planstate)->appendplans[c]))
+ return true;
+ }
+ break;
+
+ case T_MergeAppend:
+ for(c=0; c < ((MergeAppendState *) planstate)->ms_nplans; c++)
+ {
+ if (IsBadPlan(((MergeAppendState *) planstate)->mergeplans[c]))
+ return true;
+ }
+ break;
+
+ case T_BitmapAnd:
+ for(c=0; c < ((BitmapAndState *) planstate)->nplans; c++)
+ {
+ if (IsBadPlan(((BitmapAndState *) planstate)->bitmapplans[c]))
+ return true;
+ }
+ break;
+
+ case T_BitmapOr:
+ for(c=0; c < ((BitmapOrState *) planstate)->nplans ; c++)
+ {
+ if (IsBadPlan(((BitmapOrState *) planstate)->bitmapplans[c]))
+ return true;
+ }
+ break;
+
+ case T_SubqueryScan:
+ if(IsBadPlan(((SubqueryScanState *) planstate)->subplan))
+ return true;
+ break;
+
+ case T_CustomScan:
+ foreach(lc, ((CustomScanState *) planstate)->custom_ps)
+ {
+ if(IsBadPlan((PlanState*)lfirst(lc)))
+ return true;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return false;
+}
+
+
+static bool
+IsQueryMatch(const char* query, const char* pattern)
+{
+ while (*query)
+ {
+ char const* q = query;
+ char const* p = pattern;
+ bool match = true;
+
+ while (*p)
+ {
+ if (*q == '\r' || *q == '\n')
+ {
+ if (!isspace(*p))
+ {
+ match = false;
+ break;
+ }
+
+ while (isspace(*p))
+ p++;
+
+ while (isspace(*q))
+ q++;
+ }
+ else if (*q == *p)
+ {
+ q++;
+ p++;
+ }
+ else
+ {
+ match = false;
+ break;
+ }
+ }
+
+ if (match)
+ return true;
+
+ query++;
+ }
+
+ return false;
+}
+
+
+static bool
+PrepareDump()
+{
+ time_t t;
+ struct tm tm;
+ static int counter = 0;
+
+ /* Do nothing if not output directory specified */
+ if (!*output_directory)
+ return false;
+
+ t = time(NULL);
+ tm = *localtime(&t);
+
+ query_output_directory = psprintf("%s/%d-%04d_%02d_%02d_%02d_%02d_%02d_%02d/", make_absolute_path(output_directory),
+ MyProcPid, tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour,
+ tm.tm_min, tm.tm_sec, (counter++) % 100 );
+ if (pg_mkdir_p(query_output_directory, PG_DIR_MODE_OWNER) != 0)
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not create directory \"%s\"", query_output_directory)));
+
+ return true;
+}
+
+static void
+SavePlan(QueryDesc* queryDesc, char const* name, bool analyze)
+{
+ FILE* f;
+ mode_t oumask;
+ MemoryContext oldcxt;
+ struct ExplainState* es;
+
+ oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
+
+ es = NewExplainState();
+ es->analyze = analyze;
+ es->verbose = analyze;
+ es->buffers = analyze;
+ es->wal = analyze;
+ es->timing = analyze;
+ es->summary = analyze;
+ es->format = EXPLAIN_FORMAT_TEXT;
+ es->settings = analyze;
+
+ ExplainBeginOutput(es);
+ ExplainQueryText(es, queryDesc);
+ ExplainQueryParameters(es, queryDesc->params, -1);
+ ExplainPrintPlan(es, queryDesc);
+ if (es->analyze)
+ ExplainPrintTriggers(es, queryDesc);
+ if (es->costs)
+ ExplainPrintJITSummary(es, queryDesc);
+ ExplainEndOutput(es);
+
+ /* Remove last line break */
+ if (es->str->len > 0 && es->str->data[es->str->len - 1] == '\n')
+ es->str->data[--es->str->len] = '\0';
+
+ oumask = umask((mode_t) ((~(S_IRUSR | S_IWUSR)) & (S_IRWXU | S_IRWXG | S_IRWXO)));
+ f = fopen(psprintf("%s/%s.txt", query_output_directory, name), "wb");
+ umask(oumask);
+ fputs(es->str->data, f);
+ fclose(f);
+
+ MemoryContextSwitchTo(oldcxt);
+}
+
+
+static void
+FlushStringInfoToFile(StringInfo buf, FILE* f)
+{
+ if (buf->len)
+ fwrite(buf->data, buf->len, 1, f);
+ resetStringInfo(buf);
+}
+
+static FILE*
+CreateDumpFile(char const* name)
+{
+ char* filePath = psprintf("%s/%s", query_output_directory, name);
+ FILE* f = fopen(filePath, "wb");
+ if (!f)
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unable to create file \"%s\"", filePath)));
+ pfree(filePath);
+ return f;
+}
+
+static char*
+QuotedIdentifier(char const* s)
+{
+ bool need_quotes = false;
+ StringInfoData buf;
+ size_t remaining;
+ const char *cp;
+ int encoding = PG_UTF8;
+
+ for (cp=s; *cp; cp++)
+ {
+ if (!((*cp >= 'a' && *cp <= 'z') || (*cp == '_') || (*cp >= '0' && *cp <= '9' && cp != s)))
+ {
+ need_quotes = true;
+ break;
+ }
+ }
+
+ if (!need_quotes)
+ {
+ int kwnum = ScanKeywordLookup(s, &ScanKeywords);
+ if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD)
+ need_quotes = true;
+ }
+
+ if (!need_quotes)
+ return pstrdup(s);
+
+
+ initStringInfo(&buf);
+ appendStringInfoChar(&buf, '"');
+
+ remaining = strlen(s);
+ cp =s;
+ while (remaining > 0)
+ {
+ int charlen;
+
+ if (!IS_HIGHBIT_SET(*cp))
+ {
+ if (*cp == '"')
+ appendStringInfoChar(&buf, *cp);
+ appendStringInfoChar(&buf, *cp);
+ cp++;
+ remaining--;
+ continue;
+ }
+
+ charlen = pg_encoding_mblen(encoding, cp);
+
+ if (remaining >= charlen &&
+ pg_encoding_verifymbchar(encoding, cp, charlen) != -1)
+ {
+ for (int i = 0; i < charlen; i++)
+ {
+ appendStringInfoChar(&buf, *cp);
+ remaining--;
+ cp++;
+ }
+ }
+ else
+ {
+ enlargeStringInfo(&buf, 2);
+ pg_encoding_set_invalid(encoding, buf.data + buf.len);
+ buf.len += 2;
+ buf.data[buf.len] = '\0';
+ remaining--;
+ cp++;
+ }
+ }
+
+ appendStringInfoChar(&buf, '"');
+ return buf.data;
+}
+
+static FILE* CopyDataDestCallback_file;
+
+static
+void CopyDataDestCallback(void *data, int len)
+{
+ fwrite(data, len, 1, CopyDataDestCallback_file);
+ fputs("\n",CopyDataDestCallback_file);
+}
+
+
+static void
+SaveTables(QueryDesc* queryDesc)
+{
+ mode_t oumask;
+ FILE* fCreatePers = NULL;
+ FILE* fCreateTemp = NULL;
+ FILE* fDataPers = NULL;
+ FILE* fDataTemp = NULL;
+ ListCell* lc;
+ List* tableOids = NULL;
+ StringInfoData buf;
+
+ /* Populate list of all temporary tables OIDs */
+ if (dump_all_temp_tables) {
+ ScanKeyData key[1];
+ Relation pgclass;
+ TableScanDesc scan;
+ HeapTuple tuple;
+
+ ScanKeyInit(&key[0],
+ Anum_pg_proc_pronamespace,
+ BTEqualStrategyNumber,
+ F_OIDEQ,
+ ObjectIdGetDatum(LookupCreationNamespace("pg_temp")));
+
+ pgclass = table_open(RelationRelationId, AccessShareLock);
+ scan = table_beginscan_catalog(pgclass, 1, key);
+
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)))
+ {
+ Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
+ if (classForm->relkind == RELKIND_RELATION && classForm->relpersistence == RELPERSISTENCE_TEMP)
+ tableOids = list_append_unique_oid(tableOids, classForm->oid);
+ }
+
+ table_endscan(scan);
+ table_close(pgclass, AccessShareLock);
+ }
+
+ /* Populate list of query's tables OIDs */
+ foreach(lc, queryDesc->plannedstmt->relationOids) {
+ tableOids = list_append_unique_oid(tableOids, lfirst_oid(lc));
+ }
+
+ oumask = umask((mode_t) ((~(S_IRUSR | S_IWUSR | S_IWUSR)) & (S_IRWXU | S_IRWXG | S_IRWXO)));
+
+ if (dump_create || dump_indexes)
+ {
+ if (dump_persistent_tables)
+ fCreatePers = CreateDumpFile("create_persistent.sql");
+
+ if (dump_temporary_tables || dump_all_temp_tables)
+ fCreateTemp = CreateDumpFile("create_temporary.sql");
+ }
+
+ if (dump_data)
+ {
+ if (dump_persistent_tables)
+ fDataPers = CreateDumpFile("insert_persistent.sql");
+
+ if (dump_temporary_tables || dump_all_temp_tables)
+ fDataTemp = CreateDumpFile("insert_temporary.sql");
+
+ if (data_format == DATA_FORMAT_COPY_FILE)
+ {
+ char* dirname = palloc(strlen(query_output_directory)*2+1);
+ char* pos = dirname;
+ for (char* c=query_output_directory; *c; c++)
+ {
+ if (*c == '\'')
+ *(pos++) = '\\';
+ *(pos++) = *c;
+ }
+ *(pos++) = '\0';
+
+ if (fDataPers)
+ fprintf(fDataPers, "\\set dir '%s'\n", dirname);
+
+ if (fDataTemp)
+ fprintf(fDataTemp, "\\set dir '%s'\n", dirname);
+
+ pfree(dirname);
+ }
+ }
+
+ umask(oumask);
+
+ initStringInfo(&buf);
+
+ foreach(lc, tableOids) {
+ HeapTuple classTuple;
+ Form_pg_class classForm;
+ Oid relid = ObjectIdGetDatum(lfirst_oid(lc));
+
+ classTuple = SearchSysCache1(RELOID, relid);
+ classForm = (Form_pg_class) GETSTRUCT(classTuple);
+
+ if (classForm->relkind == RELKIND_RELATION &&
+ ((classForm->relpersistence == RELPERSISTENCE_PERMANENT && dump_persistent_tables) ||
+ (classForm->relpersistence == RELPERSISTENCE_TEMP && (dump_temporary_tables || dump_all_temp_tables)))
+ ){
+
+ FieldInfo *fields = (FieldInfo*)palloc(sizeof(FieldInfo) * classForm->relnatts);
+ int numFields = 0;
+ FieldInfo *field;
+ AttrNumber attno;
+ char *relname;
+
+ relname = QuotedIdentifier(classForm->relname.data);
+
+ for (attno = 1; attno <= classForm->relnatts; attno++)
+ {
+ bool isvarlena;
+ Oid outfuncid;
+
+ field = &fields[numFields];
+
+ field->tuple = SearchSysCache2(ATTNUM,
+ ObjectIdGetDatum(classForm->oid),
+ Int16GetDatum(attno));
+
+ field->form = (Form_pg_attribute) GETSTRUCT(field->tuple);
+
+ if (field->form->attisdropped)
+ {
+ ReleaseSysCache(field->tuple);
+ continue;
+ }
+
+ field->attnum = attno;
+ field->first = (numFields==0);
+ field->attname = QuotedIdentifier(field->form->attname.data);
+
+ getTypeOutputInfo(field->form->atttypid, &outfuncid, &isvarlena);
+ fmgr_info(outfuncid, &field->outfunc);
+
+ numFields++;
+ }
+
+ if (dump_create)
+ {
+ /* Write table create statement */
+ appendStringInfoString(&buf, "CREATE ");
+ if (classForm->relpersistence == RELPERSISTENCE_TEMP)
+ appendStringInfoString(&buf, "TEMPORARY ");
+ appendStringInfoString(&buf, "TABLE ");
+ appendStringInfoString(&buf, relname);
+ appendStringInfoString(&buf, " (");
+ for (field=&fields[0]; field < &fields[numFields]; field++)
+ {
+ if (!field->first)
+ appendStringInfoString(&buf, ",");
+ appendStringInfoString(&buf, "\n ");
+ appendStringInfoString(&buf, field->attname);
+ appendStringInfoString(&buf, " ");
+ appendStringInfoString(&buf, format_type_extended(field->form->atttypid, field->form->atttypmod, FORMAT_TYPE_TYPEMOD_GIVEN | ((field->form->atttypid >= FirstGenbkiObjectId) ? FORMAT_TYPE_FORCE_QUALIFY : 0)));
+ }
+ appendStringInfoString(&buf, "\n);\n\n");
+ }
+
+ /* Write indexes create statement */
+ if (dump_indexes)
+ {
+ ListCell* lcIndex;
+ Relation rel = table_open(classForm->oid, NoLock);
+
+ foreach(lcIndex, RelationGetIndexList(rel))
+ {
+ Oid indexOid = lfirst_oid(lcIndex);
+ appendStringInfoString(&buf, text_to_cstring(DatumGetTextP(DirectFunctionCall1(pg_get_indexdef, indexOid))));
+ appendStringInfoString(&buf, ";\n\n");
+ }
+
+ table_close(rel, NoLock);
+ }
+
+ if (classForm->relpersistence == RELPERSISTENCE_TEMP)
+ {
+ if (fCreateTemp)
+ FlushStringInfoToFile(&buf, fCreateTemp);
+ }
+ else
+ {
+ if (fCreatePers)
+ FlushStringInfoToFile(&buf, fCreatePers);
+ }
+
+
+ /* Write tables data */
+ if (dump_data)
+ {
+ FILE* dest = (classForm->relpersistence == RELPERSISTENCE_TEMP) ? fDataTemp : fDataPers;
+ if (dest)
+ {
+ if (data_format == DATA_FORMAT_INSERT)
+ {
+ Relation table;
+ TableScanDesc scan;
+ HeapTuple tuple;
+ bool first;
+
+ table = table_open(classForm->oid, AccessShareLock);
+ scan = table_beginscan(table, GetActiveSnapshot(), 0, NULL);
+ first = true;
+
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ if(first)
+ {
+ first = false;
+
+ appendStringInfoString(&buf, "INSERT INTO ");
+ appendStringInfoString(&buf, relname);
+ appendStringInfoString(&buf, " (");
+
+ for (field=&fields[0]; field < &fields[numFields]; field++)
+ {
+ if(!field->first)
+ appendStringInfoString(&buf, ", ");
+
+ appendStringInfoString(&buf, field->attname);
+ }
+
+ appendStringInfoString(&buf, ") VALUES\n (");
+ }
+ else
+ appendStringInfoString(&buf, ",\n (");
+
+ for (field=&fields[0]; field < &fields[numFields]; field++)
+ {
+ const char *s;
+ char *quoted;
+ bool isnull;
+ Datum datum;
+
+ if (!field->first)
+ appendStringInfoString(&buf, ",");
+
+ datum = heap_getattr(tuple, field->attnum, table->rd_att, &isnull);
+
+ if (isnull)
+ {
+ appendStringInfoString(&buf, "NULL");
+ continue;
+ }
+
+ s = OutputFunctionCall(&field->outfunc, datum);
+
+ switch (field->form->atttypid)
+ {
+ case INT2OID:
+ case INT4OID:
+ case INT8OID:
+ case OIDOID:
+ case FLOAT4OID:
+ case FLOAT8OID:
+ case NUMERICOID:
+ {
+ if (strspn(s, "0123456789 +-eE.") == strlen(s))
+ appendStringInfoString(&buf, s);
+ else
+ {
+ appendStringInfoString(&buf, "'");
+ appendStringInfoString(&buf, s);
+ appendStringInfoString(&buf, "'");
+ }
+ }
+ break;
+
+ case BITOID:
+ case VARBITOID:
+ appendStringInfoString(&buf, "B'");
+ appendStringInfoString(&buf, s);
+ appendStringInfoString(&buf, "'");
+ break;
+
+ case BOOLOID:
+ if (strcmp(s, "t") == 0)
+ appendStringInfoString(&buf, "true");
+ else
+ appendStringInfoString(&buf, "false");
+ break;
+
+ default:
+ quoted = quote_literal_cstr(s);
+ appendStringInfoString(&buf, quoted);
+ pfree(quoted);
+ break;
+ }
+ }
+ appendStringInfoString(&buf, ")");
+ FlushStringInfoToFile(&buf, dest);
+ }
+
+ table_endscan(scan);
+ table_close(table, AccessShareLock);
+
+ appendStringInfoString(&buf, ";\n\n");
+
+ FlushStringInfoToFile(&buf, dest);
+ }
+ else if (data_format == DATA_FORMAT_COPY_FILE)
+ {
+ char* filename;
+ char* basename;
+ ParseState* pstate;
+ CopyStmt* stmt;
+ uint64 processed;
+
+ basename = psprintf("table-%s.txt", relname);
+ filename = psprintf("%s/%s", query_output_directory, basename);
+
+ pstate = make_parsestate(NULL);
+ pstate->p_sourcetext = "auto_dump internal copy";
+ pstate->p_queryEnv = create_queryEnv();
+
+ stmt = makeNode(CopyStmt);
+ stmt->type = T_CopyStmt;
+ stmt->relation = makeRangeVarFromNameList(stringToQualifiedNameList(relname, NULL));
+ stmt->filename = filename;
+
+ DoCopy(pstate, stmt, 0, strlen(pstate->p_sourcetext), &processed);
+
+ fprintf(dest, "\\set command '\\\\copy %s from \\'' :dir '/%s\\''\n", relname, basename);
+ fprintf(dest, ":command\n");
+
+ pfree(basename);
+ pfree(filename);
+ }
+ else if (data_format == DATA_FORMAT_COPY_STDIN)
+ {
+ ParseState* pstate;
+ Relation rel;
+ CopyToState cstate;
+
+ pstate = make_parsestate(NULL);
+ pstate->p_sourcetext = "auto_dump internal copy";
+ pstate->p_queryEnv = create_queryEnv();
+
+ fprintf(dest, "COPY %s FROM stdin;\n", relname);
+
+ rel = table_open(relid, NoLock);
+ CopyDataDestCallback_file = dest;
+ cstate = BeginCopyTo(pstate, rel, NULL, relid,
+ NULL, false,
+ CopyDataDestCallback, NULL, NIL);
+ DoCopyTo(cstate);
+ EndCopyTo(cstate);
+ table_close(rel, NoLock);
+ fprintf(dest, "\\.\n\n");
+ }
+ }
+ }
+
+ for (field=&fields[0]; field < &fields[numFields]; field++)
+ {
+ pfree(field->attname);
+ ReleaseSysCache(field->tuple);
+ }
+
+ pfree(fields);
+ pfree(relname);
+ }
+
+ ReleaseSysCache(classTuple);
+ }
+
+ pfree(buf.data);
+
+ if (fCreatePers)
+ fclose(fCreatePers);
+
+ if (fCreateTemp)
+ fclose(fCreateTemp);
+
+ if (fDataPers)
+ fclose(fDataPers);
+
+ if (fDataTemp)
+ fclose(fDataTemp);
+}
+
+
+static void
+SaveQuery( QueryDesc* queryDesc )
+{
+ FILE* f;
+ mode_t oumask;
+ char* unescaped;
+ char* dst;
+ char const* src;
+
+ oumask = umask((mode_t) ((~(S_IRUSR | S_IWUSR | S_IWUSR)) & (S_IRWXU | S_IRWXG | S_IRWXO)));
+ f = fopen(psprintf("%s/query.sql", query_output_directory), "wb");
+ umask(oumask);
+
+ unescaped = palloc( strlen(queryDesc->sourceText) + 3 );
+ dst = unescaped;
+ src = queryDesc->sourceText;
+ while(*src){
+ if( src[0] == '\\' && src[1] =='\\' ){
+ src++;
+ }
+ *(dst++) = *(src++);
+ }
+ *(dst++) = ';';
+ *(dst++) = '\n';
+ *(dst++) = 0;
+ fputs(unescaped, f);
+ pfree(unescaped);
+
+ fclose(f);
+}
+
+
+static void
+SaveReadme(void)
+{
+ FILE* fReadme = NULL;
+ fReadme = CreateDumpFile("readme.txt");
+ fputs(
+ "Дамп снят с использованием расширения auto_dump.\n"
+ "\n"
+ "Дамп содержит (в зависимости от настроек):\n"
+ " query.sql - текст запроса, для которого делается дамп\n"
+ " create_persistent.sql - команды по созданию постоянных таблиц и (или) индексов\n"
+ " create_temporary.sql - команды по созданию временных таблиц и (или) индексов\n"
+ " insert_persistent.sql - команды по заполнению постоянных таблиц\n"
+ " insert_temporary.sql - команды по заполнению временных таблиц\n"
+ " plan_explain.txt - план запроса в формате вывода команды EXPLAIN\n"
+ " plan_analyze.txt - план запроса в формате вывода команды EXPLAIN ANALYZE\n"
+ " table-<имя_таблица>.txt - файлы с данными таблиц (для auto_dump.data_format=\"copy-file\")\n"
+ "\n"
+ "\n"
+ "Примеры использования дампа:\n"
+ "\n"
+ "1) Использование при наличии постоянных таблиц в базе:\n"
+ "psql\n"
+ "\\i create_temporary.sql\n"
+ "\\i insert_temporary.sql\n"
+ "\\i query.sql\n"
+ "\n"
+ "2) Использование на чистой базе:\n"
+ "# Выполнить один раз:\n"
+ "psql\n"
+ "\\i create_persistent.sql\n"
+ "\\i insert_persistent.sql\n"
+ "# Выполнить много раз:\n"
+ "psql\n"
+ "\\i create_temporary.sql\n"
+ "\\i insert_temporary.sql\n"
+ "\\i query.sql\n", fReadme);
+ fclose(fReadme);
+}
+
+
+static void
+Dump(QueryDesc* queryDesc, bool havePlan, bool haveAnalysis)
+{
+ if (!query_dumped)
+ {
+ if(!PrepareDump())
+ return;
+
+ query_dumped = true;
+
+ if (dump_create || dump_data || dump_indexes)
+ SaveTables(queryDesc);
+
+ if (dump_query)
+ SaveQuery(queryDesc);
+
+ if (dump_readme)
+ SaveReadme();
+ }
+
+ if (havePlan && query_dumped && dump_plan && !plan_dumped)
+ {
+ plan_dumped = true;
+ SavePlan(queryDesc, "plan_explain", false);
+ }
+
+ if (haveAnalysis && query_dumped && dump_plan && !plan_analysis_dumped)
+ {
+ plan_analysis_dumped = true;
+ SavePlan(queryDesc, "plan_analyze", true);
+ }
+}
+
+
+static void
+ExecutorStart_hook_auto_dump(QueryDesc *queryDesc, int eflags)
+{
+
+ if (nesting_level == 0 && MyProcNumber != INVALID_PROC_NUMBER && backend_dump_mode)
+ {
+ if (backend_dump_mode[MyProcNumber] == MODE_NEXT)
+ backend_dump_mode[MyProcNumber] = MODE_CURRENT;
+ else
+ backend_dump_mode[MyProcNumber] = MODE_OFF;
+ }
+
+ if (dump_enable && nesting_level == 0)
+ {
+ query_dumped = false;
+ plan_dumped = false;
+ plan_analysis_dumped = false;
+
+ if (dump_plan)
+ queryDesc->instrument_options |= INSTRUMENT_ALL;
+
+ if (dump_plan || dump_on_bad_plan)
+ queryDesc->instrument_options |= INSTRUMENT_ROWS;
+
+ if(queryDesc->operation == CMD_SELECT
+ || queryDesc->operation == CMD_DELETE
+ || queryDesc->operation == CMD_INSERT
+ || queryDesc->operation == CMD_UPDATE
+ ){
+ if (*dump_on_query_string && IsQueryMatch(queryDesc->sourceText, dump_on_query_string)) {
+ Dump( queryDesc, false, false );
+ }
+ }
+ }
+
+ if (prev_ExecutorStart)
+ prev_ExecutorStart(queryDesc, eflags);
+ else
+ standard_ExecutorStart(queryDesc, eflags);
+
+ if (dump_enable && nesting_level == 0 && dump_on_duration >= 0 && queryDesc->totaltime == NULL)
+ {
+ MemoryContext oldcxt;
+ oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
+ queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ if (dump_enable && query_dumped)
+ Dump(queryDesc, true, false);
+}
+
+
+static void
+ExecutorRun_hook_auto_dump(QueryDesc *queryDesc, ScanDirection direction,
+ uint64 count)
+{
+ nesting_level++;
+ PG_TRY();
+ {
+ if (prev_ExecutorRun)
+ prev_ExecutorRun(queryDesc, direction, count);
+ else
+ standard_ExecutorRun(queryDesc, direction, count);
+ }
+ PG_FINALLY();
+ {
+ nesting_level--;
+ }
+ PG_END_TRY();
+
+ if(dump_enable && nesting_level==0 && ((dump_plan && query_dumped) || (dump_on_bad_plan && IsBadPlan(queryDesc->planstate))))
+ Dump(queryDesc, true, true);
+}
+
+static void
+ExecutorFinish_hook_auto_dump(QueryDesc *queryDesc)
+{
+ nesting_level++;
+ PG_TRY();
+ {
+ if (prev_ExecutorFinish)
+ prev_ExecutorFinish(queryDesc);
+ else
+ standard_ExecutorFinish(queryDesc);
+ }
+ PG_FINALLY();
+ {
+ nesting_level--;
+ }
+ PG_END_TRY();
+}
+
+
+static void
+ExecutorEnd_hook_auto_dump(QueryDesc *queryDesc)
+{
+ if (nesting_level == 0 && IS_MODE_ACTIVE())
+ {
+ backend_dump_mode[MyProcNumber] = MODE_OFF;
+ Dump(queryDesc, true, true);
+ }
+
+ if(dump_enable && nesting_level==0 && queryDesc->totaltime && dump_on_duration >= 0 && !query_dumped)
+ {
+ InstrEndLoop(queryDesc->totaltime);
+ if (queryDesc->totaltime->total * 1000.0 >= dump_on_duration)
+ Dump(queryDesc, true, true);
+ }
+
+ if (prev_ExecutorEnd)
+ prev_ExecutorEnd(queryDesc);
+ else
+ standard_ExecutorEnd(queryDesc);
+}
+
+
+
+static void
+ProcessInterrupts_hook_auto_dump(void)
+{
+ bool want_cancel = false;
+ bool want_signal = false;
+
+ if (dump_enable && dump_on_cancel)
+ want_cancel = true;
+
+ if (IS_MODE_ACTIVE())
+ want_signal = true;
+
+ if (want_cancel || want_signal)
+ {
+ PG_TRY();
+ if (likely(!prev_ProcessInterrupts))
+ standard_ProcessInterrupts();
+ else
+ prev_ProcessInterrupts();
+ PG_CATCH();
+ InterruptHoldoffCount++;
+
+ if (ActivePortal && ActivePortal->queryDesc)
+ Dump(ActivePortal->queryDesc, true, true);
+
+ if (want_signal)
+ backend_dump_mode[MyProcNumber] = MODE_OFF;
+
+ InterruptHoldoffCount--;
+ PG_RE_THROW();
+ PG_END_TRY();
+ }
+ else
+ {
+ if (likely(!prev_ProcessInterrupts))
+ standard_ProcessInterrupts();
+ else
+ prev_ProcessInterrupts();
+ }
+}
+
+
+
+static void
+request_auto_dump(int pid, int mode) {
+ int num_backends = pgstat_fetch_stat_numbackends();
+ int curr_backend;
+ LocalPgBackendStatus* status = NULL;
+
+ if (!backend_dump_mode)
+ elog(ERROR,"auto_dump is not loaded via 'shared_preload_libraries'. Requesting of dump for other backends is not available.");
+
+ if (!has_privs_of_role(GetUserId(), ROLE_PG_WRITE_SERVER_FILES))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to dump request"),
+ errdetail("Only roles with privileges of the \"%s\" role may request auto_dump.",
+ "pg_write_server_files")));
+
+ for (curr_backend = 1; curr_backend <= num_backends; curr_backend++)
+ {
+ status = pgstat_get_local_beentry_by_index(curr_backend);
+ if (status->backendStatus.st_procpid == pid)
+ break;
+ else
+ status = NULL;
+ }
+
+ if(!status)
+ elog(ERROR,"No backend found for pid %d", pid);
+
+ if (status->proc_number == INVALID_PROC_NUMBER || status->proc_number > (MaxBackends + NUM_AUXILIARY_PROCS))
+ elog(ERROR,"Backend is valid target for dump");
+
+ if ((mode == MODE_IMMEDIATE || mode == MODE_CURRENT) && status->backendStatus.st_state != STATE_RUNNING && status->backendStatus.st_state != STATE_FASTPATH)
+ elog(ERROR,"Backend is not running a query");
+
+ backend_dump_mode[status->proc_number] = mode;
+ pg_memory_barrier();
+
+ if (mode == MODE_IMMEDIATE)
+ DirectFunctionCall1(pg_cancel_backend, Int32GetDatum(pid));
+}
+
+
+Datum
+auto_dump_immediate(PG_FUNCTION_ARGS) {
+ request_auto_dump(PG_GETARG_INT32(0), MODE_IMMEDIATE);
+ PG_RETURN_VOID();
+}
+
+
+Datum
+auto_dump_current(PG_FUNCTION_ARGS) {
+ request_auto_dump(PG_GETARG_INT32(0), MODE_CURRENT);
+ PG_RETURN_VOID();
+}
+
+
+Datum
+auto_dump_next(PG_FUNCTION_ARGS) {
+ request_auto_dump(PG_GETARG_INT32(0), MODE_NEXT);
+ PG_RETURN_VOID();
+}
+
+
+static Size
+auto_dump_shmem_size(void)
+{
+ return MaxBackends + NUM_AUXILIARY_PROCS;
+}
+
+static void
+auto_dump_shmem_request(void)
+{
+ if (prev_shmem_request_hook)
+ prev_shmem_request_hook();
+
+ RequestAddinShmemSpace(auto_dump_shmem_size());
+}
+
+static void
+auto_dump_shmem_startup(void)
+{
+ bool found;
+
+ backend_dump_mode = (char volatile*)ShmemInitStruct("auto_dump", auto_dump_shmem_size(), &found);
+
+ if (!found)
+ {
+ for (int c=0; c <= MaxBackends; c++)
+ backend_dump_mode[c] = MODE_OFF;
+ }
+
+ if (prev_shmem_startup_hook)
+ prev_shmem_startup_hook();
+}
+
+static void
+auto_dump_cancel_current_dump(int code, Datum arg)
+{
+ (void) code;
+ (void) arg;
+
+ if (MyProcNumber != INVALID_PROC_NUMBER && backend_dump_mode)
+ backend_dump_mode[MyProcNumber] = MODE_OFF;
+}
+
+
+void
+_PG_init(void)
+{
+ DefineCustomBoolVariable("auto_dump.enable",
+ "Enable auto-dump.",
+ NULL,
+ &dump_enable,
+ dump_enable,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomStringVariable("auto_dump.output_directory",
+ "Output directory for dumped tables",
+ NULL,
+ &output_directory,
+ output_directory,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomStringVariable("auto_dump.dump_on_query_string",
+ "Activation phrase for start dump query tables (more 10 characters).",
+ NULL,
+ &dump_on_query_string,
+ dump_on_query_string,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_on_cancel",
+ "Dump tables when query is cancelled.",
+ NULL,
+ &dump_on_cancel,
+ dump_on_cancel,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_on_bad_plan",
+ "Dump tables when query plan is considered bad.",
+ NULL,
+ &dump_on_bad_plan,
+ dump_on_bad_plan,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomIntVariable("auto_dump.dump_on_duration",
+ "Dump tables when query duration is more than specified time. 0 to dump all, -1 to to ignore duration",
+ NULL,
+ &dump_on_duration,
+ dump_on_duration,
+ -1,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+
+ DefineCustomBoolVariable("auto_dump.dump_all_temp_tables",
+ "Dump all temporary tables of backend.",
+ NULL,
+ &dump_all_temp_tables,
+ dump_all_temp_tables,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_persistent_tables",
+ "Dump persistent tables.",
+ NULL,
+ &dump_persistent_tables,
+ dump_persistent_tables,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_temporary_tables",
+ "Dump temporary tables.",
+ NULL,
+ &dump_temporary_tables,
+ dump_temporary_tables,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_data",
+ "Dump tables data.",
+ NULL,
+ &dump_data,
+ dump_data,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_indexes",
+ "Dump indexes for tables.",
+ NULL,
+ &dump_indexes,
+ dump_indexes,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_query",
+ "Dump query itself.",
+ NULL,
+ &dump_query,
+ dump_query,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_create",
+ "Dump creation of tables.",
+ NULL,
+ &dump_create,
+ dump_create,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_plan",
+ "Dump execution plan of query.",
+ NULL,
+ &dump_plan,
+ dump_plan,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomIntVariable("auto_dump.bad_plan_percent_threshold",
+ "Sets the percent difference between estimated and actual row count to trigger plan dump.",
+ NULL,
+ &dump_plan_percent_threshold,
+ dump_plan_percent_threshold,
+ 0, INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomIntVariable("auto_dump.bad_plan_count_threshold",
+ "Sets the row count difference between estimated and actual row count to trigger plan dump.",
+ NULL,
+ &dump_plan_count_threshold,
+ dump_plan_count_threshold,
+ 0, INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomEnumVariable("auto_dump.data_format",
+ "Format of saved data",
+ NULL,
+ &data_format,
+ data_format,
+ data_format_options,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ MarkGUCPrefixReserved("auto_dump");
+
+ prev_ExecutorStart = ExecutorStart_hook;
+ ExecutorStart_hook = ExecutorStart_hook_auto_dump;
+
+ prev_ExecutorRun = ExecutorRun_hook;
+ ExecutorRun_hook = ExecutorRun_hook_auto_dump;
+
+ prev_ExecutorFinish = ExecutorFinish_hook;
+ ExecutorFinish_hook = ExecutorFinish_hook_auto_dump;
+
+ prev_ExecutorEnd = ExecutorEnd_hook;
+ ExecutorEnd_hook = ExecutorEnd_hook_auto_dump;
+
+ prev_ProcessInterrupts = ProcessInterrupts_hook;
+ ProcessInterrupts_hook = ProcessInterrupts_hook_auto_dump;
+
+ prev_shmem_startup_hook = shmem_startup_hook;
+ shmem_startup_hook = auto_dump_shmem_startup;
+
+ prev_shmem_request_hook = shmem_request_hook;
+ shmem_request_hook = auto_dump_shmem_request;
+
+ on_proc_exit(auto_dump_cancel_current_dump, 0);
+}
diff --git a/contrib/auto_dump/auto_dump.control b/contrib/auto_dump/auto_dump.control
new file mode 100644
index 00000000000..97d3294ea0e
--- /dev/null
+++ b/contrib/auto_dump/auto_dump.control
@@ -0,0 +1,4 @@
+comment = 'auto_dump'
+default_version = '1.1'
+module_pathname = '$libdir/auto_dump'
+relocatable = true
diff --git a/contrib/auto_dump/meson.build b/contrib/auto_dump/meson.build
new file mode 100644
index 00000000000..d2e561f6f19
--- /dev/null
+++ b/contrib/auto_dump/meson.build
@@ -0,0 +1,28 @@
+auto_dump_sources = files(
+ 'auto_dump.c',
+)
+
+auto_dump = shared_module('auto_dump',
+ auto_dump_sources,
+ kwargs: contrib_mod_args,
+)
+contrib_targets += auto_dump
+
+install_data(
+ 'auto_dump.control',
+ 'auto_dump--1.0.sql',
+ 'auto_dump--1.0--1.1.sql',
+ kwargs: contrib_data_args,
+)
+
+# tests += {
+# 'name': 'auto_dump',
+# 'sd': meson.current_source_dir(),
+# 'bd': meson.current_build_dir(),
+# 'regress': {
+# 'sql': [
+# 'dump',
+# # 'bad_plan',
+# ],
+# },
+# }
diff --git a/contrib/dbcopies_decoding/Makefile b/contrib/dbcopies_decoding/Makefile
new file mode 100644
index 00000000000..aa6fbc538c6
--- /dev/null
+++ b/contrib/dbcopies_decoding/Makefile
@@ -0,0 +1,36 @@
+MODULE_big = dbcopies_decoding
+OBJS=dbcopies_decoding.o mchar_recode.o
+REGRESS = simple
+
+REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/dbcopies_decoding/logical.conf
+
+# Disabled because these tests require "wal_level=logical", which
+# typical installcheck users do not have (e.g. buildfarm clients).
+NO_INSTALLCHECK = 1
+
+PG_CPPFLAGS += -I/usr/local/include -I$(top_srcdir)/contrib/mchar
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS = $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/dbcopies_decoding
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+ifeq ($(PORTNAME),win32)
+ICUNAME=icuin
+else
+ICUNAME=icui18n
+endif
+
+SHLIB_LINK += -L/usr/local/lib -licuuc -l$(ICUNAME) -Wl,-rpath,'$$ORIGIN'
+
+installcheck-force:
+ $(pg_regress_installcheck) $(REGRESS)
+
+mchar_recode.c: $(top_srcdir)/contrib/mchar/mchar_recode.c
+ cp -f $(top_srcdir)/contrib/mchar/mchar_recode.c ./
diff --git a/contrib/dbcopies_decoding/dbcopies_decoding.c b/contrib/dbcopies_decoding/dbcopies_decoding.c
new file mode 100644
index 00000000000..42305f2460e
--- /dev/null
+++ b/contrib/dbcopies_decoding/dbcopies_decoding.c
@@ -0,0 +1,898 @@
+#include "postgres.h"
+
+#include "catalog/pg_type.h"
+
+#include "replication/logical.h"
+#include "replication/origin.h"
+
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/catcache.h"
+#include "utils/timestamp.h"
+#include "utils/cash.h"
+#include "utils/pg_locale.h"
+#include "utils/date.h"
+#include "utils/datetime.h"
+
+#include "mchar.h"
+
+PG_MODULE_MAGIC;
+
+
+static Oid MCHAROID = InvalidOid;
+static Oid MVARCHAROID = InvalidOid;
+static const char cQuoteChar = '\'';
+static const char cContinueChar = '!';
+
+extern PGDLLEXPORT void _PG_init(void);
+extern PGDLLEXPORT void _PG_output_plugin_init(OutputPluginCallbacks* cb);
+
+typedef struct
+{
+ MemoryContext context;
+ int record_buf_size;
+ //Заказанный размер записи
+ int prepare_header_size;
+ //Размер заголовка, который записывает OutputPluginPrepareWrite в ctx->out
+ bool include_xids;
+ //флаг Записывать идентификатор транзакции
+ bool skip_change;
+ //флаг пропустить все, ничего не выводить
+ bool xact_wrote_changes;
+ //Признак того, что старт транзакции уже выведен.
+} DecodingData;
+
+static void decode_startup(LogicalDecodingContext* ctx,
+ OutputPluginOptions* opt, bool is_init);
+static void decode_shutdown(LogicalDecodingContext* ctx
+ );
+static void decode_begin_txn(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn);
+static void decode_commit_txn(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn, XLogRecPtr commit_lsn);
+static void decode_change(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn, Relation rel, ReorderBufferChange* change);
+static void decode_truncate(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn, int nrelations, Relation relations[],
+ ReorderBufferChange* change);
+static bool filter_by_origin(LogicalDecodingContext *ctx,
+ RepOriginId origin_id);
+
+#ifndef U8_TRUNCATE_IF_INCOMPLETE
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf8.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep13
+* created by: Markus W. Scherer
+*/
+#define U8_LEAD4_T1_BITS \
+"\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
+#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) \
+(U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
+#define U8_LEAD3_T1_BITS \
+"\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
+#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) \
+(U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
+#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) { \
+ if((length)>(start)) { \
+ uint8_t __b1=s[(length)-1]; \
+ if(U8_IS_SINGLE(__b1)) { \
+ /* common ASCII character */ \
+ } else if(U8_IS_LEAD(__b1)) { \
+ --(length); \
+ } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
+ uint8_t __b2=s[(length)-2]; \
+ if(0xe0<=__b2 && __b2<=0xf4) { \
+ if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
+ U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
+ (length)-=2; \
+ } \
+ } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
+ uint8_t __b3=s[(length)-3]; \
+ if(0xf0<=__b3 && __b3<=0xf4 && \
+ U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
+ (length)-=3; \
+ } \
+ } \
+ } \
+ } \
+}
+
+#endif
+
+void _PG_init(void)
+{
+}
+
+void _PG_output_plugin_init(OutputPluginCallbacks* cb)
+{
+ AssertVariableIsOfType(&_PG_output_plugin_init, LogicalOutputPluginInit);
+
+ cb->startup_cb = decode_startup;
+ cb->begin_cb = decode_begin_txn;
+ cb->change_cb = decode_change;
+ cb->truncate_cb = decode_truncate;
+ cb->commit_cb = decode_commit_txn;
+ cb->shutdown_cb = decode_shutdown;
+ cb->filter_by_origin_cb = filter_by_origin;
+}
+
+static bool tryExtractBoolOption(DefElem* elem, const char* name, bool* dest)
+{
+ if (strcmp(elem->defname, name) == 0)
+ {
+ if (elem->arg != NULL)
+ {
+ if (!parse_bool(strVal(elem->arg), dest))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not parse value \"%s\" for parameter \"%s\"",
+ strVal(elem->arg), elem->defname)));
+ }
+ return true;
+ }
+ else
+ return false;
+}
+
+static bool tryExtractIntOption(DefElem* elem, const char* name, int32* dest)
+{
+ if (strcmp(elem->defname, name) == 0)
+ {
+ if (elem->arg != NULL)
+ *dest = pg_strtoint32(strVal(elem->arg));
+ return true;
+ }
+ else
+ return false;
+}
+
+static void readTypeOID(char* typeName, Oid* typeOid)
+{
+ if (*typeOid == InvalidOid)
+ {
+ CatCList* catlist = SearchSysCacheList(TYPENAMENSP,
+ 1, CStringGetDatum(typeName), 0, 0);
+ if (catlist->n_members == 1)
+ *typeOid = ((Form_pg_type)GETSTRUCT(
+ &catlist->members[0]->tuple))->oid;
+ ReleaseSysCacheList(catlist);
+
+ if (*typeOid == InvalidOid)
+ elog(WARNING, "OID of type %s not defined!", typeName);
+ }
+}
+
+static void decode_startup(LogicalDecodingContext* ctx,
+ OutputPluginOptions* opt, bool is_init)
+{
+ ListCell* option;
+ DecodingData* data = palloc0(sizeof(DecodingData));
+
+ data->include_xids = true;
+ data->skip_change = false;
+ data->record_buf_size = ALLOCSET_DEFAULT_MAXSIZE / 4;
+ foreach(option, ctx->output_plugin_options)
+ {
+ DefElem* elem = lfirst(option);
+ Assert(elem->arg == NULL || IsA(elem->arg, String));
+
+ if (!tryExtractBoolOption(elem, "include-xids",
+ &data->include_xids))
+ if (!tryExtractBoolOption(elem, "skip-change",
+ &data->skip_change))
+ if (!tryExtractIntOption(elem, "slice_size",
+ &data->record_buf_size))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("option \"%s\" = \"%s\" is unknown",
+ elem->defname,
+ elem->arg ? strVal(elem->arg) : "(null)")
+ )
+ );
+ }
+ }
+ data->context = AllocSetContextCreate(ctx->context,
+ "text conversion context",
+ ALLOCSET_DEFAULT_SIZES);
+ ctx->output_plugin_private = data;
+
+ opt->output_type = OUTPUT_PLUGIN_TEXTUAL_OUTPUT;
+ opt->receive_rewrites = false;
+}
+
+static void decode_shutdown(LogicalDecodingContext* ctx)
+{
+ DecodingData* data = ctx->output_plugin_private;
+
+ MemoryContextDelete(data->context);
+}
+
+static bool filter_by_origin(LogicalDecodingContext *ctx,
+ RepOriginId origin_id)
+{
+ DecodingData* data = ctx->output_plugin_private;
+ return data && data->skip_change;
+}
+
+static void decode_begin_txn(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn)
+{
+ DecodingData* data = ctx->output_plugin_private;
+
+ data->xact_wrote_changes = false;
+}
+
+static void decode_commit_txn(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn, XLogRecPtr commit_lsn)
+{
+ DecodingData* data = ctx->output_plugin_private;
+
+ if (!data->xact_wrote_changes || data->skip_change)
+ return;
+
+ OutputPluginPrepareWrite(ctx, true);
+ if (data->include_xids)
+ appendStringInfo(ctx->out, "C %u", txn->xid);
+ else
+ appendStringInfo(ctx->out, "C");
+ OutputPluginWrite(ctx, true);
+}
+
+static int record_buf_size(LogicalDecodingContext* ctx) {
+ return ((DecodingData*)(ctx->output_plugin_private))->record_buf_size;
+}
+
+static void prepareFlushedCtx(LogicalDecodingContext* ctx)
+{
+ ctx->out->len = 0;
+ OutputPluginPrepareWrite(ctx, true);
+ ((DecodingData*)(ctx->output_plugin_private)
+ )->prepare_header_size = ctx->out->len;
+}
+
+static int checkFlushCtx(LogicalDecodingContext* ctx, int toWriteSize)
+{ //возвращает максимальное число байт,
+ //которое можно записать до превышения лимита длинны
+ int overflowRemain = record_buf_size(ctx) - ctx->out->len - 1;
+ if (overflowRemain <= toWriteSize)
+ {
+ appendStringInfoChar(ctx->out, cContinueChar);
+ switch (((DecodingData*)(ctx->output_plugin_private)
+ )->prepare_header_size)
+ {
+ case 0:
+ break;
+ case 1 + sizeof(int64) * 3:
+ memset(&ctx->out->data[1], 0, sizeof(int64) * 2);
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("Unsupported ctx->prepare_write function!")));
+
+ }
+ OutputPluginWrite(ctx, false);
+ prepareFlushedCtx(ctx);
+ return record_buf_size(ctx) - ctx->out->len - 1;
+ }
+ else
+ return overflowRemain;
+}
+
+static void printByts(LogicalDecodingContext* ctx, Datum val)
+{
+ const char n[] = { "0123456789abcdef" };
+ const int cDig = 16;
+ char* bytsData = VARDATA(val);
+ int32 bytsLen = VARSIZE_ANY_EXHDR(val);
+ int resultSize = 3 + bytsLen * 2 + 1; //остаток, который требуется записать
+ int overflowRemain = checkFlushCtx(ctx, 3);
+ if (resultSize > overflowRemain)
+ enlargeStringInfo(ctx->out, overflowRemain);
+ else
+ enlargeStringInfo(ctx->out, resultSize);
+
+ appendStringInfoString(ctx->out, "\'\\x");
+ overflowRemain -= 3;
+ resultSize -= 3;
+
+ {
+ int32 i;
+ for (i = 0; i < bytsLen; ++i)
+ {
+ int x;
+ if (overflowRemain < 2)
+ {
+ overflowRemain = checkFlushCtx(ctx, 2);
+ if (resultSize > overflowRemain)
+ enlargeStringInfo(ctx->out, overflowRemain);
+ else
+ enlargeStringInfo(ctx->out, resultSize);
+ }
+ x = bytsData[i] & 255;
+ ctx->out->data[ctx->out->len] = n[x / cDig];
+ ctx->out->data[ctx->out->len + 1] = n[x % cDig];
+ ctx->out->len += 2;
+ overflowRemain -= 2;
+ resultSize -= 2;
+ }
+ }
+ if (overflowRemain < 1)
+ checkFlushCtx(ctx, 1);
+ appendStringInfoChar(ctx->out, cQuoteChar);
+}
+
+static bool truncateIfIncmoplete(const int maxCharSize, const char* str, int* len)
+{
+ if (maxCharSize == 1)
+ return true;
+ else
+ {
+ int dbEnc = GetDatabaseEncoding();
+ if (dbEnc == PG_UTF8)
+ {
+ U8_TRUNCATE_IF_INCOMPLETE(str, 0, *len);
+ return (len > 0);
+ }
+ else
+ { //медленный экзотичный вариант
+ int truncCount;
+ for (truncCount = 1; truncCount < maxCharSize; ++truncCount)
+ {
+ int charLen;
+ for (charLen = 1;
+ charLen <= maxCharSize && *len >= charLen;
+ ++charLen)
+ if (pg_verify_mbstr(dbEnc, &str[*len - charLen],
+ charLen, true))
+ return true;
+
+ --(*len);
+ }
+ }
+ }
+
+ return false;
+}
+
+static void printCharVarchar(LogicalDecodingContext* ctx, Datum val)
+{
+ const int maxCharSize = pg_database_encoding_max_length();
+ char* bytsData = VARDATA(val);
+ int32 bytsLen = VARSIZE_ANY_EXHDR(val);
+ int overflowRemain = checkFlushCtx(ctx, 1 + maxCharSize);
+
+ appendStringInfoChar(ctx->out, cQuoteChar);
+ --overflowRemain;
+ {
+ char* pBegin = bytsData;
+ int L = 0;
+ int i;
+ for (i = 0; i < bytsLen; ++i)
+ {
+ bool overflow;
+ ++L;
+ overflow = (L >= overflowRemain);
+ if (bytsData[i] == cQuoteChar || overflow || i + 1 == bytsLen)
+ {
+ if (overflow &&
+ !(bytsData[i] == cQuoteChar || i + 1 == bytsLen))
+ {
+ if (!truncateIfIncmoplete(maxCharSize, pBegin, &L))
+
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_CHARACTER_VALUE_FOR_CAST),
+ errmsg("invalid string value")
+ )
+ );
+ }
+ appendBinaryStringInfo(ctx->out, pBegin, L);
+ pBegin += L;
+ if (bytsData[i] == cQuoteChar)
+ {
+ overflowRemain = checkFlushCtx(ctx, maxCharSize+1);
+ appendStringInfoChar(ctx->out, bytsData[i]);
+ --overflowRemain;
+ }
+ else if (overflow) //гарантированный сброс буфера
+ overflowRemain = checkFlushCtx(ctx, record_buf_size(ctx));
+ else
+ overflowRemain = checkFlushCtx(ctx, maxCharSize);
+ L = 0;
+ }
+
+ }
+ }
+ if (overflowRemain < 1)
+ checkFlushCtx(ctx, 1);
+ appendStringInfoChar(ctx->out, cQuoteChar);
+}
+
+static int printM(const UChar* wordsData,
+ int wordsLen, LogicalDecodingContext* ctx)
+{
+ const int maxCharSize = pg_database_encoding_max_length();
+ const UChar cQuoteUChar = L'\'';
+ int overflowRemain = checkFlushCtx(ctx, 1 + maxCharSize);
+
+ appendStringInfoChar(ctx->out, cQuoteChar);
+ --overflowRemain;
+ {
+ const UChar* pBegin = wordsData;
+ int L = 0;
+ int i;
+ for (i = 0; i < wordsLen; ++i)
+ {
+ bool overflow;
+ ++L;
+ overflow = (L*maxCharSize >= overflowRemain);
+ if (wordsData[i] == cQuoteUChar ||
+ overflow ||
+ i + 1 == wordsLen)
+ {
+ if (overflow &&
+ !(wordsData[i] == cQuoteUChar || i + 1 == wordsLen))
+ {
+ if (U16_IS_LEAD(wordsData[i]))
+ --L;
+
+ if (L == 0 || (i > 0 && U16_IS_LEAD(wordsData[i - 1])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_CHARACTER_VALUE_FOR_CAST),
+ errmsg("invalid utf16 string value")
+ )
+ );
+ }
+ enlargeStringInfo(ctx->out, L * maxCharSize);
+ ctx->out->len += UChar2Char(pBegin, L,
+ &ctx->out->data[ctx->out->len]);
+ pBegin += L;
+
+ if (wordsData[i] == cQuoteUChar)
+ {
+ overflowRemain = checkFlushCtx(ctx, maxCharSize+1);
+ appendStringInfoChar(ctx->out, cQuoteChar);
+ --overflowRemain;
+ }
+ else if (overflow)
+ overflowRemain = checkFlushCtx(ctx, record_buf_size(ctx));
+ else
+ overflowRemain = checkFlushCtx(ctx, maxCharSize);
+ L = 0;
+ }
+ }
+ }
+ return overflowRemain;
+}
+
+static void printMVarchar(LogicalDecodingContext* ctx, Datum val)
+{
+ const UChar* pBegin = (UChar*)(DatumGetPointer(val) + MVARCHARHDRSZ);
+ if (printM(pBegin, UVARCHARLENGTH(val), ctx) < 1)
+ checkFlushCtx(ctx, 1);
+ appendStringInfoChar(ctx->out, cQuoteChar);
+}
+
+static void printMChar(LogicalDecodingContext* ctx, Datum val)
+{
+ const UChar* pBegin = (UChar*)(DatumGetPointer(val) + MCHARHDRSZ);
+ int32 trailBlanksCount =
+ DatumGetMChar(val)->typmod - u_countChar32(pBegin, UCHARLENGTH(val));
+ int overflowRemain = printM(pBegin, UCHARLENGTH(val), ctx);
+ while (trailBlanksCount > 0)
+ {
+
+ if (trailBlanksCount > overflowRemain)
+ {
+ appendStringInfoSpaces(ctx->out, overflowRemain);
+ trailBlanksCount -= overflowRemain;
+ overflowRemain = checkFlushCtx(ctx, 1);
+ }
+ else
+ {
+ appendStringInfoSpaces(ctx->out, trailBlanksCount);
+ overflowRemain -= trailBlanksCount;
+ trailBlanksCount = 0;
+ }
+ }
+ if (overflowRemain < 1)
+ checkFlushCtx(ctx, 1);
+ appendStringInfoChar(ctx->out, cQuoteChar);
+}
+
+static void appendCtxString(LogicalDecodingContext* ctx, char* str)
+{
+ int l = strlen(str);
+ checkFlushCtx(ctx, l);
+ appendBinaryStringInfo(ctx->out, str, l);
+}
+
+static void printTimestamp(LogicalDecodingContext* ctx, Datum val)
+{
+ Timestamp ts = DatumGetTimestamp(val);
+ if (!TIMESTAMP_NOT_FINITE(ts))
+ {
+ struct pg_tm tm;
+ fsec_t fsec;
+ if (timestamp2tm(ts, NULL, &tm, &fsec, NULL, NULL) == 0)
+ { //отсутствие в параметрах указателя на tz
+ //приводит к конвертации часов (ts with timezone)
+ //например было 10:23:54.123+02 получим 08:23:54
+ char* str;
+ checkFlushCtx(ctx, 14);
+ enlargeStringInfo(ctx->out, 14);
+ str = ctx->out->data + ctx->out->len;
+ ctx->out->len += 14;
+ str = pg_ultostr_zeropad(str,
+ (tm.tm_year > 0) ? tm.tm_year : -(tm.tm_year - 1), 4);
+ str = pg_ultostr_zeropad(str, tm.tm_mon, 2);
+ str = pg_ultostr_zeropad(str, tm.tm_mday, 2);
+ str = pg_ultostr_zeropad(str, tm.tm_hour, 2);
+ str = pg_ultostr_zeropad(str, tm.tm_min, 2);
+ str = pg_ultostr_zeropad(str, abs(tm.tm_sec), 2);
+ return;
+ }
+ }
+ appendCtxString(ctx, "'invalid timestamp'");
+}
+
+static void printDate(LogicalDecodingContext* ctx, Datum val)
+{
+ DateADT d = DatumGetDateADT(val);
+ if (!DATE_NOT_FINITE(d))
+ {
+ char* str;
+ int year, mon, day;
+ j2date(d + POSTGRES_EPOCH_JDATE, &year, &mon, &day);
+ checkFlushCtx(ctx, 8);
+ enlargeStringInfo(ctx->out, 8);
+ str = ctx->out->data + ctx->out->len;
+ ctx->out->len += 8;
+ str = pg_ultostr_zeropad(str, (year > 0) ? year : -(year - 1), 4);
+ str = pg_ultostr_zeropad(str, mon, 2);
+ str = pg_ultostr_zeropad(str, day, 2);
+ return;
+ }
+ appendCtxString(ctx, "'invalid date'");
+}
+
+static void printTime(LogicalDecodingContext* ctx, Datum val)
+{
+ TimeADT t = DatumGetTimeADT(val);
+ char* str;
+ struct pg_tm tm;
+ fsec_t fsec;
+ time2tm(t, &tm, &fsec);
+ checkFlushCtx(ctx, 14);
+ enlargeStringInfo(ctx->out, 14);
+ str = ctx->out->data + ctx->out->len;
+ ctx->out->len += 14;
+ str = pg_ultostr_zeropad(str, 0, 4);
+ str = pg_ultostr_zeropad(str, 0, 2);
+ str = pg_ultostr_zeropad(str, 0, 2);
+ str = pg_ultostr_zeropad(str, tm.tm_hour, 2);
+ str = pg_ultostr_zeropad(str, tm.tm_min, 2);
+ str = pg_ultostr_zeropad(str, abs(tm.tm_sec), 2);
+}
+
+static void printMoney(LogicalDecodingContext* ctx, Datum val)
+{
+ Cash v = DatumGetCash(val);
+ char buf[128];
+ char* pBuf = &buf[127];
+ bool minus = (v < 0);
+ struct lconv *lconvert = PGLC_localeconv();
+ int points = lconvert->frac_digits;
+
+ if (points < 0 || points > 10)
+ points = 2;
+
+ buf[127] = 0;
+ if (minus)
+ v = -v;
+
+ do {
+ *(--pBuf) = ((uint64)v % 10) + '0';
+ --points;
+
+ if (points == 0)
+ *(--pBuf) = '.';
+
+ if (v)
+ v = ((uint64)v) / 10;
+ } while (v || points >= 0);
+ if (minus)
+ *(--pBuf) = '-';
+
+ appendCtxString(ctx, pBuf);
+}
+
+static void printBool(LogicalDecodingContext* ctx, Datum val)
+{
+ appendCtxString(ctx, DatumGetBool(val) ? "true" : "false");
+}
+
+static void printDefault(LogicalDecodingContext* ctx,
+ Datum val, Oid typid, Oid typoutput)
+{ // Вывод с помощью стандартной OUTPUT функции ..._out
+ char* dataAsChar = OidOutputFunctionCall(typoutput, val);
+ switch (typid)
+ {
+ case INT2OID:
+ case INT4OID:
+ case INT8OID:
+ case OIDOID:
+ case FLOAT4OID:
+ case FLOAT8OID:
+ case NUMERICOID:
+ appendCtxString(ctx, dataAsChar);
+ break;
+
+ case BITOID:
+ case VARBITOID:
+ checkFlushCtx(ctx, (int)strlen(dataAsChar) + 3);
+ appendStringInfo(ctx->out, "B'%s'", dataAsChar);
+ break;
+
+ default:
+ {
+ const int maxCharSize = pg_database_encoding_max_length();
+ const char* pBegin;
+ const char* pEnd = dataAsChar;
+ int overflowRemain = checkFlushCtx(ctx, maxCharSize + 1);
+
+ appendStringInfoChar(ctx->out, cQuoteChar);
+ --overflowRemain;
+ //в отличие от printCharVarchar,
+ //здесь я не знаю длинну, но точно знаю, что на конце ноль
+ for (pBegin = dataAsChar; *pBegin; pBegin = pEnd)
+ {
+ bool overflow;
+ while (*pEnd &&
+ *pEnd != cQuoteChar &&
+ (int)(pEnd - pBegin) < overflowRemain)
+ ++pEnd;
+ overflow = (int)(pEnd - pBegin) >= overflowRemain;
+ if (pEnd != pBegin)
+ {
+ if (overflow && *pEnd && *pEnd != cQuoteChar)
+ {
+ int32 L = (int32)(pEnd - pBegin);
+ if (!truncateIfIncmoplete(maxCharSize, pBegin, &L))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_CHARACTER_VALUE_FOR_CAST),
+ errmsg("invalid string value")
+ ));
+ pEnd = pBegin + L;
+ }
+ appendBinaryStringInfo(ctx->out,
+ pBegin, (int)(pEnd - pBegin));
+ }
+
+ if (*pEnd == cQuoteChar)
+ {
+ overflowRemain = checkFlushCtx(ctx, maxCharSize + 2);
+ appendStringInfoChar(ctx->out, *pEnd);
+ appendStringInfoChar(ctx->out, *pEnd);
+ ++pEnd;
+ overflowRemain -= 2;
+ }
+ else if (overflow)
+ overflowRemain = checkFlushCtx(ctx, record_buf_size(ctx));
+ else
+ overflowRemain = checkFlushCtx(ctx, maxCharSize);
+ }
+ if (overflowRemain < 1)
+ checkFlushCtx(ctx, 1);
+ appendStringInfoChar(ctx->out, cQuoteChar);
+ }
+ break;
+ }
+ pfree(dataAsChar);
+}
+
+static void printTuple(LogicalDecodingContext* ctx,
+ TupleDesc tupdesc, HeapTuple tuple,
+ bool skip_nulls, char* tableName)
+{
+
+ if (tuple == NULL)
+ appendCtxString(ctx, " (no-tuple-data)");
+ else
+ {
+ int natt;
+ for (natt = 0; natt < tupdesc->natts; natt++)
+ {
+ bool typisvarlena;
+ Oid typoutput;
+ Form_pg_attribute attr = TupleDescAttr(tupdesc, natt);
+ Oid typid = attr->atttypid;
+ bool isnull;
+ Datum origval;
+
+ if (attr->attisdropped)
+ continue;
+ if (attr->attnum < 0) // Don't print system columns,
+ continue;//oid will already have been printed if present.
+
+ origval = heap_getattr(tuple, natt + 1, tupdesc, &isnull);
+ if (isnull)
+ {
+ if (!skip_nulls)
+ appendCtxString(ctx, " null");
+ continue;
+ }
+
+ checkFlushCtx(ctx, 1);
+ appendStringInfoChar(ctx->out, ' ');
+
+ getTypeOutputInfo(typid, &typoutput, &typisvarlena);
+
+ if (typisvarlena)
+ {
+ if (VARATT_IS_EXTERNAL_ONDISK(origval))
+ appendCtxString(ctx, "unchanged-toast-datum");
+ else
+ {
+ Datum val = PointerGetDatum(PG_DETOAST_DATUM(origval));
+
+ if (typid == BPCHAROID ||
+ typid == VARCHAROID ||
+ typid == TEXTOID)
+ printCharVarchar(ctx, val);
+ else if (typid == BYTEAOID)
+ printByts(ctx, val);
+ else if (typid > FirstNormalObjectId) {
+ readTypeOID("mchar", &MCHAROID);
+ readTypeOID("mvarchar", &MVARCHAROID);
+
+ if (typid == MCHAROID)
+ printMChar(ctx, val);
+ else if (typid == MVARCHAROID)
+ printMVarchar(ctx, val);
+ else
+ printDefault(ctx, val, typid, typoutput);
+ } else
+ printDefault(ctx, val, typid, typoutput);
+
+ if (DatumGetPointer(val) != DatumGetPointer(origval))
+ pfree(DatumGetPointer(val));
+ }
+
+ }
+ else
+ {
+ switch (typid)
+ {
+ case MONEYOID:
+ printMoney(ctx, origval);
+ break;
+ case TIMESTAMPOID:
+ case TIMESTAMPTZOID:
+ printTimestamp(ctx, origval);
+ break;
+ case DATEOID:
+ printDate(ctx, origval);
+ break;
+ case TIMEOID:
+ printTime(ctx, origval);
+ break;
+ case BOOLOID:
+ printBool(ctx, origval);
+ break;
+ default:
+ printDefault(ctx, origval, typid, typoutput);
+ break;
+ }
+ }
+ }
+ }
+}
+
+static void printTransaction(DecodingData* data,
+ LogicalDecodingContext* ctx, ReorderBufferTXN* txn)
+{
+ if (data->xact_wrote_changes)
+ return;
+
+ OutputPluginPrepareWrite(ctx, false);
+ if (data->include_xids)
+ appendStringInfo(ctx->out, "B %u", txn->xid);
+ else
+ appendStringInfoString(ctx->out, "B");
+ OutputPluginWrite(ctx, false);
+ data->xact_wrote_changes = true;
+}
+
+static void decode_change(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn, Relation relation, ReorderBufferChange* change)
+{
+ DecodingData* data = ctx->output_plugin_private;
+ if (data->skip_change)
+ return;
+ {
+ MemoryContext old = MemoryContextSwitchTo(data->context);
+ TupleDesc tupdesc = RelationGetDescr(relation);
+ char* tableName = RelationGetRelationName(relation);
+
+ printTransaction(data, ctx, txn);
+ prepareFlushedCtx(ctx);
+ switch (change->action)
+ {
+ case REORDER_BUFFER_CHANGE_INSERT:
+ {
+ appendStringInfoString(ctx->out, "I ");
+ appendStringInfoString(ctx->out, tableName);
+ printTuple(ctx, tupdesc, change->data.tp.newtuple, false, tableName);
+ }
+ break;
+ case REORDER_BUFFER_CHANGE_UPDATE:
+ {
+ appendStringInfoString(ctx->out, "U ");
+ appendStringInfoString(ctx->out, tableName);
+ printTuple(ctx, tupdesc, change->data.tp.newtuple, false, tableName);
+ }
+ break;
+ case REORDER_BUFFER_CHANGE_DELETE:
+ {
+ appendStringInfoString(ctx->out, "D ");
+ appendStringInfoString(ctx->out, tableName);
+ printTuple(ctx, tupdesc, change->data.tp.oldtuple, true, tableName);
+ }
+ break;
+ default:
+ Assert(false);
+ }
+ MemoryContextSwitchTo(old);
+ }
+ OutputPluginWrite(ctx, true);
+ MemoryContextReset(data->context);
+}
+
+static void decode_truncate(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn, int nrelations,
+ Relation relations[], ReorderBufferChange* change)
+{
+ int i;
+ DecodingData* data = ctx->output_plugin_private;
+ if (data->skip_change)
+ return;
+
+ printTransaction(data, ctx, txn);
+ {
+ MemoryContext old = MemoryContextSwitchTo(data->context);
+
+ OutputPluginPrepareWrite(ctx, true);
+
+ appendStringInfoString(ctx->out, "T ");
+
+ for (i = 0; i < nrelations; i++)
+ {
+ if (i > 0)
+ appendStringInfoString(ctx->out, ", ");
+
+ appendStringInfoString(ctx->out,
+ RelationGetRelationName(relations[i]));
+ }
+
+ MemoryContextSwitchTo(old);
+ }
+ OutputPluginWrite(ctx, true);
+ MemoryContextReset(data->context);
+}
diff --git a/contrib/dbcopies_decoding/expected/simple.out b/contrib/dbcopies_decoding/expected/simple.out
new file mode 100644
index 00000000000..4053ec1935a
--- /dev/null
+++ b/contrib/dbcopies_decoding/expected/simple.out
@@ -0,0 +1,28 @@
+-- predictability
+SET synchronous_commit = on;
+CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120));
+SELECT 'init' FROM pg_create_logical_replication_slot('dbcopies_slot', 'dbcopies_decoding');
+ ?column?
+----------
+ init
+(1 row)
+
+BEGIN;
+INSERT INTO replication_example(somedata, text) VALUES (1, 1);
+INSERT INTO replication_example(somedata, text) VALUES (1, 2);
+COMMIT;
+SELECT data FROM pg_logical_slot_get_changes('dbcopies_slot', NULL, NULL, 'include-xids', '0');
+ data
+-------------------------------
+ B
+ I replication_example 1 1 '1'
+ I replication_example 2 1 '2'
+ C
+(4 rows)
+
+SELECT pg_drop_replication_slot('dbcopies_slot');
+ pg_drop_replication_slot
+--------------------------
+
+(1 row)
+
diff --git a/contrib/dbcopies_decoding/logical.conf b/contrib/dbcopies_decoding/logical.conf
new file mode 100644
index 00000000000..367f7066514
--- /dev/null
+++ b/contrib/dbcopies_decoding/logical.conf
@@ -0,0 +1,2 @@
+wal_level = logical
+max_replication_slots = 4
diff --git a/contrib/dbcopies_decoding/meson.build b/contrib/dbcopies_decoding/meson.build
new file mode 100644
index 00000000000..9fea46ca4c7
--- /dev/null
+++ b/contrib/dbcopies_decoding/meson.build
@@ -0,0 +1,38 @@
+dbcopies_decoding_sources = files(
+ 'dbcopies_decoding.c',
+ '../mchar/mchar_recode.c'
+)
+
+if host_system == 'windows'
+ dbcopies_decoding_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'dbcopies_decoding',
+ '--FILEDESC', 'dbcopies_decoding',])
+endif
+
+dbcopies_decoding = shared_module('dbcopies_decoding',
+ dbcopies_decoding_sources,
+ include_directories: '../mchar',
+ kwargs: contrib_mod_args + {
+ 'dependencies': [icu_i18n, contrib_mod_args['dependencies']],
+ },
+)
+contrib_targets += dbcopies_decoding
+
+install_data(
+ kwargs: contrib_data_args,
+)
+
+tests += {
+ 'name': 'dbcopies_decoding',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'simple'
+ ],
+ 'regress_args': ['--temp-config', files('logical.conf')],
+ # Disabled because these tests require "wal_level=logical", which
+ # typical installcheck users do not have (e.g. buildfarm clients).
+ 'runningcheck': false,
+ },
+}
diff --git a/contrib/dbcopies_decoding/sql/simple.sql b/contrib/dbcopies_decoding/sql/simple.sql
new file mode 100644
index 00000000000..1e9d2f72323
--- /dev/null
+++ b/contrib/dbcopies_decoding/sql/simple.sql
@@ -0,0 +1,15 @@
+-- predictability
+SET synchronous_commit = on;
+
+CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120));
+
+SELECT 'init' FROM pg_create_logical_replication_slot('dbcopies_slot', 'dbcopies_decoding');
+
+BEGIN;
+INSERT INTO replication_example(somedata, text) VALUES (1, 1);
+INSERT INTO replication_example(somedata, text) VALUES (1, 2);
+COMMIT;
+
+SELECT data FROM pg_logical_slot_get_changes('dbcopies_slot', NULL, NULL, 'include-xids', '0');
+
+SELECT pg_drop_replication_slot('dbcopies_slot');
diff --git a/contrib/fasttrun/Makefile b/contrib/fasttrun/Makefile
new file mode 100644
index 00000000000..78e92b86cbe
--- /dev/null
+++ b/contrib/fasttrun/Makefile
@@ -0,0 +1,17 @@
+MODULE_big = fasttrun
+OBJS = fasttrun.o
+DATA = fasttrun--2.0.sql fasttrun--unpackaged--2.0.sql
+DOCS = README.fasttrun
+REGRESS = fasttrun
+EXTENSION=fasttrun
+
+
+ifdef USE_PGXS
+PGXS := $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/fasttrun
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/fasttrun/README.fasttrun b/contrib/fasttrun/README.fasttrun
new file mode 100644
index 00000000000..b6d1b41a6d2
--- /dev/null
+++ b/contrib/fasttrun/README.fasttrun
@@ -0,0 +1,16 @@
+select fasttruncate('TABLE_NAME');
+
+Function truncates the temporary table and doesn't grow
+pg_class size.
+
+Warning: function isn't transaction safe!
+
+For tests:
+create or replace function f() returns void as $$
+begin
+for i in 1..1000
+loop
+ PERFORM fasttruncate('tt1');
+end loop;
+end;
+$$ language plpgsql;
diff --git a/contrib/fasttrun/expected/fasttrun.out b/contrib/fasttrun/expected/fasttrun.out
new file mode 100644
index 00000000000..ef64fa6400e
--- /dev/null
+++ b/contrib/fasttrun/expected/fasttrun.out
@@ -0,0 +1,115 @@
+CREATE EXTENSION fasttrun;
+create table persist ( a int );
+insert into persist values (1);
+select fasttruncate('persist');
+ERROR: Relation isn't a temporary table
+insert into persist values (2);
+select * from persist order by a;
+ a
+---
+ 1
+ 2
+(2 rows)
+
+create temp table temp1 (a int);
+insert into temp1 values (1);
+BEGIN;
+create temp table temp2 (a int);
+insert into temp2 values (1);
+select * from temp1 order by a;
+ a
+---
+ 1
+(1 row)
+
+select * from temp2 order by a;
+ a
+---
+ 1
+(1 row)
+
+insert into temp1 (select * from generate_series(1,10000));
+insert into temp2 (select * from generate_series(1,11000));
+analyze temp2;
+select relname, relpages>0, reltuples>0 from pg_class where relname in ('temp1', 'temp2') order by relname;
+ relname | ?column? | ?column?
+---------+----------+----------
+ temp1 | f | f
+ temp2 | t | t
+(2 rows)
+
+select fasttruncate('temp1');
+ fasttruncate
+--------------
+
+(1 row)
+
+select fasttruncate('temp2');
+ fasttruncate
+--------------
+
+(1 row)
+
+insert into temp1 values (-2);
+insert into temp2 values (-2);
+select * from temp1 order by a;
+ a
+----
+ -2
+(1 row)
+
+select * from temp2 order by a;
+ a
+----
+ -2
+(1 row)
+
+COMMIT;
+select * from temp1 order by a;
+ a
+----
+ -2
+(1 row)
+
+select * from temp2 order by a;
+ a
+----
+ -2
+(1 row)
+
+select relname, relpages>0, reltuples>0 from pg_class where relname in ('temp1', 'temp2') order by relname;
+ relname | ?column? | ?column?
+---------+----------+----------
+ temp1 | f | f
+ temp2 | f | f
+(2 rows)
+
+select fasttruncate('temp1');
+ fasttruncate
+--------------
+
+(1 row)
+
+select fasttruncate('temp2');
+ fasttruncate
+--------------
+
+(1 row)
+
+select * from temp1 order by a;
+ a
+---
+(0 rows)
+
+select * from temp2 order by a;
+ a
+---
+(0 rows)
+
+select relname, relpages>0, reltuples>0 from pg_class where relname in ('temp1', 'temp2') order by relname;
+ relname | ?column? | ?column?
+---------+----------+----------
+ temp1 | f | f
+ temp2 | f | f
+(2 rows)
+
diff --git a/contrib/fasttrun/fasttrun--2.0.sql b/contrib/fasttrun/fasttrun--2.0.sql
new file mode 100644
index 00000000000..708c2753151
--- /dev/null
+++ b/contrib/fasttrun/fasttrun--2.0.sql
@@ -0,0 +1,6 @@
+\echo Use "CREATE EXTENSION fasttrun" to load this file. \quit
+
+
+CREATE OR REPLACE FUNCTION fasttruncate(text)
+RETURNS void AS 'MODULE_PATHNAME'
+LANGUAGE C RETURNS NULL ON NULL INPUT VOLATILE;
diff --git a/contrib/fasttrun/fasttrun--unpackaged--2.0.sql b/contrib/fasttrun/fasttrun--unpackaged--2.0.sql
new file mode 100644
index 00000000000..3a071f077e1
--- /dev/null
+++ b/contrib/fasttrun/fasttrun--unpackaged--2.0.sql
@@ -0,0 +1,3 @@
+\echo Use "CREATE EXTENSION fasttrun FROM unpackaged" to load this file. \quit
+
+ALTER EXTENSION fasttrun ADD function fasttruncate(text);
diff --git a/contrib/fasttrun/fasttrun.c b/contrib/fasttrun/fasttrun.c
new file mode 100644
index 00000000000..494fcf6c7b3
--- /dev/null
+++ b/contrib/fasttrun/fasttrun.c
@@ -0,0 +1,90 @@
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "miscadmin.h"
+#include "storage/lmgr.h"
+#include "storage/bufmgr.h"
+#include "catalog/namespace.h"
+#include "utils/lsyscache.h"
+#include "utils/builtins.h"
+#include <fmgr.h>
+#include <funcapi.h>
+#include <access/heapam.h>
+#include <catalog/pg_type.h>
+#include <catalog/heap.h>
+#include <commands/vacuum.h>
+#include <utils/regproc.h>
+#include <utils/varlena.h>
+
+#ifdef PG_MODULE_MAGIC
+PG_MODULE_MAGIC;
+#endif
+
+PG_FUNCTION_INFO_V1(fasttruncate);
+Datum fasttruncate(PG_FUNCTION_ARGS);
+Datum
+fasttruncate(PG_FUNCTION_ARGS) {
+ text *name=PG_GETARG_TEXT_P(0);
+ char *relname;
+ List *relname_list;
+ RangeVar *relvar;
+ Oid relOid;
+ Relation rel;
+ bool makeanalyze = false;
+
+ relname = palloc( VARSIZE(name) + 1);
+ memcpy(relname, VARDATA(name), VARSIZE(name)-VARHDRSZ);
+ relname[ VARSIZE(name)-VARHDRSZ ] = '\0';
+
+ relname_list = stringToQualifiedNameList(relname, NULL);
+ relvar = makeRangeVarFromNameList(relname_list);
+ relOid = RangeVarGetRelid(relvar, AccessExclusiveLock, false);
+
+ if ( get_rel_relkind(relOid) != RELKIND_RELATION )
+ elog(ERROR,"Relation isn't a ordinary table");
+
+ rel = table_open(relOid, NoLock);
+
+ if ( !isTempNamespace(get_rel_namespace(relOid)) )
+ elog(ERROR,"Relation isn't a temporary table");
+
+ heap_truncate(list_make1_oid(relOid));
+
+ if ( rel->rd_rel->relpages > 0 || rel->rd_rel->reltuples > 0 )
+ makeanalyze = true;
+
+ /*
+ * heap_truncate doesn't unlock the table,
+ * so we should unlock it.
+ */
+
+ table_close(rel, AccessExclusiveLock);
+
+ if ( makeanalyze ) {
+ VacuumParams params;
+ VacuumRelation *rel;
+ MemoryContext cntx;
+
+ params.options = VACOPT_ANALYZE;
+ params.freeze_min_age = -1;
+ params.freeze_table_age = -1;
+ params.multixact_freeze_min_age = -1;
+ params.multixact_freeze_table_age = -1;
+ params.is_wraparound = false;
+ params.log_min_duration = -1;
+
+ rel = makeNode(VacuumRelation);
+ rel->relation = relvar;
+ rel->oid = relOid;
+ rel->va_cols = NULL;
+
+ cntx = AllocSetContextCreate(CurrentMemoryContext,
+ "Vacuum", ALLOCSET_DEFAULT_SIZES);
+ vacuum(list_make1(rel), ¶ms,
+ GetAccessStrategy(BAS_VACUUM), cntx, false);
+ MemoryContextDelete(cntx);
+ }
+
+ PG_RETURN_VOID();
+}
diff --git a/contrib/fasttrun/fasttrun.control b/contrib/fasttrun/fasttrun.control
new file mode 100644
index 00000000000..7862c0bf8ad
--- /dev/null
+++ b/contrib/fasttrun/fasttrun.control
@@ -0,0 +1,5 @@
+comment = 'fast transaction-unsafe truncate'
+default_version = '2.0'
+module_pathname = '$libdir/fasttrun'
+relocatable = true
+trusted = true
diff --git a/contrib/fasttrun/meson.build b/contrib/fasttrun/meson.build
new file mode 100644
index 00000000000..8d6c5aeda79
--- /dev/null
+++ b/contrib/fasttrun/meson.build
@@ -0,0 +1,37 @@
+fasttrun_sources = files(
+ 'fasttrun.c'
+)
+
+if host_system == 'windows'
+ fasttrun_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'fasttrun',
+ '--FILEDESC', 'fasttrun',])
+endif
+
+fasttrun = shared_module('fasttrun',
+ fasttrun_sources,
+ kwargs: contrib_mod_args + {
+ 'dependencies': contrib_mod_args['dependencies'],
+ },
+)
+contrib_targets += fasttrun
+
+install_data(
+ 'fasttrun.control',
+ 'fasttrun--2.0.sql',
+ 'fasttrun--unpackaged--2.0.sql',
+ kwargs: contrib_data_args,
+)
+
+tests += {
+ 'name': 'fasttrun',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'fasttrun'
+ ]
+ },
+}
+
+# TODO: DOCS = README.fasttrun
\ No newline at end of file
diff --git a/contrib/fasttrun/sql/fasttrun.sql b/contrib/fasttrun/sql/fasttrun.sql
new file mode 100644
index 00000000000..0e3cb6c9beb
--- /dev/null
+++ b/contrib/fasttrun/sql/fasttrun.sql
@@ -0,0 +1,48 @@
+CREATE EXTENSION fasttrun;
+
+create table persist ( a int );
+insert into persist values (1);
+select fasttruncate('persist');
+insert into persist values (2);
+select * from persist order by a;
+
+create temp table temp1 (a int);
+insert into temp1 values (1);
+
+BEGIN;
+
+create temp table temp2 (a int);
+insert into temp2 values (1);
+
+select * from temp1 order by a;
+select * from temp2 order by a;
+
+insert into temp1 (select * from generate_series(1,10000));
+insert into temp2 (select * from generate_series(1,11000));
+
+analyze temp2;
+select relname, relpages>0, reltuples>0 from pg_class where relname in ('temp1', 'temp2') order by relname;
+
+select fasttruncate('temp1');
+select fasttruncate('temp2');
+
+insert into temp1 values (-2);
+insert into temp2 values (-2);
+
+select * from temp1 order by a;
+select * from temp2 order by a;
+
+COMMIT;
+
+select * from temp1 order by a;
+select * from temp2 order by a;
+
+select relname, relpages>0, reltuples>0 from pg_class where relname in ('temp1', 'temp2') order by relname;
+
+select fasttruncate('temp1');
+select fasttruncate('temp2');
+
+select * from temp1 order by a;
+select * from temp2 order by a;
+
+select relname, relpages>0, reltuples>0 from pg_class where relname in ('temp1', 'temp2') order by relname;
diff --git a/contrib/fulleq/Makefile b/contrib/fulleq/Makefile
new file mode 100644
index 00000000000..99cc8aca35e
--- /dev/null
+++ b/contrib/fulleq/Makefile
@@ -0,0 +1,28 @@
+MODULE_big = fulleq
+OBJS = fulleq.o
+DOCS = README.fulleq
+REGRESS = fulleq
+DATA_built = fulleq--2.0.sql fulleq--unpackaged--2.0.sql
+EXTENSION=fulleq
+
+ARGTYPE = bool bytea char name int8 int2 int4 text \
+ oid xid cid oidvector float4 float8 macaddr \
+ inet cidr varchar date time timestamp timestamptz \
+ interval timetz
+
+ifdef USE_PGXS
+PGXS := $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/fulleq
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+fulleq--2.0.sql: fulleq--2.0.sql.in
+ $(top_srcdir)/$(subdir)/generate.sh packaged "$<" > "$@"
+
+fulleq--unpackaged--2.0.sql: fulleq--unpackaged--2.0.sql.in
+ $(top_srcdir)/$(subdir)/generate.sh unpackaged "$<" > "$@"
+
diff --git a/contrib/fulleq/README.fulleq b/contrib/fulleq/README.fulleq
new file mode 100644
index 00000000000..93bf0cad20e
--- /dev/null
+++ b/contrib/fulleq/README.fulleq
@@ -0,0 +1,2 @@
+Introduce operator == which returns true when
+operands are equal or both are nulls.
diff --git a/contrib/fulleq/expected/fulleq.out b/contrib/fulleq/expected/fulleq.out
new file mode 100644
index 00000000000..452f8593432
--- /dev/null
+++ b/contrib/fulleq/expected/fulleq.out
@@ -0,0 +1,61 @@
+CREATE EXTENSION fulleq;
+select 4::int == 4;
+ ?column?
+----------
+ t
+(1 row)
+
+select 4::int == 5;
+ ?column?
+----------
+ f
+(1 row)
+
+select 4::int == NULL;
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL::int == 5;
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL::int == NULL;
+ ?column?
+----------
+ t
+(1 row)
+
+select '4'::text == '4';
+ ?column?
+----------
+ t
+(1 row)
+
+select '4'::text == '5';
+ ?column?
+----------
+ f
+(1 row)
+
+select '4'::text == NULL;
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL::text == '5';
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL::text == NULL;
+ ?column?
+----------
+ t
+(1 row)
+
diff --git a/contrib/fulleq/fulleq--2.0.sql.in b/contrib/fulleq/fulleq--2.0.sql.in
new file mode 100644
index 00000000000..c270647c720
--- /dev/null
+++ b/contrib/fulleq/fulleq--2.0.sql.in
@@ -0,0 +1,25 @@
+-- For ARGTYPE
+
+CREATE OR REPLACE FUNCTION isfulleq_ARGTYPE(ARGTYPE, ARGTYPE)
+RETURNS bool AS 'MODULE_PATHNAME'
+LANGUAGE C CALLED ON NULL INPUT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION fullhash_ARGTYPE(ARGTYPE)
+RETURNS int4 AS 'MODULE_PATHNAME'
+LANGUAGE C CALLED ON NULL INPUT IMMUTABLE;
+
+
+CREATE OPERATOR == (
+ LEFTARG = ARGTYPE,
+ RIGHTARG = ARGTYPE,
+ PROCEDURE = isfulleq_ARGTYPE,
+ COMMUTATOR = '==',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ HASHES
+);
+
+CREATE OPERATOR CLASS ARGTYPE_fill_ops
+ FOR TYPE ARGTYPE USING hash AS
+ OPERATOR 1 ==,
+ FUNCTION 1 fullhash_ARGTYPE(ARGTYPE);
diff --git a/contrib/fulleq/fulleq--unpackaged--2.0.sql.in b/contrib/fulleq/fulleq--unpackaged--2.0.sql.in
new file mode 100644
index 00000000000..8d759d8221f
--- /dev/null
+++ b/contrib/fulleq/fulleq--unpackaged--2.0.sql.in
@@ -0,0 +1,10 @@
+-- For ARGTYPE
+
+ALTER EXTENSION fulleq ADD FUNCTION isfulleq_ARGTYPE(ARGTYPE, ARGTYPE);
+
+ALTER EXTENSION fulleq ADD FUNCTION fullhash_ARGTYPE(ARGTYPE);
+
+ALTER EXTENSION fulleq ADD OPERATOR == (ARGTYPE, ARGTYPE);
+
+ALTER EXTENSION fulleq ADD OPERATOR CLASS ARGTYPE_fill_ops USING hash;
+
diff --git a/contrib/fulleq/fulleq.c b/contrib/fulleq/fulleq.c
new file mode 100644
index 00000000000..e435be4b93a
--- /dev/null
+++ b/contrib/fulleq/fulleq.c
@@ -0,0 +1,112 @@
+#include "postgres.h"
+#include "fmgr.h"
+#include "access/hash.h"
+#include "catalog/pg_collation.h"
+#include "utils/builtins.h"
+#include "utils/bytea.h"
+#include "utils/timestamp.h"
+#include "utils/date.h"
+
+#ifdef PG_MODULE_MAGIC
+PG_MODULE_MAGIC;
+#endif
+
+#define NULLHASHVALUE (-2147483647)
+
+#define FULLEQ_FUNC(type, cmpfunc, hashfunc) \
+PG_FUNCTION_INFO_V1( isfulleq_##type ); \
+Datum isfulleq_##type(PG_FUNCTION_ARGS); \
+Datum \
+isfulleq_##type(PG_FUNCTION_ARGS) { \
+ if ( PG_ARGISNULL(0) && PG_ARGISNULL(1) ) \
+ PG_RETURN_BOOL(true); \
+ else if ( PG_ARGISNULL(0) || PG_ARGISNULL(1) ) \
+ PG_RETURN_BOOL(false); \
+ \
+ PG_RETURN_DATUM( DirectFunctionCall2Coll( cmpfunc, \
+ DEFAULT_COLLATION_OID, \
+ PG_GETARG_DATUM(0), \
+ PG_GETARG_DATUM(1) \
+ ) ); \
+} \
+ \
+PG_FUNCTION_INFO_V1( fullhash_##type ); \
+Datum fullhash_##type(PG_FUNCTION_ARGS); \
+Datum \
+fullhash_##type(PG_FUNCTION_ARGS) { \
+ if ( PG_ARGISNULL(0) ) \
+ PG_RETURN_INT32(NULLHASHVALUE); \
+ \
+ PG_RETURN_DATUM( DirectFunctionCall1( hashfunc, \
+ PG_GETARG_DATUM(0) \
+ ) ); \
+}
+
+
+static Datum
+hashint2vector(PG_FUNCTION_ARGS)
+{
+ int2vector *key = (int2vector *) PG_GETARG_POINTER(0);
+
+ return hash_any((unsigned char *) key->values, key->dim1 * sizeof(int16));
+}
+
+/*
+ * We don't have a complete set of int2vector support routines,
+ * but we need int2vectoreq for catcache indexing.
+ */
+static Datum
+int2vectoreq(PG_FUNCTION_ARGS)
+{
+ int2vector *a = (int2vector *) PG_GETARG_POINTER(0);
+ int2vector *b = (int2vector *) PG_GETARG_POINTER(1);
+
+ if (a->dim1 != b->dim1)
+ PG_RETURN_BOOL(false);
+ PG_RETURN_BOOL(memcmp(a->values, b->values, a->dim1 * sizeof(int16)) == 0);
+}
+
+
+FULLEQ_FUNC( bool , booleq , hashchar );
+FULLEQ_FUNC( bytea , byteaeq , hashvarlena );
+FULLEQ_FUNC( char , chareq , hashchar );
+FULLEQ_FUNC( name , nameeq , hashname );
+FULLEQ_FUNC( int8 , int8eq , hashint8 );
+FULLEQ_FUNC( int2 , int2eq , hashint2 );
+FULLEQ_FUNC( int4 , int4eq , hashint4 );
+FULLEQ_FUNC( text , texteq , hashtext );
+FULLEQ_FUNC( oid , oideq , hashoid );
+FULLEQ_FUNC( xid , xideq , hashint4 );
+FULLEQ_FUNC( cid , cideq , hashint4 );
+FULLEQ_FUNC( oidvector , oidvectoreq , hashoidvector );
+FULLEQ_FUNC( float4 , float4eq , hashfloat4 );
+FULLEQ_FUNC( float8 , float8eq , hashfloat8 );
+/*FULLEQ_FUNC( abstime , abstimeeq , hashint4 );*/
+/*FULLEQ_FUNC( reltime , reltimeeq , hashint4 );*/
+FULLEQ_FUNC( macaddr , macaddr_eq , hashmacaddr );
+FULLEQ_FUNC( inet , network_eq , hashinet );
+FULLEQ_FUNC( cidr , network_eq , hashinet );
+FULLEQ_FUNC( varchar , texteq , hashtext );
+FULLEQ_FUNC( date , date_eq , hashint4 );
+FULLEQ_FUNC( time , time_eq , hashfloat8 );
+FULLEQ_FUNC( timestamp , timestamp_eq , hashfloat8 );
+FULLEQ_FUNC( timestamptz , timestamp_eq , hashfloat8 );
+FULLEQ_FUNC( interval , interval_eq , interval_hash );
+FULLEQ_FUNC( timetz , timetz_eq , timetz_hash );
+
+/*
+ * v10 drop * support for int2vector equality and hash operator in commit
+ * 5c80642aa8de8393b08cd3cbf612b325cedd98dc, but for compatibility
+ * we still add this operators
+ */
+FULLEQ_FUNC( int2vector , int2vectoreq , hashint2vector );
+
+static Datum
+dummy_eq(PG_FUNCTION_ARGS)
+{
+ elog(ERROR, "unimplemented");
+ PG_RETURN_DATUM(0); //keep compiler quiet
+}
+
+FULLEQ_FUNC( abstime , dummy_eq , hashint4 );
+FULLEQ_FUNC( reltime , dummy_eq , hashint4 );
diff --git a/contrib/fulleq/fulleq.control b/contrib/fulleq/fulleq.control
new file mode 100644
index 00000000000..30a26c65fff
--- /dev/null
+++ b/contrib/fulleq/fulleq.control
@@ -0,0 +1,5 @@
+comment = 'exact equal operation'
+default_version = '2.0'
+module_pathname = '$libdir/fulleq'
+relocatable = true
+trusted = true
diff --git a/contrib/fulleq/generate.sh b/contrib/fulleq/generate.sh
new file mode 100755
index 00000000000..bc925e78b98
--- /dev/null
+++ b/contrib/fulleq/generate.sh
@@ -0,0 +1,50 @@
+#!/bin/bash -e
+
+type="$1"
+template="$2"
+
+if [ "$type" = "packaged" ]; then
+ echo '\echo Use "CREATE EXTENSION fulleq" to load this file. \quit'
+elif [ "$type" = "unpackaged" ]; then
+ echo '\echo Use "CREATE EXTENSION fulleq FROM unpackaged" to load this file. \quit'
+ echo 'DROP OPERATOR CLASS IF EXISTS int2vector_fill_ops USING hash;'
+ echo 'DROP OPERATOR FAMILY IF EXISTS int2vector_fill_ops USING hash;'
+ echo 'DROP FUNCTION IF EXISTS fullhash_int2vector(int2vector);'
+ echo 'DROP OPERATOR IF EXISTS == (int2vector, int2vector);'
+ echo 'DROP FUNCTION IF EXISTS isfulleq_int2vector(int2vector, int2vector);'
+else
+ echo "invalid arguments"
+ exit 1
+fi
+
+
+ARGTYPE=(
+ bool
+ bytea
+ char
+ name
+ int8
+ int2
+ int4
+ text
+ oid
+ xid
+ cid
+ oidvector
+ float4
+ float8
+ macaddr
+ inet
+ cidr
+ varchar
+ date
+ time
+ timestamp
+ timestamptz
+ interval
+ timetz
+)
+
+for type in "${ARGTYPE[@]}"; do
+ sed -e "s/ARGTYPE/$type/g" < "$template"
+done
diff --git a/contrib/fulleq/meson.build b/contrib/fulleq/meson.build
new file mode 100644
index 00000000000..3402dd0c764
--- /dev/null
+++ b/contrib/fulleq/meson.build
@@ -0,0 +1,53 @@
+fulleq_sources = files(
+ 'fulleq.c'
+)
+
+if host_system == 'windows'
+ fulleq_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'fulleq',
+ '--FILEDESC', 'fulleq',])
+endif
+
+contrib_targets += custom_target('fulleq--2.0.sql',
+ input: 'fulleq--2.0.sql.in',
+ output: 'fulleq--2.0.sql',
+ command: [meson.current_source_dir() / 'generate.sh', 'packaged', '@INPUT@'],
+ capture: true,
+ install: true,
+ install_dir: contrib_data_args['install_dir'],
+)
+
+contrib_targets += custom_target('fulleq--unpackaged--2.0.sql',
+ input: 'fulleq--unpackaged--2.0.sql.in',
+ output: 'fulleq--unpackaged--2.0.sql',
+ command: [meson.current_source_dir() / 'generate.sh', 'unpackaged', '@INPUT@'],
+ capture: true,
+ install: true,
+ install_dir: contrib_data_args['install_dir'],
+)
+
+fulleq = shared_module('fulleq',
+ fulleq_sources,
+ kwargs: contrib_mod_args + {
+ 'dependencies': contrib_mod_args['dependencies'],
+ },
+)
+contrib_targets += fulleq
+
+install_data(
+ 'fulleq.control',
+ kwargs: contrib_data_args,
+)
+
+tests += {
+ 'name': 'fulleq',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'fulleq'
+ ]
+ },
+}
+
+# TODO: DOCS = README.fulleq
\ No newline at end of file
diff --git a/contrib/fulleq/sql/fulleq.sql b/contrib/fulleq/sql/fulleq.sql
new file mode 100644
index 00000000000..d43abeb34b7
--- /dev/null
+++ b/contrib/fulleq/sql/fulleq.sql
@@ -0,0 +1,13 @@
+CREATE EXTENSION fulleq;
+
+select 4::int == 4;
+select 4::int == 5;
+select 4::int == NULL;
+select NULL::int == 5;
+select NULL::int == NULL;
+
+select '4'::text == '4';
+select '4'::text == '5';
+select '4'::text == NULL;
+select NULL::text == '5';
+select NULL::text == NULL;
diff --git a/contrib/mchar/Changes b/contrib/mchar/Changes
new file mode 100644
index 00000000000..b7f6e0c5718
--- /dev/null
+++ b/contrib/mchar/Changes
@@ -0,0 +1,20 @@
+2.0 make an extension
+0.17 add == operation:
+ a == b => ( a = b or a is null and b is null )
+0.16 fix pg_dump - now mchar in pg_catalog scheme, not public
+ fix bug in mvarchar_substr()
+0.15 add upper()/lower()
+0.14 Add ESCAPE for LIKE, SIMILAR TO [ESCAPE], POSIX regexp
+0.13 Outer binary format is now different from
+ inner: it's just a UTF-16 string
+0.12 Fix copy binary
+0.11 Force UTF-8 convertor if server_encoding='UTF8'
+0.10 add (mchar|mvarchar)_(send|recv) functions to
+ allow binary copying. Note: that functions
+ don't recode values.
+0.9 index support for like, improve recoding functions
+0.8 initial suport for like optimizioation with index:
+ still thres no algo to find the nearest greater string
+0.7 hash indexes and enable a hash joins
+0.6 implicit casting mchar-mvarchar
+ cross type comparison operations
diff --git a/contrib/mchar/Makefile b/contrib/mchar/Makefile
new file mode 100644
index 00000000000..81826afd296
--- /dev/null
+++ b/contrib/mchar/Makefile
@@ -0,0 +1,31 @@
+MODULE_big = mchar
+OBJS = mchar_io.o mchar_proc.o mchar_op.o mchar_recode.o mchar_like.o
+EXTENSION=mchar
+DATA = mchar--2.2.1.sql mchar--2.0.1--2.1.sql mchar--2.0--2.1.sql \
+ mchar--2.1.1--2.2.sql mchar--2.1--2.2.sql \
+ mchar--2.2--2.2.1.sql \
+ mchar--unpackaged--2.0.sql
+DOCS = README.mchar
+REGRESS = init mchar mvarchar mm like compat
+ENCODING = UTF8
+
+PG_CPPFLAGS=-I/usr/local/include
+
+ifdef USE_PGXS
+PGXS := $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/mchar
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+ifeq ($(PORTNAME),win32)
+ICUNAME=icuin
+else
+ICUNAME=icui18n
+endif
+
+SHLIB_LINK += -L/usr/local/lib -licuuc -l$(ICUNAME) -Wl,-rpath,'$$ORIGIN'
+
diff --git a/contrib/mchar/README.mchar b/contrib/mchar/README.mchar
new file mode 100644
index 00000000000..479a7d1f40a
--- /dev/null
+++ b/contrib/mchar/README.mchar
@@ -0,0 +1,20 @@
+MCHAR & VARCHAR
+ type modifier
+ length()
+ substr(str, pos[, length])
+ || - concatenation with any (mchar,mvarchar) arguments
+ < <= = >= > - case-insensitive comparisons (libICU)
+ &< &<= &= &>= &> - case-sensitive comparisons (libICU)
+ implicit casting mchar<->mvarchar
+ B-tree and hash index
+ LIKE [ESCAPE]
+ SIMILAR TO [ESCAPE]
+ ~ (POSIX regexp)
+ index support for LIKE
+
+
+Authors:
+ Oleg Bartunov <oleg@sai.msu.ru>
+ Teodor Sigaev <teodor@sigaev.ru>
+
+
diff --git a/contrib/mchar/expected/compat.out b/contrib/mchar/expected/compat.out
new file mode 100644
index 00000000000..480a286e8f6
--- /dev/null
+++ b/contrib/mchar/expected/compat.out
@@ -0,0 +1,66 @@
+--- table based checks
+select '<' || ch || '>', '<' || vch || '>' from chvch;
+ ?column? | ?column?
+----------------+--------------
+ <No spaces > | <No spaces>
+ <One space > | <One space >
+ <1 space > | <1 space >
+(3 rows)
+
+select * from chvch where vch = 'One space';
+ ch | vch
+--------------+------------
+ One space | One space
+(1 row)
+
+select * from chvch where vch = 'One space ';
+ ch | vch
+--------------+------------
+ One space | One space
+(1 row)
+
+select * from ch where chcol = 'abcd' order by chcol;
+ chcol
+----------------------------------
+ abcd
+ AbcD
+(2 rows)
+
+select * from ch t1 join ch t2 on t1.chcol = t2.chcol order by t1.chcol, t2.chcol;
+ chcol | chcol
+----------------------------------+----------------------------------
+ abcd | AbcD
+ abcd | abcd
+ AbcD | AbcD
+ AbcD | abcd
+ abcz | abcz
+ defg | dEfg
+ defg | defg
+ dEfg | dEfg
+ dEfg | defg
+ ee | Ee
+ ee | ee
+ Ee | Ee
+ Ee | ee
+(13 rows)
+
+select * from ch where chcol > 'abcd' and chcol<'ee';
+ chcol
+----------------------------------
+ abcz
+ defg
+ dEfg
+(3 rows)
+
+select * from ch order by chcol;
+ chcol
+----------------------------------
+ abcd
+ AbcD
+ abcz
+ defg
+ dEfg
+ ee
+ Ee
+(7 rows)
+
diff --git a/contrib/mchar/expected/init.out b/contrib/mchar/expected/init.out
new file mode 100644
index 00000000000..7bae978ec35
--- /dev/null
+++ b/contrib/mchar/expected/init.out
@@ -0,0 +1,18 @@
+CREATE EXTENSION mchar;
+create table ch (
+ chcol mchar(32)
+) without oids;
+insert into ch values('abcd');
+insert into ch values('AbcD');
+insert into ch values('abcz');
+insert into ch values('defg');
+insert into ch values('dEfg');
+insert into ch values('ee');
+insert into ch values('Ee');
+create table chvch (
+ ch mchar(12),
+ vch mvarchar(12)
+) without oids;
+insert into chvch values('No spaces', 'No spaces');
+insert into chvch values('One space ', 'One space ');
+insert into chvch values('1 space', '1 space ');
diff --git a/contrib/mchar/expected/like.out b/contrib/mchar/expected/like.out
new file mode 100644
index 00000000000..a3f47f8c710
--- /dev/null
+++ b/contrib/mchar/expected/like.out
@@ -0,0 +1,841 @@
+-- simplest examples
+-- E061-04 like predicate
+set standard_conforming_strings=off;
+SELECT 'hawkeye'::mchar LIKE 'h%' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mchar NOT LIKE 'h%' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'hawkeye'::mchar LIKE 'H%' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mchar NOT LIKE 'H%' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'hawkeye'::mchar LIKE 'indio%' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'hawkeye'::mchar NOT LIKE 'indio%' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mchar LIKE 'h%eye' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mchar NOT LIKE 'h%eye' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mchar LIKE '_ndio' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mchar NOT LIKE '_ndio' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mchar LIKE 'in__o' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mchar NOT LIKE 'in__o' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mchar LIKE 'in_o' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mchar NOT LIKE 'in_o' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mvarchar LIKE 'h%' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mvarchar NOT LIKE 'h%' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'hawkeye'::mvarchar LIKE 'H%' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mvarchar NOT LIKE 'H%' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'hawkeye'::mvarchar LIKE 'indio%' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'hawkeye'::mvarchar NOT LIKE 'indio%' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mvarchar LIKE 'h%eye' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mvarchar NOT LIKE 'h%eye' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mvarchar LIKE '_ndio' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mvarchar NOT LIKE '_ndio' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mvarchar LIKE 'in__o' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mvarchar NOT LIKE 'in__o' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mvarchar LIKE 'in_o' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mvarchar NOT LIKE 'in_o' AS "true";
+ true
+------
+ t
+(1 row)
+
+-- unused escape character
+SELECT 'hawkeye'::mchar LIKE 'h%'::mchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mchar NOT LIKE 'h%'::mchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mchar LIKE 'ind_o'::mchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mchar NOT LIKE 'ind_o'::mchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+-- escape character
+-- E061-05 like predicate with escape clause
+SELECT 'h%'::mchar LIKE 'h#%'::mchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%'::mchar NOT LIKE 'h#%'::mchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%wkeye'::mchar LIKE 'h#%'::mchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%wkeye'::mchar NOT LIKE 'h#%'::mchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%wkeye'::mchar LIKE 'h#%%'::mchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%wkeye'::mchar NOT LIKE 'h#%%'::mchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%awkeye'::mchar LIKE 'h#%a%k%e'::mchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%awkeye'::mchar NOT LIKE 'h#%a%k%e'::mchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mchar LIKE '_ndio'::mchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mchar NOT LIKE '_ndio'::mchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'i_dio'::mchar LIKE 'i$_d_o'::mchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'i_dio'::mchar NOT LIKE 'i$_d_o'::mchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'i_dio'::mchar LIKE 'i$_nd_o'::mchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'i_dio'::mchar NOT LIKE 'i$_nd_o'::mchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'i_dio'::mchar LIKE 'i$_d%o'::mchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'i_dio'::mchar NOT LIKE 'i$_d%o'::mchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+-- escape character same as pattern character
+SELECT 'maca'::mchar LIKE 'm%aca' ESCAPE '%'::mchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'maca'::mchar NOT LIKE 'm%aca' ESCAPE '%'::mchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'ma%a'::mchar LIKE 'm%a%%a' ESCAPE '%'::mchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'ma%a'::mchar NOT LIKE 'm%a%%a' ESCAPE '%'::mchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'bear'::mchar LIKE 'b_ear' ESCAPE '_'::mchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'bear'::mchar NOT LIKE 'b_ear'::mchar ESCAPE '_' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'be_r'::mchar LIKE 'b_e__r' ESCAPE '_'::mchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'be_r'::mchar NOT LIKE 'b_e__r' ESCAPE '_'::mchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'be_r'::mchar LIKE '__e__r' ESCAPE '_'::mchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'be_r'::mchar NOT LIKE '__e__r'::mchar ESCAPE '_' AS "true";
+ true
+------
+ t
+(1 row)
+
+-- unused escape character
+SELECT 'hawkeye'::mvarchar LIKE 'h%'::mvarchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mvarchar NOT LIKE 'h%'::mvarchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mvarchar LIKE 'ind_o'::mvarchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mvarchar NOT LIKE 'ind_o'::mvarchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+-- escape character
+-- E061-05 like predicate with escape clause
+SELECT 'h%'::mvarchar LIKE 'h#%'::mvarchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%'::mvarchar NOT LIKE 'h#%'::mvarchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%wkeye'::mvarchar LIKE 'h#%'::mvarchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%wkeye'::mvarchar NOT LIKE 'h#%'::mvarchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%wkeye'::mvarchar LIKE 'h#%%'::mvarchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%wkeye'::mvarchar NOT LIKE 'h#%%'::mvarchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%awkeye'::mvarchar LIKE 'h#%a%k%e'::mvarchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%awkeye'::mvarchar NOT LIKE 'h#%a%k%e'::mvarchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mvarchar LIKE '_ndio'::mvarchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mvarchar NOT LIKE '_ndio'::mvarchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'i_dio'::mvarchar LIKE 'i$_d_o'::mvarchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'i_dio'::mvarchar NOT LIKE 'i$_d_o'::mvarchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'i_dio'::mvarchar LIKE 'i$_nd_o'::mvarchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'i_dio'::mvarchar NOT LIKE 'i$_nd_o'::mvarchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'i_dio'::mvarchar LIKE 'i$_d%o'::mvarchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'i_dio'::mvarchar NOT LIKE 'i$_d%o'::mvarchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+-- escape character same as pattern character
+SELECT 'maca'::mvarchar LIKE 'm%aca' ESCAPE '%'::mvarchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'maca'::mvarchar NOT LIKE 'm%aca' ESCAPE '%'::mvarchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'ma%a'::mvarchar LIKE 'm%a%%a' ESCAPE '%'::mvarchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'ma%a'::mvarchar NOT LIKE 'm%a%%a' ESCAPE '%'::mvarchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'bear'::mvarchar LIKE 'b_ear' ESCAPE '_'::mvarchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'bear'::mvarchar NOT LIKE 'b_ear'::mvarchar ESCAPE '_' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'be_r'::mvarchar LIKE 'b_e__r' ESCAPE '_'::mvarchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'be_r'::mvarchar NOT LIKE 'b_e__r' ESCAPE '_'::mvarchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'be_r'::mvarchar LIKE '__e__r' ESCAPE '_'::mvarchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'be_r'::mvarchar NOT LIKE '__e__r'::mvarchar ESCAPE '_' AS "true";
+ true
+------
+ t
+(1 row)
+
+-- similar to
+SELECT 'abc'::mchar SIMILAR TO 'abc'::mchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'abc'::mchar SIMILAR TO 'a'::mchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'abc'::mchar SIMILAR TO '%(b|d)%'::mchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'abc'::mchar SIMILAR TO '(b|c)%'::mchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%'::mchar SIMILAR TO 'h#%'::mchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%'::mchar SIMILAR TO 'h#%'::mchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'abc'::mvarchar SIMILAR TO 'abc'::mvarchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'abc'::mvarchar SIMILAR TO 'a'::mvarchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'abc'::mvarchar SIMILAR TO '%(b|d)%'::mvarchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'abc'::mvarchar SIMILAR TO '(b|c)%'::mvarchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%'::mvarchar SIMILAR TO 'h#%'::mvarchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%'::mvarchar SIMILAR TO 'h#%'::mvarchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+-- index support
+SELECT * from ch where chcol like 'aB_d' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+(2 rows)
+
+SELECT * from ch where chcol like 'aB%d' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+(2 rows)
+
+SELECT * from ch where chcol like 'aB%' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+ abcz
+(3 rows)
+
+SELECT * from ch where chcol like '%BC%' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+ abcz
+(3 rows)
+
+set enable_seqscan = off;
+explain (costs off)
+SELECT * from ch where chcol like 'aB_d' order by chcol using &<;
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Sort
+ Sort Key: chcol USING &<
+ -> Index Only Scan using qq on ch
+ Index Cond: ((chcol >= 'aB'::mvarchar) AND (chcol < 'aC'::mvarchar))
+ Filter: (chcol ~~ 'aB_d'::mvarchar)
+(5 rows)
+
+SELECT * from ch where chcol like 'aB_d' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+(2 rows)
+
+SELECT * from ch where chcol like 'aB%d' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+(2 rows)
+
+SELECT * from ch where chcol like 'aB%' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+ abcz
+(3 rows)
+
+SELECT * from ch where chcol like '%BC%' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+ abcz
+(3 rows)
+
+set enable_seqscan = on;
+create table testt (f1 mchar(10));
+insert into testt values ('Abc-000001');
+insert into testt values ('Abc-000002');
+insert into testt values ('0000000001');
+insert into testt values ('0000000002');
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+create index testindex on testt(f1);
+set enable_seqscan=off;
+explain (costs off)
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+ QUERY PLAN
+---------------------------------------------------
+ Index Only Scan using testindex on testt
+ Filter: ((f1)::mvarchar ~~ 'Abc\\-%'::mvarchar)
+(2 rows)
+
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+set enable_seqscan = on;
+drop table testt;
+create table testt (f1 mvarchar(10));
+insert into testt values ('Abc-000001');
+insert into testt values ('Abc-000002');
+insert into testt values ('0000000001');
+insert into testt values ('0000000002');
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E'Abc\\- %'::mchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E' %'::mchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+ 0000000001
+ 0000000002
+(4 rows)
+
+create index testindex on testt(f1);
+set enable_seqscan=off;
+explain (costs off)
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+ QUERY PLAN
+----------------------------------------------------------------------
+ Index Only Scan using testindex on testt
+ Index Cond: ((f1 >= 'Abc-'::mvarchar) AND (f1 < 'Abc.'::mvarchar))
+ Filter: ((f1)::mvarchar ~~ 'Abc\\-%'::mvarchar)
+(3 rows)
+
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E'Abc\\- %'::mchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E' %'::mchar;
+ f1
+------------
+ 0000000001
+ 0000000002
+ Abc-000001
+ Abc-000002
+(4 rows)
+
+set enable_seqscan = on;
+drop table testt;
+CREATE TABLE test ( code mchar(5) NOT NULL );
+insert into test values('1111 ');
+insert into test values('111 ');
+insert into test values('11 ');
+insert into test values('1 ');
+SELECT * FROM test WHERE code LIKE ('% ');
+ code
+-------
+ 1
+(1 row)
+
+set escape_string_warning = off;
+SELECT CASE WHEN ('_'::text SIMILAR TO '[\\_]'::text ESCAPE '\\'::text) THEN TRUE ELSE FALSE END ;
+ case
+------
+ t
+(1 row)
+
+SELECT CASE WHEN ('_'::mchar SIMILAR TO '[\\_]'::mchar ESCAPE '\\'::mchar) THEN TRUE ELSE FALSE END ;
+ case
+------
+ t
+(1 row)
+
+SELECT CASE WHEN ('_'::mvarchar SIMILAR TO '[\\_]'::mvarchar ESCAPE '\\'::mvarchar) THEN TRUE ELSE FALSE END ;
+ case
+------
+ t
+(1 row)
+
+reset escape_string_warning;
+reset standard_conforming_strings;
diff --git a/contrib/mchar/expected/mchar.out b/contrib/mchar/expected/mchar.out
new file mode 100644
index 00000000000..8f2009c50d0
--- /dev/null
+++ b/contrib/mchar/expected/mchar.out
@@ -0,0 +1,382 @@
+-- I/O tests
+select '1'::mchar;
+ mchar
+-------
+ 1
+(1 row)
+
+select '2 '::mchar;
+ mchar
+-------
+ 2
+(1 row)
+
+select '10 '::mchar;
+ mchar
+-------
+ 10
+(1 row)
+
+select '1'::mchar(2);
+ mchar
+-------
+ 1
+(1 row)
+
+select '2 '::mchar(2);
+ mchar
+-------
+ 2
+(1 row)
+
+select '3 '::mchar(2);
+ mchar
+-------
+ 3
+(1 row)
+
+select '10 '::mchar(2);
+ mchar
+-------
+ 10
+(1 row)
+
+select ' '::mchar(10);
+ mchar
+------------
+
+(1 row)
+
+select ' '::mchar;
+ mchar
+-------
+
+(1 row)
+
+-- operations & functions
+select length('1'::mchar);
+ length
+--------
+ 1
+(1 row)
+
+select length('2 '::mchar);
+ length
+--------
+ 1
+(1 row)
+
+select length('10 '::mchar);
+ length
+--------
+ 2
+(1 row)
+
+select length('1'::mchar(2));
+ length
+--------
+ 1
+(1 row)
+
+select length('2 '::mchar(2));
+ length
+--------
+ 1
+(1 row)
+
+select length('3 '::mchar(2));
+ length
+--------
+ 1
+(1 row)
+
+select length('10 '::mchar(2));
+ length
+--------
+ 2
+(1 row)
+
+select length(' '::mchar(10));
+ length
+--------
+ 0
+(1 row)
+
+select length(' '::mchar);
+ length
+--------
+ 0
+(1 row)
+
+select 'asd'::mchar(10) || '>'::mchar(10);
+ ?column?
+----------------------
+ asd >
+(1 row)
+
+select length('asd'::mchar(10) || '>'::mchar(10));
+ length
+--------
+ 11
+(1 row)
+
+select 'asd'::mchar(2) || '>'::mchar(10);
+ ?column?
+--------------
+ as>
+(1 row)
+
+select length('asd'::mchar(2) || '>'::mchar(10));
+ length
+--------
+ 3
+(1 row)
+
+-- Comparisons
+select 'asdf'::mchar = 'aSdf'::mchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar = 'aSdf '::mchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar = 'aSdf 1'::mchar(4);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar = 'aSdf 1'::mchar(5);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar = 'aSdf 1'::mchar(6);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar(3) = 'aSdf 1'::mchar(5);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar(3) = 'aSdf 1'::mchar(3);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar < 'aSdf'::mchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar < 'aSdf '::mchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar < 'aSdf 1'::mchar(4);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar < 'aSdf 1'::mchar(5);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar < 'aSdf 1'::mchar(6);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar <= 'aSdf'::mchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar <= 'aSdf '::mchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar <= 'aSdf 1'::mchar(4);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar <= 'aSdf 1'::mchar(5);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar <= 'aSdf 1'::mchar(6);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar >= 'aSdf'::mchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar >= 'aSdf '::mchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar >= 'aSdf 1'::mchar(4);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar >= 'aSdf 1'::mchar(5);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar >= 'aSdf 1'::mchar(6);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar > 'aSdf'::mchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar > 'aSdf '::mchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar > 'aSdf 1'::mchar(4);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar > 'aSdf 1'::mchar(5);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar > 'aSdf 1'::mchar(6);
+ ?column?
+----------
+ f
+(1 row)
+
+select max(ch) from chvch;
+ max
+--------------
+ One space
+(1 row)
+
+select min(ch) from chvch;
+ min
+--------------
+ 1 space
+(1 row)
+
+select substr('1234567890'::mchar, 3) = '34567890' as "34567890";
+ 34567890
+----------
+ f
+(1 row)
+
+select substr('1234567890'::mchar, 4, 3) = '456' as "456";
+ 456
+-----
+ t
+(1 row)
+
+select lower('asdfASDF'::mchar);
+ lower
+----------
+ asdfasdf
+(1 row)
+
+select upper('asdfASDF'::mchar);
+ upper
+----------
+ ASDFASDF
+(1 row)
+
+select 'asd'::mchar == 'aSd'::mchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asd'::mchar == 'aCd'::mchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asd'::mchar == NULL;
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL == 'aCd'::mchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL::mchar == NULL;
+ ?column?
+----------
+ t
+(1 row)
+
+--Note: here we use different space symbols, be carefull to copy it!
+select v, count(*) from
+(values (1, '4 242'::mchar), (2, '4 242'), (3, 'aSDF'), (4, 'asdf')) as t(i,v) group by v order by v;
+ v | count
+-------+-------
+ 4 242 | 2
+ aSDF | 2
+(2 rows)
+
+set enable_hashagg=off;
+select v, count(*) from
+(values (1, '4 242'::mchar), (2, '4 242'), (3, 'aSDF'), (4, 'asdf')) as t(i,v) group by v order by v;
+ v | count
+-------+-------
+ 4 242 | 2
+ aSDF | 2
+(2 rows)
+
+reset enable_hashagg;
diff --git a/contrib/mchar/expected/mm.out b/contrib/mchar/expected/mm.out
new file mode 100644
index 00000000000..c5b36c21611
--- /dev/null
+++ b/contrib/mchar/expected/mm.out
@@ -0,0 +1,855 @@
+select 'asd'::mchar::mvarchar;
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd '::mchar::mvarchar;
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd'::mchar(2)::mvarchar;
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd '::mchar(2)::mvarchar;
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd'::mchar(5)::mvarchar;
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd '::mchar(5)::mvarchar;
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd'::mchar::mvarchar(2);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd '::mchar::mvarchar(2);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd'::mchar(2)::mvarchar(2);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd '::mchar(2)::mvarchar(2);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd'::mchar(5)::mvarchar(2);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd '::mchar(5)::mvarchar(2);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd'::mchar::mvarchar(5);
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd '::mchar::mvarchar(5);
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd'::mchar(2)::mvarchar(5);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd '::mchar(2)::mvarchar(5);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd'::mchar(5)::mvarchar(5);
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd '::mchar(5)::mvarchar(5);
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd'::mvarchar::mchar;
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd '::mvarchar::mchar;
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd'::mvarchar(2)::mchar;
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd '::mvarchar(2)::mchar;
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd'::mvarchar(5)::mchar;
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd '::mvarchar(5)::mchar;
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd'::mvarchar::mchar(2);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd '::mvarchar::mchar(2);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd'::mvarchar(2)::mchar(2);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd '::mvarchar(2)::mchar(2);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd'::mvarchar(5)::mchar(2);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd '::mvarchar(5)::mchar(2);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd'::mvarchar::mchar(5);
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd '::mvarchar::mchar(5);
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd'::mvarchar(2)::mchar(5);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd '::mvarchar(2)::mchar(5);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd'::mvarchar(5)::mchar(5);
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd '::mvarchar(5)::mchar(5);
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd'::mchar || '123';
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd'::mchar || '123'::mchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd'::mchar || '123'::mvarchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mchar || '123';
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mchar || '123'::mchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mchar || '123'::mvarchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mchar || '123 ';
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mchar || '123 '::mchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mchar || '123 '::mvarchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd'::mvarchar || '123';
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd'::mvarchar || '123'::mchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd'::mvarchar || '123'::mvarchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mvarchar || '123';
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar || '123'::mchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar || '123'::mvarchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar || '123 ';
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar || '123 '::mchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar || '123 '::mvarchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd'::mchar(2) || '123';
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd'::mchar(2) || '123'::mchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd'::mchar(2) || '123'::mvarchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mchar(2) || '123';
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mchar(2) || '123'::mchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mchar(2) || '123'::mvarchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mchar(2) || '123 ';
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mchar(2) || '123 '::mchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mchar(2) || '123 '::mvarchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd'::mvarchar(2) || '123';
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd'::mvarchar(2) || '123'::mchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd'::mvarchar(2) || '123'::mvarchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mvarchar(2) || '123';
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mvarchar(2) || '123'::mchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mvarchar(2) || '123'::mvarchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mvarchar(2) || '123 ';
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mvarchar(2) || '123 '::mchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mvarchar(2) || '123 '::mvarchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd'::mchar(4) || '143';
+ ?column?
+----------
+ asd 143
+(1 row)
+
+select 'asd'::mchar(4) || '123'::mchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd'::mchar(4) || '123'::mvarchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mchar(4) || '123';
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mchar(4) || '123'::mchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mchar(4) || '123'::mvarchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mchar(4) || '123 ';
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mchar(4) || '123 '::mchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mchar(4) || '123 '::mvarchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd'::mvarchar(4) || '123';
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd'::mvarchar(4) || '123'::mchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd'::mvarchar(4) || '123'::mvarchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123';
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123'::mchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123'::mvarchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123 ';
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123 '::mchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123 '::mvarchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123 '::mchar(4);
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123 '::mvarchar(4);
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123'::mchar(4);
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123'::mvarchar(4);
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 1 where 'f'::mchar='F'::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f'::mchar='F '::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar='F'::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar='F '::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f'::mchar='F'::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f'::mchar='F '::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar='F'::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar='F '::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f'::mchar(2)='F'::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f'::mchar(2)='F '::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar(2)='F'::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar(2)='F '::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f'::mchar(2)='F'::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f'::mchar(2)='F '::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar(2)='F'::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar(2)='F '::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo'::mchar='FOO'::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo'::mchar='FOO '::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo '::mchar='FOO'::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo '::mchar='FOO '::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo'::mchar='FOO'::mvarchar(2);
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo'::mchar='FOO '::mvarchar(2);
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo '::mchar='FOO'::mvarchar(2);
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo '::mchar='FOO '::mvarchar(2);
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo'::mchar(2)='FOO'::mvarchar;
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo'::mchar(2)='FOO '::mvarchar;
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo '::mchar(2)='FOO'::mvarchar;
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo '::mchar(2)='FOO '::mvarchar;
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo'::mchar(2)='FOO'::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo'::mchar(2)='FOO '::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo '::mchar(2)='FOO'::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo '::mchar(2)='FOO '::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+Select 'f'::mchar(1) Union Select 'o'::mvarchar(1);
+ mchar
+-------
+ f
+ o
+(2 rows)
+
+Select 'f'::mvarchar(1) Union Select 'o'::mchar(1);
+ mvarchar
+----------
+ f
+ o
+(2 rows)
+
+select * from chvch where ch=vch;
+ ch | vch
+--------------+------------
+ No spaces | No spaces
+ One space | One space
+ 1 space | 1 space
+(3 rows)
+
+select ch.* from ch, (select 'dEfg'::mvarchar as q) as p where chcol > p.q;
+ chcol
+----------------------------------
+ ee
+ Ee
+(2 rows)
+
+create index qq on ch (chcol);
+set enable_seqscan=off;
+select ch.* from ch, (select 'dEfg'::mvarchar as q) as p where chcol > p.q;
+ chcol
+----------------------------------
+ ee
+ Ee
+(2 rows)
+
+set enable_seqscan=on;
+--\copy chvch to 'results/chvch.dump' binary
+--truncate table chvch;
+--\copy chvch from 'results/chvch.dump' binary
+--test joins
+CREATE TABLE a (mchar2 MCHAR(2) NOT NULL);
+CREATE TABLE c (mvarchar255 mvarchar NOT NULL);
+SELECT * FROM a, c WHERE mchar2 = mvarchar255;
+ mchar2 | mvarchar255
+--------+-------------
+(0 rows)
+
+SELECT * FROM a, c WHERE mvarchar255 = mchar2;
+ mchar2 | mvarchar255
+--------+-------------
+(0 rows)
+
+DROP TABLE a;
+DROP TABLE c;
+select * from (values
+ ('е'::mchar),('ё'),('еа'),('еб'),('ее'),('еж'),('ёа'),('ёб'),('ёё'),('ёж'),('ёе'),('её'))
+ z order by 1;
+ column1
+---------
+ е
+ ё
+ еа
+ ёа
+ еб
+ ёб
+ ее
+ её
+ ёе
+ ёё
+ еж
+ ёж
+(12 rows)
+
+select 'ё'::mchar = 'е';
+ ?column?
+----------
+ f
+(1 row)
+
+select 'Ё'::mchar = 'Е';
+ ?column?
+----------
+ f
+(1 row)
+
+select 'й'::mchar = 'и';
+ ?column?
+----------
+ f
+(1 row)
+
+select 'Й'::mchar = 'И';
+ ?column?
+----------
+ f
+(1 row)
+
+select mvarchar_icase_cmp('ёа','еб'), mvarchar_icase_cmp('еб','ё'),
+ mvarchar_icase_cmp('ё', 'ёа');
+ mvarchar_icase_cmp | mvarchar_icase_cmp | mvarchar_icase_cmp
+--------------------+--------------------+--------------------
+ -1 | 1 | -1
+(1 row)
+
diff --git a/contrib/mchar/expected/mvarchar.out b/contrib/mchar/expected/mvarchar.out
new file mode 100644
index 00000000000..5c866b43e71
--- /dev/null
+++ b/contrib/mchar/expected/mvarchar.out
@@ -0,0 +1,363 @@
+-- I/O tests
+select '1'::mvarchar;
+ mvarchar
+----------
+ 1
+(1 row)
+
+select '2 '::mvarchar;
+ mvarchar
+----------
+ 2
+(1 row)
+
+select '10 '::mvarchar;
+ mvarchar
+--------------
+ 10
+(1 row)
+
+select '1'::mvarchar(2);
+ mvarchar
+----------
+ 1
+(1 row)
+
+select '2 '::mvarchar(2);
+ mvarchar
+----------
+ 2
+(1 row)
+
+select '3 '::mvarchar(2);
+ mvarchar
+----------
+ 3
+(1 row)
+
+select '10 '::mvarchar(2);
+ mvarchar
+----------
+ 10
+(1 row)
+
+select ' '::mvarchar(10);
+ mvarchar
+------------
+
+(1 row)
+
+select ' '::mvarchar;
+ mvarchar
+--------------------
+
+(1 row)
+
+-- operations & functions
+select length('1'::mvarchar);
+ length
+--------
+ 1
+(1 row)
+
+select length('2 '::mvarchar);
+ length
+--------
+ 1
+(1 row)
+
+select length('10 '::mvarchar);
+ length
+--------
+ 2
+(1 row)
+
+select length('1'::mvarchar(2));
+ length
+--------
+ 1
+(1 row)
+
+select length('2 '::mvarchar(2));
+ length
+--------
+ 1
+(1 row)
+
+select length('3 '::mvarchar(2));
+ length
+--------
+ 1
+(1 row)
+
+select length('10 '::mvarchar(2));
+ length
+--------
+ 2
+(1 row)
+
+select length(' '::mvarchar(10));
+ length
+--------
+ 0
+(1 row)
+
+select length(' '::mvarchar);
+ length
+--------
+ 0
+(1 row)
+
+select 'asd'::mvarchar(10) || '>'::mvarchar(10);
+ ?column?
+----------
+ asd>
+(1 row)
+
+select length('asd'::mvarchar(10) || '>'::mvarchar(10));
+ length
+--------
+ 4
+(1 row)
+
+select 'asd'::mvarchar(2) || '>'::mvarchar(10);
+ ?column?
+----------
+ as>
+(1 row)
+
+select length('asd'::mvarchar(2) || '>'::mvarchar(10));
+ length
+--------
+ 3
+(1 row)
+
+-- Comparisons
+select 'asdf'::mvarchar = 'aSdf'::mvarchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar = 'aSdf '::mvarchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar = 'aSdf 1'::mvarchar(4);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar = 'aSdf 1'::mvarchar(5);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar = 'aSdf 1'::mvarchar(6);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar(3) = 'aSdf 1'::mvarchar(5);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar(3) = 'aSdf 1'::mvarchar(3);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar < 'aSdf'::mvarchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar < 'aSdf '::mvarchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar < 'aSdf 1'::mvarchar(4);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar < 'aSdf 1'::mvarchar(5);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar < 'aSdf 1'::mvarchar(6);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar <= 'aSdf'::mvarchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar <= 'aSdf '::mvarchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar <= 'aSdf 1'::mvarchar(4);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar <= 'aSdf 1'::mvarchar(5);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar <= 'aSdf 1'::mvarchar(6);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar >= 'aSdf'::mvarchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar >= 'aSdf '::mvarchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar >= 'aSdf 1'::mvarchar(4);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar >= 'aSdf 1'::mvarchar(5);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar >= 'aSdf 1'::mvarchar(6);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar > 'aSdf'::mvarchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar > 'aSdf '::mvarchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar > 'aSdf 1'::mvarchar(4);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar > 'aSdf 1'::mvarchar(5);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar > 'aSdf 1'::mvarchar(6);
+ ?column?
+----------
+ f
+(1 row)
+
+select max(vch) from chvch;
+ max
+------------
+ One space
+(1 row)
+
+select min(vch) from chvch;
+ min
+----------
+ 1 space
+(1 row)
+
+select substr('1234567890'::mvarchar, 3) = '34567890' as "34567890";
+ 34567890
+----------
+ f
+(1 row)
+
+select substr('1234567890'::mvarchar, 4, 3) = '456' as "456";
+ 456
+-----
+ t
+(1 row)
+
+select lower('asdfASDF'::mvarchar);
+ lower
+----------
+ asdfasdf
+(1 row)
+
+select upper('asdfASDF'::mvarchar);
+ upper
+----------
+ ASDFASDF
+(1 row)
+
+select 'asd'::mvarchar == 'aSd'::mvarchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asd'::mvarchar == 'aCd'::mvarchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asd'::mvarchar == NULL;
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL == 'aCd'::mvarchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL::mvarchar == NULL;
+ ?column?
+----------
+ t
+(1 row)
+
diff --git a/contrib/mchar/mchar--2.0--2.1.sql b/contrib/mchar/mchar--2.0--2.1.sql
new file mode 100644
index 00000000000..a794772f376
--- /dev/null
+++ b/contrib/mchar/mchar--2.0--2.1.sql
@@ -0,0 +1,2 @@
+ALTER FUNCTION mchar_like(mchar, mvarchar) SUPPORT textlike_support;
+ALTER FUNCTION mvarchar_like(mvarchar, mvarchar) SUPPORT textlike_support;
diff --git a/contrib/mchar/mchar--2.0.1--2.1.sql b/contrib/mchar/mchar--2.0.1--2.1.sql
new file mode 100644
index 00000000000..a794772f376
--- /dev/null
+++ b/contrib/mchar/mchar--2.0.1--2.1.sql
@@ -0,0 +1,2 @@
+ALTER FUNCTION mchar_like(mchar, mvarchar) SUPPORT textlike_support;
+ALTER FUNCTION mvarchar_like(mvarchar, mvarchar) SUPPORT textlike_support;
diff --git a/contrib/mchar/mchar--2.1--2.2.sql b/contrib/mchar/mchar--2.1--2.2.sql
new file mode 100644
index 00000000000..98689671499
--- /dev/null
+++ b/contrib/mchar/mchar--2.1--2.2.sql
@@ -0,0 +1,20 @@
+CREATE FUNCTION similar_to_escape(mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mchar, mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mvarchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
diff --git a/contrib/mchar/mchar--2.1.1--2.2.sql b/contrib/mchar/mchar--2.1.1--2.2.sql
new file mode 100644
index 00000000000..98689671499
--- /dev/null
+++ b/contrib/mchar/mchar--2.1.1--2.2.sql
@@ -0,0 +1,20 @@
+CREATE FUNCTION similar_to_escape(mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mchar, mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mvarchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
diff --git a/contrib/mchar/mchar--2.2--2.2.1.sql b/contrib/mchar/mchar--2.2--2.2.1.sql
new file mode 100644
index 00000000000..e663aa24a5d
--- /dev/null
+++ b/contrib/mchar/mchar--2.2--2.2.1.sql
@@ -0,0 +1,10 @@
+CREATE OR REPLACE FUNCTION mvarchar_support(internal)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT
+ PARALLEL SAFE;
+
+ALTER FUNCTION mvarchar(mvarchar, integer, boolean)
+ SUPPORT mvarchar_support;
+
+
diff --git a/contrib/mchar/mchar--2.2.1.sql b/contrib/mchar/mchar--2.2.1.sql
new file mode 100644
index 00000000000..2f975b64edd
--- /dev/null
+++ b/contrib/mchar/mchar--2.2.1.sql
@@ -0,0 +1,1352 @@
+\echo Use "CREATE EXTENSION mchar" to load this file. \quit
+
+-- I/O functions
+
+CREATE FUNCTION mchartypmod_in(cstring[])
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchartypmod_out(int4)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_in(cstring)
+RETURNS mchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_out(mchar)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_send(mchar)
+RETURNS bytea
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_recv(internal)
+RETURNS mchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE TYPE mchar (
+ INTERNALLENGTH = -1,
+ INPUT = mchar_in,
+ OUTPUT = mchar_out,
+ TYPMOD_IN = mchartypmod_in,
+ TYPMOD_OUT = mchartypmod_out,
+ RECEIVE = mchar_recv,
+ SEND = mchar_send,
+ STORAGE = extended
+);
+
+CREATE FUNCTION mchar(mchar, integer, boolean)
+RETURNS mchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE CAST (mchar as mchar)
+WITH FUNCTION mchar(mchar, integer, boolean) as IMPLICIT;
+
+CREATE FUNCTION mvarchar_in(cstring)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_out(mvarchar)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_send(mvarchar)
+RETURNS bytea
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_recv(internal)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE TYPE mvarchar (
+ INTERNALLENGTH = -1,
+ INPUT = mvarchar_in,
+ OUTPUT = mvarchar_out,
+ TYPMOD_IN = mchartypmod_in,
+ TYPMOD_OUT = mchartypmod_out,
+ RECEIVE = mvarchar_recv,
+ SEND = mvarchar_send,
+ STORAGE = extended
+);
+
+CREATE FUNCTION mvarchar_support(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT
+PARALLEL SAFE;
+
+CREATE FUNCTION mvarchar(mvarchar, integer, boolean)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT
+SUPPORT mvarchar_support;
+
+CREATE CAST (mvarchar as mvarchar)
+WITH FUNCTION mvarchar(mvarchar, integer, boolean) as IMPLICIT;
+
+--Operations and functions
+
+CREATE FUNCTION length(mchar)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'mchar_length'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION upper(mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_upper'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION lower(mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_lower'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_hash(mchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_concat(mchar, mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR || (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_concat
+);
+
+CREATE FUNCTION mchar_like(mchar, mvarchar)
+RETURNS bool
+SUPPORT textlike_support
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_notlike(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR ~~ (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mchar_like,
+ RESTRICT = likesel,
+ JOIN = likejoinsel,
+ NEGATOR = '!~~'
+);
+
+CREATE OPERATOR !~~ (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mchar_notlike,
+ RESTRICT = nlikesel,
+ JOIN = nlikejoinsel,
+ NEGATOR = '~~'
+);
+
+CREATE FUNCTION mchar_regexeq(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_regexne(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR ~ (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_regexeq,
+ RESTRICT = regexeqsel,
+ JOIN = regexeqjoinsel,
+ NEGATOR = '!~'
+);
+
+CREATE OPERATOR !~ (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_regexne,
+ RESTRICT = regexnesel,
+ JOIN = regexnejoinsel,
+ NEGATOR = '~'
+);
+
+CREATE FUNCTION similar_escape(mchar, mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION length(mvarchar)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'mvarchar_length'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION upper(mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_upper'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION lower(mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_lower'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_hash(mvarchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_concat(mvarchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR || (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_concat
+);
+
+CREATE FUNCTION mvarchar_like(mvarchar, mvarchar)
+RETURNS bool
+SUPPORT textlike_support
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION like_escape(mvarchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_like_escape'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_notlike(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR ~~ (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_like,
+ RESTRICT = likesel,
+ JOIN = likejoinsel,
+ NEGATOR = '!~~'
+);
+
+CREATE OPERATOR !~~ (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_notlike,
+ RESTRICT = nlikesel,
+ JOIN = nlikejoinsel,
+ NEGATOR = '~~'
+);
+
+CREATE FUNCTION mvarchar_regexeq(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_regexne(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR ~ (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_regexeq,
+ RESTRICT = regexeqsel,
+ JOIN = regexeqjoinsel,
+ NEGATOR = '!~'
+);
+
+CREATE OPERATOR !~ (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_regexne,
+ RESTRICT = regexnesel,
+ JOIN = regexnejoinsel,
+ NEGATOR = '~'
+);
+
+CREATE FUNCTION similar_escape(mvarchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION substr (mchar, int4)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_substring_no_len'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION substr (mchar, int4, int4)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_substring'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION substr (mvarchar, int4)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_substring_no_len'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION substr (mvarchar, int4, int4)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_substring'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+-- Comparing
+-- MCHAR
+
+CREATE FUNCTION mchar_icase_cmp(mchar, mchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_icase_eq(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_icase_ne(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_icase_lt(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_icase_le(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_icase_gt(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_icase_ge(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR < (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_icase_lt,
+ COMMUTATOR = '>',
+ NEGATOR = '>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR > (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_icase_gt,
+ COMMUTATOR = '<',
+ NEGATOR = '<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR <= (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_icase_le,
+ COMMUTATOR = '>=',
+ NEGATOR = '>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR >= (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_icase_ge,
+ COMMUTATOR = '<=',
+ NEGATOR = '<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR = (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_icase_eq,
+ COMMUTATOR = '=',
+ NEGATOR = '<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '<',
+ SORT2 = '<',
+ HASHES
+);
+
+CREATE OPERATOR <> (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_icase_ne,
+ COMMUTATOR = '<>',
+ NEGATOR = '=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+CREATE FUNCTION mchar_case_cmp(mchar, mchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_case_eq(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_case_ne(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_case_lt(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_case_le(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_case_gt(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_case_ge(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR &< (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_case_lt,
+ COMMUTATOR = '&>',
+ NEGATOR = '&>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &> (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_case_gt,
+ COMMUTATOR = '&<',
+ NEGATOR = '&<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &<= (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_case_le,
+ COMMUTATOR = '&>=',
+ NEGATOR = '&>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &>= (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_case_ge,
+ COMMUTATOR = '&<=',
+ NEGATOR = '&<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &= (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_case_eq,
+ COMMUTATOR = '&=',
+ NEGATOR = '&<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '&<',
+ SORT2 = '&<'
+);
+
+CREATE OPERATOR &<> (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_case_ne,
+ COMMUTATOR = '&<>',
+ NEGATOR = '&=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+--MVARCHAR
+
+CREATE FUNCTION mvarchar_icase_cmp(mvarchar, mvarchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_icase_eq(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_icase_ne(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_icase_lt(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_icase_le(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_icase_gt(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_icase_ge(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR < (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_icase_lt,
+ COMMUTATOR = '>',
+ NEGATOR = '>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR > (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_icase_gt,
+ COMMUTATOR = '<',
+ NEGATOR = '<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR <= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_icase_le,
+ COMMUTATOR = '>=',
+ NEGATOR = '>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR >= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_icase_ge,
+ COMMUTATOR = '<=',
+ NEGATOR = '<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR = (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_icase_eq,
+ COMMUTATOR = '=',
+ NEGATOR = '<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '<',
+ SORT2 = '<',
+ HASHES
+);
+
+CREATE OPERATOR <> (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_icase_ne,
+ COMMUTATOR = '<>',
+ NEGATOR = '=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+CREATE FUNCTION mvarchar_case_cmp(mvarchar, mvarchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_case_eq(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_case_ne(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_case_lt(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_case_le(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_case_gt(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_case_ge(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR &< (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_case_lt,
+ COMMUTATOR = '&>',
+ NEGATOR = '&>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &> (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_case_gt,
+ COMMUTATOR = '&<',
+ NEGATOR = '&<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &<= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_case_le,
+ COMMUTATOR = '&>=',
+ NEGATOR = '&>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &>= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_case_ge,
+ COMMUTATOR = '&<=',
+ NEGATOR = '&<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_case_eq,
+ COMMUTATOR = '&=',
+ NEGATOR = '&<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '&<',
+ SORT2 = '&<'
+);
+
+CREATE OPERATOR &<> (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_case_ne,
+ COMMUTATOR = '&<>',
+ NEGATOR = '&=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+-- MCHAR <> MVARCHAR
+
+CREATE FUNCTION mc_mv_icase_cmp(mchar, mvarchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_icase_eq(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_icase_ne(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_icase_lt(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_icase_le(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_icase_gt(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_icase_ge(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR < (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_icase_lt,
+ COMMUTATOR = '>',
+ NEGATOR = '>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR > (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_icase_gt,
+ COMMUTATOR = '<',
+ NEGATOR = '<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR <= (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_icase_le,
+ COMMUTATOR = '>=',
+ NEGATOR = '>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR >= (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_icase_ge,
+ COMMUTATOR = '<=',
+ NEGATOR = '<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR = (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_icase_eq,
+ COMMUTATOR = '=',
+ NEGATOR = '<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '<',
+ SORT2 = '<'
+);
+
+CREATE OPERATOR <> (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_icase_ne,
+ COMMUTATOR = '<>',
+ NEGATOR = '=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+CREATE FUNCTION mc_mv_case_cmp(mchar, mvarchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_case_eq(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_case_ne(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_case_lt(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_case_le(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_case_gt(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_case_ge(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR &< (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_case_lt,
+ COMMUTATOR = '&>',
+ NEGATOR = '&>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &> (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_case_gt,
+ COMMUTATOR = '&<',
+ NEGATOR = '&<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &<= (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_case_le,
+ COMMUTATOR = '&>=',
+ NEGATOR = '&>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &>= (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_case_ge,
+ COMMUTATOR = '&<=',
+ NEGATOR = '&<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &= (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_case_eq,
+ COMMUTATOR = '&=',
+ NEGATOR = '&<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '&<',
+ SORT2 = '&<'
+);
+
+CREATE OPERATOR &<> (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_case_ne,
+ COMMUTATOR = '&<>',
+ NEGATOR = '&=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+-- MVARCHAR <> MCHAR
+
+CREATE FUNCTION mv_mc_icase_cmp(mvarchar, mchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_icase_eq(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_icase_ne(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_icase_lt(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_icase_le(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_icase_gt(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_icase_ge(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR < (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_icase_lt,
+ COMMUTATOR = '>',
+ NEGATOR = '>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR > (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_icase_gt,
+ COMMUTATOR = '<',
+ NEGATOR = '<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR <= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_icase_le,
+ COMMUTATOR = '>=',
+ NEGATOR = '>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR >= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_icase_ge,
+ COMMUTATOR = '<=',
+ NEGATOR = '<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR = (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_icase_eq,
+ COMMUTATOR = '=',
+ NEGATOR = '<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '<',
+ SORT2 = '<'
+);
+
+CREATE OPERATOR <> (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_icase_ne,
+ COMMUTATOR = '<>',
+ NEGATOR = '=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+CREATE FUNCTION mv_mc_case_cmp(mvarchar, mchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_case_eq(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_case_ne(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_case_lt(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_case_le(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_case_gt(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_case_ge(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR &< (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_case_lt,
+ COMMUTATOR = '&>',
+ NEGATOR = '&>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &> (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_case_gt,
+ COMMUTATOR = '&<',
+ NEGATOR = '&<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &<= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_case_le,
+ COMMUTATOR = '&>=',
+ NEGATOR = '&>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &>= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_case_ge,
+ COMMUTATOR = '&<=',
+ NEGATOR = '&<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_case_eq,
+ COMMUTATOR = '&=',
+ NEGATOR = '&<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '&<',
+ SORT2 = '&<'
+);
+
+CREATE OPERATOR &<> (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_case_ne,
+ COMMUTATOR = '&<>',
+ NEGATOR = '&=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+-- MCHAR - VARCHAR operations
+
+CREATE FUNCTION mchar_mvarchar_concat(mchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR || (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mchar_mvarchar_concat
+);
+
+CREATE FUNCTION mvarchar_mchar_concat(mvarchar, mchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR || (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mvarchar_mchar_concat
+);
+
+CREATE FUNCTION mvarchar_mchar(mvarchar, integer, boolean)
+RETURNS mchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE CAST (mvarchar as mchar)
+WITH FUNCTION mvarchar_mchar(mvarchar, integer, boolean) as IMPLICIT;
+
+CREATE FUNCTION mchar_mvarchar(mchar, integer, boolean)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE CAST (mchar as mvarchar)
+WITH FUNCTION mchar_mvarchar(mchar, integer, boolean) as IMPLICIT;
+
+-- Aggregates
+
+CREATE FUNCTION mchar_larger(mchar, mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE AGGREGATE max (
+ BASETYPE = mchar,
+ SFUNC = mchar_larger,
+ STYPE = mchar,
+ SORTOP = '>'
+);
+
+CREATE FUNCTION mchar_smaller(mchar, mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE AGGREGATE min (
+ BASETYPE = mchar,
+ SFUNC = mchar_smaller,
+ STYPE = mchar,
+ SORTOP = '<'
+);
+
+CREATE FUNCTION mvarchar_larger(mvarchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE AGGREGATE max (
+ BASETYPE = mvarchar,
+ SFUNC = mvarchar_larger,
+ STYPE = mvarchar,
+ SORTOP = '>'
+);
+
+CREATE FUNCTION mvarchar_smaller(mvarchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE AGGREGATE min (
+ BASETYPE = mvarchar,
+ SFUNC = mvarchar_smaller,
+ STYPE = mvarchar,
+ SORTOP = '<'
+);
+
+-- B-tree support
+CREATE OPERATOR FAMILY icase_ops USING btree;
+CREATE OPERATOR FAMILY case_ops USING btree;
+
+CREATE OPERATOR CLASS mchar_icase_ops
+DEFAULT FOR TYPE mchar USING btree FAMILY icase_ops AS
+ OPERATOR 1 < ,
+ OPERATOR 2 <= ,
+ OPERATOR 3 = ,
+ OPERATOR 4 >= ,
+ OPERATOR 5 > ,
+ FUNCTION 1 mchar_icase_cmp(mchar, mchar),
+ OPERATOR 1 < (mchar, mvarchar),
+ OPERATOR 2 <= (mchar, mvarchar),
+ OPERATOR 3 = (mchar, mvarchar),
+ OPERATOR 4 >= (mchar, mvarchar),
+ OPERATOR 5 > (mchar, mvarchar),
+ FUNCTION 1 mc_mv_icase_cmp(mchar, mvarchar);
+
+CREATE OPERATOR CLASS mchar_case_ops
+FOR TYPE mchar USING btree FAMILY case_ops AS
+ OPERATOR 1 &< ,
+ OPERATOR 2 &<= ,
+ OPERATOR 3 &= ,
+ OPERATOR 4 &>= ,
+ OPERATOR 5 &> ,
+ FUNCTION 1 mchar_case_cmp(mchar, mchar),
+ OPERATOR 1 &< (mchar, mvarchar),
+ OPERATOR 2 &<= (mchar, mvarchar),
+ OPERATOR 3 &= (mchar, mvarchar),
+ OPERATOR 4 &>= (mchar, mvarchar),
+ OPERATOR 5 &> (mchar, mvarchar),
+ FUNCTION 1 mc_mv_case_cmp(mchar, mvarchar);
+
+CREATE OPERATOR CLASS mchar_icase_ops
+DEFAULT FOR TYPE mchar USING hash AS
+ OPERATOR 1 = ,
+ FUNCTION 1 mchar_hash(mchar);
+
+CREATE OPERATOR CLASS mvarchar_icase_ops
+DEFAULT FOR TYPE mvarchar USING btree FAMILY icase_ops AS
+ OPERATOR 1 < ,
+ OPERATOR 2 <= ,
+ OPERATOR 3 = ,
+ OPERATOR 4 >= ,
+ OPERATOR 5 > ,
+ FUNCTION 1 mvarchar_icase_cmp(mvarchar, mvarchar),
+ OPERATOR 1 < (mvarchar, mchar),
+ OPERATOR 2 <= (mvarchar, mchar),
+ OPERATOR 3 = (mvarchar, mchar),
+ OPERATOR 4 >= (mvarchar, mchar),
+ OPERATOR 5 > (mvarchar, mchar),
+ FUNCTION 1 mv_mc_icase_cmp(mvarchar, mchar);
+
+CREATE OPERATOR CLASS mvarchar_case_ops
+FOR TYPE mvarchar USING btree FAMILY case_ops AS
+ OPERATOR 1 &< ,
+ OPERATOR 2 &<= ,
+ OPERATOR 3 &= ,
+ OPERATOR 4 &>= ,
+ OPERATOR 5 &> ,
+ FUNCTION 1 mvarchar_case_cmp(mvarchar, mvarchar),
+ OPERATOR 1 &< (mvarchar, mchar),
+ OPERATOR 2 &<= (mvarchar, mchar),
+ OPERATOR 3 &= (mvarchar, mchar),
+ OPERATOR 4 &>= (mvarchar, mchar),
+ OPERATOR 5 &> (mvarchar, mchar),
+ FUNCTION 1 mv_mc_case_cmp(mvarchar, mchar);
+
+CREATE OPERATOR CLASS mvarchar_icase_ops
+DEFAULT FOR TYPE mvarchar USING hash AS
+ OPERATOR 1 = ,
+ FUNCTION 1 mvarchar_hash(mvarchar);
+
+
+-- Index support for LIKE
+
+CREATE FUNCTION mchar_pattern_fixed_prefix(internal, internal, internal)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_greaterstring(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OR REPLACE FUNCTION isfulleq_mchar(mchar, mchar)
+RETURNS bool AS 'MODULE_PATHNAME'
+LANGUAGE C CALLED ON NULL INPUT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION fullhash_mchar(mchar)
+RETURNS int4 AS 'MODULE_PATHNAME'
+LANGUAGE C CALLED ON NULL INPUT IMMUTABLE;
+
+
+CREATE OPERATOR == (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = isfulleq_mchar,
+ COMMUTATOR = '==',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ HASHES
+);
+
+CREATE OPERATOR CLASS mchar_fill_ops
+ FOR TYPE mchar USING hash AS
+ OPERATOR 1 ==,
+ FUNCTION 1 fullhash_mchar(mchar);
+
+CREATE OR REPLACE FUNCTION isfulleq_mvarchar(mvarchar, mvarchar)
+RETURNS bool AS 'MODULE_PATHNAME'
+LANGUAGE C CALLED ON NULL INPUT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION fullhash_mvarchar(mvarchar)
+RETURNS int4 AS 'MODULE_PATHNAME'
+LANGUAGE C CALLED ON NULL INPUT IMMUTABLE;
+
+
+CREATE OPERATOR == (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = isfulleq_mvarchar,
+ COMMUTATOR = '==',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ HASHES
+);
+
+CREATE OPERATOR CLASS mvarchar_fill_ops
+ FOR TYPE mvarchar USING hash AS
+ OPERATOR 1 ==,
+ FUNCTION 1 fullhash_mvarchar(mvarchar);
+
+CREATE FUNCTION similar_to_escape(mchar)
+ RETURNS mchar
+ AS 'MODULE_PATHNAME', 'mchar_similar_escape'
+ LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mchar, mchar)
+ RETURNS mchar
+ AS 'MODULE_PATHNAME', 'mchar_similar_escape'
+ LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mvarchar)
+ RETURNS mvarchar
+ AS 'MODULE_PATHNAME', 'mvarchar_similar_escape'
+ LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mvarchar, mvarchar)
+ RETURNS mvarchar
+ AS 'MODULE_PATHNAME', 'mvarchar_similar_escape'
+ LANGUAGE C IMMUTABLE;
+
diff --git a/contrib/mchar/mchar--unpackaged--2.0.sql b/contrib/mchar/mchar--unpackaged--2.0.sql
new file mode 100644
index 00000000000..1acc4ccec1e
--- /dev/null
+++ b/contrib/mchar/mchar--unpackaged--2.0.sql
@@ -0,0 +1,404 @@
+\echo Use "CREATE EXTENSION mchar FROM unpackaged" to load this file. \quit
+
+-- I/O functions
+
+ALTER EXTENSION mchar ADD FUNCTION mchartypmod_in(cstring[]);
+
+ALTER EXTENSION mchar ADD FUNCTION mchartypmod_out(int4);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_in(cstring);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_out(mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_send(mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_recv(internal);
+
+ALTER EXTENSION mchar ADD TYPE mchar;
+
+ALTER EXTENSION mchar ADD FUNCTION mchar(mchar, integer, boolean);
+
+ALTER EXTENSION mchar ADD CAST (mchar as mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_in(cstring);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_out(mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_send(mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_recv(internal);
+
+ALTER EXTENSION mchar ADD TYPE mvarchar;
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar(mvarchar, integer, boolean);
+
+ALTER EXTENSION mchar ADD CAST (mvarchar as mvarchar);
+
+--Operations and functions
+
+ALTER EXTENSION mchar ADD FUNCTION length(mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION upper(mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION lower(mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_hash(mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_concat(mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR || (mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_like(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_notlike(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR ~~ (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR !~~ (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_regexeq(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_regexne(mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR ~ (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR !~ (mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION similar_escape(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION length(mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION upper(mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION lower(mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_hash(mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_concat(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR || (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_like(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION like_escape(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_notlike(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR ~~ (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR !~~ (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_regexeq(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_regexne(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR ~ (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR !~ (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION similar_escape(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION substr (mchar, int4);
+
+ALTER EXTENSION mchar ADD FUNCTION substr (mchar, int4, int4);
+
+ALTER EXTENSION mchar ADD FUNCTION substr (mvarchar, int4);
+
+ALTER EXTENSION mchar ADD FUNCTION substr (mvarchar, int4, int4);
+
+-- Comparing
+-- MCHAR
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_icase_cmp(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_icase_eq(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_icase_ne(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_icase_lt(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_icase_le(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_icase_gt(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_icase_ge(mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR < (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR > (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <= (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR >= (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR = (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <> (mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_case_cmp(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_case_eq(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_case_ne(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_case_lt(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_case_le(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_case_gt(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_case_ge(mchar, mchar);
+
+
+ALTER EXTENSION mchar ADD OPERATOR &< (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &> (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<= (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &>= (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &= (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<> (mchar, mchar);
+
+--MVARCHAR
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_icase_cmp(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_icase_eq(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_icase_ne(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_icase_lt(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_icase_le(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_icase_gt(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_icase_ge(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR < (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR > (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <= (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR >= (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR = (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <> (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_case_cmp(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_case_eq(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_case_ne(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_case_lt(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_case_le(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_case_gt(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_case_ge(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &< (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &> (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<= (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &>= (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &= (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<> (mvarchar, mvarchar);
+
+-- MCHAR <> MVARCHAR
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_icase_cmp(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_icase_eq(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_icase_ne(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_icase_lt(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_icase_le(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_icase_gt(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_icase_ge(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR < (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR > (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <= (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR >= (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR = (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <> (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_case_cmp(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_case_eq(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_case_ne(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_case_lt(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_case_le(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_case_gt(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_case_ge(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &< (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &> (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<= (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &>= (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &= (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<> (mchar, mvarchar);
+
+-- MVARCHAR <> MCHAR
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_icase_cmp(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_icase_eq(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_icase_ne(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_icase_lt(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_icase_le(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_icase_gt(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_icase_ge(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR < (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR > (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <= (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR >= (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR = (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <> (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_case_cmp(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_case_eq(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_case_ne(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_case_lt(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_case_le(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_case_gt(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_case_ge(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &< (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &> (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<= (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &>= (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &= (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<> (mvarchar, mchar);
+
+-- MCHAR - VARCHAR operations
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_mvarchar_concat(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR || (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_mchar_concat(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR || (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_mchar(mvarchar, integer, boolean);
+
+ALTER EXTENSION mchar ADD CAST (mvarchar as mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_mvarchar(mchar, integer, boolean);
+
+ALTER EXTENSION mchar ADD CAST (mchar as mvarchar);
+
+-- Aggregates
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_larger(mchar, mchar);
+
+ALTER EXTENSION mchar ADD AGGREGATE max (mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_smaller(mchar, mchar);
+
+ALTER EXTENSION mchar ADD AGGREGATE min (mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_larger(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD AGGREGATE max (mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_smaller(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD AGGREGATE min (mvarchar);
+
+-- B-tree support
+ALTER EXTENSION mchar ADD OPERATOR FAMILY icase_ops USING btree;
+
+ALTER EXTENSION mchar ADD OPERATOR FAMILY case_ops USING btree;
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mchar_icase_ops USING btree;
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mchar_case_ops USING btree;
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mchar_icase_ops USING hash;
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mvarchar_icase_ops USING btree;
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mvarchar_case_ops USING btree;
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mvarchar_icase_ops USING hash;
+
+
+-- Index support for LIKE
+
+--mchar_pattern_fixed_prefix could be with wrong number of arguments
+ALTER EXTENSION mchar ADD FUNCTION mchar_pattern_fixed_prefix;
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_greaterstring(internal);
+
+ALTER EXTENSION mchar ADD FUNCTION isfulleq_mchar(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION fullhash_mchar(mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR == (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mchar_fill_ops USING hash;
+
+ALTER EXTENSION mchar ADD FUNCTION isfulleq_mvarchar(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION fullhash_mvarchar(mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR == (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mvarchar_fill_ops USING hash;
+
+
diff --git a/contrib/mchar/mchar.control b/contrib/mchar/mchar.control
new file mode 100644
index 00000000000..02668a5d617
--- /dev/null
+++ b/contrib/mchar/mchar.control
@@ -0,0 +1,6 @@
+# mchar extension
+comment = 'SQL Server text type'
+default_version = '2.2.1'
+module_pathname = '$libdir/mchar'
+relocatable = true
+trusted = true
diff --git a/contrib/mchar/mchar.h b/contrib/mchar/mchar.h
new file mode 100644
index 00000000000..2bfd14004cd
--- /dev/null
+++ b/contrib/mchar/mchar.h
@@ -0,0 +1,64 @@
+#ifndef __MCHAR_H__
+#define __MCHAR_H__
+
+#include "postgres.h"
+#include "mb/pg_wchar.h"
+#include "utils/builtins.h"
+#include "unicode/uchar.h"
+#include "unicode/ustring.h"
+#include "varatt.h"
+
+typedef struct {
+ int32 len;
+ int32 typmod;
+ UChar data[1];
+} MChar;
+
+#define MCHARHDRSZ offsetof(MChar, data)
+#define MCHARLENGTH(m) ( VARSIZE(m)-MCHARHDRSZ )
+#define UCHARLENGTH(m) ( MCHARLENGTH(m)/sizeof(UChar) )
+
+#define DatumGetMChar(m) ((MChar*)DatumGetPointer(m))
+#define MCharGetDatum(m) PointerGetDatum(m)
+
+#define PG_GETARG_MCHAR(n) ((MChar*) PG_DETOAST_DATUM(PG_GETARG_DATUM(n)))
+#define PG_GETARG_MCHAR_COPY(n) ((MChar*) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(n)))
+
+#define PG_RETURN_MCHAR(m) PG_RETURN_POINTER(m)
+
+typedef struct {
+ int32 len;
+ UChar data[1];
+} MVarChar;
+
+#define MVARCHARHDRSZ offsetof(MVarChar, data)
+#define MVARCHARLENGTH(m) ( VARSIZE(m)-MVARCHARHDRSZ )
+#define UVARCHARLENGTH(m) ( MVARCHARLENGTH(m)/sizeof(UChar) )
+
+#define DatumGetMVarChar(m) ((MVarChar*)DatumGetPointer(m))
+#define MVarCharGetDatum(m) PointerGetDatum(m)
+
+#define PG_GETARG_MVARCHAR(n) ((MVarChar*) PG_DETOAST_DATUM(PG_GETARG_DATUM(n)))
+#define PG_GETARG_MVARCHAR_COPY(n) ((MVarChar*) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(n)))
+
+#define PG_RETURN_MVARCHAR(m) PG_RETURN_POINTER(m)
+
+
+int Char2UChar(const char * src, int srclen, UChar *dst);
+int UChar2Char(const UChar * src, int srclen, char *dst);
+int UChar2Wchar(UChar * src, int srclen, pg_wchar *dst);
+int UCharCompare(UChar * a, int alen, UChar *b, int blen);
+int UCharCaseCompare(UChar * a, int alen, UChar *b, int blen);
+
+void FillWhiteSpace( UChar *dst, int n );
+
+int lengthWithoutSpaceVarChar(MVarChar *m);
+int lengthWithoutSpaceChar(MChar *m);
+
+extern PGDLLEXPORT Datum mchar_hash(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum mvarchar_hash(PG_FUNCTION_ARGS);
+
+int m_isspace(UChar c); /* is == ' ' */
+
+Datum hash_uchar( UChar *s, int len );
+#endif
diff --git a/contrib/mchar/mchar_io.c b/contrib/mchar/mchar_io.c
new file mode 100644
index 00000000000..d6c2ac7d393
--- /dev/null
+++ b/contrib/mchar/mchar_io.c
@@ -0,0 +1,403 @@
+#include "mchar.h"
+#include "mb/pg_wchar.h"
+#include "fmgr.h"
+#include "libpq/pqformat.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include <utils/array.h>
+
+#ifdef PG_MODULE_MAGIC
+PG_MODULE_MAGIC;
+#endif
+
+PG_FUNCTION_INFO_V1(mchar_in);
+Datum mchar_in(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(mchar_out);
+Datum mchar_out(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(mchar);
+Datum mchar(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(mvarchar_in);
+Datum mvarchar_in(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(mvarchar_out);
+Datum mvarchar_out(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(mvarchar);
+Datum mvarchar(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(mvarchar_support);
+Datum varchar_support(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(mchartypmod_in);
+Datum mchartypmod_in(PG_FUNCTION_ARGS);
+Datum
+mchartypmod_in(PG_FUNCTION_ARGS) {
+ ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
+ int32 *tl;
+ int n;
+
+ tl = ArrayGetIntegerTypmods(ta, &n);
+
+ if (n != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid type modifier")));
+ if (*tl < 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("length for type mchar/mvarchar must be at least 1")));
+
+ return *tl;
+}
+
+PG_FUNCTION_INFO_V1(mchartypmod_out);
+Datum mchartypmod_out(PG_FUNCTION_ARGS);
+Datum
+mchartypmod_out(PG_FUNCTION_ARGS) {
+ int32 typmod = PG_GETARG_INT32(0);
+ char *res = (char *) palloc(64);
+
+ if (typmod >0)
+ snprintf(res, 64, "(%d)", (int) (typmod));
+ else
+ *res = '\0';
+
+ PG_RETURN_CSTRING( res );
+}
+
+static void
+mchar_strip( MChar * m, int atttypmod ) {
+ int maxlen;
+
+ if ( atttypmod<=0 ) {
+ atttypmod =-1;
+ } else {
+ int charlen = u_countChar32( m->data, UCHARLENGTH(m) );
+
+ if ( charlen > atttypmod ) {
+ int i=0;
+ U16_FWD_N( m->data, i, UCHARLENGTH(m), atttypmod);
+ SET_VARSIZE( m, sizeof(UChar) * i + MCHARHDRSZ );
+ }
+ }
+
+ m->typmod = atttypmod;
+
+ maxlen = UCHARLENGTH(m);
+ while( maxlen>0 && m_isspace( m->data[ maxlen-1 ] ) )
+ maxlen--;
+
+ SET_VARSIZE(m, sizeof(UChar) * maxlen + MCHARHDRSZ);
+}
+
+
+Datum
+mchar_in(PG_FUNCTION_ARGS) {
+ char *s = PG_GETARG_CSTRING(0);
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ MChar *result;
+ int32 slen = strlen(s), rlen;
+
+ pg_verifymbstr(s, slen, false);
+
+ result = (MChar*)palloc( MCHARHDRSZ + slen * sizeof(UChar) * 4 /* upper limit of length */ );
+ rlen = Char2UChar( s, slen, result->data );
+ SET_VARSIZE(result, sizeof(UChar) * rlen + MCHARHDRSZ);
+
+ mchar_strip(result, atttypmod);
+
+ PG_RETURN_MCHAR(result);
+}
+
+Datum
+mchar_out(PG_FUNCTION_ARGS) {
+ MChar *in = PG_GETARG_MCHAR(0);
+ char *out;
+ size_t size, inlen = UCHARLENGTH(in);
+ size_t charlen = u_countChar32(in->data, inlen);
+
+ Assert( in->typmod < 0 || charlen<=in->typmod );
+ size = ( in->typmod < 0 ) ? inlen : in->typmod;
+ size *= pg_database_encoding_max_length();
+
+ out = (char*)palloc( size+1 );
+ size = UChar2Char( in->data, inlen, out );
+
+ if ( in->typmod>0 && charlen < in->typmod ) {
+ memset( out+size, ' ', in->typmod - charlen);
+ size += in->typmod - charlen;
+ }
+
+ out[size] = '\0';
+
+ PG_FREE_IF_COPY(in,0);
+
+ PG_RETURN_CSTRING(out);
+}
+
+Datum
+mchar(PG_FUNCTION_ARGS) {
+ MChar *source = PG_GETARG_MCHAR(0);
+ MChar *result;
+ int32 typmod = PG_GETARG_INT32(1);
+#ifdef NOT_USED
+ bool isExplicit = PG_GETARG_BOOL(2);
+#endif
+
+ result = palloc( VARSIZE(source) );
+ memcpy( result, source, VARSIZE(source) );
+ PG_FREE_IF_COPY(source,0);
+
+ mchar_strip(result, typmod);
+
+ PG_RETURN_MCHAR(result);
+}
+
+Datum
+mvarchar_in(PG_FUNCTION_ARGS) {
+ char *s = PG_GETARG_CSTRING(0);
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ MVarChar *result;
+ int32 slen = strlen(s), rlen;
+
+ pg_verifymbstr(s, slen, false);
+
+ result = (MVarChar*)palloc( MVARCHARHDRSZ + slen * sizeof(UChar) * 2 /* upper limit of length */ );
+ rlen = Char2UChar( s, slen, result->data );
+ SET_VARSIZE(result, sizeof(UChar) * rlen + MVARCHARHDRSZ);
+
+ if ( atttypmod > 0 && atttypmod < u_countChar32(result->data, UVARCHARLENGTH(result)) )
+ elog(ERROR,"value too long for type mvarchar(%d)", atttypmod);
+
+ PG_RETURN_MVARCHAR(result);
+}
+
+Datum
+mvarchar_out(PG_FUNCTION_ARGS) {
+ MVarChar *in = PG_GETARG_MVARCHAR(0);
+ char *out;
+ size_t size = UVARCHARLENGTH(in);
+
+ size *= pg_database_encoding_max_length();
+
+ out = (char*)palloc( size+1 );
+ size = UChar2Char( in->data, UVARCHARLENGTH(in), out );
+
+ out[size] = '\0';
+
+ PG_FREE_IF_COPY(in,0);
+
+ PG_RETURN_CSTRING(out);
+}
+
+static void
+mvarchar_strip(MVarChar *m, int atttypmod) {
+ int charlen = u_countChar32(m->data, UVARCHARLENGTH(m));
+
+ if ( atttypmod>=0 && atttypmod < charlen ) {
+ int i=0;
+ U16_FWD_N( m->data, i, charlen, atttypmod);
+ SET_VARSIZE(m, sizeof(UChar) * i + MVARCHARHDRSZ);
+ }
+}
+
+Datum
+mvarchar(PG_FUNCTION_ARGS) {
+ MVarChar *source = PG_GETARG_MVARCHAR(0);
+ MVarChar *result;
+ int32 typmod = PG_GETARG_INT32(1);
+ bool isExplicit = PG_GETARG_BOOL(2);
+ int charlen = u_countChar32(source->data, UVARCHARLENGTH(source));
+
+ result = palloc( VARSIZE(source) );
+ memcpy( result, source, VARSIZE(source) );
+ PG_FREE_IF_COPY(source,0);
+
+ if ( typmod>=0 && typmod < charlen ) {
+ if ( isExplicit )
+ mvarchar_strip(result, typmod);
+ else
+ elog(ERROR,"value too long for type mvarchar(%d)", typmod);
+ }
+
+ PG_RETURN_MVARCHAR(result);
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_mchar);
+Datum mvarchar_mchar(PG_FUNCTION_ARGS);
+Datum
+mvarchar_mchar(PG_FUNCTION_ARGS) {
+ MVarChar *source = PG_GETARG_MVARCHAR(0);
+ MChar *result;
+ int32 typmod = PG_GETARG_INT32(1);
+#ifdef NOT_USED
+ bool isExplicit = PG_GETARG_BOOL(2);
+#endif
+
+ result = palloc( MVARCHARLENGTH(source) + MCHARHDRSZ );
+ SET_VARSIZE(result, MVARCHARLENGTH(source) + MCHARHDRSZ);
+ memcpy( result->data, source->data, MVARCHARLENGTH(source));
+
+ PG_FREE_IF_COPY(source,0);
+
+ mchar_strip( result, typmod );
+
+ PG_RETURN_MCHAR(result);
+}
+
+PG_FUNCTION_INFO_V1(mchar_mvarchar);
+Datum mchar_mvarchar(PG_FUNCTION_ARGS);
+Datum
+mchar_mvarchar(PG_FUNCTION_ARGS) {
+ MChar *source = PG_GETARG_MCHAR(0);
+ MVarChar *result;
+ int32 typmod = PG_GETARG_INT32(1);
+ int32 scharlen = u_countChar32(source->data, UCHARLENGTH(source));
+ int32 curlen = 0, maxcharlen;
+#ifdef NOT_USED
+ bool isExplicit = PG_GETARG_BOOL(2);
+#endif
+
+ maxcharlen = (source->typmod > 0) ? source->typmod : scharlen;
+
+ result = palloc( MVARCHARHDRSZ + sizeof(UChar) * 2 * maxcharlen );
+
+ curlen = UCHARLENGTH( source );
+ if ( curlen > 0 )
+ memcpy( result->data, source->data, MCHARLENGTH(source) );
+ if ( source->typmod > 0 && scharlen < source->typmod ) {
+ FillWhiteSpace( result->data + curlen, source->typmod-scharlen );
+ curlen += source->typmod-scharlen;
+ }
+ SET_VARSIZE(result, MVARCHARHDRSZ + curlen *sizeof(UChar));
+
+ PG_FREE_IF_COPY(source,0);
+
+ mvarchar_strip( result, typmod );
+
+ PG_RETURN_MCHAR(result);
+}
+
+PG_FUNCTION_INFO_V1(mchar_send);
+Datum mchar_send(PG_FUNCTION_ARGS);
+Datum
+mchar_send(PG_FUNCTION_ARGS) {
+ MChar *in = PG_GETARG_MCHAR(0);
+ size_t inlen = UCHARLENGTH(in);
+ size_t charlen = u_countChar32(in->data, inlen);
+ StringInfoData buf;
+
+ Assert( in->typmod < 0 || charlen<=in->typmod );
+
+ pq_begintypsend(&buf);
+ pq_sendbytes(&buf, (char*)in->data, inlen * sizeof(UChar) );
+
+ if ( in->typmod>0 && charlen < in->typmod ) {
+ int nw = in->typmod - charlen;
+ UChar *white = palloc( sizeof(UChar) * nw );
+
+ FillWhiteSpace( white, nw );
+ pq_sendbytes(&buf, (char*)white, sizeof(UChar) * nw);
+ pfree(white);
+ }
+
+ PG_FREE_IF_COPY(in,0);
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+PG_FUNCTION_INFO_V1(mchar_recv);
+Datum mchar_recv(PG_FUNCTION_ARGS);
+Datum
+mchar_recv(PG_FUNCTION_ARGS) {
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ MChar *res;
+ int nbytes;
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+
+ nbytes = buf->len - buf->cursor;
+ res = (MChar*)palloc( nbytes + MCHARHDRSZ );
+ res->len = nbytes + MCHARHDRSZ;
+ res->typmod = -1;
+ SET_VARSIZE(res, res->len);
+ pq_copymsgbytes(buf, (char*)res->data, nbytes);
+
+ mchar_strip( res, atttypmod );
+
+ PG_RETURN_MCHAR(res);
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_send);
+Datum mvarchar_send(PG_FUNCTION_ARGS);
+Datum
+mvarchar_send(PG_FUNCTION_ARGS) {
+ MVarChar *in = PG_GETARG_MVARCHAR(0);
+ size_t inlen = UVARCHARLENGTH(in);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendbytes(&buf, (char*)in->data, inlen * sizeof(UChar) );
+
+ PG_FREE_IF_COPY(in,0);
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_recv);
+Datum mvarchar_recv(PG_FUNCTION_ARGS);
+Datum
+mvarchar_recv(PG_FUNCTION_ARGS) {
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ MVarChar *res;
+ int nbytes;
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+
+ nbytes = buf->len - buf->cursor;
+ res = (MVarChar*)palloc( nbytes + MVARCHARHDRSZ );
+ res->len = nbytes + MVARCHARHDRSZ;
+ SET_VARSIZE(res, res->len);
+ pq_copymsgbytes(buf, (char*)res->data, nbytes);
+
+ mvarchar_strip( res, atttypmod );
+
+ PG_RETURN_MVARCHAR(res);
+}
+
+Datum
+mvarchar_support(PG_FUNCTION_ARGS)
+{
+ Node *node = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(node, SupportRequestSimplify))
+ {
+ SupportRequestSimplify *req = (SupportRequestSimplify *) node;
+ FuncExpr *expr = req->fcall;
+ Node *typmodnode;
+
+ typmodnode = (Node *) lsecond(expr->args);
+
+ if (IsA(typmodnode, Const) && !((Const *) typmodnode)->constisnull)
+ {
+ Node *source = (Node *) linitial(expr->args);
+ int32 source_typmod = exprTypmod(source);
+ int32 req_typemod = DatumGetInt32(((Const *) typmodnode)->constvalue);
+
+ if (req_typemod < 0 || (source_typmod >= 0 && source_typmod <= req_typemod))
+ ret = relabel_to_typmod(source, req_typemod);
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
diff --git a/contrib/mchar/mchar_like.c b/contrib/mchar/mchar_like.c
new file mode 100644
index 00000000000..47b4d7302c7
--- /dev/null
+++ b/contrib/mchar/mchar_like.c
@@ -0,0 +1,984 @@
+#include "mchar.h"
+#include "mb/pg_wchar.h"
+
+#include "catalog/pg_collation.h"
+#include "utils/selfuncs.h"
+#include "utils/memutils.h"
+#include "nodes/primnodes.h"
+#include "nodes/makefuncs.h"
+#include "nodes/supportnodes.h"
+#include "regex/regex.h"
+
+/*
+** Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
+** Rich $alz is now <rsalz@bbn.com>.
+** Special thanks to Lars Mathiesen <thorinn@diku.dk> for the LABORT code.
+**
+** This code was shamelessly stolen from the "pql" code by myself and
+** slightly modified :)
+**
+** All references to the word "star" were replaced by "percent"
+** All references to the word "wild" were replaced by "like"
+**
+** All the nice shell RE matching stuff was replaced by just "_" and "%"
+**
+** As I don't have a copy of the SQL standard handy I wasn't sure whether
+** to leave in the '\' escape character handling.
+**
+** Keith Parks. <keith@mtcc.demon.co.uk>
+**
+** SQL92 lets you specify the escape character by saying
+** LIKE <pattern> ESCAPE <escape character>. We are a small operation
+** so we force you to use '\'. - ay 7/95
+**
+** Now we have the like_escape() function that converts patterns with
+** any specified escape character (or none at all) to the internal
+** default escape character, which is still '\'. - tgl 9/2000
+**
+** The code is rewritten to avoid requiring null-terminated strings,
+** which in turn allows us to leave out some memcpy() operations.
+** This code should be faster and take less memory, but no promises...
+** - thomas 2000-08-06
+**
+** Adopted for UTF-16 by teodor
+*/
+
+#define LIKE_TRUE 1
+#define LIKE_FALSE 0
+#define LIKE_ABORT (-1)
+
+
+static int
+uchareq(UChar *p1, UChar *p2) {
+ int l1=0, l2=0;
+ /*
+ * Count length of char:
+ * We suppose that string is correct!!
+ */
+ U16_FWD_1(p1, l1, 2);
+ U16_FWD_1(p2, l2, 2);
+
+ return (UCharCaseCompare(p1, l1, p2, l2)==0) ? 1 : 0;
+}
+
+#define NextChar(p, plen) \
+ do { \
+ int __l = 0; \
+ U16_FWD_1((p), __l, (plen));\
+ (p) +=__l; \
+ (plen) -=__l; \
+ } while(0)
+
+#define CopyAdvChar(dst, src, srclen) \
+ do { \
+ int __l = 0; \
+ U16_FWD_1((src), __l, (srclen));\
+ (srclen) -= __l; \
+ while (__l-- > 0) \
+ *(dst)++ = *(src)++; \
+ } while (0)
+
+
+static UChar UCharPercent = 0;
+static UChar UCharBackSlesh = 0;
+static UChar UCharUnderLine = 0;
+static UChar UCharStar = 0;
+static UChar UCharDotDot = 0;
+static UChar UCharUp = 0;
+static UChar UCharLBracket = 0;
+static UChar UCharQ = 0;
+static UChar UCharRBracket = 0;
+static UChar UCharDollar = 0;
+static UChar UCharDot = 0;
+static UChar UCharLFBracket = 0;
+static UChar UCharRFBracket = 0;
+static UChar UCharQuote = 0;
+static UChar UCharSpace = 0;
+static UChar UCharOne = 0;
+static UChar UCharComma = 0;
+static UChar UCharLQBracket = 0;
+static UChar UCharRQBracket = 0;
+
+#define MkUChar(uc, c) do { \
+ char __c = (c); \
+ u_charsToUChars( &__c, &(uc), 1 ); \
+} while(0)
+
+#define SET_UCHAR if ( UCharPercent == 0 ) { \
+ MkUChar( UCharPercent, '%' ); \
+ MkUChar( UCharBackSlesh, '\\' ); \
+ MkUChar( UCharUnderLine, '_' ); \
+ MkUChar( UCharStar, '*' ); \
+ MkUChar( UCharDotDot, ':' ); \
+ MkUChar( UCharUp, '^' ); \
+ MkUChar( UCharLBracket, '(' ); \
+ MkUChar( UCharQ, '?' ); \
+ MkUChar( UCharRBracket, ')' ); \
+ MkUChar( UCharDollar, '$' ); \
+ MkUChar( UCharDot, '.' ); \
+ MkUChar( UCharLFBracket, '{' ); \
+ MkUChar( UCharRFBracket, '}' ); \
+ MkUChar( UCharQuote, '"' ); \
+ MkUChar( UCharSpace, ' ' ); \
+ MkUChar( UCharOne, '1' ); \
+ MkUChar( UCharComma, ',' ); \
+ MkUChar( UCharLQBracket, '[' ); \
+ MkUChar( UCharRQBracket, ']' ); \
+ }
+
+int
+m_isspace(UChar c) {
+ SET_UCHAR;
+
+ return (c == UCharSpace);
+}
+
+static int
+MatchUChar(UChar *t, int tlen, UChar *p, int plen) {
+ SET_UCHAR;
+
+ /* Fast path for find substring pattern */
+ if ((plen >= 2) && p[0] == UCharPercent && p[plen-1] == UCharPercent && !u_strFindFirst(p+1, plen-2, &UCharPercent, 1) && !u_strFindFirst(p+1, plen-2, &UCharBackSlesh, 1) && !u_strFindFirst(p+1, plen-2, &UCharUnderLine, 1))
+ {
+ if (plen-2 > tlen)
+ return LIKE_FALSE;
+
+ if (tlen > 100 || plen > 100)
+ {
+ UChar* tbuf;
+ UChar* pbuf;
+ int tbufsz = tlen + 512;
+ int pbufsz = plen + 512;
+ int tbuflen;
+ int pbuflen;
+ bool found;
+ UErrorCode status1 = U_ZERO_ERROR;
+ UErrorCode status2 = U_ZERO_ERROR;
+
+ tbuf = malloc(tbufsz*sizeof(UChar));
+ pbuf = malloc(pbufsz*sizeof(UChar));
+ tbuflen = u_strToLower(tbuf, tbufsz, t, tlen, NULL, &status1);
+ pbuflen = u_strToLower(pbuf, pbufsz, p+1, plen-2, NULL, &status2);
+ if (tbuflen < tbufsz && pbuflen < pbufsz && U_SUCCESS(status1) && U_SUCCESS(status2))
+ {
+ found = u_strFindFirst(tbuf, tbuflen, pbuf, pbuflen);
+ free(tbuf);
+ free(pbuf);
+ return found ? LIKE_TRUE : LIKE_FALSE;
+ }
+ free(tbuf);
+ free(pbuf);
+ }
+ }
+
+ /* Fast path for match-everything pattern */
+ if ((plen == 1) && (*p == UCharPercent))
+ return LIKE_TRUE;
+
+ while ((tlen > 0) && (plen > 0)) {
+ if (*p == UCharBackSlesh) {
+ /* Next pattern char must match literally, whatever it is */
+ NextChar(p, plen);
+ if ((plen <= 0) || !uchareq(t, p))
+ return LIKE_FALSE;
+ } else if (*p == UCharPercent) {
+ /* %% is the same as % according to the SQL standard */
+ /* Advance past all %'s */
+ while ((plen > 0) && (*p == UCharPercent))
+ NextChar(p, plen);
+ /* Trailing percent matches everything. */
+ if (plen <= 0)
+ return LIKE_TRUE;
+
+ /*
+ * Otherwise, scan for a text position at which we can match the
+ * rest of the pattern.
+ */
+ while (tlen > 0) {
+ /*
+ * Optimization to prevent most recursion: don't recurse
+ * unless first pattern char might match this text char.
+ */
+ if (uchareq(t, p) || (*p == UCharBackSlesh) || (*p == UCharUnderLine)) {
+ int matched = MatchUChar(t, tlen, p, plen);
+
+ if (matched != LIKE_FALSE)
+ return matched; /* TRUE or ABORT */
+ }
+
+ NextChar(t, tlen);
+ }
+
+ /*
+ * End of text with no match, so no point in trying later places
+ * to start matching this pattern.
+ */
+ return LIKE_ABORT;
+ } if ((*p != UCharUnderLine) && !uchareq(t, p)) {
+ /*
+ * Not the single-character wildcard and no explicit match? Then
+ * time to quit...
+ */
+ return LIKE_FALSE;
+ }
+
+ NextChar(t, tlen);
+ NextChar(p, plen);
+ }
+
+ if (tlen > 0)
+ return LIKE_FALSE; /* end of pattern, but not of text */
+
+ /* End of input string. Do we have matching pattern remaining? */
+ while ((plen > 0) && (*p == UCharPercent)) /* allow multiple %'s at end of
+ * pattern */
+ NextChar(p, plen);
+ if (plen <= 0)
+ return LIKE_TRUE;
+
+ /*
+ * End of text with no match, so no point in trying later places to start
+ * matching this pattern.
+ */
+
+ return LIKE_ABORT;
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_like );
+Datum mvarchar_like( PG_FUNCTION_ARGS );
+Datum
+mvarchar_like( PG_FUNCTION_ARGS ) {
+ MVarChar *str = PG_GETARG_MVARCHAR(0);
+ MVarChar *pat = PG_GETARG_MVARCHAR(1);
+ int result;
+
+ result = MatchUChar( str->data, UVARCHARLENGTH(str), pat->data, UVARCHARLENGTH(pat) );
+
+ PG_FREE_IF_COPY(str,0);
+ PG_FREE_IF_COPY(pat,1);
+
+ PG_RETURN_BOOL(result == LIKE_TRUE);
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_notlike );
+Datum mvarchar_notlike( PG_FUNCTION_ARGS );
+Datum
+mvarchar_notlike( PG_FUNCTION_ARGS ) {
+ bool res = DatumGetBool( DirectFunctionCall2(
+ mvarchar_like,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1)
+ ));
+ PG_RETURN_BOOL( !res );
+}
+
+/*
+ * Removes trailing spaces in '111 %' pattern
+ */
+static UChar *
+removeTrailingSpaces( UChar *src, int srclen, int *dstlen, bool *isSpecialLast) {
+ UChar* dst = src;
+ UChar *ptr, *dptr, *markptr;
+
+ *dstlen = srclen;
+ ptr = src + srclen-1;
+ SET_UCHAR;
+
+ *isSpecialLast = ( srclen > 0 && (u_isspace(*ptr) || *ptr == UCharPercent || *ptr == UCharUnderLine ) ) ? true : false;
+ while( ptr>=src ) {
+ if ( *ptr == UCharPercent || *ptr == UCharUnderLine ) {
+ if ( ptr==src )
+ return dst; /* first character */
+
+ if ( *(ptr-1) == UCharBackSlesh )
+ return dst; /* use src as is */
+
+ if ( u_isspace( *(ptr-1) ) ) {
+ ptr--;
+ break; /* % or _ is after space which should be removed */
+ }
+ } else {
+ return dst;
+ }
+ ptr--;
+ }
+
+ markptr = ptr+1;
+ dst = (UChar*)palloc( sizeof(UChar) * srclen );
+
+ /* find last non-space character */
+ while( ptr>=src && u_isspace(*ptr) )
+ ptr--;
+
+ dptr = dst + (ptr-src+1);
+
+ if ( ptr>=src )
+ memcpy( dst, src, sizeof(UChar) * (ptr-src+1) );
+
+ while( markptr - src < srclen ) {
+ *dptr = *markptr;
+ dptr++;
+ markptr++;
+ }
+
+ *dstlen = dptr - dst;
+ return dst;
+}
+
+static UChar*
+addTrailingSpace( MChar *src, int *newlen ) {
+ int scharlen = u_countChar32(src->data, UCHARLENGTH(src));
+
+ if ( src->typmod > scharlen ) {
+ UChar *res = (UChar*) palloc( sizeof(UChar) * (UCHARLENGTH(src) + src->typmod) );
+
+ memcpy( res, src->data, sizeof(UChar) * UCHARLENGTH(src));
+ FillWhiteSpace( res+UCHARLENGTH(src), src->typmod - scharlen );
+
+ *newlen = src->typmod;
+
+ return res;
+ } else {
+ *newlen = UCHARLENGTH(src);
+ return src->data;
+ }
+}
+
+PG_FUNCTION_INFO_V1( mchar_like );
+Datum mchar_like( PG_FUNCTION_ARGS );
+Datum
+mchar_like( PG_FUNCTION_ARGS ) {
+ MChar *str = PG_GETARG_MCHAR(0);
+ MVarChar *pat = PG_GETARG_MVARCHAR(1);
+ int result;
+ bool isNeedAdd = false;
+ UChar *cleaned, *filled;
+ int clen=0, flen=0;
+
+ cleaned = removeTrailingSpaces(pat->data, UVARCHARLENGTH(pat), &clen, &isNeedAdd);
+ if ( isNeedAdd )
+ filled = addTrailingSpace(str, &flen);
+ else {
+ filled = str->data;
+ flen = UCHARLENGTH(str);
+ }
+
+ result = MatchUChar( filled, flen, cleaned, clen );
+
+ if ( pat->data != cleaned )
+ pfree( cleaned );
+ if ( str->data != filled )
+ pfree( filled );
+
+ PG_FREE_IF_COPY(str,0);
+ PG_FREE_IF_COPY(pat,1);
+
+ PG_RETURN_BOOL(result == LIKE_TRUE);
+}
+
+PG_FUNCTION_INFO_V1( mchar_notlike );
+Datum mchar_notlike( PG_FUNCTION_ARGS );
+Datum
+mchar_notlike( PG_FUNCTION_ARGS ) {
+ bool res = DatumGetInt32( DirectFunctionCall2(
+ mchar_like,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1)
+ ));
+
+ PG_RETURN_BOOL( !res );
+}
+
+
+
+PG_FUNCTION_INFO_V1( mchar_pattern_fixed_prefix );
+Datum mchar_pattern_fixed_prefix( PG_FUNCTION_ARGS );
+Datum
+mchar_pattern_fixed_prefix( PG_FUNCTION_ARGS ) {
+ Const *patt = (Const*)PG_GETARG_POINTER(0);
+ Pattern_Type ptype = (Pattern_Type)PG_GETARG_INT32(1);
+ Const **prefix = (Const**)PG_GETARG_POINTER(2);
+ UChar *spatt;
+ int32 slen, prefixlen=0, restlen=0, i=0;
+ MVarChar *sprefix;
+ MVarChar *srest;
+ Pattern_Prefix_Status status = Pattern_Prefix_None;
+
+ *prefix = NULL;
+
+ if ( ptype != Pattern_Type_Like )
+ PG_RETURN_INT32(Pattern_Prefix_None);
+
+ SET_UCHAR;
+
+ spatt = ((MVarChar*)DatumGetPointer(patt->constvalue))->data;
+ slen = UVARCHARLENGTH( DatumGetPointer(patt->constvalue) );
+
+ sprefix = (MVarChar*)palloc( MCHARHDRSZ /*The biggest hdr!! */ + sizeof(UChar) * slen );
+ srest = (MVarChar*)palloc( MCHARHDRSZ /*The biggest hdr!! */ + sizeof(UChar) * slen );
+
+ while( prefixlen < slen && i < slen ) {
+ if ( spatt[i] == UCharPercent || spatt[i] == UCharUnderLine )
+ break;
+ else if ( spatt[i] == UCharBackSlesh ) {
+ i++;
+ if ( i>= slen )
+ break;
+ }
+ sprefix->data[ prefixlen++ ] = spatt[i++];
+ }
+
+ while( prefixlen > 0 ) {
+ if ( ! u_isspace( sprefix->data[ prefixlen-1 ] ) )
+ break;
+ prefixlen--;
+ }
+
+ if ( prefixlen == 0 )
+ PG_RETURN_INT32(Pattern_Prefix_None);
+
+ for(;i<slen;i++)
+ srest->data[ restlen++ ] = spatt[i];
+
+ SET_VARSIZE(sprefix, sizeof(UChar) * prefixlen + MVARCHARHDRSZ);
+ SET_VARSIZE(srest, sizeof(UChar) * restlen + MVARCHARHDRSZ);
+
+ *prefix = makeConst( patt->consttype, -1, InvalidOid, VARSIZE(sprefix), PointerGetDatum(sprefix), false, false );
+
+ if ( prefixlen == slen ) /* in LIKE, an empty pattern is an exact match! */
+ status = Pattern_Prefix_Exact;
+ else if ( prefixlen > 0 )
+ status = Pattern_Prefix_Partial;
+
+ PG_RETURN_INT32( status );
+}
+
+static bool
+checkCmp( UChar *left, int32 leftlen, UChar *right, int32 rightlen ) {
+
+ return (UCharCaseCompare( left, leftlen, right, rightlen) < 0 ) ? true : false;
+}
+
+
+PG_FUNCTION_INFO_V1( mchar_greaterstring );
+Datum mchar_greaterstring( PG_FUNCTION_ARGS );
+Datum
+mchar_greaterstring( PG_FUNCTION_ARGS ) {
+ Const *patt = (Const*)PG_GETARG_POINTER(0);
+ char *src = (char*)DatumGetPointer( patt->constvalue );
+ int dstlen, srclen = VARSIZE(src);
+ char *dst = palloc( srclen );
+ UChar *ptr, *srcptr;
+
+ memcpy( dst, src, srclen );
+
+ srclen = dstlen = UVARCHARLENGTH( dst );
+ ptr = ((MVarChar*)dst)->data;
+ srcptr = ((MVarChar*)src)->data;
+
+ while( dstlen > 0 ) {
+ UChar *lastchar = ptr + dstlen - 1;
+
+ if ( !U16_IS_LEAD( *lastchar ) ) {
+ while( *lastchar<0xffff ) {
+
+ (*lastchar)++;
+
+ if ( ublock_getCode(*lastchar) == UBLOCK_INVALID_CODE || !checkCmp( srcptr, srclen, ptr, dstlen ) )
+ continue;
+ else {
+ SET_VARSIZE(dst, sizeof(UChar) * dstlen + MVARCHARHDRSZ);
+
+ PG_RETURN_POINTER( makeConst( patt->consttype, -1,
+ InvalidOid, VARSIZE(dst), PointerGetDatum(dst), false, false ) );
+ }
+ }
+ }
+
+ dstlen--;
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
+
+static int
+do_like_escape( UChar *pat, int plen, UChar *esc, int elen, UChar *result) {
+ UChar *p = pat,*e =esc ,*r;
+ bool afterescape;
+
+ r = result;
+ SET_UCHAR;
+
+ if ( elen == 0 ) {
+ /*
+ * No escape character is wanted. Double any backslashes in the
+ * pattern to make them act like ordinary characters.
+ */
+ while (plen > 0) {
+ if (*p == UCharBackSlesh )
+ *r++ = UCharBackSlesh;
+ CopyAdvChar(r, p, plen);
+ }
+ } else {
+ /*
+ * The specified escape must be only a single character.
+ */
+ NextChar(e, elen);
+
+ if (elen != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
+ errmsg("invalid escape string"),
+ errhint("Escape string must be empty or one character.")));
+
+ e = esc;
+
+ /*
+ * If specified escape is '\', just copy the pattern as-is.
+ */
+ if ( *e == UCharBackSlesh ) {
+ memcpy(result, pat, plen * sizeof(UChar));
+ return plen;
+ }
+
+ /*
+ * Otherwise, convert occurrences of the specified escape character to
+ * '\', and double occurrences of '\' --- unless they immediately
+ * follow an escape character!
+ */
+ afterescape = false;
+
+ while (plen > 0) {
+ if ( uchareq(p,e) && !afterescape) {
+ *r++ = UCharBackSlesh;
+ NextChar(p, plen);
+ afterescape = true;
+ } else if ( *p == UCharBackSlesh ) {
+ *r++ = UCharBackSlesh;
+ if (!afterescape)
+ *r++ = UCharBackSlesh;
+ NextChar(p, plen);
+ afterescape = false;
+ } else {
+ CopyAdvChar(r, p, plen);
+ afterescape = false;
+ }
+ }
+ }
+
+ return ( r - result );
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_like_escape );
+Datum mvarchar_like_escape( PG_FUNCTION_ARGS );
+Datum
+mvarchar_like_escape( PG_FUNCTION_ARGS ) {
+ MVarChar *pat = PG_GETARG_MVARCHAR(0);
+ MVarChar *esc = PG_GETARG_MVARCHAR(1);
+ MVarChar *result;
+
+ result = (MVarChar*)palloc( MVARCHARHDRSZ + sizeof(UChar)*2*UVARCHARLENGTH(pat) );
+ result->len = MVARCHARHDRSZ + do_like_escape( pat->data, UVARCHARLENGTH(pat),
+ esc->data, UVARCHARLENGTH(esc),
+ result->data ) * sizeof(UChar);
+
+ SET_VARSIZE(result, result->len);
+ PG_FREE_IF_COPY(pat,0);
+ PG_FREE_IF_COPY(esc,1);
+
+ PG_RETURN_MVARCHAR(result);
+}
+
+static MemoryContext McharRgCntx;
+
+#define RE_CACHE_SIZE 32
+typedef struct ReCache {
+ MemoryContext cntx;
+ UChar *pattern;
+ int length;
+ int flags;
+ regex_t re;
+} ReCache;
+
+static int num_res = 0;
+static ReCache re_array[RE_CACHE_SIZE]; /* cached re's */
+static const int mchar_regex_flavor = REG_ADVANCED | REG_ICASE;
+
+static regex_t *
+URE_compile_and_cache(UChar *text_re, int text_re_len, int cflags) {
+ pg_wchar *pattern;
+ size_t pattern_len;
+ int i;
+ int regcomp_result;
+ ReCache re_temp;
+ char errMsg[128];
+ MemoryContext oldcntx;
+ char* patternId;
+
+
+ for (i = 0; i < num_res; i++) {
+ if ( re_array[i].length == text_re_len &&
+ re_array[i].flags == cflags &&
+ memcmp(re_array[i].pattern, text_re, sizeof(UChar)*text_re_len) == 0 ) {
+
+ /* Found, move it to front */
+ if ( i>0 ) {
+ re_temp = re_array[i];
+ memmove(&re_array[1], &re_array[0], i * sizeof(ReCache));
+ re_array[0] = re_temp;
+ }
+
+ return &re_array[0].re;
+ }
+ }
+
+ if (McharRgCntx == NULL)
+ McharRgCntx = AllocSetContextCreate(TopMemoryContext,
+ "McharRgCntx",
+ ALLOCSET_SMALL_SIZES);
+
+ pattern = (pg_wchar *) palloc((1 + text_re_len) * sizeof(pg_wchar));
+ pattern_len = UChar2Wchar(text_re, text_re_len, pattern);
+
+ re_temp.cntx = AllocSetContextCreate(CurrentMemoryContext,
+ "McharRegex",
+ ALLOCSET_SMALL_SIZES);
+
+ oldcntx = MemoryContextSwitchTo(re_temp.cntx);
+
+ regcomp_result = pg_regcomp(&re_temp.re,
+ pattern,
+ pattern_len,
+ cflags,
+ DEFAULT_COLLATION_OID);
+ pfree( pattern );
+
+ if (regcomp_result != REG_OKAY) {
+ pg_regerror(regcomp_result, &re_temp.re, errMsg, sizeof(errMsg));
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+ errmsg("invalid regular expression: %s", errMsg)));
+ }
+
+ re_temp.pattern = palloc(text_re_len * sizeof(UChar));
+ memcpy(re_temp.pattern, text_re, text_re_len*sizeof(UChar));
+ re_temp.length = text_re_len;
+ re_temp.flags = cflags;
+
+ patternId = palloc0(text_re_len * sizeof(UChar) + 1);
+ UChar2Char(re_temp.pattern, text_re_len, patternId);
+ MemoryContextSetIdentifier(re_temp.cntx, patternId);
+
+ if (num_res >= RE_CACHE_SIZE) {
+ --num_res;
+ Assert(num_res < RE_CACHE_SIZE);
+ MemoryContextDelete(re_array[num_res].cntx);
+ }
+
+ MemoryContextSetParent(re_temp.cntx, McharRgCntx);
+
+ if (num_res > 0)
+ memmove(&re_array[1], &re_array[0], num_res * sizeof(ReCache));
+
+ re_array[0] = re_temp;
+ num_res++;
+
+ MemoryContextSwitchTo(oldcntx);
+
+ return &re_array[0].re;
+}
+
+static bool
+URE_compile_and_execute(UChar *pat, int pat_len, UChar *dat, int dat_len,
+ int cflags, int nmatch, regmatch_t *pmatch) {
+ pg_wchar *data;
+ size_t data_len;
+ int regexec_result;
+ regex_t *re;
+ char errMsg[128];
+
+ data = (pg_wchar *) palloc((1+dat_len) * sizeof(pg_wchar));
+ data_len = UChar2Wchar(dat, dat_len, data);
+
+ re = URE_compile_and_cache(pat, pat_len, cflags);
+
+ regexec_result = pg_regexec(re,
+ data,
+ data_len,
+ 0,
+ NULL,
+ nmatch,
+ pmatch,
+ 0);
+ pfree(data);
+
+ if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH) {
+ /* re failed??? */
+ pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+ errmsg("regular expression failed: %s", errMsg)));
+ }
+
+ return (regexec_result == REG_OKAY);
+}
+
+PG_FUNCTION_INFO_V1( mchar_regexeq );
+Datum mchar_regexeq( PG_FUNCTION_ARGS );
+Datum
+mchar_regexeq( PG_FUNCTION_ARGS ) {
+ MChar *t = PG_GETARG_MCHAR(0);
+ MChar *p = PG_GETARG_MCHAR(1);
+ bool res;
+
+ res = URE_compile_and_execute(p->data, UCHARLENGTH(p),
+ t->data, UCHARLENGTH(t),
+ mchar_regex_flavor,
+ 0, NULL);
+ PG_FREE_IF_COPY(t, 0);
+ PG_FREE_IF_COPY(p, 1);
+
+ PG_RETURN_BOOL(res);
+}
+
+PG_FUNCTION_INFO_V1( mchar_regexne );
+Datum mchar_regexne( PG_FUNCTION_ARGS );
+Datum
+mchar_regexne( PG_FUNCTION_ARGS ) {
+ MChar *t = PG_GETARG_MCHAR(0);
+ MChar *p = PG_GETARG_MCHAR(1);
+ bool res;
+
+ res = URE_compile_and_execute(p->data, UCHARLENGTH(p),
+ t->data, UCHARLENGTH(t),
+ mchar_regex_flavor,
+ 0, NULL);
+ PG_FREE_IF_COPY(t, 0);
+ PG_FREE_IF_COPY(p, 1);
+
+ PG_RETURN_BOOL(!res);
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_regexeq );
+Datum mvarchar_regexeq( PG_FUNCTION_ARGS );
+Datum
+mvarchar_regexeq( PG_FUNCTION_ARGS ) {
+ MVarChar *t = PG_GETARG_MVARCHAR(0);
+ MVarChar *p = PG_GETARG_MVARCHAR(1);
+ bool res;
+
+ res = URE_compile_and_execute(p->data, UVARCHARLENGTH(p),
+ t->data, UVARCHARLENGTH(t),
+ mchar_regex_flavor,
+ 0, NULL);
+ PG_FREE_IF_COPY(t, 0);
+ PG_FREE_IF_COPY(p, 1);
+
+ PG_RETURN_BOOL(res);
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_regexne );
+Datum mvarchar_regexne( PG_FUNCTION_ARGS );
+Datum
+mvarchar_regexne( PG_FUNCTION_ARGS ) {
+ MVarChar *t = PG_GETARG_MVARCHAR(0);
+ MVarChar *p = PG_GETARG_MVARCHAR(1);
+ bool res;
+
+ res = URE_compile_and_execute(p->data, UVARCHARLENGTH(p),
+ t->data, UVARCHARLENGTH(t),
+ mchar_regex_flavor,
+ 0, NULL);
+ PG_FREE_IF_COPY(t, 0);
+ PG_FREE_IF_COPY(p, 1);
+
+ PG_RETURN_BOOL(!res);
+}
+
+static int
+do_similar_escape(UChar *p, int plen, UChar *e, int elen, UChar *result) {
+ UChar *r;
+ bool afterescape = false;
+ bool incharclass = false;
+ int nquotes = 0;
+
+ SET_UCHAR;
+
+ if (e==NULL || elen <0 ) {
+ e = &UCharBackSlesh;
+ elen = 1;
+ } else {
+ if ( elen == 0 )
+ e = NULL;
+ else if ( elen != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
+ errmsg("invalid escape string"),
+ errhint("Escape string must be empty or one character.")));
+ }
+
+ /*
+ * Look explanation of following in ./utils/adt/regexp.c
+ */
+ r = result;
+
+ *r++ = UCharUp;
+ *r++ = UCharLBracket;
+ *r++ = UCharQ;
+ *r++ = UCharDotDot;
+
+ while( plen>0 ) {
+ UChar pchar = *p;
+
+ if (afterescape)
+ {
+ if (pchar == UCharQuote && !incharclass) /* escape-double-quote? */
+ {
+ if (nquotes == 0)
+ {
+ *r++ = UCharRBracket;
+ *r++ = UCharLFBracket;
+ *r++ = UCharOne;
+ *r++ = UCharComma;
+ *r++ = UCharOne;
+ *r++ = UCharRFBracket;
+ *r++ = UCharQ;
+ *r++ = UCharLBracket;
+ }
+ else if (nquotes == 1)
+ {
+ *r++ = UCharRBracket;
+ *r++ = UCharLFBracket;
+ *r++ = UCharOne;
+ *r++ = UCharComma;
+ *r++ = UCharOne;
+ *r++ = UCharRFBracket;
+ *r++ = UCharLBracket;
+ *r++ = UCharQ;
+ *r++ = UCharDotDot;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER),
+ errmsg("SQL regular expression may not contain more than two escape-double-quote separators")));
+ nquotes++;
+ }
+ else
+ {
+ *r++ = UCharBackSlesh;
+ *r++ = pchar;
+ }
+ afterescape = false;
+ }
+ else if (e && elen > 0 && pchar == *e)
+ {
+ afterescape = true;
+ }
+ else if (incharclass)
+ {
+ if (pchar == UCharBackSlesh)
+ *r++ = UCharBackSlesh;
+ *r++ = pchar;
+ if (pchar == UCharRQBracket)
+ incharclass = false;
+ }
+ else if (pchar == UCharLQBracket)
+ {
+ *r++ = pchar;
+ incharclass = true;
+ }
+ else if (pchar == UCharPercent)
+ {
+ *r++ = UCharDot;
+ *r++ = UCharStar;
+ }
+ else if (pchar == UCharUnderLine)
+ *r++ = UCharDot;
+ else if (pchar == UCharLBracket)
+ {
+ *r++ = UCharLBracket;
+ *r++ = UCharQ;
+ *r++ = UCharDotDot;
+ }
+ else if (pchar == UCharBackSlesh || pchar == UCharDot ||
+ pchar == UCharUp || pchar == UCharDollar)
+ {
+ *r++ = UCharBackSlesh;
+ *r++ = pchar;
+ }
+ else
+ *r++ = pchar;
+
+ p++, plen--;
+ }
+
+ *r++ = UCharRBracket;
+ *r++ = UCharDollar;
+
+ return r-result;
+}
+
+PG_FUNCTION_INFO_V1( mchar_similar_escape );
+Datum mchar_similar_escape( PG_FUNCTION_ARGS );
+Datum
+mchar_similar_escape( PG_FUNCTION_ARGS ) {
+ MChar *pat;
+ MChar *esc;
+ MChar *result;
+
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+ pat = PG_GETARG_MCHAR(0);
+
+ if (PG_NARGS() < 2 || PG_ARGISNULL(1)) {
+ esc = NULL;
+ } else {
+ esc = PG_GETARG_MCHAR(1);
+ }
+
+ result = (MChar*)palloc( MCHARHDRSZ + sizeof(UChar)*(23 + 3*UCHARLENGTH(pat)) );
+ result->len = MCHARHDRSZ + do_similar_escape( pat->data, UCHARLENGTH(pat),
+ (esc) ? esc->data : NULL, (esc) ? UCHARLENGTH(esc) : -1,
+ result->data ) * sizeof(UChar);
+ result->typmod=-1;
+
+ SET_VARSIZE(result, result->len);
+ PG_FREE_IF_COPY(pat,0);
+ if ( esc )
+ PG_FREE_IF_COPY(esc,1);
+
+ PG_RETURN_MCHAR(result);
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_similar_escape );
+Datum mvarchar_similar_escape( PG_FUNCTION_ARGS );
+Datum
+mvarchar_similar_escape( PG_FUNCTION_ARGS ) {
+ MVarChar *pat;
+ MVarChar *esc;
+ MVarChar *result;
+
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+ pat = PG_GETARG_MVARCHAR(0);
+
+ if (PG_NARGS() < 2 || PG_ARGISNULL(1)) {
+ esc = NULL;
+ } else {
+ esc = PG_GETARG_MVARCHAR(1);
+ }
+
+ result = (MVarChar*)palloc( MVARCHARHDRSZ + sizeof(UChar)*(23 + 3*UVARCHARLENGTH(pat)) );
+ result->len = MVARCHARHDRSZ + do_similar_escape( pat->data, UVARCHARLENGTH(pat),
+ (esc) ? esc->data : NULL, (esc) ? UVARCHARLENGTH(esc) : -1,
+ result->data ) * sizeof(UChar);
+
+ SET_VARSIZE(result, result->len);
+ PG_FREE_IF_COPY(pat,0);
+ if ( esc )
+ PG_FREE_IF_COPY(esc,1);
+
+ PG_RETURN_MVARCHAR(result);
+}
+
+#define RE_CACHE_SIZE 32
diff --git a/contrib/mchar/mchar_op.c b/contrib/mchar/mchar_op.c
new file mode 100644
index 00000000000..4694d9cf3c3
--- /dev/null
+++ b/contrib/mchar/mchar_op.c
@@ -0,0 +1,449 @@
+#include "mchar.h"
+
+int
+lengthWithoutSpaceVarChar(MVarChar *m) {
+ int l = UVARCHARLENGTH(m);
+
+ while( l>0 && m_isspace( m->data[ l-1 ] ) )
+ l--;
+
+ return l;
+}
+
+int
+lengthWithoutSpaceChar(MChar *m) {
+ int l = UCHARLENGTH(m);
+
+ while( l>0 && m_isspace( m->data[ l-1 ] ) )
+ l--;
+
+ return l;
+}
+
+static inline int
+mchar_icase_compare( MChar *a, MChar *b ) {
+ return UCharCaseCompare(
+ a->data, lengthWithoutSpaceChar(a),
+ b->data, lengthWithoutSpaceChar(b)
+ );
+}
+
+static inline int
+mchar_case_compare( MChar *a, MChar *b ) {
+ return UCharCompare(
+ a->data, lengthWithoutSpaceChar(a),
+ b->data, lengthWithoutSpaceChar(b)
+ );
+}
+
+#define MCHARCMPFUNC( c, type, action, ret ) \
+PG_FUNCTION_INFO_V1( mchar_##c##_##type ); \
+Datum mchar_##c##_##type(PG_FUNCTION_ARGS);\
+Datum \
+mchar_##c##_##type(PG_FUNCTION_ARGS) { \
+ MChar *a = PG_GETARG_MCHAR(0); \
+ MChar *b = PG_GETARG_MCHAR(1); \
+ int res = mchar_##c##_compare(a,b); \
+ \
+ PG_FREE_IF_COPY(a,0); \
+ PG_FREE_IF_COPY(b,1); \
+ PG_RETURN_##ret( res action 0 ); \
+}
+
+
+MCHARCMPFUNC( case, eq, ==, BOOL )
+MCHARCMPFUNC( case, ne, !=, BOOL )
+MCHARCMPFUNC( case, lt, <, BOOL )
+MCHARCMPFUNC( case, le, <=, BOOL )
+MCHARCMPFUNC( case, ge, >=, BOOL )
+MCHARCMPFUNC( case, gt, >, BOOL )
+MCHARCMPFUNC( case, cmp, +, INT32 )
+
+MCHARCMPFUNC( icase, eq, ==, BOOL )
+MCHARCMPFUNC( icase, ne, !=, BOOL )
+MCHARCMPFUNC( icase, lt, <, BOOL )
+MCHARCMPFUNC( icase, le, <=, BOOL )
+MCHARCMPFUNC( icase, ge, >=, BOOL )
+MCHARCMPFUNC( icase, gt, >, BOOL )
+MCHARCMPFUNC( icase, cmp, +, INT32 )
+
+PG_FUNCTION_INFO_V1( mchar_larger );
+Datum mchar_larger( PG_FUNCTION_ARGS );
+Datum
+mchar_larger( PG_FUNCTION_ARGS ) {
+ MChar *a = PG_GETARG_MCHAR(0);
+ MChar *b = PG_GETARG_MCHAR(1);
+ MChar *r;
+
+ r = ( mchar_icase_compare(a,b) > 0 ) ? a : b;
+
+ PG_RETURN_MCHAR(r);
+}
+
+PG_FUNCTION_INFO_V1( mchar_smaller );
+Datum mchar_smaller( PG_FUNCTION_ARGS );
+Datum
+mchar_smaller( PG_FUNCTION_ARGS ) {
+ MChar *a = PG_GETARG_MCHAR(0);
+ MChar *b = PG_GETARG_MCHAR(1);
+ MChar *r;
+
+ r = ( mchar_icase_compare(a,b) < 0 ) ? a : b;
+
+ PG_RETURN_MCHAR(r);
+}
+
+
+PG_FUNCTION_INFO_V1( mchar_concat );
+Datum mchar_concat( PG_FUNCTION_ARGS );
+Datum
+mchar_concat( PG_FUNCTION_ARGS ) {
+ MChar *a = PG_GETARG_MCHAR(0);
+ MChar *b = PG_GETARG_MCHAR(1);
+ MChar *result;
+ int maxcharlen, curlen;
+ int acharlen = u_countChar32(a->data, UCHARLENGTH(a)),
+ bcharlen = u_countChar32(b->data, UCHARLENGTH(b));
+
+
+ maxcharlen = ((a->typmod<=0) ? acharlen : a->typmod) +
+ ((b->typmod<=0) ? bcharlen : b->typmod);
+
+ result = (MChar*)palloc( MCHARHDRSZ + sizeof(UChar) * 2 * maxcharlen );
+
+ curlen = UCHARLENGTH( a );
+ if ( curlen > 0 )
+ memcpy( result->data, a->data, MCHARLENGTH(a) );
+ if ( a->typmod > 0 && acharlen < a->typmod ) {
+ FillWhiteSpace( result->data + curlen, a->typmod-acharlen );
+ curlen += a->typmod-acharlen;
+ }
+
+ if ( UCHARLENGTH(b) > 0 ) {
+ memcpy( result->data + curlen, b->data, MCHARLENGTH( b ) );
+ curlen += UCHARLENGTH( b );
+ }
+ if ( b->typmod > 0 && bcharlen < b->typmod ) {
+ FillWhiteSpace( result->data + curlen, b->typmod-bcharlen );
+ curlen += b->typmod-bcharlen;
+ }
+
+
+ result->typmod = -1;
+ SET_VARSIZE(result, sizeof(UChar) * curlen + MCHARHDRSZ);
+
+ PG_FREE_IF_COPY(a,0);
+ PG_FREE_IF_COPY(b,1);
+
+ PG_RETURN_MCHAR(result);
+}
+
+static inline int
+mvarchar_icase_compare( MVarChar *a, MVarChar *b ) {
+
+ return UCharCaseCompare(
+ a->data, lengthWithoutSpaceVarChar(a),
+ b->data, lengthWithoutSpaceVarChar(b)
+ );
+}
+
+static inline int
+mvarchar_case_compare( MVarChar *a, MVarChar *b ) {
+ return UCharCompare(
+ a->data, lengthWithoutSpaceVarChar(a),
+ b->data, lengthWithoutSpaceVarChar(b)
+ );
+}
+
+#define MVARCHARCMPFUNC( c, type, action, ret ) \
+PG_FUNCTION_INFO_V1( mvarchar_##c##_##type ); \
+Datum mvarchar_##c##_##type(PG_FUNCTION_ARGS); \
+Datum \
+mvarchar_##c##_##type(PG_FUNCTION_ARGS) { \
+ MVarChar *a = PG_GETARG_MVARCHAR(0); \
+ MVarChar *b = PG_GETARG_MVARCHAR(1); \
+ int res = mvarchar_##c##_compare(a,b); \
+ \
+ PG_FREE_IF_COPY(a,0); \
+ PG_FREE_IF_COPY(b,1); \
+ PG_RETURN_##ret( res action 0 ); \
+}
+
+
+MVARCHARCMPFUNC( case, eq, ==, BOOL )
+MVARCHARCMPFUNC( case, ne, !=, BOOL )
+MVARCHARCMPFUNC( case, lt, <, BOOL )
+MVARCHARCMPFUNC( case, le, <=, BOOL )
+MVARCHARCMPFUNC( case, ge, >=, BOOL )
+MVARCHARCMPFUNC( case, gt, >, BOOL )
+MVARCHARCMPFUNC( case, cmp, +, INT32 )
+
+MVARCHARCMPFUNC( icase, eq, ==, BOOL )
+MVARCHARCMPFUNC( icase, ne, !=, BOOL )
+MVARCHARCMPFUNC( icase, lt, <, BOOL )
+MVARCHARCMPFUNC( icase, le, <=, BOOL )
+MVARCHARCMPFUNC( icase, ge, >=, BOOL )
+MVARCHARCMPFUNC( icase, gt, >, BOOL )
+MVARCHARCMPFUNC( icase, cmp, +, INT32 )
+
+PG_FUNCTION_INFO_V1( mvarchar_larger );
+Datum mvarchar_larger( PG_FUNCTION_ARGS );
+Datum
+mvarchar_larger( PG_FUNCTION_ARGS ) {
+ MVarChar *a = PG_GETARG_MVARCHAR(0);
+ MVarChar *b = PG_GETARG_MVARCHAR(1);
+ MVarChar *r;
+
+ r = ( mvarchar_icase_compare(a,b) > 0 ) ? a : b;
+
+ PG_RETURN_MVARCHAR(r);
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_smaller );
+Datum mvarchar_smaller( PG_FUNCTION_ARGS );
+Datum
+mvarchar_smaller( PG_FUNCTION_ARGS ) {
+ MVarChar *a = PG_GETARG_MVARCHAR(0);
+ MVarChar *b = PG_GETARG_MVARCHAR(1);
+ MVarChar *r;
+
+ r = ( mvarchar_icase_compare(a,b) < 0 ) ? a : b;
+
+ PG_RETURN_MVARCHAR(r);
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_concat );
+Datum mvarchar_concat( PG_FUNCTION_ARGS );
+Datum
+mvarchar_concat( PG_FUNCTION_ARGS ) {
+ MVarChar *a = PG_GETARG_MVARCHAR(0);
+ MVarChar *b = PG_GETARG_MVARCHAR(1);
+ MVarChar *result;
+ int curlen;
+ int acharlen = u_countChar32(a->data, UVARCHARLENGTH(a)),
+ bcharlen = u_countChar32(b->data, UVARCHARLENGTH(b));
+
+ result = (MVarChar*)palloc( MVARCHARHDRSZ + sizeof(UChar) * 2 * (acharlen + bcharlen) );
+
+ curlen = UVARCHARLENGTH( a );
+ if ( curlen > 0 )
+ memcpy( result->data, a->data, MVARCHARLENGTH(a) );
+
+ if ( UVARCHARLENGTH(b) > 0 ) {
+ memcpy( result->data + curlen, b->data, MVARCHARLENGTH( b ) );
+ curlen += UVARCHARLENGTH( b );
+ }
+
+ SET_VARSIZE(result, sizeof(UChar) * curlen + MVARCHARHDRSZ);
+
+ PG_FREE_IF_COPY(a,0);
+ PG_FREE_IF_COPY(b,1);
+
+ PG_RETURN_MVARCHAR(result);
+}
+
+PG_FUNCTION_INFO_V1( mchar_mvarchar_concat );
+Datum mchar_mvarchar_concat( PG_FUNCTION_ARGS );
+Datum
+mchar_mvarchar_concat( PG_FUNCTION_ARGS ) {
+ MChar *a = PG_GETARG_MCHAR(0);
+ MVarChar *b = PG_GETARG_MVARCHAR(1);
+ MVarChar *result;
+ int curlen, maxcharlen;
+ int acharlen = u_countChar32(a->data, UCHARLENGTH(a)),
+ bcharlen = u_countChar32(b->data, UVARCHARLENGTH(b));
+
+ maxcharlen = ((a->typmod<=0) ? acharlen : a->typmod) + bcharlen;
+
+ result = (MVarChar*)palloc( MVARCHARHDRSZ + sizeof(UChar) * 2 * maxcharlen );
+
+ curlen = UCHARLENGTH( a );
+ if ( curlen > 0 )
+ memcpy( result->data, a->data, MCHARLENGTH(a) );
+ if ( a->typmod > 0 && acharlen < a->typmod ) {
+ FillWhiteSpace( result->data + curlen, a->typmod-acharlen );
+ curlen += a->typmod-acharlen;
+ }
+
+ if ( UVARCHARLENGTH(b) > 0 ) {
+ memcpy( result->data + curlen, b->data, MVARCHARLENGTH( b ) );
+ curlen += UVARCHARLENGTH( b );
+ }
+
+ SET_VARSIZE(result, sizeof(UChar) * curlen + MVARCHARHDRSZ);
+
+ PG_FREE_IF_COPY(a,0);
+ PG_FREE_IF_COPY(b,1);
+
+ PG_RETURN_MVARCHAR(result);
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_mchar_concat );
+Datum mvarchar_mchar_concat( PG_FUNCTION_ARGS );
+Datum
+mvarchar_mchar_concat( PG_FUNCTION_ARGS ) {
+ MVarChar *a = PG_GETARG_MVARCHAR(0);
+ MChar *b = PG_GETARG_MCHAR(1);
+ MVarChar *result;
+ int curlen, maxcharlen;
+ int acharlen = u_countChar32(a->data, UVARCHARLENGTH(a)),
+ bcharlen = u_countChar32(b->data, UCHARLENGTH(b));
+
+ maxcharlen = acharlen + ((b->typmod<=0) ? bcharlen : b->typmod);
+
+ result = (MVarChar*)palloc( MVARCHARHDRSZ + sizeof(UChar) * 2 * maxcharlen );
+
+ curlen = UVARCHARLENGTH( a );
+ if ( curlen > 0 )
+ memcpy( result->data, a->data, MVARCHARLENGTH(a) );
+
+ if ( UCHARLENGTH(b) > 0 ) {
+ memcpy( result->data + curlen, b->data, MCHARLENGTH( b ) );
+ curlen += UCHARLENGTH( b );
+ }
+ if ( b->typmod > 0 && bcharlen < b->typmod ) {
+ FillWhiteSpace( result->data + curlen, b->typmod-bcharlen );
+ curlen += b->typmod-bcharlen;
+ }
+
+ SET_VARSIZE(result, sizeof(UChar) * curlen + MVARCHARHDRSZ);
+
+ PG_FREE_IF_COPY(a,0);
+ PG_FREE_IF_COPY(b,1);
+
+ PG_RETURN_MVARCHAR(result);
+}
+
+/*
+ * mchar <> mvarchar
+ */
+static inline int
+mc_mv_icase_compare( MChar *a, MVarChar *b ) {
+ return UCharCaseCompare(
+ a->data, lengthWithoutSpaceChar(a),
+ b->data, lengthWithoutSpaceVarChar(b)
+ );
+}
+
+static inline int
+mc_mv_case_compare( MChar *a, MVarChar *b ) {
+ return UCharCompare(
+ a->data, lengthWithoutSpaceChar(a),
+ b->data, lengthWithoutSpaceVarChar(b)
+ );
+}
+
+#define MC_MV_CHARCMPFUNC( c, type, action, ret ) \
+PG_FUNCTION_INFO_V1( mc_mv_##c##_##type ); \
+Datum mc_mv_##c##_##type(PG_FUNCTION_ARGS);\
+Datum \
+mc_mv_##c##_##type(PG_FUNCTION_ARGS) { \
+ MChar *a = PG_GETARG_MCHAR(0); \
+ MVarChar *b = PG_GETARG_MVARCHAR(1); \
+ int res = mc_mv_##c##_compare(a,b); \
+ \
+ PG_FREE_IF_COPY(a,0); \
+ PG_FREE_IF_COPY(b,1); \
+ PG_RETURN_##ret( res action 0 ); \
+}
+
+
+MC_MV_CHARCMPFUNC( case, eq, ==, BOOL )
+MC_MV_CHARCMPFUNC( case, ne, !=, BOOL )
+MC_MV_CHARCMPFUNC( case, lt, <, BOOL )
+MC_MV_CHARCMPFUNC( case, le, <=, BOOL )
+MC_MV_CHARCMPFUNC( case, ge, >=, BOOL )
+MC_MV_CHARCMPFUNC( case, gt, >, BOOL )
+MC_MV_CHARCMPFUNC( case, cmp, +, INT32 )
+
+MC_MV_CHARCMPFUNC( icase, eq, ==, BOOL )
+MC_MV_CHARCMPFUNC( icase, ne, !=, BOOL )
+MC_MV_CHARCMPFUNC( icase, lt, <, BOOL )
+MC_MV_CHARCMPFUNC( icase, le, <=, BOOL )
+MC_MV_CHARCMPFUNC( icase, ge, >=, BOOL )
+MC_MV_CHARCMPFUNC( icase, gt, >, BOOL )
+MC_MV_CHARCMPFUNC( icase, cmp, +, INT32 )
+
+/*
+ * mvarchar <> mchar
+ */
+static inline int
+mv_mc_icase_compare( MVarChar *a, MChar *b ) {
+ return UCharCaseCompare(
+ a->data, lengthWithoutSpaceVarChar(a),
+ b->data, lengthWithoutSpaceChar(b)
+ );
+}
+
+static inline int
+mv_mc_case_compare( MVarChar *a, MChar *b ) {
+ return UCharCompare(
+ a->data, lengthWithoutSpaceVarChar(a),
+ b->data, lengthWithoutSpaceChar(b)
+ );
+}
+
+#define MV_MC_CHARCMPFUNC( c, type, action, ret ) \
+PG_FUNCTION_INFO_V1( mv_mc_##c##_##type ); \
+Datum mv_mc_##c##_##type(PG_FUNCTION_ARGS);\
+Datum \
+mv_mc_##c##_##type(PG_FUNCTION_ARGS) { \
+ MVarChar *a = PG_GETARG_MVARCHAR(0); \
+ MChar *b = PG_GETARG_MCHAR(1); \
+ int res = mv_mc_##c##_compare(a,b); \
+ \
+ PG_FREE_IF_COPY(a,0); \
+ PG_FREE_IF_COPY(b,1); \
+ PG_RETURN_##ret( res action 0 ); \
+}
+
+
+MV_MC_CHARCMPFUNC( case, eq, ==, BOOL )
+MV_MC_CHARCMPFUNC( case, ne, !=, BOOL )
+MV_MC_CHARCMPFUNC( case, lt, <, BOOL )
+MV_MC_CHARCMPFUNC( case, le, <=, BOOL )
+MV_MC_CHARCMPFUNC( case, ge, >=, BOOL )
+MV_MC_CHARCMPFUNC( case, gt, >, BOOL )
+MV_MC_CHARCMPFUNC( case, cmp, +, INT32 )
+
+MV_MC_CHARCMPFUNC( icase, eq, ==, BOOL )
+MV_MC_CHARCMPFUNC( icase, ne, !=, BOOL )
+MV_MC_CHARCMPFUNC( icase, lt, <, BOOL )
+MV_MC_CHARCMPFUNC( icase, le, <=, BOOL )
+MV_MC_CHARCMPFUNC( icase, ge, >=, BOOL )
+MV_MC_CHARCMPFUNC( icase, gt, >, BOOL )
+MV_MC_CHARCMPFUNC( icase, cmp, +, INT32 )
+
+#define NULLHASHVALUE (-2147483647)
+
+#define FULLEQ_FUNC(type, cmpfunc, hashfunc) \
+PG_FUNCTION_INFO_V1( isfulleq_##type ); \
+Datum isfulleq_##type(PG_FUNCTION_ARGS); \
+Datum \
+isfulleq_##type(PG_FUNCTION_ARGS) { \
+ if ( PG_ARGISNULL(0) && PG_ARGISNULL(1) ) \
+ PG_RETURN_BOOL(true); \
+ else if ( PG_ARGISNULL(0) || PG_ARGISNULL(1) ) \
+ PG_RETURN_BOOL(false); \
+ \
+ PG_RETURN_DATUM( DirectFunctionCall2( cmpfunc, \
+ PG_GETARG_DATUM(0), \
+ PG_GETARG_DATUM(1) \
+ ) ); \
+} \
+ \
+PG_FUNCTION_INFO_V1( fullhash_##type ); \
+Datum fullhash_##type(PG_FUNCTION_ARGS); \
+Datum \
+fullhash_##type(PG_FUNCTION_ARGS) { \
+ if ( PG_ARGISNULL(0) ) \
+ PG_RETURN_INT32(NULLHASHVALUE); \
+ \
+ PG_RETURN_DATUM( DirectFunctionCall1( hashfunc, \
+ PG_GETARG_DATUM(0) \
+ ) ); \
+}
+
+FULLEQ_FUNC( mchar, mchar_icase_eq, mchar_hash );
+FULLEQ_FUNC( mvarchar, mvarchar_icase_eq, mvarchar_hash );
+
diff --git a/contrib/mchar/mchar_proc.c b/contrib/mchar/mchar_proc.c
new file mode 100644
index 00000000000..edabfb5eb66
--- /dev/null
+++ b/contrib/mchar/mchar_proc.c
@@ -0,0 +1,315 @@
+#include "mchar.h"
+#include "mb/pg_wchar.h"
+
+PG_FUNCTION_INFO_V1(mchar_length);
+Datum mchar_length(PG_FUNCTION_ARGS);
+
+Datum
+mchar_length(PG_FUNCTION_ARGS) {
+ MChar *m = PG_GETARG_MCHAR(0);
+ int32 l = UCHARLENGTH(m);
+
+ while( l>0 && m_isspace( m->data[ l-1 ] ) )
+ l--;
+
+ l = u_countChar32(m->data, l);
+
+ PG_FREE_IF_COPY(m,0);
+
+ PG_RETURN_INT32(l);
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_length);
+Datum mvarchar_length(PG_FUNCTION_ARGS);
+
+Datum
+mvarchar_length(PG_FUNCTION_ARGS) {
+ MVarChar *m = PG_GETARG_MVARCHAR(0);
+ int32 l = UVARCHARLENGTH(m);
+
+ while( l>0 && m_isspace( m->data[ l-1 ] ) )
+ l--;
+
+ l = u_countChar32(m->data, l);
+
+ PG_FREE_IF_COPY(m,0);
+
+ PG_RETURN_INT32(l);
+}
+
+static int32
+uchar_substring(
+ UChar *str, int32 strl,
+ int32 start, int32 length, bool length_not_specified,
+ UChar *dst) {
+ int32 S = start-1; /* start position */
+ int32 S1; /* adjusted start position */
+ int32 L1; /* adjusted substring length */
+ int32 subbegin=0, subend=0;
+
+ S1 = Max(S, 0);
+ if (length_not_specified)
+ L1 = -1;
+ else {
+ /* end position */
+ int32 E = S + length;
+
+ /*
+ * A negative value for L is the only way for the end position to
+ * be before the start. SQL99 says to throw an error.
+ */
+
+ if (E < S)
+ ereport(ERROR,
+ (errcode(ERRCODE_SUBSTRING_ERROR),
+ errmsg("negative substring length not allowed")));
+
+ /*
+ * A zero or negative value for the end position can happen if the
+ * start was negative or one. SQL99 says to return a zero-length
+ * string.
+ */
+ if (E < 0)
+ return 0;
+
+ L1 = E - S1;
+ }
+
+ U16_FWD_N( str, subbegin, strl, S1 );
+ if ( subbegin >= strl )
+ return 0;
+ subend = subbegin;
+ U16_FWD_N( str, subend, strl, L1 );
+
+ memcpy( dst, str+subbegin, sizeof(UChar)*(subend-subbegin) );
+
+ return subend-subbegin;
+}
+
+PG_FUNCTION_INFO_V1(mchar_substring);
+Datum mchar_substring(PG_FUNCTION_ARGS);
+Datum
+mchar_substring(PG_FUNCTION_ARGS) {
+ MChar *src = PG_GETARG_MCHAR(0);
+ MChar *dst;
+ int32 length;
+
+ dst = (MChar*)palloc( VARSIZE(src) );
+ length = uchar_substring(
+ src->data, UCHARLENGTH(src),
+ PG_GETARG_INT32(1), PG_GETARG_INT32(2), false,
+ dst->data);
+
+ dst->typmod = src->typmod;
+ SET_VARSIZE(dst, MCHARHDRSZ + length *sizeof(UChar));
+
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MCHAR(dst);
+}
+
+PG_FUNCTION_INFO_V1(mchar_substring_no_len);
+Datum mchar_substring_no_len(PG_FUNCTION_ARGS);
+Datum
+mchar_substring_no_len(PG_FUNCTION_ARGS) {
+ MChar *src = PG_GETARG_MCHAR(0);
+ MChar *dst;
+ int32 length;
+
+ dst = (MChar*)palloc( VARSIZE(src) );
+ length = uchar_substring(
+ src->data, UCHARLENGTH(src),
+ PG_GETARG_INT32(1), -1, true,
+ dst->data);
+
+ dst->typmod = src->typmod;
+ SET_VARSIZE(dst, MCHARHDRSZ + length *sizeof(UChar));
+
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MCHAR(dst);
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_substring);
+Datum mvarchar_substring(PG_FUNCTION_ARGS);
+Datum
+mvarchar_substring(PG_FUNCTION_ARGS) {
+ MVarChar *src = PG_GETARG_MVARCHAR(0);
+ MVarChar *dst;
+ int32 length;
+
+ dst = (MVarChar*)palloc( VARSIZE(src) );
+ length = uchar_substring(
+ src->data, UVARCHARLENGTH(src),
+ PG_GETARG_INT32(1), PG_GETARG_INT32(2), false,
+ dst->data);
+
+ SET_VARSIZE(dst, MVARCHARHDRSZ + length *sizeof(UChar));
+
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MVARCHAR(dst);
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_substring_no_len);
+Datum mvarchar_substring_no_len(PG_FUNCTION_ARGS);
+Datum
+mvarchar_substring_no_len(PG_FUNCTION_ARGS) {
+ MVarChar *src = PG_GETARG_MVARCHAR(0);
+ MVarChar *dst;
+ int32 length;
+
+ dst = (MVarChar*)palloc( VARSIZE(src) );
+ length = uchar_substring(
+ src->data, UVARCHARLENGTH(src),
+ PG_GETARG_INT32(1), -1, true,
+ dst->data);
+
+ SET_VARSIZE(dst, MVARCHARHDRSZ + length *sizeof(UChar));
+
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MVARCHAR(dst);
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_hash);
+Datum
+mvarchar_hash(PG_FUNCTION_ARGS) {
+ MVarChar *src = PG_GETARG_MVARCHAR(0);
+ Datum res;
+
+ res = hash_uchar( src->data, lengthWithoutSpaceVarChar(src) );
+
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_DATUM( res );
+}
+
+PG_FUNCTION_INFO_V1(mchar_hash);
+Datum
+mchar_hash(PG_FUNCTION_ARGS) {
+ MChar *src = PG_GETARG_MCHAR(0);
+ Datum res;
+
+ res = hash_uchar( src->data, lengthWithoutSpaceChar(src) );
+
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_DATUM( res );
+}
+
+PG_FUNCTION_INFO_V1(mchar_upper);
+Datum mchar_upper(PG_FUNCTION_ARGS);
+Datum
+mchar_upper(PG_FUNCTION_ARGS) {
+ MChar *src = PG_GETARG_MCHAR(0);
+ MChar *dst = (MChar*)palloc( VARSIZE(src) * 2 );
+
+ dst->len = MCHARHDRSZ;
+ dst->typmod = src->typmod;
+ if ( UCHARLENGTH(src) != 0 ) {
+ int length;
+ UErrorCode err=0;
+
+ length = u_strToUpper( dst->data, VARSIZE(src) * 2 - MCHARHDRSZ,
+ src->data, UCHARLENGTH(src),
+ NULL, &err );
+
+ Assert( length <= VARSIZE(src) * 2 - MCHARHDRSZ );
+
+ if ( U_FAILURE(err) )
+ elog(ERROR,"ICU u_strToUpper fails and returns %d (%s)", err, u_errorName(err));
+
+ dst->len += sizeof(UChar) * length;
+ }
+
+ SET_VARSIZE( dst, dst->len );
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MCHAR( dst );
+}
+
+PG_FUNCTION_INFO_V1(mchar_lower);
+Datum mchar_lower(PG_FUNCTION_ARGS);
+Datum
+mchar_lower(PG_FUNCTION_ARGS) {
+ MChar *src = PG_GETARG_MCHAR(0);
+ MChar *dst = (MChar*)palloc( VARSIZE(src) * 2 );
+
+ dst->len = MCHARHDRSZ;
+ dst->typmod = src->typmod;
+ if ( UCHARLENGTH(src) != 0 ) {
+ int length;
+ UErrorCode err=0;
+
+ length = u_strToLower( dst->data, VARSIZE(src) * 2 - MCHARHDRSZ,
+ src->data, UCHARLENGTH(src),
+ NULL, &err );
+
+ Assert( length <= VARSIZE(src) * 2 - MCHARHDRSZ );
+
+ if ( U_FAILURE(err) )
+ elog(ERROR,"ICU u_strToLower fails and returns %d (%s)", err, u_errorName(err));
+
+ dst->len += sizeof(UChar) * length;
+ }
+
+ SET_VARSIZE( dst, dst->len );
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MCHAR( dst );
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_upper);
+Datum mvarchar_upper(PG_FUNCTION_ARGS);
+Datum
+mvarchar_upper(PG_FUNCTION_ARGS) {
+ MVarChar *src = PG_GETARG_MVARCHAR(0);
+ MVarChar *dst = (MVarChar*)palloc( VARSIZE(src) * 2 );
+
+ dst->len = MVARCHARHDRSZ;
+
+ if ( UVARCHARLENGTH(src) != 0 ) {
+ int length;
+ UErrorCode err=0;
+
+ length = u_strToUpper( dst->data, VARSIZE(src) * 2 - MVARCHARHDRSZ,
+ src->data, UVARCHARLENGTH(src),
+ NULL, &err );
+
+ Assert( length <= VARSIZE(src) * 2 - MVARCHARHDRSZ );
+
+ if ( U_FAILURE(err) )
+ elog(ERROR,"ICU u_strToUpper fails and returns %d (%s)", err, u_errorName(err));
+
+ dst->len += sizeof(UChar) * length;
+ }
+
+ SET_VARSIZE( dst, dst->len );
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MVARCHAR( dst );
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_lower);
+Datum mvarchar_lower(PG_FUNCTION_ARGS);
+Datum
+mvarchar_lower(PG_FUNCTION_ARGS) {
+ MVarChar *src = PG_GETARG_MVARCHAR(0);
+ MVarChar *dst = (MVarChar*)palloc( VARSIZE(src) * 2 );
+
+ dst->len = MVARCHARHDRSZ;
+
+ if ( UVARCHARLENGTH(src) != 0 ) {
+ int length;
+ UErrorCode err=0;
+
+ length = u_strToLower( dst->data, VARSIZE(src) * 2 - MVARCHARHDRSZ,
+ src->data, UVARCHARLENGTH(src),
+ NULL, &err );
+
+ Assert( length <= VARSIZE(src) * 2 - MVARCHARHDRSZ );
+
+ if ( U_FAILURE(err) )
+ elog(ERROR,"ICU u_strToLower fails and returns %d (%s)", err, u_errorName(err));
+
+ dst->len += sizeof(UChar) * length;
+ }
+
+ SET_VARSIZE( dst, dst->len );
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MVARCHAR( dst );
+}
+
+
diff --git a/contrib/mchar/mchar_recode.c b/contrib/mchar/mchar_recode.c
new file mode 100644
index 00000000000..12bc6d4f3aa
--- /dev/null
+++ b/contrib/mchar/mchar_recode.c
@@ -0,0 +1,166 @@
+#include "mchar.h"
+#include "access/hash.h"
+
+#include "unicode/ucol.h"
+#include "unicode/ucnv.h"
+
+static UConverter *cnvDB = NULL;
+static UCollator *colCaseInsensitive = NULL;
+static UCollator *colCaseSensitive = NULL;
+
+static void
+createUObjs() {
+ if ( !cnvDB ) {
+ UErrorCode err = 0;
+
+ if ( GetDatabaseEncoding() == PG_UTF8 )
+ cnvDB = ucnv_open("UTF8", &err);
+ else
+ cnvDB = ucnv_open(NULL, &err);
+ if ( U_FAILURE(err) || cnvDB == NULL )
+ elog(ERROR,"ICU ucnv_open returns %d (%s)", err, u_errorName(err));
+ }
+
+ if ( !colCaseInsensitive ) {
+ UErrorCode err = 0;
+
+ colCaseInsensitive = ucol_open("", &err);
+ if ( U_FAILURE(err) || cnvDB == NULL ) {
+ if ( colCaseSensitive )
+ ucol_close( colCaseSensitive );
+ colCaseSensitive = NULL;
+ elog(ERROR,"ICU ucol_open returns %d (%s)", err, u_errorName(err));
+ }
+
+ ucol_setStrength( colCaseInsensitive, UCOL_SECONDARY );
+ }
+
+ if ( !colCaseSensitive ) {
+ UErrorCode err = 0;
+
+ colCaseSensitive = ucol_open("", &err);
+ if ( U_FAILURE(err) || cnvDB == NULL ) {
+ if ( colCaseSensitive )
+ ucol_close( colCaseSensitive );
+ colCaseSensitive = NULL;
+ elog(ERROR,"ICU ucol_open returns %d (%s)", err, u_errorName(err));
+ }
+
+ ucol_setAttribute(colCaseSensitive, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &err);
+ if (U_FAILURE(err)) {
+ if ( colCaseSensitive )
+ ucol_close( colCaseSensitive );
+ colCaseSensitive = NULL;
+ elog(ERROR,"ICU ucol_setAttribute returns %d (%s)", err, u_errorName(err));
+ }
+ }
+}
+
+int
+Char2UChar(const char * src, int srclen, UChar *dst) {
+ int dstlen=0;
+ UErrorCode err = 0;
+
+ createUObjs();
+ dstlen = ucnv_toUChars( cnvDB, dst, srclen*4, src, srclen, &err );
+ if ( U_FAILURE(err))
+ elog(ERROR,"ICU ucnv_toUChars returns %d (%s)", err, u_errorName(err));
+
+ return dstlen;
+}
+
+int
+UChar2Char(const UChar * src, int srclen, char *dst) {
+ int dstlen=0;
+ UErrorCode err = 0;
+
+ createUObjs();
+ dstlen = ucnv_fromUChars( cnvDB, dst, srclen*4, src, srclen, &err );
+ if ( U_FAILURE(err) )
+ elog(ERROR,"ICU ucnv_fromUChars returns %d (%s)", err, u_errorName(err));
+
+ return dstlen;
+}
+
+int
+UChar2Wchar(UChar * src, int srclen, pg_wchar *dst) {
+ int dstlen=0;
+ char *utf = palloc(sizeof(char)*srclen*4);
+
+ dstlen = UChar2Char(src, srclen, utf);
+ dstlen = pg_mb2wchar_with_len( utf, dst, dstlen );
+ pfree(utf);
+
+ return dstlen;
+}
+
+static UChar UCharWhiteSpace = 0;
+
+void
+FillWhiteSpace( UChar *dst, int n ) {
+ if ( UCharWhiteSpace == 0 ) {
+ int len;
+ UErrorCode err = 0;
+
+ u_strFromUTF8( &UCharWhiteSpace, 1, &len, " ", 1, &err);
+
+ Assert( len==1 );
+ Assert( !U_FAILURE(err) );
+ }
+
+ while( n-- > 0 )
+ *dst++ = UCharWhiteSpace;
+}
+
+int
+UCharCaseCompare(UChar * a, int alen, UChar *b, int blen) {
+
+ createUObjs();
+
+ return (int)ucol_strcoll( colCaseInsensitive,
+ a, alen,
+ b, blen);
+}
+
+int
+UCharCompare(UChar * a, int alen, UChar *b, int blen) {
+
+ createUObjs();
+
+ return (int)ucol_strcoll( colCaseSensitive,
+ a, alen,
+ b, blen);
+}
+
+Datum
+hash_uchar( UChar *s, int len ) {
+ int32 length = INT_MAX, i;
+ Datum res;
+ uint8 *d;
+
+ if ( len == 0 )
+ return hash_any( NULL, 0 );
+
+ createUObjs();
+
+ for(i=2;; i*=2)
+ {
+ d = palloc(len * i);
+ length = ucol_getSortKey(colCaseInsensitive, s, len, d, len*i);
+
+ if (length == 0)
+ elog(ERROR,"ICU ucol_getSortKey fails");
+
+ if (length < len*i)
+ break;
+
+ pfree(d);
+ }
+
+ res = hash_any( (unsigned char*) d, length);
+
+ pfree(d);
+
+ return res;
+}
+
diff --git a/contrib/mchar/meson.build b/contrib/mchar/meson.build
new file mode 100644
index 00000000000..2e22ae8a15b
--- /dev/null
+++ b/contrib/mchar/meson.build
@@ -0,0 +1,51 @@
+mchar_sources = files(
+ 'mchar_io.c',
+ 'mchar_proc.c',
+ 'mchar_op.c',
+ 'mchar_recode.c',
+ 'mchar_like.c'
+)
+
+if host_system == 'windows'
+ mchar_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'mchar',
+ '--FILEDESC', 'mchar',])
+endif
+
+mchar = shared_module('mchar',
+ mchar_sources,
+ kwargs: contrib_mod_args + {
+ 'dependencies': [icu_i18n, contrib_mod_args['dependencies']],
+ },
+)
+contrib_targets += mchar
+
+install_data(
+ 'mchar.control',
+ 'mchar--2.2.1.sql',
+ 'mchar--2.0.1--2.1.sql',
+ 'mchar--2.0--2.1.sql',
+ 'mchar--2.1.1--2.2.sql',
+ 'mchar--2.1--2.2.sql',
+ 'mchar--2.2--2.2.1.sql',
+ 'mchar--unpackaged--2.0.sql',
+ kwargs: contrib_data_args,
+)
+
+tests += {
+ 'name': 'mchar',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'init',
+ 'mchar',
+ 'mvarchar',
+ 'mm',
+ 'like',
+ 'compat',
+ ]
+ },
+}
+
+# TODO: DOCS = README.mchar
\ No newline at end of file
diff --git a/contrib/mchar/sql/compat.sql b/contrib/mchar/sql/compat.sql
new file mode 100644
index 00000000000..d5b6a986960
--- /dev/null
+++ b/contrib/mchar/sql/compat.sql
@@ -0,0 +1,11 @@
+--- table based checks
+
+select '<' || ch || '>', '<' || vch || '>' from chvch;
+select * from chvch where vch = 'One space';
+select * from chvch where vch = 'One space ';
+
+select * from ch where chcol = 'abcd' order by chcol;
+select * from ch t1 join ch t2 on t1.chcol = t2.chcol order by t1.chcol, t2.chcol;
+select * from ch where chcol > 'abcd' and chcol<'ee';
+select * from ch order by chcol;
+
diff --git a/contrib/mchar/sql/init.sql b/contrib/mchar/sql/init.sql
new file mode 100644
index 00000000000..04310044458
--- /dev/null
+++ b/contrib/mchar/sql/init.sql
@@ -0,0 +1,23 @@
+CREATE EXTENSION mchar;
+
+create table ch (
+ chcol mchar(32)
+) without oids;
+
+insert into ch values('abcd');
+insert into ch values('AbcD');
+insert into ch values('abcz');
+insert into ch values('defg');
+insert into ch values('dEfg');
+insert into ch values('ee');
+insert into ch values('Ee');
+
+create table chvch (
+ ch mchar(12),
+ vch mvarchar(12)
+) without oids;
+
+insert into chvch values('No spaces', 'No spaces');
+insert into chvch values('One space ', 'One space ');
+insert into chvch values('1 space', '1 space ');
+
diff --git a/contrib/mchar/sql/like.sql b/contrib/mchar/sql/like.sql
new file mode 100644
index 00000000000..c29cf4eb6f9
--- /dev/null
+++ b/contrib/mchar/sql/like.sql
@@ -0,0 +1,231 @@
+-- simplest examples
+-- E061-04 like predicate
+set standard_conforming_strings=off;
+
+SELECT 'hawkeye'::mchar LIKE 'h%' AS "true";
+SELECT 'hawkeye'::mchar NOT LIKE 'h%' AS "false";
+
+SELECT 'hawkeye'::mchar LIKE 'H%' AS "true";
+SELECT 'hawkeye'::mchar NOT LIKE 'H%' AS "false";
+
+SELECT 'hawkeye'::mchar LIKE 'indio%' AS "false";
+SELECT 'hawkeye'::mchar NOT LIKE 'indio%' AS "true";
+
+SELECT 'hawkeye'::mchar LIKE 'h%eye' AS "true";
+SELECT 'hawkeye'::mchar NOT LIKE 'h%eye' AS "false";
+
+SELECT 'indio'::mchar LIKE '_ndio' AS "true";
+SELECT 'indio'::mchar NOT LIKE '_ndio' AS "false";
+
+SELECT 'indio'::mchar LIKE 'in__o' AS "true";
+SELECT 'indio'::mchar NOT LIKE 'in__o' AS "false";
+
+SELECT 'indio'::mchar LIKE 'in_o' AS "false";
+SELECT 'indio'::mchar NOT LIKE 'in_o' AS "true";
+
+SELECT 'hawkeye'::mvarchar LIKE 'h%' AS "true";
+SELECT 'hawkeye'::mvarchar NOT LIKE 'h%' AS "false";
+
+SELECT 'hawkeye'::mvarchar LIKE 'H%' AS "true";
+SELECT 'hawkeye'::mvarchar NOT LIKE 'H%' AS "false";
+
+SELECT 'hawkeye'::mvarchar LIKE 'indio%' AS "false";
+SELECT 'hawkeye'::mvarchar NOT LIKE 'indio%' AS "true";
+
+SELECT 'hawkeye'::mvarchar LIKE 'h%eye' AS "true";
+SELECT 'hawkeye'::mvarchar NOT LIKE 'h%eye' AS "false";
+
+SELECT 'indio'::mvarchar LIKE '_ndio' AS "true";
+SELECT 'indio'::mvarchar NOT LIKE '_ndio' AS "false";
+
+SELECT 'indio'::mvarchar LIKE 'in__o' AS "true";
+SELECT 'indio'::mvarchar NOT LIKE 'in__o' AS "false";
+
+SELECT 'indio'::mvarchar LIKE 'in_o' AS "false";
+SELECT 'indio'::mvarchar NOT LIKE 'in_o' AS "true";
+
+-- unused escape character
+SELECT 'hawkeye'::mchar LIKE 'h%'::mchar ESCAPE '#' AS "true";
+SELECT 'hawkeye'::mchar NOT LIKE 'h%'::mchar ESCAPE '#' AS "false";
+
+SELECT 'indio'::mchar LIKE 'ind_o'::mchar ESCAPE '$' AS "true";
+SELECT 'indio'::mchar NOT LIKE 'ind_o'::mchar ESCAPE '$' AS "false";
+
+-- escape character
+-- E061-05 like predicate with escape clause
+SELECT 'h%'::mchar LIKE 'h#%'::mchar ESCAPE '#' AS "true";
+SELECT 'h%'::mchar NOT LIKE 'h#%'::mchar ESCAPE '#' AS "false";
+
+SELECT 'h%wkeye'::mchar LIKE 'h#%'::mchar ESCAPE '#' AS "false";
+SELECT 'h%wkeye'::mchar NOT LIKE 'h#%'::mchar ESCAPE '#' AS "true";
+
+SELECT 'h%wkeye'::mchar LIKE 'h#%%'::mchar ESCAPE '#' AS "true";
+SELECT 'h%wkeye'::mchar NOT LIKE 'h#%%'::mchar ESCAPE '#' AS "false";
+
+SELECT 'h%awkeye'::mchar LIKE 'h#%a%k%e'::mchar ESCAPE '#' AS "true";
+SELECT 'h%awkeye'::mchar NOT LIKE 'h#%a%k%e'::mchar ESCAPE '#' AS "false";
+
+SELECT 'indio'::mchar LIKE '_ndio'::mchar ESCAPE '$' AS "true";
+SELECT 'indio'::mchar NOT LIKE '_ndio'::mchar ESCAPE '$' AS "false";
+
+SELECT 'i_dio'::mchar LIKE 'i$_d_o'::mchar ESCAPE '$' AS "true";
+SELECT 'i_dio'::mchar NOT LIKE 'i$_d_o'::mchar ESCAPE '$' AS "false";
+
+SELECT 'i_dio'::mchar LIKE 'i$_nd_o'::mchar ESCAPE '$' AS "false";
+SELECT 'i_dio'::mchar NOT LIKE 'i$_nd_o'::mchar ESCAPE '$' AS "true";
+
+SELECT 'i_dio'::mchar LIKE 'i$_d%o'::mchar ESCAPE '$' AS "true";
+SELECT 'i_dio'::mchar NOT LIKE 'i$_d%o'::mchar ESCAPE '$' AS "false";
+
+-- escape character same as pattern character
+SELECT 'maca'::mchar LIKE 'm%aca' ESCAPE '%'::mchar AS "true";
+SELECT 'maca'::mchar NOT LIKE 'm%aca' ESCAPE '%'::mchar AS "false";
+
+SELECT 'ma%a'::mchar LIKE 'm%a%%a' ESCAPE '%'::mchar AS "true";
+SELECT 'ma%a'::mchar NOT LIKE 'm%a%%a' ESCAPE '%'::mchar AS "false";
+
+SELECT 'bear'::mchar LIKE 'b_ear' ESCAPE '_'::mchar AS "true";
+SELECT 'bear'::mchar NOT LIKE 'b_ear'::mchar ESCAPE '_' AS "false";
+
+SELECT 'be_r'::mchar LIKE 'b_e__r' ESCAPE '_'::mchar AS "true";
+SELECT 'be_r'::mchar NOT LIKE 'b_e__r' ESCAPE '_'::mchar AS "false";
+
+SELECT 'be_r'::mchar LIKE '__e__r' ESCAPE '_'::mchar AS "false";
+SELECT 'be_r'::mchar NOT LIKE '__e__r'::mchar ESCAPE '_' AS "true";
+
+-- unused escape character
+SELECT 'hawkeye'::mvarchar LIKE 'h%'::mvarchar ESCAPE '#' AS "true";
+SELECT 'hawkeye'::mvarchar NOT LIKE 'h%'::mvarchar ESCAPE '#' AS "false";
+
+SELECT 'indio'::mvarchar LIKE 'ind_o'::mvarchar ESCAPE '$' AS "true";
+SELECT 'indio'::mvarchar NOT LIKE 'ind_o'::mvarchar ESCAPE '$' AS "false";
+
+-- escape character
+-- E061-05 like predicate with escape clause
+SELECT 'h%'::mvarchar LIKE 'h#%'::mvarchar ESCAPE '#' AS "true";
+SELECT 'h%'::mvarchar NOT LIKE 'h#%'::mvarchar ESCAPE '#' AS "false";
+
+SELECT 'h%wkeye'::mvarchar LIKE 'h#%'::mvarchar ESCAPE '#' AS "false";
+SELECT 'h%wkeye'::mvarchar NOT LIKE 'h#%'::mvarchar ESCAPE '#' AS "true";
+
+SELECT 'h%wkeye'::mvarchar LIKE 'h#%%'::mvarchar ESCAPE '#' AS "true";
+SELECT 'h%wkeye'::mvarchar NOT LIKE 'h#%%'::mvarchar ESCAPE '#' AS "false";
+
+SELECT 'h%awkeye'::mvarchar LIKE 'h#%a%k%e'::mvarchar ESCAPE '#' AS "true";
+SELECT 'h%awkeye'::mvarchar NOT LIKE 'h#%a%k%e'::mvarchar ESCAPE '#' AS "false";
+
+SELECT 'indio'::mvarchar LIKE '_ndio'::mvarchar ESCAPE '$' AS "true";
+SELECT 'indio'::mvarchar NOT LIKE '_ndio'::mvarchar ESCAPE '$' AS "false";
+
+SELECT 'i_dio'::mvarchar LIKE 'i$_d_o'::mvarchar ESCAPE '$' AS "true";
+SELECT 'i_dio'::mvarchar NOT LIKE 'i$_d_o'::mvarchar ESCAPE '$' AS "false";
+
+SELECT 'i_dio'::mvarchar LIKE 'i$_nd_o'::mvarchar ESCAPE '$' AS "false";
+SELECT 'i_dio'::mvarchar NOT LIKE 'i$_nd_o'::mvarchar ESCAPE '$' AS "true";
+
+SELECT 'i_dio'::mvarchar LIKE 'i$_d%o'::mvarchar ESCAPE '$' AS "true";
+SELECT 'i_dio'::mvarchar NOT LIKE 'i$_d%o'::mvarchar ESCAPE '$' AS "false";
+
+-- escape character same as pattern character
+SELECT 'maca'::mvarchar LIKE 'm%aca' ESCAPE '%'::mvarchar AS "true";
+SELECT 'maca'::mvarchar NOT LIKE 'm%aca' ESCAPE '%'::mvarchar AS "false";
+
+SELECT 'ma%a'::mvarchar LIKE 'm%a%%a' ESCAPE '%'::mvarchar AS "true";
+SELECT 'ma%a'::mvarchar NOT LIKE 'm%a%%a' ESCAPE '%'::mvarchar AS "false";
+
+SELECT 'bear'::mvarchar LIKE 'b_ear' ESCAPE '_'::mvarchar AS "true";
+SELECT 'bear'::mvarchar NOT LIKE 'b_ear'::mvarchar ESCAPE '_' AS "false";
+
+SELECT 'be_r'::mvarchar LIKE 'b_e__r' ESCAPE '_'::mvarchar AS "true";
+SELECT 'be_r'::mvarchar NOT LIKE 'b_e__r' ESCAPE '_'::mvarchar AS "false";
+
+SELECT 'be_r'::mvarchar LIKE '__e__r' ESCAPE '_'::mvarchar AS "false";
+SELECT 'be_r'::mvarchar NOT LIKE '__e__r'::mvarchar ESCAPE '_' AS "true";
+
+-- similar to
+
+SELECT 'abc'::mchar SIMILAR TO 'abc'::mchar AS "true";
+SELECT 'abc'::mchar SIMILAR TO 'a'::mchar AS "false";
+SELECT 'abc'::mchar SIMILAR TO '%(b|d)%'::mchar AS "true";
+SELECT 'abc'::mchar SIMILAR TO '(b|c)%'::mchar AS "false";
+SELECT 'h%'::mchar SIMILAR TO 'h#%'::mchar AS "false";
+SELECT 'h%'::mchar SIMILAR TO 'h#%'::mchar ESCAPE '#' AS "true";
+
+SELECT 'abc'::mvarchar SIMILAR TO 'abc'::mvarchar AS "true";
+SELECT 'abc'::mvarchar SIMILAR TO 'a'::mvarchar AS "false";
+SELECT 'abc'::mvarchar SIMILAR TO '%(b|d)%'::mvarchar AS "true";
+SELECT 'abc'::mvarchar SIMILAR TO '(b|c)%'::mvarchar AS "false";
+SELECT 'h%'::mvarchar SIMILAR TO 'h#%'::mvarchar AS "false";
+SELECT 'h%'::mvarchar SIMILAR TO 'h#%'::mvarchar ESCAPE '#' AS "true";
+
+-- index support
+
+SELECT * from ch where chcol like 'aB_d' order by chcol using &<;
+SELECT * from ch where chcol like 'aB%d' order by chcol using &<;
+SELECT * from ch where chcol like 'aB%' order by chcol using &<;
+SELECT * from ch where chcol like '%BC%' order by chcol using &<;
+set enable_seqscan = off;
+explain (costs off)
+SELECT * from ch where chcol like 'aB_d' order by chcol using &<;
+SELECT * from ch where chcol like 'aB_d' order by chcol using &<;
+SELECT * from ch where chcol like 'aB%d' order by chcol using &<;
+SELECT * from ch where chcol like 'aB%' order by chcol using &<;
+SELECT * from ch where chcol like '%BC%' order by chcol using &<;
+set enable_seqscan = on;
+
+
+create table testt (f1 mchar(10));
+insert into testt values ('Abc-000001');
+insert into testt values ('Abc-000002');
+insert into testt values ('0000000001');
+insert into testt values ('0000000002');
+
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+create index testindex on testt(f1);
+set enable_seqscan=off;
+explain (costs off)
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+set enable_seqscan = on;
+drop table testt;
+
+create table testt (f1 mvarchar(10));
+insert into testt values ('Abc-000001');
+insert into testt values ('Abc-000002');
+insert into testt values ('0000000001');
+insert into testt values ('0000000002');
+
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+select * from testt where f1::mchar like E'Abc\\- %'::mchar;
+select * from testt where f1::mchar like E' %'::mchar;
+create index testindex on testt(f1);
+set enable_seqscan=off;
+explain (costs off)
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+select * from testt where f1::mchar like E'Abc\\- %'::mchar;
+select * from testt where f1::mchar like E' %'::mchar;
+set enable_seqscan = on;
+drop table testt;
+
+
+CREATE TABLE test ( code mchar(5) NOT NULL );
+insert into test values('1111 ');
+insert into test values('111 ');
+insert into test values('11 ');
+insert into test values('1 ');
+
+SELECT * FROM test WHERE code LIKE ('% ');
+
+set escape_string_warning = off;
+SELECT CASE WHEN ('_'::text SIMILAR TO '[\\_]'::text ESCAPE '\\'::text) THEN TRUE ELSE FALSE END ;
+SELECT CASE WHEN ('_'::mchar SIMILAR TO '[\\_]'::mchar ESCAPE '\\'::mchar) THEN TRUE ELSE FALSE END ;
+SELECT CASE WHEN ('_'::mvarchar SIMILAR TO '[\\_]'::mvarchar ESCAPE '\\'::mvarchar) THEN TRUE ELSE FALSE END ;
+reset escape_string_warning;
+reset standard_conforming_strings;
+
+
diff --git a/contrib/mchar/sql/mchar.sql b/contrib/mchar/sql/mchar.sql
new file mode 100644
index 00000000000..8850aa3e9d7
--- /dev/null
+++ b/contrib/mchar/sql/mchar.sql
@@ -0,0 +1,90 @@
+-- I/O tests
+
+select '1'::mchar;
+select '2 '::mchar;
+select '10 '::mchar;
+
+select '1'::mchar(2);
+select '2 '::mchar(2);
+select '3 '::mchar(2);
+select '10 '::mchar(2);
+
+select ' '::mchar(10);
+select ' '::mchar;
+
+-- operations & functions
+
+select length('1'::mchar);
+select length('2 '::mchar);
+select length('10 '::mchar);
+
+select length('1'::mchar(2));
+select length('2 '::mchar(2));
+select length('3 '::mchar(2));
+select length('10 '::mchar(2));
+
+select length(' '::mchar(10));
+select length(' '::mchar);
+
+select 'asd'::mchar(10) || '>'::mchar(10);
+select length('asd'::mchar(10) || '>'::mchar(10));
+select 'asd'::mchar(2) || '>'::mchar(10);
+select length('asd'::mchar(2) || '>'::mchar(10));
+
+-- Comparisons
+
+select 'asdf'::mchar = 'aSdf'::mchar;
+select 'asdf'::mchar = 'aSdf '::mchar;
+select 'asdf'::mchar = 'aSdf 1'::mchar(4);
+select 'asdf'::mchar = 'aSdf 1'::mchar(5);
+select 'asdf'::mchar = 'aSdf 1'::mchar(6);
+select 'asdf'::mchar(3) = 'aSdf 1'::mchar(5);
+select 'asdf'::mchar(3) = 'aSdf 1'::mchar(3);
+
+select 'asdf'::mchar < 'aSdf'::mchar;
+select 'asdf'::mchar < 'aSdf '::mchar;
+select 'asdf'::mchar < 'aSdf 1'::mchar(4);
+select 'asdf'::mchar < 'aSdf 1'::mchar(5);
+select 'asdf'::mchar < 'aSdf 1'::mchar(6);
+
+select 'asdf'::mchar <= 'aSdf'::mchar;
+select 'asdf'::mchar <= 'aSdf '::mchar;
+select 'asdf'::mchar <= 'aSdf 1'::mchar(4);
+select 'asdf'::mchar <= 'aSdf 1'::mchar(5);
+select 'asdf'::mchar <= 'aSdf 1'::mchar(6);
+
+select 'asdf'::mchar >= 'aSdf'::mchar;
+select 'asdf'::mchar >= 'aSdf '::mchar;
+select 'asdf'::mchar >= 'aSdf 1'::mchar(4);
+select 'asdf'::mchar >= 'aSdf 1'::mchar(5);
+select 'asdf'::mchar >= 'aSdf 1'::mchar(6);
+
+select 'asdf'::mchar > 'aSdf'::mchar;
+select 'asdf'::mchar > 'aSdf '::mchar;
+select 'asdf'::mchar > 'aSdf 1'::mchar(4);
+select 'asdf'::mchar > 'aSdf 1'::mchar(5);
+select 'asdf'::mchar > 'aSdf 1'::mchar(6);
+
+select max(ch) from chvch;
+select min(ch) from chvch;
+
+select substr('1234567890'::mchar, 3) = '34567890' as "34567890";
+select substr('1234567890'::mchar, 4, 3) = '456' as "456";
+
+select lower('asdfASDF'::mchar);
+select upper('asdfASDF'::mchar);
+
+select 'asd'::mchar == 'aSd'::mchar;
+select 'asd'::mchar == 'aCd'::mchar;
+select 'asd'::mchar == NULL;
+select NULL == 'aCd'::mchar;
+select NULL::mchar == NULL;
+
+
+--Note: here we use different space symbols, be carefull to copy it!
+select v, count(*) from
+(values (1, '4 242'::mchar), (2, '4 242'), (3, 'aSDF'), (4, 'asdf')) as t(i,v) group by v order by v;
+set enable_hashagg=off;
+select v, count(*) from
+(values (1, '4 242'::mchar), (2, '4 242'), (3, 'aSDF'), (4, 'asdf')) as t(i,v) group by v order by v;
+reset enable_hashagg;
diff --git a/contrib/mchar/sql/mm.sql b/contrib/mchar/sql/mm.sql
new file mode 100644
index 00000000000..2e11b937040
--- /dev/null
+++ b/contrib/mchar/sql/mm.sql
@@ -0,0 +1,196 @@
+select 'asd'::mchar::mvarchar;
+select 'asd '::mchar::mvarchar;
+select 'asd'::mchar(2)::mvarchar;
+select 'asd '::mchar(2)::mvarchar;
+select 'asd'::mchar(5)::mvarchar;
+select 'asd '::mchar(5)::mvarchar;
+select 'asd'::mchar::mvarchar(2);
+select 'asd '::mchar::mvarchar(2);
+select 'asd'::mchar(2)::mvarchar(2);
+select 'asd '::mchar(2)::mvarchar(2);
+select 'asd'::mchar(5)::mvarchar(2);
+select 'asd '::mchar(5)::mvarchar(2);
+select 'asd'::mchar::mvarchar(5);
+select 'asd '::mchar::mvarchar(5);
+select 'asd'::mchar(2)::mvarchar(5);
+select 'asd '::mchar(2)::mvarchar(5);
+select 'asd'::mchar(5)::mvarchar(5);
+select 'asd '::mchar(5)::mvarchar(5);
+
+select 'asd'::mvarchar::mchar;
+select 'asd '::mvarchar::mchar;
+select 'asd'::mvarchar(2)::mchar;
+select 'asd '::mvarchar(2)::mchar;
+select 'asd'::mvarchar(5)::mchar;
+select 'asd '::mvarchar(5)::mchar;
+select 'asd'::mvarchar::mchar(2);
+select 'asd '::mvarchar::mchar(2);
+select 'asd'::mvarchar(2)::mchar(2);
+select 'asd '::mvarchar(2)::mchar(2);
+select 'asd'::mvarchar(5)::mchar(2);
+select 'asd '::mvarchar(5)::mchar(2);
+select 'asd'::mvarchar::mchar(5);
+select 'asd '::mvarchar::mchar(5);
+select 'asd'::mvarchar(2)::mchar(5);
+select 'asd '::mvarchar(2)::mchar(5);
+select 'asd'::mvarchar(5)::mchar(5);
+select 'asd '::mvarchar(5)::mchar(5);
+
+select 'asd'::mchar || '123';
+select 'asd'::mchar || '123'::mchar;
+select 'asd'::mchar || '123'::mvarchar;
+
+select 'asd '::mchar || '123';
+select 'asd '::mchar || '123'::mchar;
+select 'asd '::mchar || '123'::mvarchar;
+
+select 'asd '::mchar || '123 ';
+select 'asd '::mchar || '123 '::mchar;
+select 'asd '::mchar || '123 '::mvarchar;
+
+
+select 'asd'::mvarchar || '123';
+select 'asd'::mvarchar || '123'::mchar;
+select 'asd'::mvarchar || '123'::mvarchar;
+
+select 'asd '::mvarchar || '123';
+select 'asd '::mvarchar || '123'::mchar;
+select 'asd '::mvarchar || '123'::mvarchar;
+
+select 'asd '::mvarchar || '123 ';
+select 'asd '::mvarchar || '123 '::mchar;
+select 'asd '::mvarchar || '123 '::mvarchar;
+
+
+select 'asd'::mchar(2) || '123';
+select 'asd'::mchar(2) || '123'::mchar;
+select 'asd'::mchar(2) || '123'::mvarchar;
+
+
+select 'asd '::mchar(2) || '123';
+select 'asd '::mchar(2) || '123'::mchar;
+select 'asd '::mchar(2) || '123'::mvarchar;
+
+
+select 'asd '::mchar(2) || '123 ';
+select 'asd '::mchar(2) || '123 '::mchar;
+select 'asd '::mchar(2) || '123 '::mvarchar;
+
+select 'asd'::mvarchar(2) || '123';
+select 'asd'::mvarchar(2) || '123'::mchar;
+select 'asd'::mvarchar(2) || '123'::mvarchar;
+
+select 'asd '::mvarchar(2) || '123';
+select 'asd '::mvarchar(2) || '123'::mchar;
+select 'asd '::mvarchar(2) || '123'::mvarchar;
+
+select 'asd '::mvarchar(2) || '123 ';
+select 'asd '::mvarchar(2) || '123 '::mchar;
+select 'asd '::mvarchar(2) || '123 '::mvarchar;
+
+select 'asd'::mchar(4) || '143';
+select 'asd'::mchar(4) || '123'::mchar;
+select 'asd'::mchar(4) || '123'::mvarchar;
+
+select 'asd '::mchar(4) || '123';
+select 'asd '::mchar(4) || '123'::mchar;
+select 'asd '::mchar(4) || '123'::mvarchar;
+
+select 'asd '::mchar(4) || '123 ';
+select 'asd '::mchar(4) || '123 '::mchar;
+select 'asd '::mchar(4) || '123 '::mvarchar;
+
+select 'asd'::mvarchar(4) || '123';
+select 'asd'::mvarchar(4) || '123'::mchar;
+select 'asd'::mvarchar(4) || '123'::mvarchar;
+
+select 'asd '::mvarchar(4) || '123';
+select 'asd '::mvarchar(4) || '123'::mchar;
+select 'asd '::mvarchar(4) || '123'::mvarchar;
+
+select 'asd '::mvarchar(4) || '123 ';
+select 'asd '::mvarchar(4) || '123 '::mchar;
+select 'asd '::mvarchar(4) || '123 '::mvarchar;
+
+
+select 'asd '::mvarchar(4) || '123 '::mchar(4);
+select 'asd '::mvarchar(4) || '123 '::mvarchar(4);
+select 'asd '::mvarchar(4) || '123'::mchar(4);
+select 'asd '::mvarchar(4) || '123'::mvarchar(4);
+
+
+select 1 where 'f'::mchar='F'::mvarchar;
+select 1 where 'f'::mchar='F '::mvarchar;
+select 1 where 'f '::mchar='F'::mvarchar;
+select 1 where 'f '::mchar='F '::mvarchar;
+
+select 1 where 'f'::mchar='F'::mvarchar(2);
+select 1 where 'f'::mchar='F '::mvarchar(2);
+select 1 where 'f '::mchar='F'::mvarchar(2);
+select 1 where 'f '::mchar='F '::mvarchar(2);
+
+select 1 where 'f'::mchar(2)='F'::mvarchar;
+select 1 where 'f'::mchar(2)='F '::mvarchar;
+select 1 where 'f '::mchar(2)='F'::mvarchar;
+select 1 where 'f '::mchar(2)='F '::mvarchar;
+
+select 1 where 'f'::mchar(2)='F'::mvarchar(2);
+select 1 where 'f'::mchar(2)='F '::mvarchar(2);
+select 1 where 'f '::mchar(2)='F'::mvarchar(2);
+select 1 where 'f '::mchar(2)='F '::mvarchar(2);
+
+select 1 where 'foo'::mchar='FOO'::mvarchar;
+select 1 where 'foo'::mchar='FOO '::mvarchar;
+select 1 where 'foo '::mchar='FOO'::mvarchar;
+select 1 where 'foo '::mchar='FOO '::mvarchar;
+
+select 1 where 'foo'::mchar='FOO'::mvarchar(2);
+select 1 where 'foo'::mchar='FOO '::mvarchar(2);
+select 1 where 'foo '::mchar='FOO'::mvarchar(2);
+select 1 where 'foo '::mchar='FOO '::mvarchar(2);
+
+select 1 where 'foo'::mchar(2)='FOO'::mvarchar;
+select 1 where 'foo'::mchar(2)='FOO '::mvarchar;
+select 1 where 'foo '::mchar(2)='FOO'::mvarchar;
+select 1 where 'foo '::mchar(2)='FOO '::mvarchar;
+
+select 1 where 'foo'::mchar(2)='FOO'::mvarchar(2);
+select 1 where 'foo'::mchar(2)='FOO '::mvarchar(2);
+select 1 where 'foo '::mchar(2)='FOO'::mvarchar(2);
+select 1 where 'foo '::mchar(2)='FOO '::mvarchar(2);
+
+Select 'f'::mchar(1) Union Select 'o'::mvarchar(1);
+Select 'f'::mvarchar(1) Union Select 'o'::mchar(1);
+
+select * from chvch where ch=vch;
+
+select ch.* from ch, (select 'dEfg'::mvarchar as q) as p where chcol > p.q;
+create index qq on ch (chcol);
+set enable_seqscan=off;
+select ch.* from ch, (select 'dEfg'::mvarchar as q) as p where chcol > p.q;
+set enable_seqscan=on;
+
+
+--\copy chvch to 'results/chvch.dump' binary
+--truncate table chvch;
+--\copy chvch from 'results/chvch.dump' binary
+
+--test joins
+CREATE TABLE a (mchar2 MCHAR(2) NOT NULL);
+CREATE TABLE c (mvarchar255 mvarchar NOT NULL);
+SELECT * FROM a, c WHERE mchar2 = mvarchar255;
+SELECT * FROM a, c WHERE mvarchar255 = mchar2;
+DROP TABLE a;
+DROP TABLE c;
+
+select * from (values
+ ('е'::mchar),('ё'),('еа'),('еб'),('ее'),('еж'),('ёа'),('ёб'),('ёё'),('ёж'),('ёе'),('её'))
+ z order by 1;
+
+select 'ё'::mchar = 'е';
+select 'Ё'::mchar = 'Е';
+select 'й'::mchar = 'и';
+select 'Й'::mchar = 'И';
+
+select mvarchar_icase_cmp('ёа','еб'), mvarchar_icase_cmp('еб','ё'),
+ mvarchar_icase_cmp('ё', 'ёа');
diff --git a/contrib/mchar/sql/mvarchar.sql b/contrib/mchar/sql/mvarchar.sql
new file mode 100644
index 00000000000..91b0981075d
--- /dev/null
+++ b/contrib/mchar/sql/mvarchar.sql
@@ -0,0 +1,82 @@
+-- I/O tests
+
+select '1'::mvarchar;
+select '2 '::mvarchar;
+select '10 '::mvarchar;
+
+select '1'::mvarchar(2);
+select '2 '::mvarchar(2);
+select '3 '::mvarchar(2);
+select '10 '::mvarchar(2);
+
+select ' '::mvarchar(10);
+select ' '::mvarchar;
+
+-- operations & functions
+
+select length('1'::mvarchar);
+select length('2 '::mvarchar);
+select length('10 '::mvarchar);
+
+select length('1'::mvarchar(2));
+select length('2 '::mvarchar(2));
+select length('3 '::mvarchar(2));
+select length('10 '::mvarchar(2));
+
+select length(' '::mvarchar(10));
+select length(' '::mvarchar);
+
+select 'asd'::mvarchar(10) || '>'::mvarchar(10);
+select length('asd'::mvarchar(10) || '>'::mvarchar(10));
+select 'asd'::mvarchar(2) || '>'::mvarchar(10);
+select length('asd'::mvarchar(2) || '>'::mvarchar(10));
+
+-- Comparisons
+
+select 'asdf'::mvarchar = 'aSdf'::mvarchar;
+select 'asdf'::mvarchar = 'aSdf '::mvarchar;
+select 'asdf'::mvarchar = 'aSdf 1'::mvarchar(4);
+select 'asdf'::mvarchar = 'aSdf 1'::mvarchar(5);
+select 'asdf'::mvarchar = 'aSdf 1'::mvarchar(6);
+select 'asdf'::mvarchar(3) = 'aSdf 1'::mvarchar(5);
+select 'asdf'::mvarchar(3) = 'aSdf 1'::mvarchar(3);
+
+select 'asdf'::mvarchar < 'aSdf'::mvarchar;
+select 'asdf'::mvarchar < 'aSdf '::mvarchar;
+select 'asdf'::mvarchar < 'aSdf 1'::mvarchar(4);
+select 'asdf'::mvarchar < 'aSdf 1'::mvarchar(5);
+select 'asdf'::mvarchar < 'aSdf 1'::mvarchar(6);
+
+select 'asdf'::mvarchar <= 'aSdf'::mvarchar;
+select 'asdf'::mvarchar <= 'aSdf '::mvarchar;
+select 'asdf'::mvarchar <= 'aSdf 1'::mvarchar(4);
+select 'asdf'::mvarchar <= 'aSdf 1'::mvarchar(5);
+select 'asdf'::mvarchar <= 'aSdf 1'::mvarchar(6);
+
+select 'asdf'::mvarchar >= 'aSdf'::mvarchar;
+select 'asdf'::mvarchar >= 'aSdf '::mvarchar;
+select 'asdf'::mvarchar >= 'aSdf 1'::mvarchar(4);
+select 'asdf'::mvarchar >= 'aSdf 1'::mvarchar(5);
+select 'asdf'::mvarchar >= 'aSdf 1'::mvarchar(6);
+
+select 'asdf'::mvarchar > 'aSdf'::mvarchar;
+select 'asdf'::mvarchar > 'aSdf '::mvarchar;
+select 'asdf'::mvarchar > 'aSdf 1'::mvarchar(4);
+select 'asdf'::mvarchar > 'aSdf 1'::mvarchar(5);
+select 'asdf'::mvarchar > 'aSdf 1'::mvarchar(6);
+
+select max(vch) from chvch;
+select min(vch) from chvch;
+
+select substr('1234567890'::mvarchar, 3) = '34567890' as "34567890";
+select substr('1234567890'::mvarchar, 4, 3) = '456' as "456";
+
+select lower('asdfASDF'::mvarchar);
+select upper('asdfASDF'::mvarchar);
+
+select 'asd'::mvarchar == 'aSd'::mvarchar;
+select 'asd'::mvarchar == 'aCd'::mvarchar;
+select 'asd'::mvarchar == NULL;
+select NULL == 'aCd'::mvarchar;
+select NULL::mvarchar == NULL;
+
diff --git a/contrib/meson.build b/contrib/meson.build
index ed30ee7d639..b1784a348df 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -14,6 +14,7 @@ contrib_doc_args = {
subdir('amcheck')
subdir('auth_delay')
+subdir('auto_dump')
subdir('auto_explain')
subdir('basic_archive')
subdir('bloom')
@@ -23,11 +24,14 @@ subdir('btree_gin')
subdir('btree_gist')
subdir('citext')
subdir('cube')
+subdir('dbcopies_decoding')
subdir('dblink')
subdir('dict_int')
subdir('dict_xsyn')
subdir('earthdistance')
+subdir('fasttrun')
subdir('file_fdw')
+subdir('fulleq')
subdir('fuzzystrmatch')
subdir('hstore')
subdir('hstore_plperl')
@@ -40,7 +44,9 @@ subdir('jsonb_plpython')
subdir('lo')
subdir('ltree')
subdir('ltree_plpython')
+subdir('mchar')
subdir('oid2name')
+subdir('online_analyze')
subdir('pageinspect')
subdir('passwordcheck')
subdir('pg_buffercache')
@@ -55,7 +61,9 @@ subdir('pgstattuple')
subdir('pg_surgery')
subdir('pg_trgm')
subdir('pg_visibility')
+subdir('pg_wait_sampling')
subdir('pg_walinspect')
+subdir('plantuner')
subdir('postgres_fdw')
subdir('seg')
subdir('sepgsql')
diff --git a/contrib/online_analyze/COPYRIGHT b/contrib/online_analyze/COPYRIGHT
new file mode 100644
index 00000000000..75fea1f35d6
--- /dev/null
+++ b/contrib/online_analyze/COPYRIGHT
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2011 Teodor Sigaev <teodor@sigaev.ru>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
diff --git a/contrib/online_analyze/Makefile b/contrib/online_analyze/Makefile
new file mode 100644
index 00000000000..333add2b09b
--- /dev/null
+++ b/contrib/online_analyze/Makefile
@@ -0,0 +1,16 @@
+MODULE_big = online_analyze
+OBJS = online_analyze.o
+#DATA_built = online_analyze.sql
+DOCS = README.online_analyze
+#REGRESS = online_analyze
+
+ifdef USE_PGXS
+PGXS := $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/online_analyze
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
diff --git a/contrib/online_analyze/README.online_analyze b/contrib/online_analyze/README.online_analyze
new file mode 100644
index 00000000000..d72f17db424
--- /dev/null
+++ b/contrib/online_analyze/README.online_analyze
@@ -0,0 +1,46 @@
+Module makes an analyze call immediately after INSERT/UPDATE/DELETE/SELECT INTO
+for affected table(s).
+
+Supported versions of PostgreSQL: 8.4.*, 9.0.*, 9.1.*, 9.2.*, 9.3.*, 9.4*, 9.5*,
+ 9.6*
+
+Usage: LOAD 'online_analyze';
+
+Custom variables (defaults values are shown):
+online_analyze.enable = on
+ Enables on-line analyze
+
+online_analyze.local_tracking = off
+ Per backend tracking for temp tables (do not use system statistic)
+
+online_analyze.verbose = on
+ Execute ANALYZE VERBOSE
+
+online_analyze.scale_factor = 0.1
+ Fraction of table size to start on-line analyze (similar to
+ autovacuum_analyze_scale_factor)
+
+online_analyze.threshold = 50
+ Min number of row updates before on-line analyze (similar to
+ autovacuum_analyze_threshold)
+
+online_analyze.min_interval = 10000
+ Minimum time interval between analyze call per table (in milliseconds)
+
+online_analyze.lower_limit = 0
+ Min number of rows in table to analyze
+
+online_analyze.table_type = "all"
+ Type(s) of table for online analyze: all, persistent, temporary, none
+
+online_analyze.exclude_tables = ""
+ List of tables which will not online analyze
+
+online_analyze.include_tables = ""
+ List of tables which will online analyze
+ online_analyze.include_tables overwrites online_analyze.exclude_tables.
+
+online_analyze.capacity_threshold = 100000
+ Maximum number of temporary tables to store in local cache
+
+Author: Teodor Sigaev <teodor@sigaev.ru>
diff --git a/contrib/online_analyze/meson.build b/contrib/online_analyze/meson.build
new file mode 100644
index 00000000000..e427099e8e4
--- /dev/null
+++ b/contrib/online_analyze/meson.build
@@ -0,0 +1,24 @@
+online_analyze_sources = files(
+ 'online_analyze.c'
+)
+
+if host_system == 'windows'
+ online_analyze_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'online_analyze',
+ '--FILEDESC', 'online_analyze',])
+endif
+
+online_analyze = shared_module('online_analyze',
+ online_analyze_sources,
+ kwargs: contrib_mod_args + {
+ 'dependencies': contrib_mod_args['dependencies'],
+ },
+)
+contrib_targets += online_analyze
+
+install_data(
+ kwargs: contrib_data_args,
+)
+
+
+# TODO: DOCS = README.online_analyze
\ No newline at end of file
diff --git a/contrib/online_analyze/online_analyze.c b/contrib/online_analyze/online_analyze.c
new file mode 100644
index 00000000000..412030864f2
--- /dev/null
+++ b/contrib/online_analyze/online_analyze.c
@@ -0,0 +1,1408 @@
+/*
+ * Copyright (c) 2011 Teodor Sigaev <teodor@sigaev.ru>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "postgres.h"
+
+#include "pgstat.h"
+#include "miscadmin.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "catalog/namespace.h"
+#include "commands/vacuum.h"
+#include "executor/executor.h"
+#include "nodes/nodes.h"
+#include "nodes/parsenodes.h"
+#include "storage/bufmgr.h"
+#include "utils/builtins.h"
+#include "utils/hsearch.h"
+#include "utils/memutils.h"
+#include "utils/lsyscache.h"
+#include "utils/guc.h"
+#if PG_VERSION_NUM >= 90200
+#include "catalog/pg_class.h"
+#include "nodes/primnodes.h"
+#include "tcop/utility.h"
+#include "utils/rel.h"
+#include "utils/relcache.h"
+#include "utils/timestamp.h"
+#if PG_VERSION_NUM >= 90500
+#include "nodes/makefuncs.h"
+#if PG_VERSION_NUM >= 100000
+#include "utils/varlena.h"
+#include "utils/regproc.h"
+#if PG_VERSION_NUM >= 130000
+#include "common/hashfn.h"
+#endif
+#endif
+#endif
+#endif
+
+#ifdef PG_MODULE_MAGIC
+PG_MODULE_MAGIC;
+#endif
+
+static bool online_analyze_enable = true;
+static bool online_analyze_local_tracking = false;
+static bool online_analyze_verbose = true;
+static double online_analyze_scale_factor = 0.1;
+static int online_analyze_threshold = 50;
+static int online_analyze_capacity_threshold = 100000;
+static double online_analyze_min_interval = 10000;
+static int online_analyze_lower_limit = 0;
+
+static ExecutorEnd_hook_type oldExecutorEndHook = NULL;
+#if PG_VERSION_NUM >= 90200
+static ProcessUtility_hook_type oldProcessUtilityHook = NULL;
+#endif
+
+#if PG_VERSION_NUM >= 120000
+#define VACOPT_NOWAIT VACOPT_SKIP_LOCKED
+#endif
+
+typedef enum CmdKind
+{
+ CK_SELECT = CMD_SELECT,
+ CK_UPDATE = CMD_UPDATE,
+ CK_INSERT = CMD_INSERT,
+ CK_DELETE = CMD_DELETE,
+ CK_TRUNCATE,
+ CK_FASTTRUNCATE,
+ CK_CREATE,
+ CK_ANALYZE,
+ CK_VACUUM
+} CmdKind;
+
+
+typedef enum
+{
+ OATT_ALL = 0x03,
+ OATT_PERSISTENT = 0x01,
+ OATT_TEMPORARY = 0x02,
+ OATT_NONE = 0x00
+} OnlineAnalyzeTableType;
+
+static const struct config_enum_entry online_analyze_table_type_options[] =
+{
+ {"all", OATT_ALL, false},
+ {"persistent", OATT_PERSISTENT, false},
+ {"temporary", OATT_TEMPORARY, false},
+ {"none", OATT_NONE, false},
+ {NULL, 0, false},
+};
+
+static int online_analyze_table_type = (int)OATT_ALL;
+
+typedef struct TableList {
+ int nTables;
+ Oid *tables;
+ char *tableStr;
+ bool inited;
+} TableList;
+
+static TableList excludeTables = {0, NULL, NULL, false};
+static TableList includeTables = {0, NULL, NULL, false};
+
+typedef struct OnlineAnalyzeTableStat {
+ Oid tableid;
+ bool rereadStat;
+ PgStat_Counter n_tuples;
+ PgStat_Counter mod_since_analyze;
+ TimestampTz last_autoanalyze_time;
+ TimestampTz last_analyze_time;
+} OnlineAnalyzeTableStat;
+
+static MemoryContext onlineAnalyzeMemoryContext = NULL;
+static HTAB *relstats = NULL;
+
+static void relstatsInit(void);
+
+#if PG_VERSION_NUM < 100000
+static int
+oid_cmp(const void *a, const void *b)
+{
+ if (*(Oid*)a == *(Oid*)b)
+ return 0;
+ return (*(Oid*)a > *(Oid*)b) ? 1 : -1;
+}
+#endif
+
+static const char *
+tableListAssign(const char * newval, bool doit, TableList *tbl)
+{
+ char *rawname;
+ List *namelist;
+ ListCell *l;
+ Oid *newOids = NULL;
+ int nOids = 0,
+ i = 0;
+
+ rawname = pstrdup(newval);
+
+ if (!SplitIdentifierString(rawname, ',', &namelist))
+ goto cleanup;
+
+ /*
+ * follow work could be done only in normal processing because of
+ * accsess to system catalog
+ */
+ if (MyProcNumber == INVALID_PROC_NUMBER || !IsUnderPostmaster ||
+ !IsTransactionState())
+ {
+ includeTables.inited = false;
+ excludeTables.inited = false;
+ return newval;
+ }
+
+ if (doit)
+ {
+ nOids = list_length(namelist);
+ newOids = malloc(sizeof(Oid) * (nOids+1));
+ if (!newOids)
+ elog(ERROR,"could not allocate %d bytes",
+ (int)(sizeof(Oid) * (nOids+1)));
+ }
+
+ foreach(l, namelist)
+ {
+ char *curname = (char *) lfirst(l);
+#if PG_VERSION_NUM >= 160000
+ Oid relOid = RangeVarGetRelid(makeRangeVarFromNameList(
+ stringToQualifiedNameList(curname, NULL)), NoLock, true);
+#elif PG_VERSION_NUM >= 90200
+ Oid relOid = RangeVarGetRelid(makeRangeVarFromNameList(
+ stringToQualifiedNameList(curname)), NoLock, true);
+#else
+ Oid relOid = RangeVarGetRelid(makeRangeVarFromNameList(
+ stringToQualifiedNameList(curname)), true);
+#endif
+
+ if (relOid == InvalidOid)
+ {
+#if PG_VERSION_NUM >= 90100
+ if (doit == false)
+#endif
+ elog(WARNING,"'%s' does not exist", curname);
+ continue;
+ }
+ else if ( get_rel_relkind(relOid) != RELKIND_RELATION )
+ {
+#if PG_VERSION_NUM >= 90100
+ if (doit == false)
+#endif
+ elog(WARNING,"'%s' is not an table", curname);
+ continue;
+ }
+ else if (doit)
+ {
+ newOids[i++] = relOid;
+ }
+ }
+
+ if (doit)
+ {
+ tbl->nTables = i;
+ if (tbl->tables)
+ free(tbl->tables);
+ tbl->tables = newOids;
+ if (tbl->nTables > 1)
+ qsort(tbl->tables, tbl->nTables, sizeof(tbl->tables[0]), oid_cmp);
+ }
+
+ pfree(rawname);
+ list_free(namelist);
+
+ return newval;
+
+cleanup:
+ if (newOids)
+ free(newOids);
+ pfree(rawname);
+ list_free(namelist);
+ return NULL;
+}
+
+#if PG_VERSION_NUM >= 90100
+static bool
+excludeTablesCheck(char **newval, void **extra, GucSource source)
+{
+ char *val;
+
+ val = (char*)tableListAssign(*newval, false, &excludeTables);
+
+ if (val)
+ {
+ *newval = val;
+ return true;
+ }
+
+ return false;
+}
+
+static void
+excludeTablesAssign(const char *newval, void *extra)
+{
+ tableListAssign(newval, true, &excludeTables);
+}
+
+static bool
+includeTablesCheck(char **newval, void **extra, GucSource source)
+{
+ char *val;
+
+ val = (char*)tableListAssign(*newval, false, &includeTables);
+
+ if (val)
+ {
+ *newval = val;
+ return true;
+ }
+
+ return false;
+}
+
+static void
+includeTablesAssign(const char *newval, void *extra)
+{
+ tableListAssign(newval, true, &includeTables);
+}
+
+#else /* PG_VERSION_NUM < 90100 */
+
+static const char *
+excludeTablesAssign(const char * newval, bool doit, GucSource source)
+{
+ return tableListAssign(newval, doit, &excludeTables);
+}
+
+static const char *
+includeTablesAssign(const char * newval, bool doit, GucSource source)
+{
+ return tableListAssign(newval, doit, &includeTables);
+}
+
+#endif
+
+static void
+lateInit()
+{
+ TableList *tl[] = {&includeTables, &excludeTables};
+ int i;
+
+ if (MyProcNumber == INVALID_PROC_NUMBER || !IsUnderPostmaster ||
+ !IsTransactionState())
+ return; /* we aren't in connected state */
+
+ for(i=0; i<lengthof(tl); i++)
+ {
+ TableList *tbl = tl[i];
+
+ if (tbl->inited == false)
+ tableListAssign(tbl->tableStr, true, tbl);
+ tbl->inited = true;
+ }
+}
+
+static const char*
+tableListShow(TableList *tbl)
+{
+ char *val, *ptr;
+ int i,
+ len;
+
+ lateInit();
+
+ len = 1 /* \0 */ + tbl->nTables * (2 * NAMEDATALEN + 2 /* ', ' */ + 1 /* . */);
+ ptr = val = palloc(len);
+ *ptr ='\0';
+ for(i=0; i<tbl->nTables; i++)
+ {
+ char *relname = get_rel_name(tbl->tables[i]);
+ Oid nspOid = get_rel_namespace(tbl->tables[i]);
+ char *nspname = get_namespace_name(nspOid);
+
+ if ( relname == NULL || nspOid == InvalidOid || nspname == NULL )
+ continue;
+
+ ptr += snprintf(ptr, len - (ptr - val), "%s%s.%s",
+ (i==0) ? "" : ", ",
+ nspname, relname);
+ }
+
+ return val;
+}
+
+static const char*
+excludeTablesShow(void)
+{
+ return tableListShow(&excludeTables);
+}
+
+static const char*
+includeTablesShow(void)
+{
+ return tableListShow(&includeTables);
+}
+
+static bool
+matchOid(TableList *tbl, Oid oid)
+{
+ Oid *StopLow = tbl->tables,
+ *StopHigh = tbl->tables + tbl->nTables,
+ *StopMiddle;
+
+ /* Loop invariant: StopLow <= val < StopHigh */
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+
+ if (*StopMiddle == oid)
+ return true;
+ else if (*StopMiddle < oid)
+ StopLow = StopMiddle + 1;
+ else
+ StopHigh = StopMiddle;
+ }
+
+ return false;
+}
+
+#if PG_VERSION_NUM >= 90500
+static RangeVar*
+makeRangeVarFromOid(Oid relOid)
+{
+ return makeRangeVar(
+ get_namespace_name(get_rel_namespace(relOid)),
+ get_rel_name(relOid),
+ -1
+ );
+
+}
+#endif
+
+static void
+makeAnalyze(Oid relOid, CmdKind operation, int64 naffected)
+{
+ TimestampTz now = GetCurrentTimestamp();
+ Relation rel;
+ OnlineAnalyzeTableType reltype;
+ bool found = false,
+ newTable = false;
+ OnlineAnalyzeTableStat *rstat,
+ dummyrstat;
+ PgStat_StatTabEntry *tabentry = NULL;
+
+ if (relOid == InvalidOid)
+ return;
+
+ if (naffected == 0)
+ /* return if there is no changes */
+ return;
+ else if (naffected < 0)
+ /* number if affected rows is unknown */
+ naffected = 0;
+
+ rel = RelationIdGetRelation(relOid);
+ if (rel->rd_rel->relkind != RELKIND_RELATION)
+ {
+ RelationClose(rel);
+ return;
+ }
+
+ reltype =
+#if PG_VERSION_NUM >= 90100
+ (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+#else
+ (rel->rd_istemp || rel->rd_islocaltemp)
+#endif
+ ? OATT_TEMPORARY : OATT_PERSISTENT;
+
+ RelationClose(rel);
+
+ /*
+ * includeTables overwrites excludeTables
+ */
+ switch(online_analyze_table_type)
+ {
+ case OATT_ALL:
+ if (get_rel_relkind(relOid) != RELKIND_RELATION ||
+ (matchOid(&excludeTables, relOid) == true &&
+ matchOid(&includeTables, relOid) == false))
+ return;
+ break;
+ case OATT_NONE:
+ if (get_rel_relkind(relOid) != RELKIND_RELATION ||
+ matchOid(&includeTables, relOid) == false)
+ return;
+ break;
+ case OATT_TEMPORARY:
+ case OATT_PERSISTENT:
+ default:
+ /*
+ * skip analyze if relation's type doesn't not match
+ * online_analyze_table_type
+ */
+ if ((online_analyze_table_type & reltype) == 0 ||
+ matchOid(&excludeTables, relOid) == true)
+ {
+ if (matchOid(&includeTables, relOid) == false)
+ return;
+ }
+ break;
+ }
+
+ /*
+ * Do not store data about persistent table in local memory because we
+ * could not track changes of them: they could be changed by another
+ * backends. So always get a pgstat table entry.
+ */
+ if (reltype == OATT_TEMPORARY)
+ rstat = hash_search(relstats, &relOid, HASH_ENTER, &found);
+ else
+ rstat = &dummyrstat; /* found == false for following if */
+
+ if (!found)
+ {
+ MemSet(rstat, 0, sizeof(*rstat));
+ rstat->tableid = relOid;
+ newTable = true;
+ }
+
+ if (operation == CK_VACUUM)
+ {
+ /* force reread because vacuum could change n_tuples */
+ rstat->rereadStat = true;
+ return;
+ }
+ else if (operation == CK_ANALYZE)
+ {
+ /* only analyze */
+ rstat->mod_since_analyze = 0;
+ rstat->last_analyze_time = now;
+ if (newTable)
+ rstat->rereadStat = true;
+ return;
+ }
+
+ Assert(rstat->tableid == relOid);
+
+ if (
+ /* do not reread data if it was a truncation */
+ operation != CK_TRUNCATE && operation != CK_FASTTRUNCATE &&
+ /* read for persistent table and for temp teble if it allowed */
+ (reltype == OATT_PERSISTENT || online_analyze_local_tracking == false) &&
+ /* read only for new table or we know that it's needed */
+ (newTable == true || rstat->rereadStat == true)
+ )
+ {
+ rstat->rereadStat = false;
+
+ tabentry = pgstat_fetch_stat_tabentry(relOid);
+
+ if (tabentry)
+ {
+ rstat->n_tuples =
+#if PG_VERSION_NUM >= 160000
+ tabentry->dead_tuples + tabentry->live_tuples;
+#else
+ tabentry->n_dead_tuples + tabentry->n_live_tuples;
+#endif
+
+ rstat->mod_since_analyze =
+#if PG_VERSION_NUM >= 160000
+ tabentry->mod_since_analyze;
+#elif PG_VERSION_NUM >= 90000
+ tabentry->changes_since_analyze;
+#else
+ tabentry->n_live_tuples + tabentry->n_dead_tuples -
+ tabentry->last_anl_tuples;
+#endif
+
+ rstat->last_autoanalyze_time =
+#if PG_VERSION_NUM >= 160000
+ tabentry->last_autoanalyze_time;
+#else
+ tabentry->autovac_analyze_timestamp;
+#endif
+
+ rstat->last_analyze_time =
+#if PG_VERSION_NUM >= 160000
+ tabentry->last_analyze_time;
+#else
+ tabentry->analyze_timestamp;
+#endif
+ }
+ }
+
+ if (newTable ||
+ /* force analyze after truncate, fasttruncate already did analyze */
+ operation == CK_TRUNCATE || (
+ /* do not analyze too often, if both stamps are exceeded the go */
+ TimestampDifferenceExceeds(rstat->last_analyze_time, now, online_analyze_min_interval) &&
+ TimestampDifferenceExceeds(rstat->last_autoanalyze_time, now, online_analyze_min_interval) &&
+ /* do not analyze too small tables */
+ rstat->n_tuples + rstat->mod_since_analyze + naffected > online_analyze_lower_limit &&
+ /* be in sync with relation_needs_vacanalyze */
+ ((double)(rstat->mod_since_analyze + naffected)) >=
+ online_analyze_scale_factor * ((double)rstat->n_tuples) +
+ (double)online_analyze_threshold))
+ {
+#if PG_VERSION_NUM < 90500
+ VacuumStmt vacstmt;
+#else
+ VacuumParams vacstmt;
+#endif
+ TimestampTz startStamp, endStamp;
+ int flags;
+
+#ifdef PGPRO_EE
+ /* ATX is not compatible with online_analyze */
+ if (getNestLevelATX() != 0)
+ return;
+#endif
+
+ memset(&startStamp, 0, sizeof(startStamp)); /* keep compiler quiet */
+
+ memset(&vacstmt, 0, sizeof(vacstmt));
+
+ vacstmt.freeze_min_age = -1;
+ vacstmt.freeze_table_age = -1; /* ??? */
+
+#if PG_VERSION_NUM < 90500
+ vacstmt.type = T_VacuumStmt;
+ vacstmt.relation = NULL;
+ vacstmt.va_cols = NIL;
+#if PG_VERSION_NUM >= 90000
+ vacstmt.options = VACOPT_ANALYZE;
+ if (online_analyze_verbose)
+ vacstmt.options |= VACOPT_VERBOSE;
+#else
+ vacstmt.vacuum = vacstmt.full = false;
+ vacstmt.analyze = true;
+ vacstmt.verbose = online_analyze_verbose;
+#endif
+#else
+ vacstmt.multixact_freeze_min_age = -1;
+ vacstmt.multixact_freeze_table_age = -1;
+ vacstmt.log_min_duration = -1;
+#endif
+
+
+ if (online_analyze_verbose)
+ startStamp = GetCurrentTimestamp();
+
+ flags = VACOPT_ANALYZE | VACOPT_NOWAIT |
+ ((online_analyze_verbose) ? VACOPT_VERBOSE : 0);
+
+#if PG_VERSION_NUM >= 120000
+ vacstmt.options = flags;
+#endif
+ analyze_rel(relOid,
+#if PG_VERSION_NUM < 90500
+ &vacstmt
+#if PG_VERSION_NUM >= 90018
+ , true
+#endif
+ , GetAccessStrategy(BAS_VACUUM)
+#if (PG_VERSION_NUM >= 90000) && (PG_VERSION_NUM < 90004)
+ , true
+#endif
+#else
+ makeRangeVarFromOid(relOid),
+#if PG_VERSION_NUM < 120000
+ flags,
+#endif
+ &vacstmt, NULL, true, GetAccessStrategy(BAS_VACUUM)
+#endif
+ );
+
+ /* Make changes visible to subsequent calls */
+ CommandCounterIncrement();
+
+ if (online_analyze_verbose)
+ {
+ long secs;
+ int microsecs;
+
+ endStamp = GetCurrentTimestamp();
+ TimestampDifference(startStamp, endStamp, &secs, µsecs);
+ elog(INFO, "analyze \"%s\" took %.02f seconds",
+ get_rel_name(relOid),
+ ((double)secs) + ((double)microsecs)/1.0e6);
+ }
+
+ rstat->last_autoanalyze_time = now;
+ rstat->mod_since_analyze = 0;
+
+ switch(operation)
+ {
+ case CK_CREATE:
+ case CK_INSERT:
+ case CK_UPDATE:
+ rstat->n_tuples += naffected;
+ /* FALLTHROUGH */
+ case CK_DELETE:
+ rstat->rereadStat = (reltype == OATT_PERSISTENT);
+ break;
+ case CK_TRUNCATE:
+ case CK_FASTTRUNCATE:
+ rstat->rereadStat = false;
+ rstat->n_tuples = 0;
+ break;
+ default:
+ break;
+ }
+
+ /* update last analyze timestamp in local memory of backend */
+ if (tabentry)
+ {
+#if PG_VERSION_NUM >= 160000
+ tabentry->last_analyze_time = now;
+ tabentry->mod_since_analyze = 0;
+#else
+ tabentry->analyze_timestamp = now;
+ tabentry->changes_since_analyze = 0;
+#endif
+ }
+#if 0
+ /* force reload stat for new table */
+ if (newTable)
+ pgstat_clear_snapshot();
+#endif
+ }
+ else
+ {
+#if PG_VERSION_NUM >= 90000
+ if (tabentry)
+#if PG_VERSION_NUM >= 160000
+ tabentry->mod_since_analyze += naffected;
+#else
+ tabentry->changes_since_analyze += naffected;
+#endif
+#endif
+ switch(operation)
+ {
+ case CK_CREATE:
+ case CK_INSERT:
+ rstat->mod_since_analyze += naffected;
+ rstat->n_tuples += naffected;
+ break;
+ case CK_UPDATE:
+ rstat->mod_since_analyze += 2 * naffected;
+ rstat->n_tuples += naffected;
+ break;
+ case CK_DELETE:
+ rstat->mod_since_analyze += naffected;
+ break;
+ case CK_TRUNCATE:
+ case CK_FASTTRUNCATE:
+ rstat->mod_since_analyze = 0;
+ rstat->n_tuples = 0;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Reset local cache if we are over limit */
+ if (hash_get_num_entries(relstats) > online_analyze_capacity_threshold)
+ relstatsInit();
+}
+
+static Const*
+isFastTruncateCall(QueryDesc *queryDesc)
+{
+ TargetEntry *te;
+ FuncExpr *fe;
+ Const *constval;
+
+ if (!(
+ queryDesc->plannedstmt &&
+ queryDesc->operation == CMD_SELECT &&
+ queryDesc->plannedstmt->planTree &&
+ queryDesc->plannedstmt->planTree->targetlist &&
+ list_length(queryDesc->plannedstmt->planTree->targetlist) == 1
+ ))
+ return NULL;
+
+ te = linitial(queryDesc->plannedstmt->planTree->targetlist);
+
+ if (!IsA(te, TargetEntry))
+ return NULL;
+
+ fe = (FuncExpr*)te->expr;
+
+ if (!(
+ fe && IsA(fe, FuncExpr) &&
+ fe->funcid >= FirstNormalObjectId &&
+ fe->funcretset == false &&
+ fe->funcresulttype == VOIDOID &&
+ fe->funcvariadic == false &&
+ list_length(fe->args) == 1
+ ))
+ return NULL;
+
+ constval = linitial(fe->args);
+
+ if (!(
+ IsA(constval,Const) &&
+ constval->consttype == TEXTOID &&
+ strcmp(get_func_name(fe->funcid), "fasttruncate") == 0
+ ))
+ return NULL;
+
+ return constval;
+}
+
+
+extern PGDLLIMPORT void onlineAnalyzeHooker(QueryDesc *queryDesc);
+void
+onlineAnalyzeHooker(QueryDesc *queryDesc)
+{
+ int64 naffected = -1;
+ Const *constval;
+
+ if (queryDesc->estate)
+ naffected = queryDesc->estate->es_processed;
+
+ lateInit();
+
+#if PG_VERSION_NUM >= 90200
+ if (online_analyze_enable &&
+ (constval = isFastTruncateCall(queryDesc)) != NULL)
+ {
+ Datum tblnamed = constval->constvalue;
+ char *tblname = text_to_cstring(DatumGetTextP(tblnamed));
+#if PG_VERSION_NUM >= 160000
+ RangeVar *tblvar =
+ makeRangeVarFromNameList(stringToQualifiedNameList(tblname, NULL));
+#else
+ RangeVar *tblvar =
+ makeRangeVarFromNameList(stringToQualifiedNameList(tblname));
+#endif
+
+ makeAnalyze(RangeVarGetRelid(tblvar,
+ NoLock,
+ false),
+ CK_FASTTRUNCATE, -1);
+ }
+#endif
+
+ if (online_analyze_enable && queryDesc->plannedstmt &&
+ (queryDesc->operation == CMD_INSERT ||
+ queryDesc->operation == CMD_UPDATE ||
+ queryDesc->operation == CMD_DELETE
+#if PG_VERSION_NUM < 90200
+ || (queryDesc->operation == CMD_SELECT &&
+ queryDesc->plannedstmt->intoClause)
+#endif
+ ))
+ {
+#if PG_VERSION_NUM < 90200
+ if (queryDesc->operation == CMD_SELECT)
+ {
+ Oid relOid = RangeVarGetRelid(queryDesc->plannedstmt->intoClause->rel, true);
+
+ makeAnalyze(relOid, queryDesc->operation, naffected);
+ }
+ else
+#endif
+ if (queryDesc->plannedstmt->resultRelations &&
+ queryDesc->plannedstmt->rtable)
+ {
+ ListCell *l;
+
+ foreach(l, queryDesc->plannedstmt->resultRelations)
+ {
+ int n = lfirst_int(l);
+ RangeTblEntry *rte = list_nth(queryDesc->plannedstmt->rtable, n-1);
+
+ if (rte->rtekind == RTE_RELATION)
+ makeAnalyze(rte->relid, (CmdKind)queryDesc->operation, naffected);
+ }
+ }
+ }
+
+ if (oldExecutorEndHook)
+ oldExecutorEndHook(queryDesc);
+ else
+ standard_ExecutorEnd(queryDesc);
+}
+
+static List *toremove = NIL;
+
+/*
+ * removeTable called on transaction end, see call RegisterXactCallback() below
+ */
+static void
+removeTable(XactEvent event, void *arg)
+{
+ ListCell *cell;
+
+ switch(event)
+ {
+ case XACT_EVENT_COMMIT:
+ break;
+ case XACT_EVENT_ABORT:
+ toremove = NIL;
+ default:
+ return;
+ }
+
+ foreach(cell, toremove)
+ {
+ Oid relOid = lfirst_oid(cell);
+
+ hash_search(relstats, &relOid, HASH_REMOVE, NULL);
+ }
+
+ toremove = NIL;
+}
+
+#if PG_VERSION_NUM >= 120000
+static int
+parse_vacuum_opt(VacuumStmt *vacstmt)
+{
+ int options = vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE;
+ ListCell *lc;
+
+ foreach(lc, vacstmt->options)
+ {
+ DefElem *opt = (DefElem *) lfirst(lc);
+
+ /* Parse common options for VACUUM and ANALYZE */
+ if (strcmp(opt->defname, "verbose") == 0)
+ options |= VACOPT_VERBOSE;
+ else if (strcmp(opt->defname, "skip_locked") == 0)
+ options |= VACOPT_SKIP_LOCKED;
+ else if (strcmp(opt->defname, "analyze") == 0)
+ options |= VACOPT_ANALYZE;
+ else if (strcmp(opt->defname, "freeze") == 0)
+ options |= VACOPT_FREEZE;
+ else if (strcmp(opt->defname, "full") == 0)
+ options |= VACOPT_FULL;
+ else if (strcmp(opt->defname, "disable_page_skipping") == 0)
+ options |= VACOPT_DISABLE_PAGE_SKIPPING;
+ }
+
+ return options;
+}
+#endif
+
+
+#if PG_VERSION_NUM >= 90200
+static void
+onlineAnalyzeHookerUtility(
+#if PG_VERSION_NUM >= 100000
+ PlannedStmt *pstmt,
+#else
+ Node *parsetree,
+#endif
+ const char *queryString,
+#if PG_VERSION_NUM >= 140000
+ bool readOnlyTree,
+#endif
+#if PG_VERSION_NUM >= 90300
+ ProcessUtilityContext context, ParamListInfo params,
+#if PG_VERSION_NUM >= 100000
+ QueryEnvironment *queryEnv,
+#endif
+#else
+ ParamListInfo params, bool isTopLevel,
+#endif
+ DestReceiver *dest,
+#if PG_VERSION_NUM >= 130000
+ QueryCompletion *completionTag
+#else
+ char *completionTag
+#endif
+) {
+ List *tblnames = NIL;
+ CmdKind op = CK_INSERT;
+#if PG_VERSION_NUM >= 100000
+ Node *parsetree = NULL;
+
+ if (pstmt->commandType == CMD_UTILITY)
+ parsetree = pstmt->utilityStmt;
+#endif
+
+ lateInit();
+
+ if (parsetree && online_analyze_enable)
+ {
+ if (IsA(parsetree, CreateTableAsStmt) &&
+ ((CreateTableAsStmt*)parsetree)->into)
+ {
+ tblnames =
+ list_make1((RangeVar*)copyObject(((CreateTableAsStmt*)parsetree)->into->rel));
+ op = CK_CREATE;
+ }
+ else if (IsA(parsetree, TruncateStmt))
+ {
+ tblnames = list_copy(((TruncateStmt*)parsetree)->relations);
+ op = CK_TRUNCATE;
+ }
+ else if (IsA(parsetree, DropStmt) &&
+ ((DropStmt*)parsetree)->removeType == OBJECT_TABLE)
+ {
+ ListCell *cell;
+
+ foreach(cell, ((DropStmt*)parsetree)->objects)
+ {
+ List *relname = (List *) lfirst(cell);
+ RangeVar *rel = makeRangeVarFromNameList(relname);
+ Oid relOid = RangeVarGetRelid(rel, NoLock, true);
+
+ if (OidIsValid(relOid))
+ {
+ MemoryContext ctx;
+
+ ctx = MemoryContextSwitchTo(TopTransactionContext);
+ toremove = lappend_oid(toremove, relOid);
+ MemoryContextSwitchTo(ctx);
+ }
+ }
+ }
+ else if (IsA(parsetree, VacuumStmt))
+ {
+ VacuumStmt *vac = (VacuumStmt*)parsetree;
+ int options =
+#if PG_VERSION_NUM >= 120000
+ parse_vacuum_opt(vac)
+#else
+ vac->options
+#endif
+ ;
+
+
+#if PG_VERSION_NUM >= 110000
+ tblnames = vac->rels;
+#else
+ if (vac->relation)
+ tblnames = list_make1(vac->relation);
+#endif
+
+ if (options & (VACOPT_VACUUM | VACOPT_FULL | VACOPT_FREEZE))
+ {
+ /* optionally with analyze */
+ op = CK_VACUUM;
+
+ /* drop all collected stat */
+ if (tblnames == NIL)
+ relstatsInit();
+ }
+ else if (options & VACOPT_ANALYZE)
+ {
+ op = CK_ANALYZE;
+
+ /* should reset all counters */
+ if (tblnames == NIL)
+ {
+ HASH_SEQ_STATUS hs;
+ OnlineAnalyzeTableStat *rstat;
+ TimestampTz now = GetCurrentTimestamp();
+
+ hash_seq_init(&hs, relstats);
+
+ while((rstat = hash_seq_search(&hs)) != NULL)
+ {
+ rstat->mod_since_analyze = 0;
+ rstat->last_analyze_time = now;
+ }
+ }
+ }
+ else
+ tblnames = NIL;
+ }
+ }
+
+#if PG_VERSION_NUM >= 100000
+#define parsetree pstmt
+#endif
+
+ if (oldProcessUtilityHook)
+ oldProcessUtilityHook(parsetree, queryString,
+#if PG_VERSION_NUM >= 140000
+ readOnlyTree,
+#endif
+#if PG_VERSION_NUM >= 90300
+ context, params,
+#if PG_VERSION_NUM >= 100000
+ queryEnv,
+#endif
+#else
+ params, isTopLevel,
+#endif
+ dest, completionTag);
+ else
+ standard_ProcessUtility(parsetree, queryString,
+#if PG_VERSION_NUM >= 140000
+ readOnlyTree,
+#endif
+#if PG_VERSION_NUM >= 90300
+ context, params,
+#if PG_VERSION_NUM >= 100000
+ queryEnv,
+#endif
+#else
+ params, isTopLevel,
+#endif
+ dest, completionTag);
+
+#if PG_VERSION_NUM >= 100000
+#undef parsetree
+#endif
+
+ if (tblnames) {
+ ListCell *l;
+
+ foreach(l, tblnames)
+ {
+ RangeVar *tblname =
+#if PG_VERSION_NUM >= 110000
+ (IsA(lfirst(l), VacuumRelation)) ?
+ ((VacuumRelation*)lfirst(l))->relation :
+#endif
+ (RangeVar*)lfirst(l);
+ Oid tblOid;
+
+ Assert(IsA(tblname, RangeVar));
+
+ tblOid = RangeVarGetRelid(tblname, NoLock, true);
+ makeAnalyze(tblOid, op, -1);
+ }
+ }
+}
+#endif
+
+
+static void
+relstatsInit(void)
+{
+ HASHCTL hash_ctl;
+ int flags = 0;
+
+ MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+
+ hash_ctl.hash = oid_hash;
+ flags |= HASH_FUNCTION;
+
+ if (onlineAnalyzeMemoryContext)
+ {
+ Assert(relstats != NULL);
+ MemoryContextReset(onlineAnalyzeMemoryContext);
+ }
+ else
+ {
+ Assert(relstats == NULL);
+
+#if PG_VERSION_NUM < 90600
+ onlineAnalyzeMemoryContext =
+ AllocSetContextCreate(CacheMemoryContext,
+ "online_analyze storage context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE
+ );
+#else
+ onlineAnalyzeMemoryContext =
+ AllocSetContextCreate(CacheMemoryContext,
+ "online_analyze storage context", ALLOCSET_DEFAULT_SIZES);
+#endif
+ }
+
+ hash_ctl.hcxt = onlineAnalyzeMemoryContext;
+ flags |= HASH_CONTEXT;
+
+ hash_ctl.keysize = sizeof(Oid);
+
+ hash_ctl.entrysize = sizeof(OnlineAnalyzeTableStat);
+ flags |= HASH_ELEM;
+
+ relstats = hash_create("online_analyze storage", 1024, &hash_ctl, flags);
+}
+
+void _PG_init(void);
+void
+_PG_init(void)
+{
+ relstatsInit();
+
+ oldExecutorEndHook = ExecutorEnd_hook;
+
+ ExecutorEnd_hook = onlineAnalyzeHooker;
+
+#if PG_VERSION_NUM >= 90200
+ oldProcessUtilityHook = ProcessUtility_hook;
+
+ ProcessUtility_hook = onlineAnalyzeHookerUtility;
+#endif
+
+
+ DefineCustomBoolVariable(
+ "online_analyze.enable",
+ "Enable on-line analyze",
+ "Enables analyze of table directly after insert/update/delete/select into",
+ &online_analyze_enable,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_enable,
+#endif
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomBoolVariable(
+ "online_analyze.local_tracking",
+ "Per backend tracking",
+ "Per backend tracking for temp tables (do not use system statistic)",
+ &online_analyze_local_tracking,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_local_tracking,
+#endif
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomBoolVariable(
+ "online_analyze.verbose",
+ "Verbosity of on-line analyze",
+ "Make ANALYZE VERBOSE after table's changes",
+ &online_analyze_verbose,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_verbose,
+#endif
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomRealVariable(
+ "online_analyze.scale_factor",
+ "fraction of table size to start on-line analyze",
+ "fraction of table size to start on-line analyze",
+ &online_analyze_scale_factor,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_scale_factor,
+#endif
+ 0.0,
+ 1.0,
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomIntVariable(
+ "online_analyze.threshold",
+ "min number of row updates before on-line analyze",
+ "min number of row updates before on-line analyze",
+ &online_analyze_threshold,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_threshold,
+#endif
+ 0,
+ 0x7fffffff,
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomIntVariable(
+ "online_analyze.capacity_threshold",
+ "Max local cache table capacity",
+ "Max local cache table capacity",
+ &online_analyze_capacity_threshold,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_capacity_threshold,
+#endif
+ 0,
+ 0x7fffffff,
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomRealVariable(
+ "online_analyze.min_interval",
+ "minimum time interval between analyze call (in milliseconds)",
+ "minimum time interval between analyze call (in milliseconds)",
+ &online_analyze_min_interval,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_min_interval,
+#endif
+ 0.0,
+ 1e30,
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomEnumVariable(
+ "online_analyze.table_type",
+ "Type(s) of table for online analyze: all(default), persistent, temporary, none",
+ NULL,
+ &online_analyze_table_type,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_table_type,
+#endif
+ online_analyze_table_type_options,
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomStringVariable(
+ "online_analyze.exclude_tables",
+ "List of tables which will not online analyze",
+ NULL,
+ &excludeTables.tableStr,
+#if PG_VERSION_NUM >= 80400
+ "",
+#endif
+ PGC_USERSET,
+ 0,
+#if PG_VERSION_NUM >= 90100
+ excludeTablesCheck,
+ excludeTablesAssign,
+#else
+ excludeTablesAssign,
+#endif
+ excludeTablesShow
+ );
+
+ DefineCustomStringVariable(
+ "online_analyze.include_tables",
+ "List of tables which will online analyze",
+ NULL,
+ &includeTables.tableStr,
+#if PG_VERSION_NUM >= 80400
+ "",
+#endif
+ PGC_USERSET,
+ 0,
+#if PG_VERSION_NUM >= 90100
+ includeTablesCheck,
+ includeTablesAssign,
+#else
+ includeTablesAssign,
+#endif
+ includeTablesShow
+ );
+
+ DefineCustomIntVariable(
+ "online_analyze.lower_limit",
+ "min number of rows in table to analyze",
+ "min number of rows in table to analyze",
+ &online_analyze_lower_limit,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_lower_limit,
+#endif
+ 0,
+ 0x7fffffff,
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ RegisterXactCallback(removeTable, NULL);
+}
+
+#if PG_VERSION_NUM < 150000
+void _PG_fini(void);
+void
+_PG_fini(void)
+{
+ ExecutorEnd_hook = oldExecutorEndHook;
+#if PG_VERSION_NUM >= 90200
+ ProcessUtility_hook = oldProcessUtilityHook;
+#endif
+
+ if (excludeTables.tables)
+ free(excludeTables.tables);
+ if (includeTables.tables)
+ free(includeTables.tables);
+
+ excludeTables.tables = includeTables.tables = NULL;
+ excludeTables.nTables = includeTables.nTables = 0;
+}
+#endif
diff --git a/contrib/pg_stat_statements/expected/level_tracking.out b/contrib/pg_stat_statements/expected/level_tracking.out
index 8213fcd2e61..b9ace0ab500 100644
--- a/contrib/pg_stat_statements/expected/level_tracking.out
+++ b/contrib/pg_stat_statements/expected/level_tracking.out
@@ -194,15 +194,14 @@ EXPLAIN (COSTS OFF) MERGE INTO stats_track_tab
(6 rows)
EXPLAIN (COSTS OFF) SELECT 1 UNION SELECT 2;
- QUERY PLAN
---------------------------
- Unique
- -> Sort
- Sort Key: (1)
- -> Append
- -> Result
- -> Result
-(6 rows)
+ QUERY PLAN
+--------------------
+ HashAggregate
+ Group Key: (1)
+ -> Append
+ -> Result
+ -> Result
+(5 rows)
SELECT toplevel, calls, query FROM pg_stat_statements
ORDER BY query COLLATE "C";
@@ -320,15 +319,14 @@ EXPLAIN (COSTS OFF) MERGE INTO stats_track_tab
(6 rows)
EXPLAIN (COSTS OFF) SELECT 1 UNION SELECT 2;
- QUERY PLAN
---------------------------
- Unique
- -> Sort
- Sort Key: (1)
- -> Append
- -> Result
- -> Result
-(6 rows)
+ QUERY PLAN
+--------------------
+ HashAggregate
+ Group Key: (1)
+ -> Append
+ -> Result
+ -> Result
+(5 rows)
SELECT toplevel, calls, query FROM pg_stat_statements
ORDER BY query COLLATE "C";
@@ -383,25 +381,23 @@ EXPLAIN (COSTS OFF) (SELECT 1, 2, 3)\; EXPLAIN (COSTS OFF) (SELECT 1, 2, 3, 4);
(1 row)
EXPLAIN (COSTS OFF) SELECT 1, 2 UNION SELECT 3, 4\; EXPLAIN (COSTS OFF) (SELECT 1, 2, 3) UNION SELECT 3, 4, 5;
+ QUERY PLAN
+-----------------------
+ HashAggregate
+ Group Key: (1), (2)
+ -> Append
+ -> Result
+ -> Result
+(5 rows)
+
QUERY PLAN
----------------------------
- Unique
- -> Sort
- Sort Key: (1), (2)
- -> Append
- -> Result
- -> Result
-(6 rows)
-
- QUERY PLAN
----------------------------------
- Unique
- -> Sort
- Sort Key: (1), (2), (3)
- -> Append
- -> Result
- -> Result
-(6 rows)
+ HashAggregate
+ Group Key: (1), (2), (3)
+ -> Append
+ -> Result
+ -> Result
+(5 rows)
SELECT toplevel, calls, query FROM pg_stat_statements
ORDER BY query COLLATE "C";
@@ -674,25 +670,23 @@ EXPLAIN (COSTS OFF) MERGE INTO stats_track_tab USING (SELECT id FROM generate_se
(1 row)
EXPLAIN (COSTS OFF) SELECT 1, 2 UNION SELECT 3, 4\; EXPLAIN (COSTS OFF) (SELECT 1, 2, 3) UNION SELECT 3, 4, 5;
+ QUERY PLAN
+-----------------------
+ HashAggregate
+ Group Key: (1), (2)
+ -> Append
+ -> Result
+ -> Result
+(5 rows)
+
QUERY PLAN
----------------------------
- Unique
- -> Sort
- Sort Key: (1), (2)
- -> Append
- -> Result
- -> Result
-(6 rows)
-
- QUERY PLAN
----------------------------------
- Unique
- -> Sort
- Sort Key: (1), (2), (3)
- -> Append
- -> Result
- -> Result
-(6 rows)
+ HashAggregate
+ Group Key: (1), (2), (3)
+ -> Append
+ -> Result
+ -> Result
+(5 rows)
SELECT toplevel, calls, query FROM pg_stat_statements
ORDER BY query COLLATE "C";
@@ -777,15 +771,14 @@ EXPLAIN (COSTS OFF) WITH a AS (SELECT 4) MERGE INTO stats_track_tab
(6 rows)
EXPLAIN (COSTS OFF) WITH a AS (select 4) SELECT 1 UNION SELECT 2;
- QUERY PLAN
---------------------------
- Unique
- -> Sort
- Sort Key: (1)
- -> Append
- -> Result
- -> Result
-(6 rows)
+ QUERY PLAN
+--------------------
+ HashAggregate
+ Group Key: (1)
+ -> Append
+ -> Result
+ -> Result
+(5 rows)
SELECT toplevel, calls, query FROM pg_stat_statements
ORDER BY query COLLATE "C";
@@ -871,15 +864,14 @@ EXPLAIN (COSTS OFF) WITH a AS (SELECT 4) MERGE INTO stats_track_tab
(6 rows)
EXPLAIN (COSTS OFF) WITH a AS (select 4) SELECT 1 UNION SELECT 2;
- QUERY PLAN
---------------------------
- Unique
- -> Sort
- Sort Key: (1)
- -> Append
- -> Result
- -> Result
-(6 rows)
+ QUERY PLAN
+--------------------
+ HashAggregate
+ Group Key: (1)
+ -> Append
+ -> Result
+ -> Result
+(5 rows)
SELECT toplevel, calls, query FROM pg_stat_statements
ORDER BY query COLLATE "C";
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 9b3a33118f4..5d1f53b9eda 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -890,7 +890,7 @@ pgss_planner(Query *parse,
int cursorOptions,
ParamListInfo boundParams)
{
- PlannedStmt *result;
+ PlannedStmt *result = NULL;
/*
* We can't process the query if no query_string is provided, as
Submodule contrib/pg_wait_sampling 00000000000...91b163ddc37 (new submodule)
diff --git a/contrib/pg_wait_sampling/.gitignore b/contrib/pg_wait_sampling/.gitignore
new file mode 100644
index 0000000..e066fb5
--- /dev/null
+++ b/contrib/pg_wait_sampling/.gitignore
@@ -0,0 +1,6 @@
+*.o
+*.so
+/.deps/
+/log/
+/results/
+/tmp_check/
diff --git a/contrib/pg_wait_sampling/.travis.yml b/contrib/pg_wait_sampling/.travis.yml
new file mode 100644
index 0000000..f68e8de
--- /dev/null
+++ b/contrib/pg_wait_sampling/.travis.yml
@@ -0,0 +1,36 @@
+dist: jammy
+language: c
+env:
+- PG_MAJOR=19 SNAPSHOT=1
+- PG_MAJOR=18
+- PG_MAJOR=17
+- PG_MAJOR=16
+- PG_MAJOR=15
+- PG_MAJOR=14
+- PG_MAJOR=13
+before_script:
+- curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
+- |
+ if [ -n "${SNAPSHOT}" ]; then
+ echo "deb https://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg-snapshot main ${PG_MAJOR}"
+ elif [ -n "${BETA}" ]; then
+ echo "deb https://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main ${PG_MAJOR}"
+ else
+ echo "deb https://apt.postgresql.org/pub/repos/apt/ $(lsb_release -cs)-pgdg main"
+ fi | sudo tee /etc/apt/sources.list.d/postgresql.list
+- |
+ if [ -n "${SNAPSHOT}" ]; then
+ {
+ echo "Package: *"
+ echo "Pin: origin apt.postgresql.org"
+ echo "Pin-Priority: 600"
+ } | sudo tee /etc/apt/preferences.d/pgdg.pref
+ fi
+- sudo apt-get update
+- sudo systemctl stop postgresql
+- sudo apt-get install -y --no-install-recommends postgresql-client-${PG_MAJOR} postgresql-${PG_MAJOR} postgresql-server-dev-${PG_MAJOR}
+- sudo systemctl stop postgresql
+script: ./run-tests.sh
+after_script:
+- cat regression.diffs
+- cat logfile
diff --git a/contrib/pg_wait_sampling/LICENSE b/contrib/pg_wait_sampling/LICENSE
new file mode 100644
index 0000000..46c4b8f
--- /dev/null
+++ b/contrib/pg_wait_sampling/LICENSE
@@ -0,0 +1,11 @@
+pg_wait_sampling is released under the PostgreSQL License, a liberal Open Source license, similar to the BSD or MIT licenses.
+
+Copyright (c) 2015-2025, Postgres Professional
+Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+Portions Copyright (c) 1994, The Regents of the University of California
+
+Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies.
+
+IN NO EVENT SHALL POSTGRES PROFESSIONAL BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF POSTGRES PROFESSIONAL HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+POSTGRES PROFESSIONAL SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND POSTGRES PROFESSIONAL HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
diff --git a/contrib/pg_wait_sampling/Makefile b/contrib/pg_wait_sampling/Makefile
new file mode 100644
index 0000000..32711a3
--- /dev/null
+++ b/contrib/pg_wait_sampling/Makefile
@@ -0,0 +1,22 @@
+# contrib/pg_wait_sampling/Makefile
+
+MODULE_big = pg_wait_sampling
+OBJS = pg_wait_sampling.o collector.o
+
+EXTENSION = pg_wait_sampling
+DATA = pg_wait_sampling--1.1.sql pg_wait_sampling--1.0--1.1.sql
+
+REGRESS = load queries
+
+EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_wait_sampling
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/pg_wait_sampling/README.md b/contrib/pg_wait_sampling/README.md
new file mode 100644
index 0000000..bbdbd20
--- /dev/null
+++ b/contrib/pg_wait_sampling/README.md
@@ -0,0 +1,187 @@
+[](https://app.travis-ci.com/postgrespro/pg_wait_sampling)
+[](https://raw.githubusercontent.com/postgrespro/pg_wait_sampling/master/LICENSE)
+
+`pg_wait_sampling` – sampling based statistics of wait events
+=============================================================
+
+Introduction
+------------
+
+PostgreSQL provides information about current wait event of particular
+process. However, in order to gather descriptive statistics of server
+behavior user have to sample current wait event multiple times.
+`pg_wait_sampling` is an extension for collecting sampling statistics of wait
+events.
+
+The module must be loaded by adding `pg_wait_sampling` to
+`shared_preload_libraries` in postgresql.conf, because it requires additional
+shared memory and launches background worker. This means that a server restart
+is needed to add or remove the module.
+
+When used with `pg_stat_statements` it is recommended to put `pg_stat_statements`
+before `pg_wait_sampling` in `shared_preload_libraries` so queryIds of
+utility statements are not rewritten by the former.
+
+When `pg_wait_sampling` is enabled, it collects two kinds of statistics.
+
+ * History of waits events. It's implemented as in-memory ring buffer where
+ samples of each process wait events are written with given (configurable)
+ period. Therefore, for each running process user can see some number of
+ recent samples depending on history size (configurable). Assuming there is
+ a client who periodically read this history and dump it somewhere, user
+ can have continuous history.
+ * Waits profile. It's implemented as in-memory hash table where count
+ of samples are accumulated per each process and each wait event
+ (and each query with `pg_stat_statements`). This hash
+ table can be reset by user request. Assuming there is a client who
+ periodically dumps profile and resets it, user can have statistics of
+ intensivity of wait events among time.
+
+In combination with `pg_stat_statements` this extension can also provide
+per query statistics.
+
+`pg_wait_sampling` launches special background worker for gathering the
+statistics above.
+
+Availability
+------------
+
+`pg_wait_sampling` is implemented as an extension and not available in default
+PostgreSQL installation. It is available from
+[github](https://github.com/postgrespro/pg_wait_sampling)
+under the same license as
+[PostgreSQL](http://www.postgresql.org/about/licence/)
+and supports PostgreSQL 13+.
+
+Installation
+------------
+
+Pre-built `pg_wait_sampling` packages are provided in official PostgreSQL
+repository: https://download.postgresql.org/pub/repos/
+
+Manual build
+------------
+
+`pg_wait_sampling` is PostgreSQL extension which requires PostgreSQL 13 or
+higher. Before build and install you should ensure following:
+
+ * PostgreSQL version is 13 or higher.
+ * You have development package of PostgreSQL installed or you built
+ PostgreSQL from source.
+ * Your PATH variable is configured so that `pg_config` command available, or
+ set PG_CONFIG variable.
+
+Typical installation procedure may look like this:
+
+ $ git clone https://github.com/postgrespro/pg_wait_sampling.git
+ $ cd pg_wait_sampling
+ $ make USE_PGXS=1
+ $ sudo make USE_PGXS=1 install
+
+Then add `shared_preload_libraries = pg_wait_sampling` to `postgresql.conf` and
+restart the server.
+
+To test your installation:
+
+ $ make USE_PGXS=1 installcheck
+
+To create the extension in the target database:
+
+ CREATE EXTENSION pg_wait_sampling;
+
+Compilation on Windows is not supported, since the extension uses symbols from PostgreSQL
+that are not exported.
+
+Usage
+-----
+
+`pg_wait_sampling` interacts with user by set of views and functions.
+
+`pg_wait_sampling_current` view – information about current wait events for
+all processed including background workers.
+
+| Column name | Column type | Description |
+| ----------- | ----------- | ----------------------- |
+| pid | int4 | Id of process |
+| event_type | text | Name of wait event type |
+| event | text | Name of wait event |
+| queryid | int8 | Id of query |
+
+`pg_wait_sampling_get_current(pid int4)` returns the same table for single given
+process.
+
+`pg_wait_sampling_history` view – history of wait events obtained by sampling into
+in-memory ring buffer.
+
+| Column name | Column type | Description |
+| ----------- | ----------- | ----------------------- |
+| pid | int4 | Id of process |
+| ts | timestamptz | Sample timestamp |
+| event_type | text | Name of wait event type |
+| event | text | Name of wait event |
+| queryid | int8 | Id of query |
+
+`pg_wait_sampling_profile` view – profile of wait events obtained by sampling into
+in-memory hash table.
+
+| Column name | Column type | Description |
+| ----------- | ----------- | ----------------------- |
+| pid | int4 | Id of process |
+| event_type | text | Name of wait event type |
+| event | text | Name of wait event |
+| queryid | int8 | Id of query |
+| count | text | Count of samples |
+
+`pg_wait_sampling_reset_profile()` function resets the profile.
+
+The work of wait event statistics collector worker is controlled by following
+GUCs.
+
+| Parameter name | Data type | Description | Default value |
+|----------------------------------| --------- |---------------------------------------------|--------------:|
+| pg_wait_sampling.history_size | int4 | Size of history in-memory ring buffer | 5000 |
+| pg_wait_sampling.history_period | int4 | Period for history sampling in milliseconds | 10 |
+| pg_wait_sampling.profile_period | int4 | Period for profile sampling in milliseconds | 10 |
+| pg_wait_sampling.profile_pid | bool | Whether profile should be per pid | true |
+| pg_wait_sampling.profile_queries | enum | Whether profile should be per query | top |
+| pg_wait_sampling.sample_cpu | bool | Whether on CPU backends should be sampled | true |
+
+If `pg_wait_sampling.profile_pid` is set to false, sampling profile wouldn't be
+collected in per-process manner. In this case the value of pid could would
+be always zero and corresponding row contain samples among all the processes.
+
+If `pg_wait_sampling.profile_queries` is set to `none`, `queryid` field in
+views will be zero. If it is set to `top`, queryIds only of top level statements
+are recorded. If it is set to `all`, queryIds of nested statements are recorded.
+
+If `pg_wait_sampling.sample_cpu` is set to true then processes that are not
+waiting on anything are also sampled. The wait event columns for such processes
+will be NULL.
+
+Values of these GUC variables can be changed only in config file or with ALTER SYSTEM.
+Then you need to reload server's configuration (such as with pg_reload_conf function)
+for changes to take effect.
+
+See
+[PostgreSQL documentation](http://www.postgresql.org/docs/devel/static/monitoring-stats.html#WAIT-EVENT-TABLE)
+for list of possible wait events.
+
+Contribution
+------------
+
+Please, notice, that `pg_wait_sampling` is still under development and while
+it's stable and tested, it may contains some bugs. Don't hesitate to raise
+[issues at github](https://github.com/postgrespro/pg_wait_sampling/issues) with
+your bug reports.
+
+If you're lacking of some functionality in `pg_wait_sampling` and feeling power
+to implement it then you're welcome to make pull requests.
+
+Authors
+-------
+
+ * Alexander Korotkov <a.korotkov@postgrespro.ru>, Postgres Professional,
+ Moscow, Russia
+ * Ildus Kurbangaliev <i.kurbangaliev@gmail.com>, Postgres Professional,
+ Moscow, Russia
+
diff --git a/contrib/pg_wait_sampling/collector.c b/contrib/pg_wait_sampling/collector.c
new file mode 100644
index 0000000..721299f
--- /dev/null
+++ b/contrib/pg_wait_sampling/collector.c
@@ -0,0 +1,486 @@
+/*
+ * collector.c
+ * Collector of wait event history and profile.
+ *
+ * Copyright (c) 2015-2025, Postgres Professional
+ *
+ * IDENTIFICATION
+ * contrib/pg_wait_sampling/pg_wait_sampling.c
+ */
+#include "postgres.h"
+
+#include <signal.h>
+
+#include "compat.h"
+#include "miscadmin.h"
+#include "pg_wait_sampling.h"
+#include "pgstat.h"
+#include "postmaster/bgworker.h"
+#include "postmaster/interrupt.h"
+#include "storage/ipc.h"
+#include "storage/latch.h"
+#include "storage/lock.h"
+#include "storage/lwlock.h"
+#include "storage/proc.h"
+#include "storage/procsignal.h"
+#include "storage/shm_mq.h"
+#include "utils/guc.h"
+#include "utils/hsearch.h"
+#include "utils/memutils.h"
+#include "utils/resowner.h"
+#include "utils/timestamp.h"
+
+static volatile sig_atomic_t shutdown_requested = false;
+
+static void handle_sigterm(SIGNAL_ARGS);
+
+/*
+ * Register background worker for collecting waits history.
+ */
+void
+pgws_register_wait_collector(void)
+{
+ BackgroundWorker worker;
+
+ /* Set up background worker parameters */
+ memset(&worker, 0, sizeof(worker));
+ worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
+ worker.bgw_start_time = BgWorkerStart_ConsistentState;
+ worker.bgw_restart_time = 1;
+ worker.bgw_notify_pid = 0;
+ snprintf(worker.bgw_library_name, BGW_MAXLEN, "pg_wait_sampling");
+ snprintf(worker.bgw_function_name, BGW_MAXLEN, CppAsString(pgws_collector_main));
+ snprintf(worker.bgw_name, BGW_MAXLEN, "pg_wait_sampling collector");
+ worker.bgw_main_arg = (Datum) 0;
+ RegisterBackgroundWorker(&worker);
+}
+
+/*
+ * Allocate memory for waits history.
+ */
+static void
+alloc_history(History *observations, int count)
+{
+ observations->items = (HistoryItem *) palloc0(sizeof(HistoryItem) * count);
+ observations->index = 0;
+ observations->count = count;
+ observations->wraparound = false;
+}
+
+/*
+ * Reallocate memory for changed number of history items.
+ */
+static void
+realloc_history(History *observations, int count)
+{
+ HistoryItem *newitems;
+ int copyCount,
+ i,
+ j;
+
+ /* Allocate new array for history */
+ newitems = (HistoryItem *) palloc0(sizeof(HistoryItem) * count);
+
+ /* Copy entries from old array to the new */
+ if (observations->wraparound)
+ copyCount = observations->count;
+ else
+ copyCount = observations->index;
+
+ copyCount = Min(copyCount, count);
+
+ i = 0;
+ if (observations->wraparound)
+ j = observations->index + 1;
+ else
+ j = 0;
+ while (i < copyCount)
+ {
+ if (j >= observations->count)
+ j = 0;
+ memcpy(&newitems[i], &observations->items[j], sizeof(HistoryItem));
+ i++;
+ j++;
+ }
+
+ /* Switch to new history array */
+ pfree(observations->items);
+ observations->items = newitems;
+ observations->index = copyCount;
+ observations->count = count;
+ observations->wraparound = false;
+}
+
+static void
+handle_sigterm(SIGNAL_ARGS)
+{
+ int save_errno = errno;
+
+ shutdown_requested = true;
+ if (MyProc)
+ SetLatch(&MyProc->procLatch);
+ errno = save_errno;
+}
+
+/*
+ * Get next item of history with rotation.
+ */
+static HistoryItem *
+get_next_observation(History *observations)
+{
+ HistoryItem *result;
+
+ /* Check for wraparound */
+ if (observations->index >= observations->count)
+ {
+ observations->index = 0;
+ observations->wraparound = true;
+ }
+ result = &observations->items[observations->index];
+ observations->index++;
+ return result;
+}
+
+/*
+ * Read current waits from backends and write them to history array
+ * and/or profile hash.
+ */
+static void
+probe_waits(History *observations, HTAB *profile_hash,
+ bool write_history, bool write_profile, bool profile_pid)
+{
+ int i,
+ newSize;
+ TimestampTz ts = GetCurrentTimestamp();
+
+ /* Realloc waits history if needed */
+ newSize = pgws_historySize;
+ if (observations->count != newSize)
+ realloc_history(observations, newSize);
+
+ /* Iterate PGPROCs under shared lock */
+ LWLockAcquire(ProcArrayLock, LW_SHARED);
+ for (i = 0; i < ProcGlobal->allProcCount; i++)
+ {
+ HistoryItem item,
+ *observation;
+ PGPROC *proc = &ProcGlobal->allProcs[i];
+
+ if (!pgws_should_sample_proc(proc, &item.pid, &item.wait_event_info))
+ continue;
+
+ if (pgws_profileQueries)
+ item.queryId = pgws_proc_queryids[i];
+ else
+ item.queryId = 0;
+
+ item.ts = ts;
+
+ /* Write to the history if needed */
+ if (write_history)
+ {
+ observation = get_next_observation(observations);
+ *observation = item;
+ }
+
+ /* Write to the profile if needed */
+ if (write_profile)
+ {
+ ProfileItem *profileItem;
+ bool found;
+
+ if (!profile_pid)
+ item.pid = 0;
+
+ profileItem = (ProfileItem *) hash_search(profile_hash, &item, HASH_ENTER, &found);
+ if (found)
+ profileItem->count++;
+ else
+ profileItem->count = 1;
+ }
+ }
+ LWLockRelease(ProcArrayLock);
+}
+
+/*
+ * Send waits history to shared memory queue.
+ */
+static void
+send_history(History *observations, shm_mq_handle *mqh)
+{
+ Size count,
+ i;
+ shm_mq_result mq_result;
+
+ if (observations->wraparound)
+ count = observations->count;
+ else
+ count = observations->index;
+
+ /* Send array size first since receive_array expects this */
+ mq_result = shm_mq_send_compat(mqh, sizeof(count), &count, false, true);
+ if (mq_result == SHM_MQ_DETACHED)
+ {
+ ereport(WARNING,
+ (errmsg("pg_wait_sampling collector: "
+ "receiver of message queue has been detached")));
+ return;
+ }
+ for (i = 0; i < count; i++)
+ {
+ mq_result = shm_mq_send_compat(mqh,
+ sizeof(HistoryItem),
+ &observations->items[i],
+ false,
+ true);
+ if (mq_result == SHM_MQ_DETACHED)
+ {
+ ereport(WARNING,
+ (errmsg("pg_wait_sampling collector: "
+ "receiver of message queue has been detached")));
+ return;
+ }
+ }
+}
+
+/*
+ * Send profile to shared memory queue.
+ */
+static void
+send_profile(HTAB *profile_hash, shm_mq_handle *mqh)
+{
+ HASH_SEQ_STATUS scan_status;
+ ProfileItem *item;
+ Size count = hash_get_num_entries(profile_hash);
+ shm_mq_result mq_result;
+
+ /* Send array size first since receive_array expects this */
+ mq_result = shm_mq_send_compat(mqh, sizeof(count), &count, false, true);
+ if (mq_result == SHM_MQ_DETACHED)
+ {
+ ereport(WARNING,
+ (errmsg("pg_wait_sampling collector: "
+ "receiver of message queue has been detached")));
+ return;
+ }
+ hash_seq_init(&scan_status, profile_hash);
+ while ((item = (ProfileItem *) hash_seq_search(&scan_status)) != NULL)
+ {
+ mq_result = shm_mq_send_compat(mqh, sizeof(ProfileItem), item, false,
+ true);
+ if (mq_result == SHM_MQ_DETACHED)
+ {
+ hash_seq_term(&scan_status);
+ ereport(WARNING,
+ (errmsg("pg_wait_sampling collector: "
+ "receiver of message queue has been detached")));
+ return;
+ }
+ }
+}
+
+/*
+ * Make hash table for wait profile.
+ */
+static HTAB *
+make_profile_hash()
+{
+ HASHCTL hash_ctl;
+
+ if (pgws_profileQueries)
+ hash_ctl.keysize = offsetof(ProfileItem, count);
+ else
+ hash_ctl.keysize = offsetof(ProfileItem, queryId);
+
+ hash_ctl.entrysize = sizeof(ProfileItem);
+ return hash_create("Waits profile hash", 1024, &hash_ctl,
+ HASH_ELEM | HASH_BLOBS);
+}
+
+/*
+ * Delta between two timestamps in milliseconds.
+ */
+static int64
+millisecs_diff(TimestampTz tz1, TimestampTz tz2)
+{
+ long secs;
+ int microsecs;
+
+ TimestampDifference(tz1, tz2, &secs, µsecs);
+
+ return secs * 1000 + microsecs / 1000;
+
+}
+
+/*
+ * Main routine of wait history collector.
+ */
+void
+pgws_collector_main(Datum main_arg)
+{
+ HTAB *profile_hash = NULL;
+ History observations;
+ MemoryContext old_context,
+ collector_context;
+ TimestampTz current_ts,
+ history_ts,
+ profile_ts;
+
+ /*
+ * Establish signal handlers.
+ *
+ * We want to respond to the ProcSignal notifications. This is done in
+ * the upstream provided procsignal_sigusr1_handler, which is
+ * automatically used if a bgworker connects to a database. But since our
+ * worker doesn't connect to any database even though it calls
+ * InitPostgres, which will still initializze a new backend and thus
+ * partitipate to the ProcSignal infrastructure.
+ */
+ pqsignal(SIGTERM, handle_sigterm);
+ pqsignal(SIGHUP, SignalHandlerForConfigReload);
+ pqsignal(SIGUSR1, procsignal_sigusr1_handler);
+ BackgroundWorkerUnblockSignals();
+ InitPostgresCompat(NULL, InvalidOid, NULL, InvalidOid, 0, NULL);
+ SetProcessingMode(NormalProcessing);
+
+ /* Make pg_wait_sampling recognisable in pg_stat_activity */
+ pgstat_report_appname("pg_wait_sampling collector");
+
+ profile_hash = make_profile_hash();
+ pgws_collector_hdr->latch = &MyProc->procLatch;
+
+ CurrentResourceOwner = ResourceOwnerCreate(NULL, "pg_wait_sampling collector");
+ collector_context = AllocSetContextCreate(TopMemoryContext,
+ "pg_wait_sampling context", ALLOCSET_DEFAULT_SIZES);
+ old_context = MemoryContextSwitchTo(collector_context);
+ alloc_history(&observations, pgws_historySize);
+ MemoryContextSwitchTo(old_context);
+
+ ereport(LOG, (errmsg("pg_wait_sampling collector started")));
+
+ /* Start counting time for history and profile samples */
+ profile_ts = history_ts = GetCurrentTimestamp();
+
+ while (1)
+ {
+ int rc;
+ shm_mq_handle *mqh;
+ int64 history_diff,
+ profile_diff;
+ bool write_history,
+ write_profile;
+
+ /* We need an explicit call for at least ProcSignal notifications. */
+ CHECK_FOR_INTERRUPTS();
+
+ if (ConfigReloadPending)
+ {
+ ConfigReloadPending = false;
+ ProcessConfigFile(PGC_SIGHUP);
+ }
+
+ /* Calculate time to next sample for history or profile */
+ current_ts = GetCurrentTimestamp();
+
+ history_diff = millisecs_diff(history_ts, current_ts);
+ profile_diff = millisecs_diff(profile_ts, current_ts);
+
+ write_history = (history_diff >= (int64) pgws_historyPeriod);
+ write_profile = (profile_diff >= (int64) pgws_profilePeriod);
+
+ if (write_history || write_profile)
+ {
+ probe_waits(&observations, profile_hash,
+ write_history, write_profile, pgws_profilePid);
+
+ if (write_history)
+ {
+ history_ts = current_ts;
+ history_diff = 0;
+ }
+
+ if (write_profile)
+ {
+ profile_ts = current_ts;
+ profile_diff = 0;
+ }
+ }
+
+ /* Shutdown if requested */
+ if (shutdown_requested)
+ break;
+
+ /*
+ * Wait until next sample time or request to do something through
+ * shared memory.
+ */
+ rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ Min(pgws_historyPeriod - (int) history_diff,
+ pgws_historyPeriod - (int) profile_diff), PG_WAIT_EXTENSION);
+
+ if (rc & WL_POSTMASTER_DEATH)
+ proc_exit(1);
+
+ ResetLatch(&MyProc->procLatch);
+
+ /* Handle request if any */
+ if (pgws_collector_hdr->request != NO_REQUEST)
+ {
+ LOCKTAG tag;
+ SHMRequest request;
+
+ pgws_init_lock_tag(&tag, PGWS_COLLECTOR_LOCK);
+
+ LockAcquire(&tag, ExclusiveLock, false, false);
+ request = pgws_collector_hdr->request;
+ pgws_collector_hdr->request = NO_REQUEST;
+
+ if (request == HISTORY_REQUEST || request == PROFILE_REQUEST)
+ {
+ shm_mq_result mq_result;
+
+ /* Send history or profile */
+ shm_mq_set_sender(pgws_collector_mq, MyProc);
+ mqh = shm_mq_attach(pgws_collector_mq, NULL, NULL);
+ mq_result = shm_mq_wait_for_attach(mqh);
+ switch (mq_result)
+ {
+ case SHM_MQ_SUCCESS:
+ switch (request)
+ {
+ case HISTORY_REQUEST:
+ send_history(&observations, mqh);
+ break;
+ case PROFILE_REQUEST:
+ send_profile(profile_hash, mqh);
+ break;
+ default:
+ Assert(false);
+ }
+ break;
+ case SHM_MQ_DETACHED:
+ ereport(WARNING,
+ (errmsg("pg_wait_sampling collector: "
+ "receiver of message queue have been "
+ "detached")));
+ break;
+ default:
+ Assert(false);
+ }
+ shm_mq_detach(mqh);
+ }
+ else if (request == PROFILE_RESET)
+ {
+ /* Reset profile hash */
+ hash_destroy(profile_hash);
+ profile_hash = make_profile_hash();
+ }
+ LockRelease(&tag, ExclusiveLock, false);
+ }
+ }
+
+ MemoryContextReset(collector_context);
+
+ ereport(LOG, (errmsg("pg_wait_sampling collector shutting down")));
+ proc_exit(0);
+}
diff --git a/contrib/pg_wait_sampling/compat.h b/contrib/pg_wait_sampling/compat.h
new file mode 100644
index 0000000..32aef49
--- /dev/null
+++ b/contrib/pg_wait_sampling/compat.h
@@ -0,0 +1,50 @@
+/*
+ * compat.h
+ * Definitions for function wrappers compatible between PG versions.
+ *
+ * Copyright (c) 2015-2025, Postgres Professional
+ *
+ * IDENTIFICATION
+ * contrib/pg_wait_sampling/compat.h
+ */
+#ifndef __COMPAT_H__
+#define __COMPAT_H__
+
+#include "miscadmin.h"
+#include "storage/shm_mq.h"
+
+static inline shm_mq_result
+shm_mq_send_compat(shm_mq_handle *mqh, Size nbytes, const void *data,
+ bool nowait, bool force_flush)
+{
+#if PG_VERSION_NUM >= 150000
+ return shm_mq_send(mqh, nbytes, data, nowait, force_flush);
+#else
+ return shm_mq_send(mqh, nbytes, data, nowait);
+#endif
+}
+
+#if PG_VERSION_NUM < 170000
+#define INIT_PG_LOAD_SESSION_LIBS 0x0001
+#define INIT_PG_OVERRIDE_ALLOW_CONNS 0x0002
+#endif
+
+static inline void
+InitPostgresCompat(const char *in_dbname, Oid dboid,
+ const char *username, Oid useroid,
+ bits32 flags,
+ char *out_dbname)
+{
+#if PG_VERSION_NUM >= 170000
+ InitPostgres(in_dbname, dboid, username, useroid, flags, out_dbname);
+#elif PG_VERSION_NUM >= 150000
+ InitPostgres(in_dbname, dboid, username, useroid,
+ flags & INIT_PG_LOAD_SESSION_LIBS,
+ flags & INIT_PG_OVERRIDE_ALLOW_CONNS, out_dbname);
+#else
+ InitPostgres(in_dbname, dboid, username, useroid, out_dbname,
+ flags & INIT_PG_OVERRIDE_ALLOW_CONNS);
+#endif
+}
+
+#endif
diff --git a/contrib/pg_wait_sampling/conf.add b/contrib/pg_wait_sampling/conf.add
new file mode 100644
index 0000000..54c013d
--- /dev/null
+++ b/contrib/pg_wait_sampling/conf.add
@@ -0,0 +1 @@
+shared_preload_libraries = 'pg_wait_sampling'
diff --git a/contrib/pg_wait_sampling/expected/load.out b/contrib/pg_wait_sampling/expected/load.out
new file mode 100644
index 0000000..b7de0ac
--- /dev/null
+++ b/contrib/pg_wait_sampling/expected/load.out
@@ -0,0 +1,31 @@
+CREATE EXTENSION pg_wait_sampling;
+\d pg_wait_sampling_current
+View "public.pg_wait_sampling_current"
+ Column | Type | Modifiers
+------------+---------+-----------
+ pid | integer |
+ event_type | text |
+ event | text |
+ queryid | bigint |
+
+\d pg_wait_sampling_history
+ View "public.pg_wait_sampling_history"
+ Column | Type | Modifiers
+------------+--------------------------+-----------
+ pid | integer |
+ ts | timestamp with time zone |
+ event_type | text |
+ event | text |
+ queryid | bigint |
+
+\d pg_wait_sampling_profile
+View "public.pg_wait_sampling_profile"
+ Column | Type | Modifiers
+------------+---------+-----------
+ pid | integer |
+ event_type | text |
+ event | text |
+ queryid | bigint |
+ count | bigint |
+
+DROP EXTENSION pg_wait_sampling;
diff --git a/contrib/pg_wait_sampling/expected/load_1.out b/contrib/pg_wait_sampling/expected/load_1.out
new file mode 100644
index 0000000..1a1358a
--- /dev/null
+++ b/contrib/pg_wait_sampling/expected/load_1.out
@@ -0,0 +1,31 @@
+CREATE EXTENSION pg_wait_sampling;
+\d pg_wait_sampling_current
+ View "public.pg_wait_sampling_current"
+ Column | Type | Collation | Nullable | Default
+------------+---------+-----------+----------+---------
+ pid | integer | | |
+ event_type | text | | |
+ event | text | | |
+ queryid | bigint | | |
+
+\d pg_wait_sampling_history
+ View "public.pg_wait_sampling_history"
+ Column | Type | Collation | Nullable | Default
+------------+--------------------------+-----------+----------+---------
+ pid | integer | | |
+ ts | timestamp with time zone | | |
+ event_type | text | | |
+ event | text | | |
+ queryid | bigint | | |
+
+\d pg_wait_sampling_profile
+ View "public.pg_wait_sampling_profile"
+ Column | Type | Collation | Nullable | Default
+------------+---------+-----------+----------+---------
+ pid | integer | | |
+ event_type | text | | |
+ event | text | | |
+ queryid | bigint | | |
+ count | bigint | | |
+
+DROP EXTENSION pg_wait_sampling;
diff --git a/contrib/pg_wait_sampling/expected/queries.out b/contrib/pg_wait_sampling/expected/queries.out
new file mode 100644
index 0000000..722df5f
--- /dev/null
+++ b/contrib/pg_wait_sampling/expected/queries.out
@@ -0,0 +1,48 @@
+CREATE EXTENSION pg_wait_sampling;
+WITH t as (SELECT sum(0) FROM pg_wait_sampling_current)
+ SELECT sum(0) FROM generate_series(1, 2), t;
+ sum
+-----
+ 0
+(1 row)
+
+WITH t as (SELECT sum(0) FROM pg_wait_sampling_history)
+ SELECT sum(0) FROM generate_series(1, 2), t;
+ sum
+-----
+ 0
+(1 row)
+
+WITH t as (SELECT sum(0) FROM pg_wait_sampling_profile)
+ SELECT sum(0) FROM generate_series(1, 2), t;
+ sum
+-----
+ 0
+(1 row)
+
+-- Some dummy checks just to be sure that all our functions work and return something.
+SELECT count(*) = 1 as test FROM pg_wait_sampling_get_current(pg_backend_pid());
+ test
+------
+ t
+(1 row)
+
+SELECT count(*) >= 0 as test FROM pg_wait_sampling_get_profile();
+ test
+------
+ t
+(1 row)
+
+SELECT count(*) >= 0 as test FROM pg_wait_sampling_get_history();
+ test
+------
+ t
+(1 row)
+
+SELECT pg_wait_sampling_reset_profile();
+ pg_wait_sampling_reset_profile
+--------------------------------
+
+(1 row)
+
+DROP EXTENSION pg_wait_sampling;
diff --git a/contrib/pg_wait_sampling/meson.build b/contrib/pg_wait_sampling/meson.build
new file mode 100644
index 0000000..c3c3dc9
--- /dev/null
+++ b/contrib/pg_wait_sampling/meson.build
@@ -0,0 +1,41 @@
+# Copyright (c) 2025, Postgres Professional
+
+# Does not support the PGXS infrastructure at this time. Please, compile as part
+# of the contrib source tree.
+
+pg_wait_sampling_sources = files(
+ 'collector.c',
+ 'pg_wait_sampling.c',
+)
+
+if host_system == 'windows'
+ pg_wait_sampling_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'pg_wait_sampling',
+ '--FILEDESC', 'pg_wait_sampling - provides information about the current wait events for particular processes.',])
+endif
+
+pg_wait_sampling = shared_module('pg_wait_sampling',
+ pg_wait_sampling_sources,
+ kwargs: contrib_mod_args,
+)
+contrib_targets += pg_wait_sampling
+
+install_data(
+ 'pg_wait_sampling.control',
+ 'pg_wait_sampling--1.0--1.1.sql',
+ 'pg_wait_sampling--1.1.sql',
+ kwargs: contrib_data_args,
+)
+
+tests += {
+ 'name': 'pg_wait_sampling',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'load',
+ 'queries',
+ ],
+ 'regress_args': ['--temp-config', files('conf.add')],
+ },
+}
diff --git a/contrib/pg_wait_sampling/pg_wait_sampling--1.0--1.1.sql b/contrib/pg_wait_sampling/pg_wait_sampling--1.0--1.1.sql
new file mode 100644
index 0000000..3831394
--- /dev/null
+++ b/contrib/pg_wait_sampling/pg_wait_sampling--1.0--1.1.sql
@@ -0,0 +1,70 @@
+/* contrib/pg_wait_sampling/pg_wait_sampling--1.0--1.1.sql */
+
+DROP FUNCTION pg_wait_sampling_get_current (
+ pid int4,
+ OUT pid int4,
+ OUT event_type text,
+ OUT event text
+) CASCADE;
+
+DROP FUNCTION pg_wait_sampling_get_history (
+ OUT pid int4,
+ OUT ts timestamptz,
+ OUT event_type text,
+ OUT event text
+) CASCADE;
+
+DROP FUNCTION pg_wait_sampling_get_profile (
+ OUT pid int4,
+ OUT event_type text,
+ OUT event text,
+ OUT count bigint
+) CASCADE;
+
+CREATE FUNCTION pg_wait_sampling_get_current (
+ pid int4,
+ OUT pid int4,
+ OUT event_type text,
+ OUT event text,
+ OUT queryid int8
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME'
+LANGUAGE C VOLATILE CALLED ON NULL INPUT;
+
+CREATE VIEW pg_wait_sampling_current AS
+ SELECT * FROM pg_wait_sampling_get_current(NULL::integer);
+
+GRANT SELECT ON pg_wait_sampling_current TO PUBLIC;
+
+CREATE FUNCTION pg_wait_sampling_get_history (
+ OUT pid int4,
+ OUT ts timestamptz,
+ OUT event_type text,
+ OUT event text,
+ OUT queryid int8
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE VIEW pg_wait_sampling_history AS
+ SELECT * FROM pg_wait_sampling_get_history();
+
+GRANT SELECT ON pg_wait_sampling_history TO PUBLIC;
+
+CREATE FUNCTION pg_wait_sampling_get_profile (
+ OUT pid int4,
+ OUT event_type text,
+ OUT event text,
+ OUT queryid int8,
+ OUT count int8
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE VIEW pg_wait_sampling_profile AS
+ SELECT * FROM pg_wait_sampling_get_profile();
+
+GRANT SELECT ON pg_wait_sampling_profile TO PUBLIC;
diff --git a/contrib/pg_wait_sampling/pg_wait_sampling--1.1.sql b/contrib/pg_wait_sampling/pg_wait_sampling--1.1.sql
new file mode 100644
index 0000000..e1bdf6a
--- /dev/null
+++ b/contrib/pg_wait_sampling/pg_wait_sampling--1.1.sql
@@ -0,0 +1,60 @@
+/* contrib/pg_wait_sampling/setup.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION pg_wait_sampling" to load this file. \quit
+
+CREATE FUNCTION pg_wait_sampling_get_current (
+ pid int4,
+ OUT pid int4,
+ OUT event_type text,
+ OUT event text,
+ OUT queryid int8
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME'
+LANGUAGE C VOLATILE CALLED ON NULL INPUT;
+
+CREATE VIEW pg_wait_sampling_current AS
+ SELECT * FROM pg_wait_sampling_get_current(NULL::integer);
+
+GRANT SELECT ON pg_wait_sampling_current TO PUBLIC;
+
+CREATE FUNCTION pg_wait_sampling_get_history (
+ OUT pid int4,
+ OUT ts timestamptz,
+ OUT event_type text,
+ OUT event text,
+ OUT queryid int8
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE VIEW pg_wait_sampling_history AS
+ SELECT * FROM pg_wait_sampling_get_history();
+
+GRANT SELECT ON pg_wait_sampling_history TO PUBLIC;
+
+CREATE FUNCTION pg_wait_sampling_get_profile (
+ OUT pid int4,
+ OUT event_type text,
+ OUT event text,
+ OUT queryid int8,
+ OUT count int8
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE VIEW pg_wait_sampling_profile AS
+ SELECT * FROM pg_wait_sampling_get_profile();
+
+GRANT SELECT ON pg_wait_sampling_profile TO PUBLIC;
+
+CREATE FUNCTION pg_wait_sampling_reset_profile()
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C VOLATILE STRICT;
+
+-- Don't want this to be available to non-superusers.
+REVOKE ALL ON FUNCTION pg_wait_sampling_reset_profile() FROM PUBLIC;
diff --git a/contrib/pg_wait_sampling/pg_wait_sampling.c b/contrib/pg_wait_sampling/pg_wait_sampling.c
new file mode 100644
index 0000000..7e8abb9
--- /dev/null
+++ b/contrib/pg_wait_sampling/pg_wait_sampling.c
@@ -0,0 +1,1181 @@
+/*
+ * pg_wait_sampling.c
+ * Track information about wait events.
+ *
+ * Copyright (c) 2015-2025, Postgres Professional
+ *
+ * IDENTIFICATION
+ * contrib/pg_wait_sampling/pg_wait_sampling.c
+ */
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_type_d.h"
+#include "executor/executor.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "optimizer/planner.h"
+#include "pg_wait_sampling.h"
+#include "pgstat.h"
+#include "storage/ipc.h"
+#include "storage/latch.h"
+#include "storage/lock.h"
+#include "storage/lwlock.h"
+#include "storage/proc.h"
+#include "storage/shm_mq.h"
+#include "storage/shm_toc.h"
+#include "storage/shmem.h"
+#include "tcop/utility.h"
+#include "utils/builtins.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+#include "utils/timestamp.h"
+
+#if PG_VERSION_NUM < 150000
+#include "postmaster/autovacuum.h"
+#include "replication/walsender.h"
+#endif
+
+PG_MODULE_MAGIC;
+
+void _PG_init(void);
+
+static bool shmem_initialized = false;
+
+/* Hooks variables */
+static ExecutorStart_hook_type prev_ExecutorStart = NULL;
+static ExecutorRun_hook_type prev_ExecutorRun = NULL;
+static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
+static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
+static planner_hook_type planner_hook_next = NULL;
+static ProcessUtility_hook_type prev_ProcessUtility = NULL;
+
+/* Current nesting depth of planner/Executor calls */
+static int nesting_level = 0;
+
+/* Pointers to shared memory objects */
+shm_mq *pgws_collector_mq = NULL;
+uint64 *pgws_proc_queryids = NULL;
+CollectorShmqHeader *pgws_collector_hdr = NULL;
+
+/* Receiver (backend) local shm_mq pointers and lock */
+static shm_mq *recv_mq = NULL;
+static shm_mq_handle *recv_mqh = NULL;
+static LOCKTAG queueTag;
+
+/* Hook functions */
+#if PG_VERSION_NUM >= 150000
+static shmem_request_hook_type prev_shmem_request_hook = NULL;
+#endif
+static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
+static PGPROC *search_proc(int backendPid);
+static PlannedStmt *pgws_planner_hook(Query *parse,
+#if PG_VERSION_NUM >= 130000
+ const char *query_string,
+#endif
+ int cursorOptions, ParamListInfo boundParams
+#if PG_VERSION_NUM >= 190000
+ , ExplainState *es
+#endif
+ );
+static void pgws_ExecutorStart(QueryDesc *queryDesc, int eflags);
+static void pgws_ExecutorRun(QueryDesc *queryDesc,
+ ScanDirection direction,
+ uint64 count
+#if PG_VERSION_NUM >= 100000 && PG_VERSION_NUM < 180000
+ ,bool execute_once
+#endif
+);
+static void pgws_ExecutorFinish(QueryDesc *queryDesc);
+static void pgws_ExecutorEnd(QueryDesc *queryDesc);
+static void pgws_ProcessUtility(PlannedStmt *pstmt,
+ const char *queryString,
+#if PG_VERSION_NUM >= 140000
+ bool readOnlyTree,
+#endif
+ ProcessUtilityContext context,
+ ParamListInfo params,
+ QueryEnvironment *queryEnv,
+ DestReceiver *dest,
+#if PG_VERSION_NUM >= 130000
+ QueryCompletion *qc
+#else
+ char *completionTag
+#endif
+);
+
+/*---- GUC variables ----*/
+
+typedef enum
+{
+ PGWS_PROFILE_QUERIES_NONE, /* profile no statements */
+ PGWS_PROFILE_QUERIES_TOP, /* only top level statements */
+ PGWS_PROFILE_QUERIES_ALL /* all statements, including nested ones */
+} PGWSTrackLevel;
+
+static const struct config_enum_entry pgws_profile_queries_options[] =
+{
+ {"none", PGWS_PROFILE_QUERIES_NONE, false},
+ {"off", PGWS_PROFILE_QUERIES_NONE, false},
+ {"no", PGWS_PROFILE_QUERIES_NONE, false},
+ {"false", PGWS_PROFILE_QUERIES_NONE, false},
+ {"0", PGWS_PROFILE_QUERIES_NONE, false},
+ {"top", PGWS_PROFILE_QUERIES_TOP, false},
+ {"on", PGWS_PROFILE_QUERIES_TOP, false},
+ {"yes", PGWS_PROFILE_QUERIES_TOP, false},
+ {"true", PGWS_PROFILE_QUERIES_TOP, false},
+ {"1", PGWS_PROFILE_QUERIES_TOP, false},
+ {"all", PGWS_PROFILE_QUERIES_ALL, false},
+ {NULL, 0, false}
+};
+
+int pgws_historySize = 5000;
+int pgws_historyPeriod = 10;
+int pgws_profilePeriod = 10;
+bool pgws_profilePid = true;
+int pgws_profileQueries = PGWS_PROFILE_QUERIES_TOP;
+bool pgws_sampleCpu = true;
+
+#define pgws_enabled(level) \
+ ((pgws_profileQueries == PGWS_PROFILE_QUERIES_ALL) || \
+ (pgws_profileQueries == PGWS_PROFILE_QUERIES_TOP && (level) == 0))
+
+/*
+ * Calculate max processes count.
+ *
+ * The value has to be in sync with ProcGlobal->allProcCount, initialized in
+ * InitProcGlobal() (proc.c).
+ *
+ */
+static int
+get_max_procs_count(void)
+{
+ int count = 0;
+
+ /* First, add the maximum number of backends (MaxBackends). */
+#if PG_VERSION_NUM >= 150000
+
+ /*
+ * On pg15+, we can directly access the MaxBackends variable, as it will
+ * have already been initialized in shmem_request_hook.
+ */
+ Assert(MaxBackends > 0);
+ count += MaxBackends;
+#else
+
+ /*
+ * On older versions, we need to compute MaxBackends: bgworkers,
+ * autovacuum workers and launcher. This has to be in sync with the value
+ * computed in InitializeMaxBackends() (postinit.c)
+ *
+ * Note that we need to calculate the value as it won't initialized when
+ * we need it during _PG_init().
+ *
+ * Note also that the value returned during _PG_init() might be different
+ * from the value returned later if some third-party modules change one of
+ * the underlying GUC. This isn't ideal but can't lead to a crash, as the
+ * value returned during _PG_init() is only used to ask for additional
+ * shmem with RequestAddinShmemSpace(), and postgres has an extra 100kB of
+ * shmem to compensate some small unaccounted usage. So if the value
+ * later changes, we will allocate and initialize the new (and correct)
+ * memory size, which will either work thanks for the extra 100kB of
+ * shmem, of fail (and prevent postgres startup) due to an out of shared
+ * memory error.
+ */
+ count += MaxConnections + autovacuum_max_workers + 1
+ + max_worker_processes;
+
+ /*
+ * Starting with pg12, wal senders aren't part of MaxConnections anymore
+ * and have to be accounted for.
+ */
+ count += max_wal_senders;
+#endif /* pg 15- */
+ /* End of MaxBackends calculation. */
+
+ /* Add AuxiliaryProcs */
+ count += NUM_AUXILIARY_PROCS;
+
+ return count;
+}
+
+/*
+ * Estimate amount of shared memory needed.
+ */
+static Size
+pgws_shmem_size(void)
+{
+ shm_toc_estimator e;
+ Size size;
+ int nkeys;
+
+ shm_toc_initialize_estimator(&e);
+
+ nkeys = 3;
+
+ shm_toc_estimate_chunk(&e, sizeof(CollectorShmqHeader));
+ shm_toc_estimate_chunk(&e, (Size) COLLECTOR_QUEUE_SIZE);
+ shm_toc_estimate_chunk(&e, sizeof(uint64) * get_max_procs_count());
+
+ shm_toc_estimate_keys(&e, nkeys);
+ size = shm_toc_estimate(&e);
+
+ return size;
+}
+
+#if PG_VERSION_NUM >= 150000
+/*
+ * shmem_request hook: request additional shared memory resources.
+ *
+ * If you change code here, don't forget to also report the modifications in
+ * _PG_init() for pg14 and below.
+ */
+static void
+pgws_shmem_request(void)
+{
+ if (prev_shmem_request_hook)
+ prev_shmem_request_hook();
+
+ RequestAddinShmemSpace(pgws_shmem_size());
+}
+#endif
+
+/*
+ * Distribute shared memory.
+ */
+static void
+pgws_shmem_startup(void)
+{
+ bool found;
+ Size segsize = pgws_shmem_size();
+ void *pgws;
+ shm_toc *toc;
+
+ pgws = ShmemInitStruct("pg_wait_sampling", segsize, &found);
+
+ if (!found)
+ {
+ /* Create shared objects */
+ toc = shm_toc_create(PG_WAIT_SAMPLING_MAGIC, pgws, segsize);
+
+ pgws_collector_hdr = shm_toc_allocate(toc, sizeof(CollectorShmqHeader));
+ shm_toc_insert(toc, 0, pgws_collector_hdr);
+ pgws_collector_mq = shm_toc_allocate(toc, COLLECTOR_QUEUE_SIZE);
+ shm_toc_insert(toc, 1, pgws_collector_mq);
+ pgws_proc_queryids = shm_toc_allocate(toc,
+ sizeof(uint64) * get_max_procs_count());
+ shm_toc_insert(toc, 2, pgws_proc_queryids);
+ MemSet(pgws_proc_queryids, 0, sizeof(uint64) * get_max_procs_count());
+ }
+ else
+ {
+ /* Attach to existing shared objects */
+ toc = shm_toc_attach(PG_WAIT_SAMPLING_MAGIC, pgws);
+ pgws_collector_hdr = shm_toc_lookup(toc, 0, false);
+ pgws_collector_mq = shm_toc_lookup(toc, 1, false);
+ pgws_proc_queryids = shm_toc_lookup(toc, 2, false);
+ }
+
+ shmem_initialized = true;
+
+ if (prev_shmem_startup_hook)
+ prev_shmem_startup_hook();
+}
+
+/*
+ * Check shared memory is initialized. Report an error otherwise.
+ */
+static void
+check_shmem(void)
+{
+ if (!shmem_initialized)
+ {
+ ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("pg_wait_sampling shared memory wasn't initialized yet")));
+ }
+}
+
+static void
+pgws_cleanup_callback(int code, Datum arg)
+{
+ elog(DEBUG3, "pg_wait_sampling cleanup: detaching shm_mq and releasing queue lock");
+ shm_mq_detach(recv_mqh);
+ LockRelease(&queueTag, ExclusiveLock, false);
+}
+
+/*
+ * Module load callback
+ */
+void
+_PG_init(void)
+{
+ if (!process_shared_preload_libraries_in_progress)
+ return;
+
+#if PG_VERSION_NUM < 150000
+
+ /*
+ * Request additional shared resources. (These are no-ops if we're not in
+ * the postmaster process.) We'll allocate or attach to the shared
+ * resources in pgws_shmem_startup().
+ *
+ * If you change code here, don't forget to also report the modifications
+ * in pgsp_shmem_request() for pg15 and later.
+ */
+ RequestAddinShmemSpace(pgws_shmem_size());
+#endif
+
+ pgws_register_wait_collector();
+
+ /*
+ * Install hooks.
+ */
+#if PG_VERSION_NUM >= 150000
+ prev_shmem_request_hook = shmem_request_hook;
+ shmem_request_hook = pgws_shmem_request;
+#endif
+ prev_shmem_startup_hook = shmem_startup_hook;
+ shmem_startup_hook = pgws_shmem_startup;
+ planner_hook_next = planner_hook;
+ planner_hook = pgws_planner_hook;
+ prev_ExecutorStart = ExecutorStart_hook;
+ ExecutorStart_hook = pgws_ExecutorStart;
+ prev_ExecutorRun = ExecutorRun_hook;
+ ExecutorRun_hook = pgws_ExecutorRun;
+ prev_ExecutorFinish = ExecutorFinish_hook;
+ ExecutorFinish_hook = pgws_ExecutorFinish;
+ prev_ExecutorEnd = ExecutorEnd_hook;
+ ExecutorEnd_hook = pgws_ExecutorEnd;
+ prev_ProcessUtility = ProcessUtility_hook;
+ ProcessUtility_hook = pgws_ProcessUtility;
+
+ /* Define GUC variables */
+ DefineCustomIntVariable("pg_wait_sampling.history_size",
+ "Sets size of waits history.",
+ NULL,
+ &pgws_historySize,
+ 5000,
+ 100,
+ INT_MAX,
+ PGC_SIGHUP,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomIntVariable("pg_wait_sampling.history_period",
+ "Sets period of waits history sampling.",
+ NULL,
+ &pgws_historyPeriod,
+ 10,
+ 1,
+ INT_MAX,
+ PGC_SIGHUP,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomIntVariable("pg_wait_sampling.profile_period",
+ "Sets period of waits profile sampling.",
+ NULL,
+ &pgws_profilePeriod,
+ 10,
+ 1,
+ INT_MAX,
+ PGC_SIGHUP,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomBoolVariable("pg_wait_sampling.profile_pid",
+ "Sets whether profile should be collected per pid.",
+ NULL,
+ &pgws_profilePid,
+ true,
+ PGC_SIGHUP,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomEnumVariable("pg_wait_sampling.profile_queries",
+ "Sets whether profile should be collected per query.",
+ NULL,
+ &pgws_profileQueries,
+ PGWS_PROFILE_QUERIES_TOP,
+ pgws_profile_queries_options,
+ PGC_SIGHUP,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomBoolVariable("pg_wait_sampling.sample_cpu",
+ "Sets whether not waiting backends should be sampled.",
+ NULL,
+ &pgws_sampleCpu,
+ true,
+ PGC_SIGHUP,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+#if PG_VERSION_NUM >= 150000
+ MarkGUCPrefixReserved("pg_wait_sampling");
+#endif
+}
+
+/*
+ * Find PGPROC entry responsible for given pid assuming ProcArrayLock was
+ * already taken.
+ */
+static PGPROC *
+search_proc(int pid)
+{
+ int i;
+
+ if (pid == 0)
+ return MyProc;
+
+ for (i = 0; i < ProcGlobal->allProcCount; i++)
+ {
+ PGPROC *proc = &ProcGlobal->allProcs[i];
+
+ if (proc->pid && proc->pid == pid)
+ {
+ return proc;
+ }
+ }
+
+ ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("backend with pid=%d not found", pid)));
+ return NULL;
+}
+
+/*
+ * Decide whether this PGPROC entry should be included in profiles and output
+ * views.
+ */
+bool
+pgws_should_sample_proc(PGPROC *proc, int *pid_p, uint32 *wait_event_info_p)
+{
+ int pid = proc->pid;
+ uint32 wait_event_info = proc->wait_event_info;
+
+ *pid_p = pid;
+ *wait_event_info_p = wait_event_info;
+
+ if (wait_event_info == 0 && !pgws_sampleCpu)
+ return false;
+
+ /*
+ * On PostgreSQL versions < 17 the PGPROC->pid field is not reset on
+ * process exit. This would lead to such processes getting counted for
+ * null wait events. So instead we make use of DisownLatch() resetting
+ * owner_pid during ProcKill().
+ */
+ if (pid == 0 || proc->procLatch.owner_pid == 0 || pid == MyProcPid)
+ return false;
+
+ return true;
+}
+
+typedef struct
+{
+ HistoryItem *items;
+ TimestampTz ts;
+} WaitCurrentContext;
+
+PG_FUNCTION_INFO_V1(pg_wait_sampling_get_current);
+Datum
+pg_wait_sampling_get_current(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ WaitCurrentContext *params;
+
+ check_shmem();
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+ TupleDesc tupdesc;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ params = (WaitCurrentContext *) palloc0(sizeof(WaitCurrentContext));
+ params->ts = GetCurrentTimestamp();
+
+ funcctx->user_fctx = params;
+ tupdesc = CreateTemplateTupleDesc(4);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pid",
+ INT4OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "type",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "event",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "queryid",
+ INT8OID, -1, 0);
+
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+ LWLockAcquire(ProcArrayLock, LW_SHARED);
+
+ if (!PG_ARGISNULL(0))
+ {
+ /* pg_wait_sampling_get_current(pid int4) function */
+ HistoryItem *item;
+ PGPROC *proc;
+
+ proc = search_proc(PG_GETARG_UINT32(0));
+ params->items = (HistoryItem *) palloc0(sizeof(HistoryItem));
+ item = ¶ms->items[0];
+ item->pid = proc->pid;
+ item->wait_event_info = proc->wait_event_info;
+ item->queryId = pgws_proc_queryids[proc - ProcGlobal->allProcs];
+ funcctx->max_calls = 1;
+ }
+ else
+ {
+ /* pg_wait_sampling_current view */
+ int procCount = ProcGlobal->allProcCount,
+ i,
+ j = 0;
+
+ params->items = (HistoryItem *) palloc0(sizeof(HistoryItem) * procCount);
+ for (i = 0; i < procCount; i++)
+ {
+ PGPROC *proc = &ProcGlobal->allProcs[i];
+
+ if (!pgws_should_sample_proc(proc,
+ ¶ms->items[j].pid,
+ ¶ms->items[j].wait_event_info))
+ continue;
+
+ params->items[j].pid = proc->pid;
+ params->items[j].wait_event_info = proc->wait_event_info;
+ params->items[j].queryId = pgws_proc_queryids[i];
+ j++;
+ }
+ funcctx->max_calls = j;
+ }
+
+ LWLockRelease(ProcArrayLock);
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+ params = (WaitCurrentContext *) funcctx->user_fctx;
+
+ if (funcctx->call_cntr < funcctx->max_calls)
+ {
+ HeapTuple tuple;
+ Datum values[4];
+ bool nulls[4];
+ const char *event_type,
+ *event;
+ HistoryItem *item;
+
+ item = ¶ms->items[funcctx->call_cntr];
+
+ /* Make and return next tuple to caller */
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ event_type = pgstat_get_wait_event_type(item->wait_event_info);
+ event = pgstat_get_wait_event(item->wait_event_info);
+ values[0] = Int32GetDatum(item->pid);
+ if (event_type)
+ values[1] = PointerGetDatum(cstring_to_text(event_type));
+ else
+ nulls[1] = true;
+ if (event)
+ values[2] = PointerGetDatum(cstring_to_text(event));
+ else
+ nulls[2] = true;
+
+ values[3] = UInt64GetDatum(item->queryId);
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+
+ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+ }
+ else
+ {
+ /* nothing left */
+ SRF_RETURN_DONE(funcctx);
+ }
+}
+
+typedef struct
+{
+ Size count;
+ ProfileItem *items;
+} Profile;
+
+void
+pgws_init_lock_tag(LOCKTAG *tag, uint32 lock)
+{
+ tag->locktag_field1 = PG_WAIT_SAMPLING_MAGIC;
+ tag->locktag_field2 = lock;
+ tag->locktag_field3 = 0;
+ tag->locktag_field4 = 0;
+ tag->locktag_type = LOCKTAG_USERLOCK;
+ tag->locktag_lockmethodid = USER_LOCKMETHOD;
+}
+
+/* Get array (history or profile data) from shared memory */
+static void *
+receive_array(SHMRequest request, Size item_size, Size *count)
+{
+ LOCKTAG collectorTag;
+ shm_mq_result res;
+ Size len,
+ i;
+ void *data;
+ Pointer result,
+ ptr;
+ MemoryContext oldctx;
+
+ /* Ensure nobody else trying to send request to queue */
+ pgws_init_lock_tag(&queueTag, PGWS_QUEUE_LOCK);
+ LockAcquire(&queueTag, ExclusiveLock, false, false);
+
+ pgws_init_lock_tag(&collectorTag, PGWS_COLLECTOR_LOCK);
+ LockAcquire(&collectorTag, ExclusiveLock, false, false);
+ recv_mq = shm_mq_create(pgws_collector_mq, COLLECTOR_QUEUE_SIZE);
+ pgws_collector_hdr->request = request;
+ LockRelease(&collectorTag, ExclusiveLock, false);
+
+ /*
+ * Check that the collector was started to avoid NULL
+ * pointer dereference.
+ */
+ if (!pgws_collector_hdr->latch)
+ ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("pg_wait_sampling collector wasn't started")));
+
+ SetLatch(pgws_collector_hdr->latch);
+
+ shm_mq_set_receiver(recv_mq, MyProc);
+
+ /*
+ * We switch to TopMemoryContext, so that recv_mqh is allocated there and
+ * is guaranteed to survive until before_shmem_exit callbacks are fired.
+ * Anyway, shm_mq_detach() will free handler on its own.
+ *
+ * NB: we do not pass `seg` to shm_mq_attach(), so it won't set its own
+ * callback, i.e. we do not interfere here with shm_mq_detach_callback().
+ */
+ oldctx = MemoryContextSwitchTo(TopMemoryContext);
+ recv_mqh = shm_mq_attach(recv_mq, NULL, NULL);
+ MemoryContextSwitchTo(oldctx);
+
+ /*
+ * Now we surely attached to the shm_mq and got collector's attention. If
+ * anything went wrong (e.g. Ctrl+C received from the client) we have to
+ * cleanup some things, i.e. detach from the shm_mq, so collector was able
+ * to continue responding to other requests.
+ *
+ * PG_ENSURE_ERROR_CLEANUP() guaranties that cleanup callback will be
+ * fired for both ERROR and FATAL.
+ */
+ PG_ENSURE_ERROR_CLEANUP(pgws_cleanup_callback, 0);
+ {
+ res = shm_mq_receive(recv_mqh, &len, &data, false);
+ if (res != SHM_MQ_SUCCESS || len != sizeof(*count))
+ elog(ERROR, "error reading mq");
+
+ memcpy(count, data, sizeof(*count));
+
+ result = palloc(item_size * (*count));
+ ptr = result;
+
+ for (i = 0; i < *count; i++)
+ {
+ res = shm_mq_receive(recv_mqh, &len, &data, false);
+ if (res != SHM_MQ_SUCCESS || len != item_size)
+ elog(ERROR, "error reading mq");
+
+ memcpy(ptr, data, item_size);
+ ptr += item_size;
+ }
+ }
+ PG_END_ENSURE_ERROR_CLEANUP(pgws_cleanup_callback, 0);
+
+ /* We still have to detach and release lock during normal operation. */
+ shm_mq_detach(recv_mqh);
+ LockRelease(&queueTag, ExclusiveLock, false);
+
+ return result;
+}
+
+
+PG_FUNCTION_INFO_V1(pg_wait_sampling_get_profile);
+Datum
+pg_wait_sampling_get_profile(PG_FUNCTION_ARGS)
+{
+ Profile *profile;
+ FuncCallContext *funcctx;
+
+ check_shmem();
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+ TupleDesc tupdesc;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* Receive profile from shmq */
+ profile = (Profile *) palloc0(sizeof(Profile));
+ profile->items = (ProfileItem *) receive_array(PROFILE_REQUEST,
+ sizeof(ProfileItem), &profile->count);
+
+ funcctx->user_fctx = profile;
+ funcctx->max_calls = profile->count;
+
+ /* Make tuple descriptor */
+ tupdesc = CreateTemplateTupleDesc(5);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pid",
+ INT4OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "type",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "event",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "queryid",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 5, "count",
+ INT8OID, -1, 0);
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ profile = (Profile *) funcctx->user_fctx;
+
+ if (funcctx->call_cntr < funcctx->max_calls)
+ {
+ /* for each row */
+ Datum values[5];
+ bool nulls[5];
+ HeapTuple tuple;
+ ProfileItem *item;
+ const char *event_type,
+ *event;
+
+ item = &profile->items[funcctx->call_cntr];
+
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ /* Make and return next tuple to caller */
+ event_type = pgstat_get_wait_event_type(item->wait_event_info);
+ event = pgstat_get_wait_event(item->wait_event_info);
+ values[0] = Int32GetDatum(item->pid);
+ if (event_type)
+ values[1] = PointerGetDatum(cstring_to_text(event_type));
+ else
+ nulls[1] = true;
+ if (event)
+ values[2] = PointerGetDatum(cstring_to_text(event));
+ else
+ nulls[2] = true;
+
+ if (pgws_profileQueries)
+ values[3] = UInt64GetDatum(item->queryId);
+ else
+ values[3] = (Datum) 0;
+
+ values[4] = UInt64GetDatum(item->count);
+
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+
+ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+ }
+ else
+ {
+ /* nothing left */
+ SRF_RETURN_DONE(funcctx);
+ }
+}
+
+PG_FUNCTION_INFO_V1(pg_wait_sampling_reset_profile);
+Datum
+pg_wait_sampling_reset_profile(PG_FUNCTION_ARGS)
+{
+ LOCKTAG collectorTag;
+
+ check_shmem();
+
+ pgws_init_lock_tag(&queueTag, PGWS_QUEUE_LOCK);
+
+ LockAcquire(&queueTag, ExclusiveLock, false, false);
+
+ pgws_init_lock_tag(&collectorTag, PGWS_COLLECTOR_LOCK);
+ LockAcquire(&collectorTag, ExclusiveLock, false, false);
+ pgws_collector_hdr->request = PROFILE_RESET;
+ LockRelease(&collectorTag, ExclusiveLock, false);
+
+ /*
+ * Check that the collector was started to avoid NULL
+ * pointer dereference.
+ */
+ if (!pgws_collector_hdr->latch)
+ ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("pg_wait_sampling collector wasn't started")));
+
+ SetLatch(pgws_collector_hdr->latch);
+
+ LockRelease(&queueTag, ExclusiveLock, false);
+
+ PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(pg_wait_sampling_get_history);
+Datum
+pg_wait_sampling_get_history(PG_FUNCTION_ARGS)
+{
+ History *history;
+ FuncCallContext *funcctx;
+
+ check_shmem();
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+ TupleDesc tupdesc;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* Receive history from shmq */
+ history = (History *) palloc0(sizeof(History));
+ history->items = (HistoryItem *) receive_array(HISTORY_REQUEST,
+ sizeof(HistoryItem), &history->count);
+
+ funcctx->user_fctx = history;
+ funcctx->max_calls = history->count;
+
+ /* Make tuple descriptor */
+ tupdesc = CreateTemplateTupleDesc(5);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pid",
+ INT4OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "sample_ts",
+ TIMESTAMPTZOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "type",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "event",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 5, "queryid",
+ INT8OID, -1, 0);
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ history = (History *) funcctx->user_fctx;
+
+ if (history->index < history->count)
+ {
+ HeapTuple tuple;
+ HistoryItem *item;
+ Datum values[5];
+ bool nulls[5];
+ const char *event_type,
+ *event;
+
+ item = &history->items[history->index];
+
+ /* Make and return next tuple to caller */
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ event_type = pgstat_get_wait_event_type(item->wait_event_info);
+ event = pgstat_get_wait_event(item->wait_event_info);
+ values[0] = Int32GetDatum(item->pid);
+ values[1] = TimestampTzGetDatum(item->ts);
+ if (event_type)
+ values[2] = PointerGetDatum(cstring_to_text(event_type));
+ else
+ nulls[2] = true;
+ if (event)
+ values[3] = PointerGetDatum(cstring_to_text(event));
+ else
+ nulls[3] = true;
+
+ values[4] = UInt64GetDatum(item->queryId);
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+
+ history->index++;
+ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+ }
+ else
+ {
+ /* nothing left */
+ SRF_RETURN_DONE(funcctx);
+ }
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * planner_hook hook, save queryId for collector
+ */
+static PlannedStmt *
+pgws_planner_hook(Query *parse,
+#if PG_VERSION_NUM >= 130000
+ const char *query_string,
+#endif
+ int cursorOptions,
+ ParamListInfo boundParams
+#if PG_VERSION_NUM >= 190000
+ , ExplainState *es
+#endif
+ )
+{
+ PlannedStmt *result;
+ int i = MyProc - ProcGlobal->allProcs;
+ uint64 save_queryId = 0;
+
+ if (pgws_enabled(nesting_level))
+ {
+ save_queryId = pgws_proc_queryids[i];
+ pgws_proc_queryids[i] = parse->queryId;
+ }
+
+ nesting_level++;
+ PG_TRY();
+ {
+ /* Invoke original hook if needed */
+ if (planner_hook_next)
+ result = planner_hook_next(parse,
+#if PG_VERSION_NUM >= 130000
+ query_string,
+#endif
+ cursorOptions, boundParams
+#if PG_VERSION_NUM >= 190000
+ , es
+#endif
+ );
+ else
+ result = standard_planner(parse,
+#if PG_VERSION_NUM >= 130000
+ query_string,
+#endif
+ cursorOptions, boundParams
+#if PG_VERSION_NUM >= 190000
+ , es
+#endif
+ );
+ nesting_level--;
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+ else if (pgws_enabled(nesting_level))
+ pgws_proc_queryids[i] = save_queryId;
+ }
+ PG_CATCH();
+ {
+ nesting_level--;
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+ else if (pgws_enabled(nesting_level))
+ pgws_proc_queryids[i] = save_queryId;
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ return result;
+}
+
+/*
+ * ExecutorStart hook: save queryId for collector
+ */
+static void
+pgws_ExecutorStart(QueryDesc *queryDesc, int eflags)
+{
+ int i = MyProc - ProcGlobal->allProcs;
+
+ if (pgws_enabled(nesting_level))
+ pgws_proc_queryids[i] = queryDesc->plannedstmt->queryId;
+ if (prev_ExecutorStart)
+ prev_ExecutorStart(queryDesc, eflags);
+ else
+ standard_ExecutorStart(queryDesc, eflags);
+}
+
+static void
+pgws_ExecutorRun(QueryDesc *queryDesc,
+ ScanDirection direction,
+ uint64 count
+#if PG_VERSION_NUM >= 100000 && PG_VERSION_NUM < 180000
+ ,bool execute_once
+#endif
+)
+{
+ int i = MyProc - ProcGlobal->allProcs;
+ uint64 save_queryId = pgws_proc_queryids[i];
+
+ nesting_level++;
+ PG_TRY();
+ {
+ if (prev_ExecutorRun)
+#if PG_VERSION_NUM >= 100000 && PG_VERSION_NUM < 180000
+ prev_ExecutorRun(queryDesc, direction, count, execute_once);
+#else
+ prev_ExecutorRun(queryDesc, direction, count);
+#endif
+ else
+#if PG_VERSION_NUM >= 100000 && PG_VERSION_NUM < 180000
+ standard_ExecutorRun(queryDesc, direction, count, execute_once);
+#else
+ standard_ExecutorRun(queryDesc, direction, count);
+#endif
+ nesting_level--;
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+ else
+ pgws_proc_queryids[i] = save_queryId;
+ }
+ PG_CATCH();
+ {
+ nesting_level--;
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+ else
+ pgws_proc_queryids[i] = save_queryId;
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+}
+
+static void
+pgws_ExecutorFinish(QueryDesc *queryDesc)
+{
+ int i = MyProc - ProcGlobal->allProcs;
+ uint64 save_queryId = pgws_proc_queryids[i];
+
+ nesting_level++;
+ PG_TRY();
+ {
+ if (prev_ExecutorFinish)
+ prev_ExecutorFinish(queryDesc);
+ else
+ standard_ExecutorFinish(queryDesc);
+ nesting_level--;
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+ else
+ pgws_proc_queryids[i] = save_queryId;
+ }
+ PG_CATCH();
+ {
+ nesting_level--;
+ pgws_proc_queryids[i] = save_queryId;
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+}
+
+/*
+ * ExecutorEnd hook: clear queryId
+ */
+static void
+pgws_ExecutorEnd(QueryDesc *queryDesc)
+{
+ int i = MyProc - ProcGlobal->allProcs;
+
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+
+ if (prev_ExecutorEnd)
+ prev_ExecutorEnd(queryDesc);
+ else
+ standard_ExecutorEnd(queryDesc);
+}
+
+static void
+pgws_ProcessUtility(PlannedStmt *pstmt,
+ const char *queryString,
+#if PG_VERSION_NUM >= 140000
+ bool readOnlyTree,
+#endif
+ ProcessUtilityContext context,
+ ParamListInfo params,
+ QueryEnvironment *queryEnv,
+ DestReceiver *dest,
+#if PG_VERSION_NUM >= 130000
+ QueryCompletion *qc
+#else
+ char *completionTag
+#endif
+)
+{
+ int i = MyProc - ProcGlobal->allProcs;
+ uint64 save_queryId = 0;
+
+ if (pgws_enabled(nesting_level))
+ {
+ save_queryId = pgws_proc_queryids[i];
+ pgws_proc_queryids[i] = pstmt->queryId;
+ }
+
+ nesting_level++;
+ PG_TRY();
+ {
+ if (prev_ProcessUtility)
+ prev_ProcessUtility(pstmt, queryString,
+#if PG_VERSION_NUM >= 140000
+ readOnlyTree,
+#endif
+ context, params, queryEnv,
+ dest,
+#if PG_VERSION_NUM >= 130000
+ qc
+#else
+ completionTag
+#endif
+ );
+ else
+ standard_ProcessUtility(pstmt, queryString,
+#if PG_VERSION_NUM >= 140000
+ readOnlyTree,
+#endif
+ context, params, queryEnv,
+ dest,
+#if PG_VERSION_NUM >= 130000
+ qc
+#else
+ completionTag
+#endif
+ );
+ nesting_level--;
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+ else if (pgws_enabled(nesting_level))
+ pgws_proc_queryids[i] = save_queryId;
+ }
+ PG_CATCH();
+ {
+ nesting_level--;
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+ else if (pgws_enabled(nesting_level))
+ pgws_proc_queryids[i] = save_queryId;
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+}
diff --git a/contrib/pg_wait_sampling/pg_wait_sampling.control b/contrib/pg_wait_sampling/pg_wait_sampling.control
new file mode 100644
index 0000000..97d9a34
--- /dev/null
+++ b/contrib/pg_wait_sampling/pg_wait_sampling.control
@@ -0,0 +1,5 @@
+# pg_wait_sampling extension
+comment = 'sampling based statistics of wait events'
+default_version = '1.1'
+module_pathname = '$libdir/pg_wait_sampling'
+relocatable = true
diff --git a/contrib/pg_wait_sampling/pg_wait_sampling.h b/contrib/pg_wait_sampling/pg_wait_sampling.h
new file mode 100644
index 0000000..dab773c
--- /dev/null
+++ b/contrib/pg_wait_sampling/pg_wait_sampling.h
@@ -0,0 +1,81 @@
+/*
+ * pg_wait_sampling.h
+ * Headers for pg_wait_sampling extension.
+ *
+ * Copyright (c) 2015-2025, Postgres Professional
+ *
+ * IDENTIFICATION
+ * contrib/pg_wait_sampling/pg_wait_sampling.h
+ */
+#ifndef __PG_WAIT_SAMPLING_H__
+#define __PG_WAIT_SAMPLING_H__
+
+#include "datatype/timestamp.h"
+#include "storage/latch.h"
+#include "storage/lock.h"
+#include "storage/shm_mq.h"
+
+#define PG_WAIT_SAMPLING_MAGIC 0xCA94B107
+#define COLLECTOR_QUEUE_SIZE (16 * 1024)
+#define HISTORY_TIME_MULTIPLIER 10
+#define PGWS_QUEUE_LOCK 0
+#define PGWS_COLLECTOR_LOCK 1
+
+typedef struct
+{
+ int pid;
+ uint32 wait_event_info;
+ uint64 queryId;
+ uint64 count;
+} ProfileItem;
+
+typedef struct
+{
+ int pid;
+ uint32 wait_event_info;
+ uint64 queryId;
+ TimestampTz ts;
+} HistoryItem;
+
+typedef struct
+{
+ bool wraparound;
+ Size index;
+ Size count;
+ HistoryItem *items;
+} History;
+
+typedef enum
+{
+ NO_REQUEST,
+ HISTORY_REQUEST,
+ PROFILE_REQUEST,
+ PROFILE_RESET
+} SHMRequest;
+
+typedef struct
+{
+ Latch *latch;
+ SHMRequest request;
+} CollectorShmqHeader;
+
+/* GUC variables */
+extern int pgws_historySize;
+extern int pgws_historyPeriod;
+extern int pgws_profilePeriod;
+extern bool pgws_profilePid;
+extern int pgws_profileQueries;
+extern bool pgws_sampleCpu;
+
+/* pg_wait_sampling.c */
+extern CollectorShmqHeader *pgws_collector_hdr;
+extern shm_mq *pgws_collector_mq;
+extern uint64 *pgws_proc_queryids;
+extern void pgws_init_lock_tag(LOCKTAG *tag, uint32 lock);
+extern bool pgws_should_sample_proc(PGPROC *proc, int *pid_p, uint32 *wait_event_info_p);
+
+/* collector.c */
+extern void pgws_register_wait_collector(void);
+extern PGDLLEXPORT void pgws_collector_main(Datum main_arg);
+
+#endif
diff --git a/contrib/pg_wait_sampling/run-tests.sh b/contrib/pg_wait_sampling/run-tests.sh
new file mode 100755
index 0000000..f42e999
--- /dev/null
+++ b/contrib/pg_wait_sampling/run-tests.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+set -ev
+
+PATH=/usr/lib/postgresql/$PG_MAJOR/bin:$PATH
+export PGDATA=/var/lib/postgresql/$PG_MAJOR/test
+export COPT=-Werror
+export USE_PGXS=1
+
+sudo mkdir -p /var/lib/postgresql/$PG_MAJOR
+sudo chmod 1777 /var/lib/postgresql/$PG_MAJOR
+sudo chmod 1777 /var/run/postgresql
+
+make clean
+make
+
+sudo -E env PATH=$PATH make install
+
+initdb
+echo "shared_preload_libraries = pg_wait_sampling" >> $PGDATA/postgresql.conf
+
+pg_ctl -l logfile start
+make installcheck
+pg_ctl stop
diff --git a/contrib/pg_wait_sampling/sql/load.sql b/contrib/pg_wait_sampling/sql/load.sql
new file mode 100644
index 0000000..d772e3e
--- /dev/null
+++ b/contrib/pg_wait_sampling/sql/load.sql
@@ -0,0 +1,7 @@
+CREATE EXTENSION pg_wait_sampling;
+
+\d pg_wait_sampling_current
+\d pg_wait_sampling_history
+\d pg_wait_sampling_profile
+
+DROP EXTENSION pg_wait_sampling;
diff --git a/contrib/pg_wait_sampling/sql/queries.sql b/contrib/pg_wait_sampling/sql/queries.sql
new file mode 100644
index 0000000..de44c6d
--- /dev/null
+++ b/contrib/pg_wait_sampling/sql/queries.sql
@@ -0,0 +1,18 @@
+CREATE EXTENSION pg_wait_sampling;
+
+WITH t as (SELECT sum(0) FROM pg_wait_sampling_current)
+ SELECT sum(0) FROM generate_series(1, 2), t;
+
+WITH t as (SELECT sum(0) FROM pg_wait_sampling_history)
+ SELECT sum(0) FROM generate_series(1, 2), t;
+
+WITH t as (SELECT sum(0) FROM pg_wait_sampling_profile)
+ SELECT sum(0) FROM generate_series(1, 2), t;
+
+-- Some dummy checks just to be sure that all our functions work and return something.
+SELECT count(*) = 1 as test FROM pg_wait_sampling_get_current(pg_backend_pid());
+SELECT count(*) >= 0 as test FROM pg_wait_sampling_get_profile();
+SELECT count(*) >= 0 as test FROM pg_wait_sampling_get_history();
+SELECT pg_wait_sampling_reset_profile();
+
+DROP EXTENSION pg_wait_sampling;
diff --git a/contrib/plantuner/COPYRIGHT b/contrib/plantuner/COPYRIGHT
new file mode 100644
index 00000000000..6e4705bc561
--- /dev/null
+++ b/contrib/plantuner/COPYRIGHT
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2009 Teodor Sigaev <teodor@sigaev.ru>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
diff --git a/contrib/plantuner/Makefile b/contrib/plantuner/Makefile
new file mode 100644
index 00000000000..f2e8350e84c
--- /dev/null
+++ b/contrib/plantuner/Makefile
@@ -0,0 +1,15 @@
+MODULE_big = plantuner
+DOCS = README.plantuner
+REGRESS = plantuner
+OBJS=plantuner.o
+
+ifdef USE_PGXS
+PGXS = $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/plantuner
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/plantuner/README.plantuner b/contrib/plantuner/README.plantuner
new file mode 100644
index 00000000000..17c8ba010b8
--- /dev/null
+++ b/contrib/plantuner/README.plantuner
@@ -0,0 +1,99 @@
+Plantuner - enable planner hints
+
+ contrib/plantuner is a contribution module for PostgreSQL 8.4+, which
+ enable planner hints.
+
+ All work was done by Teodor Sigaev (teodor@sigaev.ru) and Oleg Bartunov
+ (oleg@sai.msu.su).
+
+ Sponsor: Nomao project (http://www.nomao.com)
+
+Motivation
+
+ Whether somebody think it's bad or not, but sometime it's very
+ interesting to be able to control planner (provide hints, which tells
+ optimizer to ignore its algorithm in part), which is currently
+ impossible in POstgreSQL. Oracle, for example, has over 120 hints, SQL
+ Server also provides hints.
+
+ This first version of plantuner provides a possibility to hide
+ specified indexes from PostgreSQL planner, so it will not use them.
+
+ There are many situation, when developer want to temporarily disable
+ specific index(es), without dropping them, or to instruct planner to
+ use specific index.
+
+ Next, for some workload PostgreSQL could be too pessimistic for
+ newly created tables and assumes much more rows in table than
+ it actually has. If plantuner.fix_empty_table GUC variable is set
+ to true then module will set to zero number of pages/tuples of
+ table which hasn't blocks in file.
+
+Installation
+
+ * Get latest source of plantuner from CVS Repository
+ * gmake && gmake install && gmake installcheck
+
+Syntax
+ plantuner.forbid_index (deprecated)
+ plantuner.disable_index
+ List of indexes invisible to planner
+ plantuner.enable_index
+ List of indexes visible to planner even they are hided
+ by plantuner.disable_index.
+ plantuner.only_index
+ List of explicitly enabled indexes (overload plantuner.disable_index
+ and plantuner.enable_index), so, only indexes in this list are allowed.
+
+Usage
+
+ To enable the module you can either load shared library 'plantuner' in
+ psql session or specify 'shared_preload_libraries' option in
+ postgresql.conf.
+=# LOAD 'plantuner';
+=# create table test(id int);
+=# create index id_idx on test(id);
+=# create index id_idx2 on test(id);
+=# \d test
+ Table "public.test"
+ Column | Type | Modifiers
+--------+---------+-----------
+ id | integer |
+Indexes:
+ "id_idx" btree (id)
+ "id_idx2" btree (id)
+=# explain select id from test where id=1;
+ QUERY PLAN
+-----------------------------------------------------------------------
+ Bitmap Heap Scan on test (cost=4.34..15.03 rows=12 width=4)
+ Recheck Cond: (id = 1)
+ -> Bitmap Index Scan on id_idx2 (cost=0.00..4.34 rows=12 width=0)
+ Index Cond: (id = 1)
+(4 rows)
+=# set enable_seqscan=off;
+=# set plantuner.disable_index='id_idx2';
+=# explain select id from test where id=1;
+ QUERY PLAN
+----------------------------------------------------------------------
+ Bitmap Heap Scan on test (cost=4.34..15.03 rows=12 width=4)
+ Recheck Cond: (id = 1)
+ -> Bitmap Index Scan on id_idx (cost=0.00..4.34 rows=12 width=0)
+ Index Cond: (id = 1)
+(4 rows)
+=# set plantuner.disable_index='id_idx2,id_idx';
+=# explain select id from test where id=1;
+ QUERY PLAN
+-------------------------------------------------------------------------
+ Seq Scan on test (cost=10000000000.00..10000000040.00 rows=12 width=4)
+ Filter: (id = 1)
+(2 rows)
+=# set plantuner.enable_index='id_idx';
+=# explain select id from test where id=1;
+ QUERY PLAN
+-----------------------------------------------------------------------
+ Bitmap Heap Scan on test (cost=4.34..15.03 rows=12 width=4)
+ Recheck Cond: (id = 1)
+ -> Bitmap Index Scan on id_idx (cost=0.00..4.34 rows=12 width=0)
+ Index Cond: (id = 1)
+(4 rows)
+
diff --git a/contrib/plantuner/expected/plantuner.out b/contrib/plantuner/expected/plantuner.out
new file mode 100644
index 00000000000..70d2bcaaef2
--- /dev/null
+++ b/contrib/plantuner/expected/plantuner.out
@@ -0,0 +1,96 @@
+LOAD 'plantuner';
+SHOW plantuner.disable_index;
+ plantuner.disable_index
+-------------------------
+
+(1 row)
+
+CREATE TABLE wow (i int, j int);
+CREATE INDEX i_idx ON wow (i);
+CREATE INDEX j_idx ON wow (j);
+CREATE INDEX i1 ON WOW (i);
+CREATE INDEX i2 ON WOW (i);
+CREATE INDEX i3 ON WOW (i);
+SET enable_seqscan=off;
+SELECT * FROM wow;
+ i | j
+---+---
+(0 rows)
+
+SET plantuner.disable_index="i_idx, j_idx";
+SELECT * FROM wow;
+ i | j
+---+---
+(0 rows)
+
+SHOW plantuner.disable_index;
+ plantuner.disable_index
+----------------------------
+ public.i_idx, public.j_idx
+(1 row)
+
+SET plantuner.disable_index="i_idx, nonexistent, public.j_idx, wow";
+WARNING: 'nonexistent' does not exist
+WARNING: 'wow' is not an index
+SHOW plantuner.disable_index;
+ plantuner.disable_index
+----------------------------
+ public.i_idx, public.j_idx
+(1 row)
+
+SET plantuner.enable_index="i_idx";
+SHOW plantuner.enable_index;
+ plantuner.enable_index
+------------------------
+ public.i_idx
+(1 row)
+
+SELECT * FROM wow;
+ i | j
+---+---
+(0 rows)
+
+--test only index
+RESET plantuner.disable_index;
+RESET plantuner.enable_index;
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+SET enable_indexonlyscan=off;
+SET plantuner.only_index="i1";
+SHOW plantuner.only_index;
+ plantuner.only_index
+----------------------
+ public.i1
+(1 row)
+
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
+ QUERY PLAN
+----------------------------
+ Index Scan using i1 on wow
+ Index Cond: (i = 0)
+(2 rows)
+
+SET plantuner.disable_index="i1,i2,i3";
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
+ QUERY PLAN
+----------------------------
+ Index Scan using i1 on wow
+ Index Cond: (i = 0)
+(2 rows)
+
+SET plantuner.only_index="i2";
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
+ QUERY PLAN
+----------------------------
+ Index Scan using i2 on wow
+ Index Cond: (i = 0)
+(2 rows)
+
+RESET plantuner.only_index;
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
+ QUERY PLAN
+-------------------------------
+ Index Scan using i_idx on wow
+ Index Cond: (i = 0)
+(2 rows)
+
diff --git a/contrib/plantuner/meson.build b/contrib/plantuner/meson.build
new file mode 100644
index 00000000000..cf1b1ae8cdb
--- /dev/null
+++ b/contrib/plantuner/meson.build
@@ -0,0 +1,34 @@
+plantuner_sources = files(
+ 'plantuner.c'
+)
+
+if host_system == 'windows'
+ plantuner_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'plantuner',
+ '--FILEDESC', 'plantuner',])
+endif
+
+plantuner = shared_module('plantuner',
+ plantuner_sources,
+ kwargs: contrib_mod_args + {
+ 'dependencies': contrib_mod_args['dependencies'],
+ },
+)
+contrib_targets += plantuner
+
+install_data(
+ kwargs: contrib_data_args,
+)
+
+tests += {
+ 'name': 'plantuner',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'plantuner',
+ ]
+ },
+}
+
+# TODO: DOCS = README.plantuner
\ No newline at end of file
diff --git a/contrib/plantuner/plantuner.c b/contrib/plantuner/plantuner.c
new file mode 100644
index 00000000000..82996e3dc31
--- /dev/null
+++ b/contrib/plantuner/plantuner.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2009 Teodor Sigaev <teodor@sigaev.ru>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <postgres.h>
+
+#include <fmgr.h>
+#include <miscadmin.h>
+#include <access/heapam.h>
+#include <access/xact.h>
+#include <catalog/namespace.h>
+#include <catalog/pg_class.h>
+#if PG_VERSION_NUM >= 160000
+#include <nodes/miscnodes.h>
+#endif
+#include <nodes/pg_list.h>
+#include <optimizer/plancat.h>
+#include <storage/bufmgr.h>
+#include <utils/builtins.h>
+#include <utils/guc.h>
+#include <utils/lsyscache.h>
+#include <utils/rel.h>
+#if PG_VERSION_NUM >= 100000
+#include <utils/regproc.h>
+#include <utils/varlena.h>
+#endif
+
+PG_MODULE_MAGIC;
+
+#if PG_VERSION_NUM >= 130000
+#define heap_open(r, l) table_open(r, l)
+#define heap_close(r, l) table_close(r, l)
+#endif
+
+static int nDisabledIndexes = 0;
+static Oid *disabledIndexes = NULL;
+static char *disableIndexesOutStr = "";
+
+static int nEnabledIndexes = 0;
+static Oid *enabledIndexes = NULL;
+static char *enableIndexesOutStr = "";
+
+static int nOnlyIndexes = 0;
+static Oid *onlyIndexes = NULL;
+static char *onlyIndexesOutStr = "";
+
+static get_relation_info_hook_type prevHook = NULL;
+static bool fix_empty_table = false;
+
+static bool plantuner_enable_inited = false;
+static bool plantuner_only_inited = false;
+static bool plantuner_disable_inited = false;
+
+typedef enum IndexListKind {
+ EnabledKind,
+ DisabledKind,
+ OnlyKind
+} IndexListKind;
+
+static const char *
+indexesAssign(const char * newval, bool doit, GucSource source,
+ IndexListKind kind)
+{
+ char *rawname;
+ List *namelist;
+ ListCell *l;
+ Oid *newOids = NULL;
+ int nOids = 0,
+ i = 0;
+
+ rawname = pstrdup(newval);
+
+ if (!SplitIdentifierString(rawname, ',', &namelist))
+ goto cleanup;
+
+ /*
+ * follow work could be done only in normal processing because of
+ * accsess to system catalog
+ */
+ if (MyProcNumber == INVALID_PROC_NUMBER || !IsUnderPostmaster ||
+ !IsTransactionState())
+ {
+ /* reset init state */
+ switch(kind)
+ {
+ case EnabledKind:
+ plantuner_enable_inited = false;
+ break;
+ case DisabledKind:
+ plantuner_disable_inited = false;
+ break;
+ case OnlyKind:
+ plantuner_only_inited = false;
+ break;
+ default:
+ elog(ERROR, "wrong kind");
+ }
+
+ return newval;
+ }
+
+ if (doit)
+ {
+ nOids = list_length(namelist);
+ newOids = malloc(sizeof(Oid) * (nOids+1));
+ if (!newOids)
+ elog(ERROR,"could not allocate %d bytes",
+ (int)(sizeof(Oid) * (nOids+1)));
+ }
+
+ switch(kind)
+ {
+ case EnabledKind:
+ plantuner_enable_inited = true;
+ break;
+ case DisabledKind:
+ plantuner_disable_inited = true;
+ break;
+ case OnlyKind:
+ plantuner_only_inited = true;
+ break;
+ default:
+ elog(ERROR, "wrong kind");
+ }
+
+ foreach(l, namelist)
+ {
+ char *curname = (char *) lfirst(l);
+#if PG_VERSION_NUM >= 90200
+ List *cur_namelist;
+ Oid indexOid;
+
+#if PG_VERSION_NUM >= 160000
+ ErrorSaveContext escontext = {T_ErrorSaveContext};
+
+ cur_namelist = stringToQualifiedNameList(curname, (Node *) &escontext);
+
+ /* bad name list syntax */
+ if (cur_namelist == NIL)
+ continue;
+#else
+ cur_namelist = stringToQualifiedNameList(curname);
+#endif
+
+ indexOid = RangeVarGetRelid(makeRangeVarFromNameList(cur_namelist),
+ NoLock, true);
+#else
+ Oid indexOid = RangeVarGetRelid(
+ makeRangeVarFromNameList(stringToQualifiedNameList(curname)),
+ true);
+#endif
+
+ if (indexOid == InvalidOid)
+ {
+#if PG_VERSION_NUM >= 90100
+ if (doit == false)
+#endif
+ elog(WARNING,"'%s' does not exist", curname);
+ continue;
+ }
+ else if ( get_rel_relkind(indexOid) != RELKIND_INDEX )
+ {
+#if PG_VERSION_NUM >= 90100
+ if (doit == false)
+#endif
+ elog(WARNING,"'%s' is not an index", curname);
+ continue;
+ }
+ else if (doit)
+ {
+ newOids[i++] = indexOid;
+ }
+ }
+
+ if (doit)
+ {
+ switch(kind)
+ {
+ case EnabledKind:
+ nEnabledIndexes = i;
+ if (enabledIndexes)
+ free(enabledIndexes);
+ enabledIndexes = newOids;
+ break;
+ case DisabledKind:
+ nDisabledIndexes = i;
+ if (disabledIndexes)
+ free(disabledIndexes);
+ disabledIndexes = newOids;
+ break;
+ case OnlyKind:
+ nOnlyIndexes = i;
+ if (onlyIndexes)
+ free(onlyIndexes);
+ onlyIndexes = newOids;
+ break;
+ default:
+ elog(ERROR, "wrong kind");
+ }
+ }
+
+ pfree(rawname);
+ list_free(namelist);
+
+ return newval;
+
+cleanup:
+ if (newOids)
+ free(newOids);
+ pfree(rawname);
+ list_free(namelist);
+ return NULL;
+}
+
+static const char *
+assignDisabledIndexes(const char * newval, bool doit, GucSource source)
+{
+ return indexesAssign(newval, doit, source, DisabledKind);
+}
+
+static const char *
+assignEnabledIndexes(const char * newval, bool doit, GucSource source)
+{
+ return indexesAssign(newval, doit, source, EnabledKind);
+}
+
+static const char *
+assignOnlyIndexes(const char * newval, bool doit, GucSource source)
+{
+ return indexesAssign(newval, doit, source, OnlyKind);
+}
+
+static void
+lateInit()
+{
+ if (!plantuner_only_inited)
+ indexesAssign(onlyIndexesOutStr, true, PGC_S_USER, OnlyKind);
+ if (!plantuner_enable_inited)
+ indexesAssign(enableIndexesOutStr, true, PGC_S_USER, EnabledKind);
+ if (!plantuner_disable_inited)
+ indexesAssign(disableIndexesOutStr, true, PGC_S_USER, DisabledKind);
+}
+
+#if PG_VERSION_NUM >= 90100
+
+static bool
+checkOnlyIndexes(char **newval, void **extra, GucSource source)
+{
+ char *val;
+
+ val = (char*)indexesAssign(*newval, false, source, OnlyKind);
+
+ if (val)
+ {
+ *newval = val;
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+checkDisabledIndexes(char **newval, void **extra, GucSource source)
+{
+ char *val;
+
+ val = (char*)indexesAssign(*newval, false, source, DisabledKind);
+
+ if (val)
+ {
+ *newval = val;
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+checkEnabledIndexes(char **newval, void **extra, GucSource source)
+{
+ char *val;
+
+ val = (char*)indexesAssign(*newval, false, source, EnabledKind);
+
+ if (val)
+ {
+ *newval = val;
+ return true;
+ }
+
+ return false;
+}
+
+static void
+assignDisabledIndexesNew(const char *newval, void *extra)
+{
+ assignDisabledIndexes(newval, true, PGC_S_USER /* doesn't matter */);
+}
+
+static void
+assignEnabledIndexesNew(const char *newval, void *extra)
+{
+ assignEnabledIndexes(newval, true, PGC_S_USER /* doesn't matter */);
+}
+
+static void
+assignOnlyIndexesNew(const char *newval, void *extra)
+{
+ assignOnlyIndexes(newval, true, PGC_S_USER /* doesn't matter */);
+}
+
+#endif
+
+static void
+indexFilter(PlannerInfo *root, Oid relationObjectId, bool inhparent,
+ RelOptInfo *rel)
+{
+ int i;
+
+ lateInit();
+
+ if (nOnlyIndexes > 0)
+ {
+ ListCell *l;
+
+restart1:
+ foreach(l, rel->indexlist)
+ {
+ IndexOptInfo *info = (IndexOptInfo*)lfirst(l);
+ bool remove = true;
+
+ for(i=0; remove && i<nOnlyIndexes; i++)
+ if (onlyIndexes[i] == info->indexoid)
+ remove = false;
+
+ if (remove)
+ {
+ rel->indexlist = list_delete_ptr(rel->indexlist, info);
+ goto restart1;
+ }
+ }
+
+ return;
+ }
+
+ for(i=0; i<nDisabledIndexes; i++)
+ {
+ ListCell *l;
+
+ foreach(l, rel->indexlist)
+ {
+ IndexOptInfo *info = (IndexOptInfo*)lfirst(l);
+
+ if (disabledIndexes[i] == info->indexoid)
+ {
+ int j;
+
+ for(j=0; j<nEnabledIndexes; j++)
+ if (enabledIndexes[j] == info->indexoid)
+ break;
+
+ if (j >= nEnabledIndexes)
+ rel->indexlist = list_delete_ptr(rel->indexlist, info);
+
+ break;
+ }
+ }
+ }
+}
+
+static void
+execPlantuner(PlannerInfo *root, Oid relationObjectId, bool inhparent,
+ RelOptInfo *rel)
+{
+ Relation relation;
+
+ relation = heap_open(relationObjectId, NoLock);
+ if (relation->rd_rel->relkind == RELKIND_RELATION)
+ {
+ if (fix_empty_table && RelationGetNumberOfBlocks(relation) == 0)
+ {
+ /*
+ * estimate_rel_size() could be too pessimistic for particular
+ * workload
+ */
+ rel->pages = 1.0;
+ rel->tuples = 0.0;
+ }
+
+ indexFilter(root, relationObjectId, inhparent, rel);
+ }
+ heap_close(relation, NoLock);
+
+ /*
+ * Call next hook if it exists
+ */
+ if (prevHook)
+ prevHook(root, relationObjectId, inhparent, rel);
+}
+
+static const char*
+IndexFilterShow(Oid* indexes, int nIndexes)
+{
+ char *val, *ptr;
+ int i,
+ len;
+
+ lateInit();
+
+ len = 1 /* \0 */ + nIndexes * (2 * NAMEDATALEN + 2 /* ', ' */ + 1 /* . */);
+ ptr = val = palloc(len);
+
+ *ptr =(char)'\0';
+ for(i=0; i<nIndexes; i++)
+ {
+ char *relname = get_rel_name(indexes[i]);
+ Oid nspOid = get_rel_namespace(indexes[i]);
+ char *nspname = get_namespace_name(nspOid);
+
+ if ( relname == NULL || nspOid == InvalidOid || nspname == NULL )
+ continue;
+
+ ptr += snprintf(ptr, len - (ptr - val), "%s%s.%s",
+ (i==0) ? "" : ", ",
+ nspname,
+ relname);
+ }
+
+ return val;
+}
+
+static const char*
+disabledIndexFilterShow(void)
+{
+ return IndexFilterShow(disabledIndexes, nDisabledIndexes);
+}
+
+static const char*
+enabledIndexFilterShow(void)
+{
+ return IndexFilterShow(enabledIndexes, nEnabledIndexes);
+}
+
+static const char*
+onlyIndexFilterShow(void)
+{
+ return IndexFilterShow(onlyIndexes, nOnlyIndexes);
+}
+
+void _PG_init(void);
+void
+_PG_init(void)
+{
+ DefineCustomStringVariable(
+ "plantuner.forbid_index",
+ "List of forbidden indexes (deprecated)",
+ "Listed indexes will not be used in queries (deprecated, use plantuner.disable_index)",
+ &disableIndexesOutStr,
+ "",
+ PGC_USERSET,
+ 0,
+#if PG_VERSION_NUM >= 90100
+ checkDisabledIndexes,
+ assignDisabledIndexesNew,
+#else
+ assignDisabledIndexes,
+#endif
+ disabledIndexFilterShow
+ );
+
+ DefineCustomStringVariable(
+ "plantuner.disable_index",
+ "List of disabled indexes",
+ "Listed indexes will not be used in queries",
+ &disableIndexesOutStr,
+ "",
+ PGC_USERSET,
+ 0,
+#if PG_VERSION_NUM >= 90100
+ checkDisabledIndexes,
+ assignDisabledIndexesNew,
+#else
+ assignDisabledIndexes,
+#endif
+ disabledIndexFilterShow
+ );
+
+ DefineCustomStringVariable(
+ "plantuner.enable_index",
+ "List of enabled indexes (overload plantuner.disable_index)",
+ "Listed indexes which could be used in queries even they are listed in plantuner.disable_index",
+ &enableIndexesOutStr,
+ "",
+ PGC_USERSET,
+ 0,
+#if PG_VERSION_NUM >= 90100
+ checkEnabledIndexes,
+ assignEnabledIndexesNew,
+#else
+ assignEnabledIndexes,
+#endif
+ enabledIndexFilterShow
+ );
+
+ DefineCustomStringVariable(
+ "plantuner.only_index",
+ "List of explicitly enabled indexes (overload plantuner.disable_index and plantuner.enable_index)",
+ "Only indexes in this list are allowed",
+ &onlyIndexesOutStr,
+ "",
+ PGC_USERSET,
+ 0,
+#if PG_VERSION_NUM >= 90100
+ checkOnlyIndexes,
+ assignOnlyIndexesNew,
+#else
+ assignOnlyIndexes,
+#endif
+ onlyIndexFilterShow
+ );
+
+ DefineCustomBoolVariable(
+ "plantuner.fix_empty_table",
+ "Sets to zero estimations for empty tables",
+ "Sets to zero estimations for empty or newly created tables",
+ &fix_empty_table,
+#if PG_VERSION_NUM >= 80400
+ fix_empty_table,
+#endif
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ if (get_relation_info_hook != execPlantuner )
+ {
+ prevHook = get_relation_info_hook;
+ get_relation_info_hook = execPlantuner;
+ }
+}
diff --git a/contrib/plantuner/sql/plantuner.sql b/contrib/plantuner/sql/plantuner.sql
new file mode 100644
index 00000000000..ddd6fcc94f1
--- /dev/null
+++ b/contrib/plantuner/sql/plantuner.sql
@@ -0,0 +1,51 @@
+LOAD 'plantuner';
+
+SHOW plantuner.disable_index;
+
+CREATE TABLE wow (i int, j int);
+CREATE INDEX i_idx ON wow (i);
+CREATE INDEX j_idx ON wow (j);
+CREATE INDEX i1 ON WOW (i);
+CREATE INDEX i2 ON WOW (i);
+CREATE INDEX i3 ON WOW (i);
+
+SET enable_seqscan=off;
+
+SELECT * FROM wow;
+
+SET plantuner.disable_index="i_idx, j_idx";
+
+SELECT * FROM wow;
+
+SHOW plantuner.disable_index;
+
+SET plantuner.disable_index="i_idx, nonexistent, public.j_idx, wow";
+
+SHOW plantuner.disable_index;
+
+SET plantuner.enable_index="i_idx";
+
+SHOW plantuner.enable_index;
+
+SELECT * FROM wow;
+--test only index
+RESET plantuner.disable_index;
+RESET plantuner.enable_index;
+
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+SET enable_indexonlyscan=off;
+
+SET plantuner.only_index="i1";
+SHOW plantuner.only_index;
+
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
+
+SET plantuner.disable_index="i1,i2,i3";
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
+
+SET plantuner.only_index="i2";
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
+
+RESET plantuner.only_index;
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c
index eb255e74d15..cd603390153 100644
--- a/contrib/postgres_fdw/connection.c
+++ b/contrib/postgres_fdw/connection.c
@@ -1140,6 +1140,9 @@ pgfdw_xact_callback(XactEvent event, void *arg)
else
pgfdw_abort_cleanup(entry, true);
break;
+ case XACT_EVENT_PRE_ABORT:
+ case XACT_EVENT_PARALLEL_PRE_ABORT:
+ break;
}
}
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 5fc2a5a9b07..bbbbbdebeea 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2568,65 +2568,68 @@ SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = f
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM ft1, ft4, ft5, local_tbl WHERE ft1.c1 = ft4.c1 AND ft1.c1 = ft5.c1
AND ft1.c2 = local_tbl.c1 AND ft1.c1 < 100 AND ft5.c1 < 100 FOR UPDATE;
- QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
LockRows
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft4.c1, ft4.c2, ft4.c3, ft5.c1, ft5.c2, ft5.c3, local_tbl.c1, local_tbl.c2, local_tbl.c3, ft1.*, ft4.*, ft5.*, local_tbl.ctid
-> Merge Join
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft4.c1, ft4.c2, ft4.c3, ft5.c1, ft5.c2, ft5.c3, local_tbl.c1, local_tbl.c2, local_tbl.c3, ft1.*, ft4.*, ft5.*, local_tbl.ctid
- Merge Cond: (local_tbl.c1 = ft1.c2)
- -> Index Scan using local_tbl_pkey on public.local_tbl
- Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, local_tbl.ctid
+ Merge Cond: (ft1.c1 = ft4.c1)
+ -> Sort
+ Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, local_tbl.c1, local_tbl.c2, local_tbl.c3, local_tbl.ctid
+ Sort Key: ft1.c1
+ -> Merge Join
+ Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, local_tbl.c1, local_tbl.c2, local_tbl.c3, local_tbl.ctid
+ Merge Cond: (local_tbl.c1 = ft1.c2)
+ -> Index Scan using local_tbl_pkey on public.local_tbl
+ Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, local_tbl.ctid
+ -> Sort
+ Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
+ Sort Key: ft1.c2
+ -> Foreign Scan on public.ft1
+ Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
+ Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) FOR UPDATE
-> Sort
- Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft4.c1, ft4.c2, ft4.c3, ft4.*, ft5.c1, ft5.c2, ft5.c3, ft5.*
- Sort Key: ft1.c2
+ Output: ft4.c1, ft4.c2, ft4.c3, ft4.*, ft5.c1, ft5.c2, ft5.c3, ft5.*
+ Sort Key: ft4.c1
-> Foreign Scan
- Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft4.c1, ft4.c2, ft4.c3, ft4.*, ft5.c1, ft5.c2, ft5.c3, ft5.*
- Relations: ((public.ft1) INNER JOIN (public.ft4)) INNER JOIN (public.ft5)
- Remote SQL: SELECT r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8) END, r2.c1, r2.c2, r2.c3, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2.c1, r2.c2, r2.c3) END, r3.c1, r3.c2, r3.c3, CASE WHEN (r3.*)::text IS NOT NULL THEN ROW(r3.c1, r3.c2, r3.c3) END FROM (("S 1"."T 1" r1 INNER JOIN "S 1"."T 3" r2 ON (((r1."C 1" = r2.c1)) AND ((r1."C 1" < 100)))) INNER JOIN "S 1"."T 4" r3 ON (((r1."C 1" = r3.c1)) AND ((r3.c1 < 100)))) FOR UPDATE OF r1 FOR UPDATE OF r2 FOR UPDATE OF r3
+ Output: ft4.c1, ft4.c2, ft4.c3, ft4.*, ft5.c1, ft5.c2, ft5.c3, ft5.*
+ Relations: (public.ft4) INNER JOIN (public.ft5)
+ Remote SQL: SELECT r2.c1, r2.c2, r2.c3, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2.c1, r2.c2, r2.c3) END, r3.c1, r3.c2, r3.c3, CASE WHEN (r3.*)::text IS NOT NULL THEN ROW(r3.c1, r3.c2, r3.c3) END FROM ("S 1"."T 3" r2 INNER JOIN "S 1"."T 4" r3 ON (((r2.c1 = r3.c1)) AND ((r3.c1 < 100)))) FOR UPDATE OF r2 FOR UPDATE OF r3
-> Merge Join
- Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft4.c1, ft4.c2, ft4.c3, ft4.*, ft5.c1, ft5.c2, ft5.c3, ft5.*
- Merge Cond: (ft1.c1 = ft5.c1)
- -> Merge Join
- Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft4.c1, ft4.c2, ft4.c3, ft4.*
- Merge Cond: (ft1.c1 = ft4.c1)
- -> Sort
- Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
- Sort Key: ft1.c1
- -> Foreign Scan on public.ft1
- Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
- Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) FOR UPDATE
- -> Sort
+ Output: ft4.c1, ft4.c2, ft4.c3, ft4.*, ft5.c1, ft5.c2, ft5.c3, ft5.*
+ Merge Cond: (ft4.c1 = ft5.c1)
+ -> Sort
+ Output: ft4.c1, ft4.c2, ft4.c3, ft4.*
+ Sort Key: ft4.c1
+ -> Foreign Scan on public.ft4
Output: ft4.c1, ft4.c2, ft4.c3, ft4.*
- Sort Key: ft4.c1
- -> Foreign Scan on public.ft4
- Output: ft4.c1, ft4.c2, ft4.c3, ft4.*
- Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 3" FOR UPDATE
+ Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 3" FOR UPDATE
-> Sort
Output: ft5.c1, ft5.c2, ft5.c3, ft5.*
Sort Key: ft5.c1
-> Foreign Scan on public.ft5
Output: ft5.c1, ft5.c2, ft5.c3, ft5.*
Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 4" WHERE ((c1 < 100)) FOR UPDATE
-(38 rows)
+(41 rows)
SELECT * FROM ft1, ft4, ft5, local_tbl WHERE ft1.c1 = ft4.c1 AND ft1.c1 = ft5.c1
AND ft1.c2 = local_tbl.c1 AND ft1.c1 < 100 AND ft5.c1 < 100 FOR UPDATE;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 | c1 | c2 | c3 | c1 | c2 | c3 | c1 | c2 | c3
----+----+-------+------------------------------+--------------------------+----+------------+-----+----+----+--------+----+----+--------+----+----+------
+ 6 | 6 | 00006 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo | 6 | 7 | AAA006 | 6 | 7 | AAA006 | 6 | 6 | 0006
12 | 2 | 00012 | Tue Jan 13 00:00:00 1970 PST | Tue Jan 13 00:00:00 1970 | 2 | 2 | foo | 12 | 13 | AAA012 | 12 | 13 | AAA012 | 2 | 2 | 0002
- 42 | 2 | 00042 | Thu Feb 12 00:00:00 1970 PST | Thu Feb 12 00:00:00 1970 | 2 | 2 | foo | 42 | 43 | AAA042 | 42 | 43 | AAA042 | 2 | 2 | 0002
- 72 | 2 | 00072 | Sat Mar 14 00:00:00 1970 PST | Sat Mar 14 00:00:00 1970 | 2 | 2 | foo | 72 | 73 | AAA072 | 72 | 73 | | 2 | 2 | 0002
+ 18 | 8 | 00018 | Mon Jan 19 00:00:00 1970 PST | Mon Jan 19 00:00:00 1970 | 8 | 8 | foo | 18 | 19 | AAA018 | 18 | 19 | | 8 | 8 | 0008
24 | 4 | 00024 | Sun Jan 25 00:00:00 1970 PST | Sun Jan 25 00:00:00 1970 | 4 | 4 | foo | 24 | 25 | AAA024 | 24 | 25 | AAA024 | 4 | 4 | 0004
- 54 | 4 | 00054 | Tue Feb 24 00:00:00 1970 PST | Tue Feb 24 00:00:00 1970 | 4 | 4 | foo | 54 | 55 | AAA054 | 54 | 55 | | 4 | 4 | 0004
- 84 | 4 | 00084 | Thu Mar 26 00:00:00 1970 PST | Thu Mar 26 00:00:00 1970 | 4 | 4 | foo | 84 | 85 | AAA084 | 84 | 85 | AAA084 | 4 | 4 | 0004
- 96 | 6 | 00096 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo | 96 | 97 | AAA096 | 96 | 97 | AAA096 | 6 | 6 | 0006
36 | 6 | 00036 | Fri Feb 06 00:00:00 1970 PST | Fri Feb 06 00:00:00 1970 | 6 | 6 | foo | 36 | 37 | AAA036 | 36 | 37 | | 6 | 6 | 0006
- 66 | 6 | 00066 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo | 66 | 67 | AAA066 | 66 | 67 | AAA066 | 6 | 6 | 0006
- 6 | 6 | 00006 | Wed Jan 07 00:00:00 1970 PST | Wed Jan 07 00:00:00 1970 | 6 | 6 | foo | 6 | 7 | AAA006 | 6 | 7 | AAA006 | 6 | 6 | 0006
+ 42 | 2 | 00042 | Thu Feb 12 00:00:00 1970 PST | Thu Feb 12 00:00:00 1970 | 2 | 2 | foo | 42 | 43 | AAA042 | 42 | 43 | AAA042 | 2 | 2 | 0002
48 | 8 | 00048 | Wed Feb 18 00:00:00 1970 PST | Wed Feb 18 00:00:00 1970 | 8 | 8 | foo | 48 | 49 | AAA048 | 48 | 49 | AAA048 | 8 | 8 | 0008
- 18 | 8 | 00018 | Mon Jan 19 00:00:00 1970 PST | Mon Jan 19 00:00:00 1970 | 8 | 8 | foo | 18 | 19 | AAA018 | 18 | 19 | | 8 | 8 | 0008
+ 54 | 4 | 00054 | Tue Feb 24 00:00:00 1970 PST | Tue Feb 24 00:00:00 1970 | 4 | 4 | foo | 54 | 55 | AAA054 | 54 | 55 | | 4 | 4 | 0004
+ 66 | 6 | 00066 | Sun Mar 08 00:00:00 1970 PST | Sun Mar 08 00:00:00 1970 | 6 | 6 | foo | 66 | 67 | AAA066 | 66 | 67 | AAA066 | 6 | 6 | 0006
+ 72 | 2 | 00072 | Sat Mar 14 00:00:00 1970 PST | Sat Mar 14 00:00:00 1970 | 2 | 2 | foo | 72 | 73 | AAA072 | 72 | 73 | | 2 | 2 | 0002
78 | 8 | 00078 | Fri Mar 20 00:00:00 1970 PST | Fri Mar 20 00:00:00 1970 | 8 | 8 | foo | 78 | 79 | AAA078 | 78 | 79 | AAA078 | 8 | 8 | 0008
+ 84 | 4 | 00084 | Thu Mar 26 00:00:00 1970 PST | Thu Mar 26 00:00:00 1970 | 4 | 4 | foo | 84 | 85 | AAA084 | 84 | 85 | AAA084 | 4 | 4 | 0004
+ 96 | 6 | 00096 | Tue Apr 07 00:00:00 1970 PST | Tue Apr 07 00:00:00 1970 | 6 | 6 | foo | 96 | 97 | AAA096 | 96 | 97 | AAA096 | 6 | 6 | 0006
(13 rows)
RESET enable_nestloop;
@@ -2691,22 +2694,19 @@ SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.* FROM ft1 INNER JOIN ft2 ON (ft1.
-> Sort
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, ft2.c2
Sort Key: ft1.c3
- -> Merge Join
+ -> Hash Join
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, ft2.c2
- Merge Cond: (ft1.c1 = ft2.c1)
+ Hash Cond: (ft1.c1 = ft2.c1)
Join Filter: ((ft1.c1 - postgres_fdw_abs(ft2.c2)) = 0)
- -> Sort
+ -> Foreign Scan on public.ft1
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
- Sort Key: ft1.c1
- -> Foreign Scan on public.ft1
- Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
- Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100))
- -> Materialize
+ Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100))
+ -> Hash
Output: ft2.*, ft2.c1, ft2.c2
-> Foreign Scan on public.ft2
Output: ft2.*, ft2.c1, ft2.c2
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY "C 1" ASC NULLS LAST
-(32 rows)
+(29 rows)
ALTER SERVER loopback OPTIONS (DROP fdw_startup_cost);
ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw');
@@ -6347,26 +6347,23 @@ BEGIN;
-> Nested Loop
Output: ft2.ctid, ft2.*, ft4.*, ft5.*, ft4.c1, ft4.c2, ft4.c3
Join Filter: (ft4.c1 = ft5.c1)
- -> Sort
+ -> Hash Join
Output: ft2.ctid, ft2.*, ft2.c2, ft4.*, ft4.c1, ft4.c2, ft4.c3
- Sort Key: ft2.c2
- -> Hash Join
- Output: ft2.ctid, ft2.*, ft2.c2, ft4.*, ft4.c1, ft4.c2, ft4.c3
- Hash Cond: (ft2.c2 = ft4.c1)
- -> Foreign Scan on public.ft2
- Output: ft2.ctid, ft2.*, ft2.c2
- Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8, ctid FROM "S 1"."T 1" WHERE (("C 1" > 1200)) FOR UPDATE
- -> Hash
+ Hash Cond: (ft2.c2 = ft4.c1)
+ -> Foreign Scan on public.ft2
+ Output: ft2.ctid, ft2.*, ft2.c2
+ Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8, ctid FROM "S 1"."T 1" WHERE (("C 1" > 1200)) FOR UPDATE
+ -> Hash
+ Output: ft4.*, ft4.c1, ft4.c2, ft4.c3
+ -> Foreign Scan on public.ft4
Output: ft4.*, ft4.c1, ft4.c2, ft4.c3
- -> Foreign Scan on public.ft4
- Output: ft4.*, ft4.c1, ft4.c2, ft4.c3
- Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 3"
+ Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 3"
-> Materialize
Output: ft5.*, ft5.c1
-> Foreign Scan on public.ft5
Output: ft5.*, ft5.c1
Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 4"
-(29 rows)
+(26 rows)
UPDATE ft2 SET c3 = 'bar'
FROM ft4 INNER JOIN ft5 ON (ft4.c1 = ft5.c1)
diff --git a/src/backend/access/gin/ginbulk.c b/src/backend/access/gin/ginbulk.c
index 302cb2092a9..8cebd71f31d 100644
--- a/src/backend/access/gin/ginbulk.c
+++ b/src/backend/access/gin/ginbulk.c
@@ -117,6 +117,7 @@ ginInitBA(BuildAccumulator *accum)
ginCombineData,
ginAllocEntryAccumulator,
NULL, /* no freefunc needed */
+ NULL, /* no fixfunc needed */
accum);
}
diff --git a/src/backend/access/heap/Makefile b/src/backend/access/heap/Makefile
index 394534172fa..e93eedb69ef 100644
--- a/src/backend/access/heap/Makefile
+++ b/src/backend/access/heap/Makefile
@@ -21,6 +21,8 @@ OBJS = \
hio.o \
pruneheap.o \
rewriteheap.o \
+ tempcat.o \
+ tupmap.o \
vacuumlazy.o \
visibilitymap.o
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 0dcd6ee817e..10511a537a2 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -37,6 +37,7 @@
#include "access/multixact.h"
#include "access/subtrans.h"
#include "access/syncscan.h"
+#include "access/tempcat.h"
#include "access/valid.h"
#include "access/visibilitymap.h"
#include "access/xloginsert.h"
@@ -54,6 +55,10 @@
#include "utils/spccache.h"
#include "utils/syscache.h"
+#include "nodes/execnodes.h"
+#include "catalog/index.h"
+#include "utils/memutils.h"
+#include "access/tempcat.h"
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
TransactionId xid, CommandId cid, int options);
@@ -6494,6 +6499,13 @@ heap_inplace_update_and_unlock(Relation relation,
bool RelcacheInitFileInval = false;
Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
+
+ if (enable_temp_memory_catalog && IsTempTableScope())
+ {
+ temp_catalog_update_inplace(relation, tuple);
+ return;
+ }
+
oldlen = oldtup->t_len - htup->t_hoff;
newlen = tuple->t_len - tuple->t_data->t_hoff;
if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
diff --git a/src/backend/access/heap/meson.build b/src/backend/access/heap/meson.build
index 2637b24112f..59c9563e764 100644
--- a/src/backend/access/heap/meson.build
+++ b/src/backend/access/heap/meson.build
@@ -9,6 +9,8 @@ backend_sources += files(
'hio.c',
'pruneheap.c',
'rewriteheap.c',
+ 'tempcat.c',
+ 'tupmap.c',
'vacuumlazy.c',
'visibilitymap.c',
)
diff --git a/src/backend/access/heap/tempcat.c b/src/backend/access/heap/tempcat.c
new file mode 100644
index 00000000000..e3c0ecc76f0
--- /dev/null
+++ b/src/backend/access/heap/tempcat.c
@@ -0,0 +1,959 @@
+#include "postgres.h"
+
+#include "access/skey.h"
+#include "access/table.h"
+#include "access/tempcat.h"
+#include "access/xact.h"
+#include "catalog/index.h"
+#include "catalog/namespace.h"
+#include "common/hashfn.h"
+#include "lib/rbtree.h"
+#include "nodes/execnodes.h"
+#include "pgstat.h"
+#include "utils/catcache.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/typcache.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+
+typedef struct TupleItem{
+ dlist_node node;
+ dlist_node stageCreatedNode;
+ dlist_node stageDeletedNode;
+ HeapTuple tuple;
+ Datum *values;
+ bool *nulls;
+} TupleItem;
+
+
+typedef struct TreeItem{
+ RBTNode node;
+ dlist_head tuples; /* TuplePointerItem::node */
+}TreeItem;
+
+typedef struct TuplePointerItem{
+ dlist_node node;
+ dlist_node deletionNode;
+ TupleItem* tupleItem;
+ TreeItem* owner;
+}TuplePointerItem;
+
+typedef struct RelationItem{
+ dlist_node node;
+ Oid relid;
+ dlist_head indexes; /* IndexItem::node */
+ dlist_head indexesScheduledForDeletion; /* IndexItem::node */
+ dlist_head allTuples; /* TupleItem::node */
+ dlist_head unstagedCreatedTuples; /* TupleItem::stageCreatedNode */
+ dlist_head unstagedDeletedTuples; /* TupleItem::stageDeletedNode */
+ dlist_head stagedCreatedTuples; /* TupleItem::stageCreatedNode */
+ dlist_head stagedDeletedTuples; /* TupleItem::stageDeletedNode */
+} RelationItem;
+
+typedef struct IndexItem{
+ dlist_node node;
+ RelationItem* rel;
+ int nkeys;
+ AttrNumber attrNumbers[INDEX_MAX_KEYS];
+ FmgrInfo keyCmpFuncs[INDEX_MAX_KEYS];
+ Oid keyCollations[INDEX_MAX_KEYS];
+ RBTree* tree; /* TreeItem::node */
+ dlist_head scans; /* TempCatScanData::node */
+ dlist_head tuplePointersScheduledForDeletion; /* TuplePointerItem::deletionNode */
+} IndexItem;
+
+struct TempCatScanData{
+ dlist_node node;
+ ScanKey key;
+ int nkeys;
+ bool started;
+ bool finished;
+ RelationItem* rel;
+ IndexItem* index;
+ RBTreeIterator treeIter;
+ dlist_iter listIter;
+ RBTNode* endNode;
+ bool continuous;
+ uint64_t count;
+};
+
+
+static dlist_head temp_rels; /* RelationItem::node */
+static bool initialized = false;
+bool enable_temp_memory_catalog = false;
+
+
+static void
+rbt_destroy(RBTree* tree)
+{
+ for(;;){
+ RBTNode* node = rbt_leftmost(tree);
+ if(node)
+ {
+ TreeItem* item = (TreeItem*)node;
+
+ while(!dlist_is_empty(&item->tuples))
+ {
+ dlist_node* keyNode = dlist_pop_head_node(&item->tuples);
+ TuplePointerItem* keyItem = dlist_container(TuplePointerItem,node,keyNode);
+
+ if(!dlist_node_is_detached(&keyItem->deletionNode))
+ dlist_delete(&keyItem->deletionNode);
+
+ pfree(keyItem);
+ }
+
+ rbt_delete(tree, node);
+ continue;
+ }
+
+ pfree(tree);
+ return;
+ }
+}
+
+
+static void
+LocalInvalidateCatCache(int cacheId, uint32 hashValue, Oid dbId, void *context)
+{
+ SysCacheInvalidate(cacheId, hashValue);
+ CallSyscacheCallbacks(cacheId, hashValue);
+}
+
+static void
+LocalInvalidateCatCacheTupleNow(Oid relid, HeapTuple tuple, HeapTuple newTuple)
+{
+ GetTopTransactionId();
+ GetCurrentCommandId(true);
+ InvalidateCatalogSnapshot();
+
+ if (!RelationInvalidatesSnapshotsOnly(relid)){
+ PrepareToInvalidateCacheTuple(relid, tuple, newTuple, LocalInvalidateCatCache, NULL);
+ }
+}
+
+
+static void
+LocalInvalidateCatCacheTuple(Relation rel, HeapTuple tuple, HeapTuple newTuple)
+{
+ GetTopTransactionId();
+ GetCurrentCommandId(true);
+ CacheInvalidateHeapTuple(rel, tuple, newTuple);
+ AcceptInvalidationMessages();
+}
+
+
+static TreeItem*
+find_index_tree_item(IndexItem* indexItem, TupleItem* tupleItem, int cmp)
+{
+
+ TuplePointerItem keyToSearch;
+ TreeItem nodeToSearch;
+
+ keyToSearch.tupleItem = tupleItem;
+
+ dlist_init(&nodeToSearch.tuples);
+ dlist_push_tail(&nodeToSearch.tuples, &keyToSearch.node);
+
+ if(cmp < 0)
+ return (TreeItem*)rbt_find_less(indexItem->tree, (RBTNode*)&nodeToSearch, false);
+ else if(cmp > 0)
+ return (TreeItem*)rbt_find_great(indexItem->tree, (RBTNode*)&nodeToSearch, false);
+ else
+ return (TreeItem*)rbt_find(indexItem->tree, (RBTNode*)&nodeToSearch);
+}
+
+
+
+
+static void
+delete_pending_key_items(IndexItem* idxItem)
+{
+ while (!dlist_is_empty(&idxItem->tuplePointersScheduledForDeletion))
+ {
+ dlist_node* node = dlist_pop_head_node(&idxItem->tuplePointersScheduledForDeletion);
+ TuplePointerItem* tuplePointerItem = (TuplePointerItem*) dlist_container(TuplePointerItem, deletionNode, node);
+ TreeItem* owner = tuplePointerItem->owner;
+ dlist_delete(&tuplePointerItem->node);
+ pfree(tuplePointerItem);
+
+ if (dlist_is_empty(&owner->tuples))
+ rbt_delete(idxItem->tree, &owner->node);
+ }
+}
+
+
+static void
+cleanup( RelationItem* relEntry )
+{
+ dlist_iter indexIter;
+ dlist_mutable_iter miter;
+
+ dlist_foreach(indexIter, &relEntry->indexes)
+ {
+ IndexItem* index = (IndexItem*) dlist_container(IndexItem, node, indexIter.cur);
+ if (!dlist_is_empty(&index->scans))
+ continue;
+
+ delete_pending_key_items(index);
+ }
+
+ dlist_foreach_modify(miter, &relEntry->indexesScheduledForDeletion)
+ {
+ IndexItem* index = (IndexItem*) dlist_container(IndexItem, node, miter.cur);
+
+ /* Don't delete (yet) indexes that are currently used for scans*/
+ if (!dlist_is_empty(&index->scans))
+ continue;
+
+ rbt_destroy(index->tree);
+ dlist_delete(&index->node);
+ pfree(index);
+ }
+}
+
+
+static void
+insert_tuple_entry_for_index(IndexItem* idxItem, TupleItem* tupItem)
+{
+ TuplePointerItem* tuplePointer;
+ TreeItem tempTreeItem;
+ bool isNew;
+
+ tuplePointer = palloc_object(TuplePointerItem);
+ dlist_node_init(&tuplePointer->deletionNode);
+ tuplePointer->tupleItem = tupItem;
+ tuplePointer->owner = NULL;
+
+ dlist_init(&tempTreeItem.tuples);
+ dlist_push_tail(&tempTreeItem.tuples, &tuplePointer->node);
+ rbt_insert(idxItem->tree, &tempTreeItem.node, &isNew);
+
+ // if (isNew)
+ // {
+ // dlist_init(&tuplePointer->owner->tuples);
+ // dlist_push_tail(&tuplePointer->owner->tuples, &tuplePointer->node);
+ // }
+}
+
+
+static TupleItem*
+create_tuple_item(HeapTuple htup, TupleDesc tupdesc, ItemPointer ptr)
+{
+ int attributeIndex;
+ TupleItem* entry = palloc_object(TupleItem);
+ entry->values = palloc_array(Datum,tupdesc->natts);
+ entry->nulls = palloc_array(bool, tupdesc->natts);
+ entry->tuple = heap_copytuple(htup);
+ dlist_node_init(&entry->stageCreatedNode);
+ dlist_node_init(&entry->stageDeletedNode);
+
+ for(attributeIndex=0; attributeIndex < tupdesc->natts ;attributeIndex++)
+ entry->values[attributeIndex] = heap_getattr(entry->tuple, attributeIndex+1, tupdesc, &entry->nulls[attributeIndex]);
+
+ return entry;
+}
+
+
+static void
+add_tuple_entry(RelationItem* relItem, TupleItem* tupItem, bool addToUnstaged)
+{
+ dlist_iter iter;
+
+ dlist_foreach(iter, &relItem->indexes)
+ {
+ IndexItem* idxItem = dlist_container(IndexItem, node, iter.cur);
+ insert_tuple_entry_for_index(idxItem, tupItem);
+ }
+
+ dlist_push_head(&relItem->allTuples, &tupItem->node);
+
+ if (addToUnstaged)
+ dlist_push_tail(&relItem->unstagedCreatedTuples, &tupItem->stageCreatedNode);
+
+ tupItem->tuple->t_self = temp_catalog_tupmap_assign(NULL, tupItem);
+}
+
+
+static void
+remove_tuple_entry(RelationItem* relItem, TupleItem* tupItem, bool staging)
+{
+ dlist_iter indexIter;
+
+ dlist_foreach(indexIter, &relItem->indexes)
+ {
+ dlist_iter tupIter;
+ IndexItem* idxItem = (IndexItem*) dlist_container(IndexItem, node, indexIter.cur);
+ TreeItem* node = find_index_tree_item(idxItem, tupItem, 0);
+ if (node)
+ {
+ dlist_foreach(tupIter, &node->tuples)
+ {
+ TuplePointerItem* key = (TuplePointerItem*) dlist_container(TuplePointerItem, node, tupIter.cur);
+ if (key->tupleItem == tupItem)
+ {
+ if (dlist_node_is_detached(&key->deletionNode))
+ dlist_push_tail(&idxItem->tuplePointersScheduledForDeletion, &key->deletionNode);
+ break;
+ }
+ }
+ }
+
+ if (!staging)
+ delete_pending_key_items(idxItem);
+ }
+
+ dlist_delete(&tupItem->node);
+ dlist_node_init(&tupItem->node);
+
+ if (staging)
+ dlist_push_tail(&relItem->unstagedDeletedTuples, &tupItem->stageDeletedNode);
+
+ temp_catalog_tupmap_unassign(&tupItem->tuple->t_self, tupItem);
+}
+
+
+
+static RBTNode*
+rbt_alloc(void *arg)
+{
+ return &(palloc_object(TreeItem)->node);
+}
+
+
+static void
+rbt_free(RBTNode *x, void *arg)
+{
+ pfree(x);
+}
+
+
+static int
+rbt_compare(TreeItem* a, TreeItem* b, IndexItem* index)
+{
+ TuplePointerItem* aKey = dlist_head_element(TuplePointerItem,node,&a->tuples);
+ TuplePointerItem* bKey = dlist_head_element(TuplePointerItem,node,&b->tuples);
+
+ for (int keyIndex=0; keyIndex < index->nkeys; keyIndex++)
+ {
+ int attributeIndex = index->attrNumbers[keyIndex] - 1;
+ int cmp;
+
+ cmp = DatumGetInt32(DirectFunctionCall2Coll(index->keyCmpFuncs[keyIndex].fn_addr, index->keyCollations[keyIndex], aKey->tupleItem->values[attributeIndex], bKey->tupleItem->values[attributeIndex]));
+ if (cmp)
+ return cmp;
+ }
+ return 0;
+}
+
+static void
+rbt_combine(TreeItem* existing, TreeItem* newdata, IndexItem* index)
+{
+ while (!dlist_is_empty(&newdata->tuples))
+ {
+ dlist_node* newTuplePointerNode = dlist_pop_head_node(&newdata->tuples);
+ TuplePointerItem* newTuplePointer = dlist_container(TuplePointerItem, node, newTuplePointerNode);
+ dlist_push_tail(&existing->tuples, &newTuplePointer->node);
+ newTuplePointer->owner = existing;
+ }
+}
+
+
+static void
+rbt_fix(RBTNode *x, void *arg)
+{
+ dlist_iter tupIter;
+ TreeItem* item = (TreeItem*)x;
+ dlist_node* head = &item->tuples.head;
+
+ /* Fix old head element address. */
+ if (head->next == head->prev && head->next == head->next->next)
+ {
+ head->next = head;
+ head->prev = head;
+ }
+ else
+ {
+ head->next->prev = head;
+ head->prev->next = head;
+ }
+
+ dlist_foreach(tupIter, &item->tuples)
+ {
+ TuplePointerItem* key = (TuplePointerItem*) dlist_container(TuplePointerItem, node, tupIter.cur);
+ key->owner = item;
+ }
+}
+
+
+static bool
+compare_keyItem_with_scanKey(IndexItem* indexItem, TuplePointerItem* keyItem, ScanKey keys, int nkeys)
+{
+ for (int keyIndex=0; keyIndex < nkeys; keyIndex++)
+ {
+ int attributeIndex = indexItem->attrNumbers[keyIndex] - 1;
+ int cmp;
+
+ cmp = DatumGetInt32(DirectFunctionCall2Coll(indexItem->keyCmpFuncs[keyIndex].fn_addr, indexItem->keyCollations[keyIndex], keyItem->tupleItem->values[attributeIndex], keys[keyIndex].sk_argument));
+
+ switch(keys[keyIndex].sk_strategy){
+ case BTLessStrategyNumber:
+ if (cmp >= 0)
+ return false;
+ break;
+ case BTLessEqualStrategyNumber:
+ if (cmp > 0)
+ return false;
+ break;
+ case BTEqualStrategyNumber:
+ if (cmp != 0)
+ return false;
+ break;
+ case BTGreaterEqualStrategyNumber:
+ if (cmp < 0)
+ return false;
+ break;
+ case BTGreaterStrategyNumber:
+ if (cmp <= 0)
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
+static IndexItem*
+get_index_entry(RelationItem* relEntry, Relation relation, AttrNumber* attrNumbers, int numKeys)
+{
+ IndexItem* indexEntry = NULL;
+ int keyIndex;
+ dlist_iter iter;
+ dlist_mutable_iter indexIter;
+
+ dlist_foreach_modify(indexIter, &relEntry->indexes)
+ {
+ IndexItem* index = (IndexItem*) dlist_container(IndexItem, node, indexIter.cur);
+ if (index->nkeys >= numKeys && memcmp(index->attrNumbers, attrNumbers, sizeof(AttrNumber) * numKeys)==0)
+ return index;
+
+ if (index->nkeys < numKeys && memcmp(index->attrNumbers, attrNumbers, sizeof(AttrNumber) * index->nkeys)==0)
+ {
+ dlist_delete(&index->node);
+ dlist_push_tail(&relEntry->indexesScheduledForDeletion, &index->node);
+ }
+ }
+
+ indexEntry = palloc_object(IndexItem);
+ indexEntry->rel = relEntry;
+ indexEntry->nkeys = numKeys;
+ indexEntry->tree = rbt_create( sizeof(TreeItem), (rbt_comparator)rbt_compare, (rbt_combiner)rbt_combine, rbt_alloc, rbt_free, rbt_fix, indexEntry);
+ dlist_init(&indexEntry->scans);
+ dlist_init(&indexEntry->tuplePointersScheduledForDeletion);
+
+ for(keyIndex=0; keyIndex < numKeys; keyIndex++ )
+ {
+ TypeCacheEntry* typeEntry;
+ FormData_pg_attribute* attribute = TupleDescAttr(relation->rd_att, attrNumbers[keyIndex]-1);
+
+ typeEntry = lookup_type_cache(attribute->atttypid, TYPECACHE_CMP_PROC_FINFO);
+ Assert(OidIsValid(typeEntry->cmp_proc_finfo.fn_oid));
+
+ indexEntry->keyCmpFuncs[keyIndex] = typeEntry->cmp_proc_finfo;
+ indexEntry->keyCollations[keyIndex] = attribute->attcollation;
+ indexEntry->attrNumbers[keyIndex] = attribute->attnum;
+ }
+
+ dlist_foreach(iter, &relEntry->allTuples)
+ {
+ TupleItem* tupleEntry = dlist_container(TupleItem, node, iter.cur);
+ insert_tuple_entry_for_index(indexEntry, tupleEntry);
+ }
+
+ dlist_push_tail(&relEntry->indexes, &indexEntry->node);
+
+ return indexEntry;
+}
+
+
+static RelationItem*
+find_relation_entry(Relation rel)
+{
+ dlist_iter iter;
+ dlist_foreach(iter, &temp_rels)
+ {
+ RelationItem* item = dlist_container(RelationItem, node, iter.cur);
+ if (item->relid == rel->rd_rel->oid)
+ return item;
+ }
+ return NULL;
+}
+
+
+static RelationItem*
+get_relation_entry(Relation relation)
+{
+ RelationItem* relEntry = find_relation_entry(relation);
+ if (!relEntry)
+ {
+ relEntry = palloc_object(RelationItem);
+ relEntry->relid = relation->rd_rel->oid;
+ dlist_init(&relEntry->indexes);
+ dlist_init(&relEntry->allTuples);
+ dlist_init(&relEntry->indexesScheduledForDeletion);
+ dlist_init(&relEntry->unstagedCreatedTuples);
+ dlist_init(&relEntry->unstagedDeletedTuples);
+ dlist_init(&relEntry->stagedCreatedTuples);
+ dlist_init(&relEntry->stagedDeletedTuples);
+ dlist_push_tail(&temp_rels, &relEntry->node);
+ }
+
+ return relEntry;
+}
+
+
+void
+temp_catalog_insert(Relation relation, HeapTuple htup)
+{
+ RelationItem* relEntry = NULL;
+ MemoryContext oldctx;
+ TupleItem* item;
+
+ oldctx = MemoryContextSwitchTo(TopMemoryContext);
+
+ relEntry = get_relation_entry(relation);
+
+ item = create_tuple_item(htup, relation->rd_att, NULL);
+ add_tuple_entry(relEntry, item, true);
+
+ LocalInvalidateCatCacheTuple(relation, item->tuple, NULL);
+ htup->t_self = item->tuple->t_self;
+
+ MemoryContextSwitchTo(oldctx);
+}
+
+
+void
+temp_catalog_delete(Relation relation, ItemPointer ptr)
+{
+ RelationItem* relEntry;
+ TupleItem* tupleEntry;
+
+ relEntry = find_relation_entry(relation);
+ if (!relEntry)
+ return;
+
+ tupleEntry = temp_catalog_tupmap_get(ptr);
+ if (!tupleEntry)
+ return;
+
+ remove_tuple_entry(relEntry, tupleEntry, true);
+
+ LocalInvalidateCatCacheTuple(relation, tupleEntry->tuple, NULL);
+
+ pgstat_count_heap_delete(relation);
+
+ cleanup(relEntry);
+}
+
+
+void
+temp_catalog_update(Relation relation, ItemPointer ptr, HeapTuple htup)
+{
+ RelationItem* relEntry = NULL;
+ TupleItem* oldTupleEntry;
+ TupleItem* newTupleEntry;
+ MemoryContext oldctx;
+
+ relEntry = find_relation_entry(relation);
+ if (!relEntry)
+ return;
+
+ oldctx = MemoryContextSwitchTo(TopMemoryContext);
+
+ oldTupleEntry = (TupleItem*)temp_catalog_tupmap_get(ptr);
+
+ remove_tuple_entry(relEntry, oldTupleEntry, true);
+
+ newTupleEntry = create_tuple_item(htup, relation->rd_att, ptr);
+ add_tuple_entry(relEntry, newTupleEntry, true);
+
+ LocalInvalidateCatCacheTuple(relation, oldTupleEntry->tuple, newTupleEntry->tuple);
+
+ cleanup(relEntry);
+
+ MemoryContextSwitchTo(oldctx);
+}
+
+
+void
+temp_catalog_update_inplace(Relation relation, HeapTuple htup)
+{
+ temp_catalog_update(relation, &htup->t_self, htup);
+}
+
+
+struct TempCatScanData*
+temp_catalog_beginscan(Relation relation, int nkeys, ScanKey key)
+{
+ IndexItem* indexEntry = NULL;
+ RelationItem* relEntry = NULL;
+ AttrNumber attrNumbers[INDEX_MAX_KEYS];
+ MemoryContext oldctx;
+ int strategy;
+ TempCatScanData* scan;
+ int walkDir;
+ TreeItem* lastItem;
+ TreeItem* endItem;
+ bool continuous;
+ static bool nested = false;
+
+ if (nested)
+ return NULL;
+
+ nested = true;
+
+ relEntry = find_relation_entry(relation);
+ if (!relEntry)
+ {
+ nested = false;
+ return NULL;
+ }
+
+ for (int c=0; c < nkeys; c++)
+ attrNumbers[c] = key[c].sk_attno;
+
+ oldctx = MemoryContextSwitchTo(TopMemoryContext);
+
+ indexEntry = get_index_entry(relEntry, relation, attrNumbers, nkeys);
+
+ scan = palloc_object(TempCatScanData);
+ scan->rel = relEntry;
+ scan->index = indexEntry;
+ scan->key = key;
+ scan->nkeys = nkeys;
+ scan->started = false;
+ scan->finished = false;
+ scan->listIter.cur = NULL;
+ scan->listIter.end = NULL;
+ scan->count = 0;
+
+ if (nkeys)
+ {
+ strategy = key[nkeys-1].sk_strategy;
+ for (int c=0; c < nkeys-1; c++)
+ {
+ if (key[c].sk_strategy != BTEqualStrategyNumber)
+ {
+ strategy = InvalidStrategy;
+ break;
+ }
+ }
+ }
+ else
+ strategy = InvalidStrategy;
+
+ if (strategy != BTEqualStrategyNumber &&
+ strategy != BTGreaterStrategyNumber &&
+ strategy != BTGreaterEqualStrategyNumber &&
+ strategy != BTLessStrategyNumber &&
+ strategy != BTLessEqualStrategyNumber)
+ {
+ walkDir = LeftRightWalk;
+ lastItem = NULL;
+ endItem = NULL;
+ continuous = false;
+ }
+ else
+ {
+ TupleItem tempTuple;
+ Datum* attrValues;
+ int maxAtt=0;
+
+ for (int c=0; c < nkeys; c++)
+ maxAtt = Max(maxAtt,attrNumbers[c]);
+
+ attrValues = palloc_array(Datum,maxAtt);
+ for (int c=0; c < nkeys; c++)
+ attrValues[attrNumbers[c]-1] = key[c].sk_argument;
+
+ tempTuple.values = attrValues;
+
+ continuous = true;
+
+ if (strategy == BTEqualStrategyNumber)
+ {
+ walkDir = LeftRightWalk;
+ lastItem = find_index_tree_item(indexEntry, &tempTuple, -1);
+ endItem = find_index_tree_item(indexEntry, &tempTuple, +1);
+ }
+ else if (strategy == BTGreaterStrategyNumber || strategy == BTGreaterEqualStrategyNumber)
+ {
+ walkDir = LeftRightWalk;
+ lastItem = find_index_tree_item(indexEntry, &tempTuple, -1);
+ endItem = NULL;
+ }
+ else if (strategy == BTLessStrategyNumber || strategy == BTLessEqualStrategyNumber)
+ {
+ walkDir = RightLeftWalk;
+ lastItem = find_index_tree_item(indexEntry, &tempTuple, +1);
+ endItem = NULL;
+ }
+
+ pfree(attrValues);
+ }
+
+ rbt_begin_iterate(indexEntry->tree, walkDir, &scan->treeIter);
+ scan->treeIter.last_visited = &lastItem->node;
+ scan->endNode = &endItem->node;
+ scan->continuous = continuous;
+
+ dlist_push_tail(&indexEntry->scans, &scan->node);
+
+ MemoryContextSwitchTo(oldctx);
+
+ nested = false;
+
+ return scan;
+}
+
+
+void
+temp_catalog_endscan(TempCatScanData* scan)
+{
+ if (!scan)
+ return;
+
+ dlist_delete(&scan->node);
+ cleanup(scan->rel);
+ pfree(scan);
+}
+
+
+HeapTuple
+temp_catalog_getnext(TempCatScanData* scan, BufferHeapTupleTableSlot* bslot)
+{
+ if (!scan || scan->finished)
+ return NULL;
+
+ for(;;){
+ TuplePointerItem* key;
+
+ while (scan->listIter.cur == scan->listIter.end)
+ {
+ TreeItem* nextNode = (TreeItem*)rbt_iterate(&scan->treeIter);
+ if (!nextNode)
+ {
+ scan->finished = true;
+ return NULL;
+ }
+
+ scan->listIter.end = &nextNode->tuples.head;
+ scan->listIter.cur = scan->listIter.end->next ? scan->listIter.end->next : scan->listIter.end;
+ }
+
+ key = dlist_container(TuplePointerItem,node,scan->listIter.cur);
+
+ scan->listIter.cur = scan->listIter.cur->next;
+
+ scan->count++;
+
+ if (!dlist_node_is_detached(&key->deletionNode))
+ continue;
+
+ if (!compare_keyItem_with_scanKey(scan->index, key, scan->key, scan->nkeys))
+ {
+ if (scan->continuous)
+ {
+ scan->finished = true;
+ return NULL;
+ }
+
+ continue;
+ }
+
+ scan->started = true;
+ bslot->base.tuple = key->tupleItem->tuple;
+ return key->tupleItem->tuple;
+ }
+}
+
+
+bool
+temp_catalog_is_fetched(TempCatScanData* scan)
+{
+ return scan && scan->started && !scan->finished;
+}
+
+
+static void
+dlist_move(dlist_head *dst, dlist_head *src)
+{
+ if (dst->head.next == NULL) /* convert NULL header to circular */
+ dlist_init(dst);
+
+ if (!dlist_is_empty(src))
+ {
+ dlist_node* head = dlist_head_node(src);
+ dlist_node* tail = dlist_tail_node(src);
+ tail->next = &dst->head;
+ head->prev = dst->head.prev;
+ dst->head.prev->next = head;
+ dst->head.prev = tail;
+ dlist_init(src);
+ }
+}
+
+
+static void
+free_tuple(TupleItem* item)
+{
+ pfree(item->values);
+ pfree(item->nulls);
+ heap_freetuple(item->tuple);
+ pfree(item);
+}
+
+
+static void
+free_deleted_tuples(dlist_head* list)
+{
+ while (!dlist_is_empty(list))
+ {
+ dlist_node* node = dlist_pop_head_node(list);
+ TupleItem* tupleItem = dlist_container(TupleItem, stageDeletedNode, node);
+ free_tuple(tupleItem);
+ }
+
+ dlist_init(list);
+}
+
+
+static void
+revert_created_tuples(RelationItem* relEntry, dlist_head* list)
+{
+ while (!dlist_is_empty(list))
+ {
+ dlist_node* node = dlist_pop_head_node(list);
+ TupleItem* tupleItem = dlist_container(TupleItem, stageCreatedNode, node);
+
+ if (dlist_node_is_detached(&tupleItem->stageDeletedNode)){
+ remove_tuple_entry(relEntry, tupleItem, false);
+ }else{
+ dlist_delete(&tupleItem->stageDeletedNode);
+ }
+
+ LocalInvalidateCatCacheTupleNow(relEntry->relid, tupleItem->tuple, NULL);
+
+ free_tuple(tupleItem);
+ }
+}
+
+
+static void
+revert_deleted_tuples(RelationItem* relEntry, dlist_head* list)
+{
+ while (!dlist_is_empty(list))
+ {
+ dlist_node* node = dlist_pop_head_node(list);
+ TupleItem* tupleItem = dlist_container(TupleItem, stageDeletedNode, node);
+
+ dlist_node_init(&tupleItem->stageDeletedNode);
+
+ add_tuple_entry(relEntry, tupleItem, false);
+
+ LocalInvalidateCatCacheTupleNow(relEntry->relid, tupleItem->tuple, NULL);
+ }
+}
+
+
+static void
+detach_created_tuples(dlist_head* list)
+{
+ while (!dlist_is_empty(list))
+ {
+ dlist_node* node = dlist_pop_head_node(list);
+ dlist_node_init(node);
+ }
+}
+
+
+static void
+temp_cat_xact_cb(XactEvent event, void *arg)
+{
+ dlist_iter iter;
+ if (event == XACT_EVENT_PRE_COMMIT || event == XACT_EVENT_PARALLEL_PRE_COMMIT)
+ {
+ dlist_foreach(iter, &temp_rels)
+ {
+ RelationItem* item = dlist_container(RelationItem, node, iter.cur);
+ detach_created_tuples(&item->unstagedCreatedTuples);
+ detach_created_tuples(&item->stagedCreatedTuples);
+ free_deleted_tuples(&item->unstagedDeletedTuples);
+ free_deleted_tuples(&item->stagedDeletedTuples);
+ cleanup(item);
+ }
+ }
+ else if (event == XACT_EVENT_PRE_ABORT || event == XACT_EVENT_PARALLEL_PRE_ABORT)
+ {
+ dlist_foreach(iter, &temp_rels)
+ {
+ RelationItem* item = dlist_container(RelationItem, node, iter.cur);
+ MemoryContext oldctx = MemoryContextSwitchTo(TopMemoryContext);
+ revert_created_tuples(item, &item->unstagedCreatedTuples);
+ revert_created_tuples(item, &item->stagedCreatedTuples);
+ revert_deleted_tuples(item, &item->unstagedDeletedTuples);
+ revert_deleted_tuples(item, &item->stagedDeletedTuples);
+ cleanup(item);
+ MemoryContextSwitchTo(oldctx);
+ }
+ }
+}
+
+
+static void
+temp_cat_subxact_cb(SubXactEvent event, SubTransactionId mySubid,
+ SubTransactionId parentSubid, void *arg)
+{
+ dlist_iter iter;
+ if (event == SUBXACT_EVENT_COMMIT_SUB )
+ {
+ dlist_foreach(iter, &temp_rels)
+ {
+ RelationItem* item = dlist_container(RelationItem, node, iter.cur);
+ dlist_move(&item->stagedCreatedTuples, &item->unstagedCreatedTuples);
+ dlist_move(&item->stagedDeletedTuples, &item->unstagedDeletedTuples);
+ }
+ }
+ else if (event == SUBXACT_EVENT_PRE_ABORT_SUB)
+ {
+ dlist_foreach(iter, &temp_rels)
+ {
+ RelationItem* item = dlist_container(RelationItem, node, iter.cur);
+ MemoryContext oldctx = MemoryContextSwitchTo(TopMemoryContext);
+ revert_created_tuples(item, &item->unstagedCreatedTuples);
+ revert_deleted_tuples(item, &item->unstagedDeletedTuples);
+ cleanup(item);
+ MemoryContextSwitchTo(oldctx);
+ }
+ }
+}
+
+
+void
+temp_catalog_init(void)
+{
+ if (!initialized)
+ {
+ dlist_init(&temp_rels);
+ RegisterSubXactCallback(temp_cat_subxact_cb, NULL);
+ RegisterXactCallback(temp_cat_xact_cb, NULL);
+ initialized = true;
+ }
+}
diff --git a/src/backend/access/heap/tupmap.c b/src/backend/access/heap/tupmap.c
new file mode 100644
index 00000000000..a86c675e274
--- /dev/null
+++ b/src/backend/access/heap/tupmap.c
@@ -0,0 +1,136 @@
+#include "postgres.h"
+#include "storage/itemptr.h"
+#include "lib/rbtree.h"
+#include "access/tempcat.h"
+
+typedef struct MapItem{
+ RBTNode node;
+ ItemPointerData pointer;
+ void* data;
+}MapItem;
+
+static RBTree* tree;
+static uint64_t counter;
+static bool overwrite = false;
+
+#define COUNTER_MAX ( ((((uint64_t)1)<<16)-1) * (((uint64_t)1)<<32) )
+
+static int64_t
+ItemPointerToInt(ItemPointer ptr)
+{
+ return (((int64_t)(ptr->ip_posid-1)) << 32) | (ptr->ip_blkid.bi_hi<<16) | ptr->ip_blkid.bi_lo;
+}
+
+static ItemPointerData
+IntToItemPointer(int64_t i)
+{
+ ItemPointerData ret;
+ ret.ip_posid = (i >> 32) + 1;
+ ret.ip_blkid.bi_hi = i >>16;
+ ret.ip_blkid.bi_lo = i;
+ return ret;
+}
+
+static int
+tupmap_rbt_compare(const RBTNode *a, const RBTNode *b, void *arg)
+{
+ MapItem* aItem = (MapItem*)a;
+ MapItem* bItem = (MapItem*)b;
+
+ return ItemPointerToInt(&aItem->pointer) - ItemPointerToInt(&bItem->pointer);
+}
+
+static void
+tupmap_rbt_combine(RBTNode *existing, const RBTNode *newdata, void *arg)
+{
+ if (overwrite)
+ ((MapItem*)existing)->data = ((MapItem*)newdata)->data;
+}
+
+static RBTNode*
+tupmap_rbt_alloc(void *arg)
+{
+ return (RBTNode*)palloc_object(MapItem);
+}
+
+static void
+tupmap_rbt_free(RBTNode *x, void *arg)
+{
+ pfree(x);
+}
+
+
+ItemPointerData
+temp_catalog_tupmap_assign(ItemPointer ptr, void* data)
+{
+ if (!tree)
+ tree = rbt_create( sizeof(MapItem), tupmap_rbt_compare, tupmap_rbt_combine, tupmap_rbt_alloc, tupmap_rbt_free, NULL, NULL);
+
+ for(;;){
+ bool isNew;
+ MapItem newItem;
+ MapItem* node;
+ newItem.data = data;
+
+ if (ptr){
+ newItem.pointer = *ptr;
+ overwrite = true;
+ }else{
+ if (unlikely(!counter))
+ counter = 1;
+ newItem.pointer = IntToItemPointer(counter);
+ counter++;
+ if (unlikely(counter >= COUNTER_MAX))
+ counter = 0;
+
+ overwrite = false;
+ }
+
+ node = (MapItem*)rbt_insert(tree, (RBTNode*)&newItem, &isNew);
+ if(!isNew && !overwrite){
+ continue;
+ }
+
+ return node->pointer;
+ }
+}
+
+
+bool
+temp_catalog_tupmap_unassign(ItemPointer ptr, void* data)
+{
+ MapItem searchItem;
+ MapItem* item;
+
+ if (!tree)
+ return false;
+
+ searchItem.pointer = *ptr;
+ item = (MapItem*)rbt_find(tree, (RBTNode*)&searchItem);
+ if (!item)
+ return false;
+
+ if (item->data != data)
+ return false;
+
+ rbt_delete(tree, (RBTNode*)item);
+ return true;
+}
+
+
+void*
+temp_catalog_tupmap_get(ItemPointer ptr)
+{
+ MapItem searchItem;
+ MapItem* item;
+
+ if (!tree)
+ return false;
+
+ searchItem.pointer = *ptr;
+ item = (MapItem*)rbt_find(tree, (RBTNode*)&searchItem);
+ if (!item)
+ return NULL;
+
+ return item->data;
+}
\ No newline at end of file
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 745a04ef26e..c740ba29ba7 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -631,7 +631,8 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
* to keep checking for creation or extension of the file, which happens
* infrequently.
*/
- CacheInvalidateSmgr(RelationGetSmgr(rel)->smgr_rlocator);
+ if (!RELATION_IS_LOCAL(rel))
+ CacheInvalidateSmgr(RelationGetSmgr(rel)->smgr_rlocator);
return buf;
}
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index 0cb27af1310..e222ee1c534 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -23,6 +23,7 @@
#include "access/heapam.h"
#include "access/relscan.h"
#include "access/tableam.h"
+#include "access/tempcat.h"
#include "access/transam.h"
#include "catalog/index.h"
#include "lib/stringinfo.h"
@@ -406,6 +407,7 @@ systable_beginscan(Relation heapRelation,
sysscan->heap_rel = heapRelation;
sysscan->irel = irel;
sysscan->slot = table_slot_create(heapRelation, NULL);
+ sysscan->tempscan = NULL;
if (snapshot == NULL)
{
@@ -420,6 +422,9 @@ systable_beginscan(Relation heapRelation,
sysscan->snapshot = NULL;
}
+ if (enable_temp_memory_catalog)
+ sysscan->tempscan = temp_catalog_beginscan(heapRelation, nkeys, key);
+
if (irel)
{
int i;
@@ -476,6 +481,7 @@ systable_beginscan(Relation heapRelation,
if (TransactionIdIsValid(CheckXidAlive))
bsysscan = true;
+
return sysscan;
}
@@ -515,6 +521,16 @@ systable_getnext(SysScanDesc sysscan)
{
HeapTuple htup = NULL;
+ if (sysscan->tempscan)
+ {
+ htup = temp_catalog_getnext(sysscan->tempscan, (BufferHeapTupleTableSlot *) sysscan->slot);
+ if(htup)
+ {
+ HandleConcurrentAbort();
+ return htup;
+ }
+ }
+
if (sysscan->irel)
{
if (index_getnext_slot(sysscan->iscan, ForwardScanDirection, sysscan->slot))
@@ -575,6 +591,9 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
Snapshot freshsnap;
bool result;
+ if (sysscan->tempscan && temp_catalog_is_fetched(sysscan->tempscan))
+ return true;
+
Assert(tup == ExecFetchSlotHeapTuple(sysscan->slot, false, NULL));
freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel));
@@ -602,6 +621,9 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
void
systable_endscan(SysScanDesc sysscan)
{
+ if (sysscan->tempscan)
+ temp_catalog_endscan(sysscan->tempscan);
+
if (sysscan->slot)
{
ExecDropSingleTupleTableSlot(sysscan->slot);
@@ -865,6 +887,10 @@ systable_inplace_update_begin(Relation relation,
slot = scan->slot;
Assert(TTS_IS_BUFFERTUPLE(slot));
bslot = (BufferHeapTupleTableSlot *) slot;
+
+ /* When using in-memory temp catalog the pointer is zero */
+ if (!bslot->buffer)
+ break;
} while (!heap_inplace_lock(scan->heap_rel,
bslot->base.tuple, bslot->buffer,
(void (*) (void *)) systable_endscan, scan));
@@ -908,6 +934,8 @@ systable_inplace_update_cancel(void *state)
HeapTuple oldtup = bslot->base.tuple;
Buffer buffer = bslot->buffer;
- heap_inplace_unlock(relation, oldtup, buffer);
+ if (buffer)
+ heap_inplace_unlock(relation, oldtup, buffer);
+
systable_endscan(scan);
}
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index b885513f765..0717e93d432 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -2877,6 +2877,9 @@ AbortTransaction(void)
TransStateAsString(s->state));
Assert(s->parent == NULL);
+ CallXactCallbacks(is_parallel_worker ? XACT_EVENT_PARALLEL_PRE_ABORT
+ : XACT_EVENT_PRE_ABORT);
+
/*
* set the current transaction state information appropriately during the
* abort processing
@@ -5308,6 +5311,9 @@ AbortSubTransaction(void)
AtEOSubXact_Parallel(false, s->subTransactionId);
s->parallelModeLevel = 0;
+ CallSubXactCallbacks(SUBXACT_EVENT_PRE_ABORT_SUB, s->subTransactionId,
+ s->parent->subTransactionId);
+
/*
* We can skip all this stuff if the subxact failed before creating a
* ResourceOwner...
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 596d2ca5836..19ee03086c5 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -96,6 +96,7 @@
#include "utils/guc_hooks.h"
#include "utils/guc_tables.h"
#include "utils/injection_point.h"
+#include "utils/inval.h"
#include "utils/pgstat_internal.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
@@ -166,6 +167,8 @@ static double PrevCheckPointDistance = 0;
*/
static bool check_wal_consistency_checking_deferred = false;
+static bool have_non_temp_records = false;
+
/*
* GUC support
*/
@@ -779,6 +782,9 @@ XLogInsertRecord(XLogRecData *rdata,
if (!XLogInsertAllowed())
elog(ERROR, "cannot make new WAL entries during recovery");
+ if (!IsTempTableScope() && rechdr->xl_rmid != RM_XACT_ID)
+ have_non_temp_records = true;
+
/*
* Given that we're not in recovery, InsertTimeLineID is set and can't
* change, so we can read it without a lock.
@@ -2599,6 +2605,8 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Assert(Insert >= Write);
}
#endif
+
+ have_non_temp_records = false;
}
/*
@@ -8746,6 +8754,9 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
Assert(tli != 0);
+ if (MyBackendType == B_BACKEND && !have_non_temp_records)
+ return;
+
/*
* Quick exit if fsync is disabled or write() has already synced the WAL
* file.
diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c
index 59caae8f1bc..aaf09850f60 100644
--- a/src/backend/catalog/catalog.c
+++ b/src/backend/catalog/catalog.c
@@ -606,6 +606,14 @@ GetNewRelFileNumber(Oid reltablespace, Relation pg_class, char relpersistence)
else
rlocator.locator.relNumber = GetNewObjectId();
+ /* There is no chance that temporary table name will collide, because
+ * name contains backend id which is unique among all backends */
+ if (rlocator.backend != INVALID_PROC_NUMBER)
+ {
+ collides = false;
+ break;
+ }
+
/* Check for existing file of same name */
rpath = relpath(rlocator, MAIN_FORKNUM);
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index 18316a3968b..5f08f907f9c 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -276,6 +276,27 @@ performDeletion(const ObjectAddress *object,
Relation depRel;
ObjectAddresses *targetObjects;
+ if (flags & PERFORM_DELETION_CONCURRENTLY)
+ {
+ /*
+ * We must commit our transaction in order to make the first pg_index
+ * state update visible to other sessions. If the DROP machinery has
+ * already performed any other actions (removal of other objects,
+ * pg_depend entries, etc), the commit would make those actions
+ * permanent, which would leave us with inconsistent catalog state if
+ * we fail partway through the following sequence. Since DROP INDEX
+ * CONCURRENTLY is restricted to dropping just one index that has no
+ * dependencies, we should get here before anything's been done ---
+ * but let's check that to be sure. We can verify that the current
+ * transaction has not executed any transactional updates by checking
+ * that no XID has been assigned.
+ */
+ if (GetTopTransactionIdIfAny() != InvalidTransactionId)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
+ }
+
/*
* We save some cycles by opening pg_depend just once and passing the
* Relation pointer down to all the recursive deletion steps.
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index fd6537567ea..8a31e868075 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -1301,6 +1301,11 @@ heap_create_with_catalog(const char *relname,
else
relacl = NULL;
+ /*
+ * This prevents sending cache invalidation messages for temporary tables.
+ */
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(relpersistence == RELPERSISTENCE_TEMP);
+
/*
* Create the relcache entry (mostly dummy at this point) and the physical
* disk file. (If we fail further down, it's the smgr's responsibility to
@@ -1527,6 +1532,8 @@ heap_create_with_catalog(const char *relname,
table_close(new_rel_desc, NoLock); /* do not unlock till end of xact */
table_close(pg_class_desc, RowExclusiveLock);
+ END_TEMP_TABLE_SCOPE();
+
return relid;
}
@@ -3636,6 +3643,8 @@ heap_truncate_one_rel(Relation rel)
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
return;
+ BEGIN_TEMP_TABLE_SCOPE_SHARED(rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
/* Truncate the underlying relation */
table_relation_nontransactional_truncate(rel);
@@ -3653,6 +3662,8 @@ heap_truncate_one_rel(Relation rel)
/* keep the lock... */
table_close(toastrel, NoLock);
}
+
+ END_TEMP_TABLE_SCOPE();
}
/*
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index aa216683b74..5cadb8f7886 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -28,6 +28,7 @@
#include "access/multixact.h"
#include "access/relscan.h"
#include "access/tableam.h"
+#include "access/tempcat.h"
#include "access/toast_compression.h"
#include "access/transam.h"
#include "access/visibilitymap.h"
@@ -56,6 +57,7 @@
#include "commands/progress.h"
#include "commands/tablecmds.h"
#include "commands/trigger.h"
+#include "commands/typecmds.h"
#include "executor/executor.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
@@ -132,7 +134,7 @@ static void SetReindexProcessing(Oid heapOid, Oid indexOid);
static void ResetReindexProcessing(void);
static void SetReindexPending(List *indexes);
static void RemoveReindexPending(Oid indexOid);
-
+static void IndexTypeCreate(Relation indexRelation);
/*
* relationHasPrimaryKey
@@ -674,6 +676,112 @@ UpdateIndexRelation(Oid indexoid,
heap_freetuple(tuple);
}
+/*
+ * We only need to create reltype for multicolumn user-defined
+ * B-tree indexes that don't have a reltype yet.
+ */
+#define INDEX_NEEDS_RELTYPE(indexRelation, indexInfo, accessMethodOid) ( \
+ !IsSystemRelation(indexRelation) \
+ && indexInfo->ii_NumIndexKeyAttrs > 1 \
+ && accessMethodOid == BTREE_AM_OID \
+ && indexRelation->rd_rel->reltype == InvalidOid \
+ && (!IsBinaryUpgrade || binary_upgrade_next_pg_type_oid != InvalidOid))
+
+/*
+ * IndexTypeCreate
+ *
+ * Create type for specified index.
+ */
+void
+IndexTypeCreate(Relation indexRelation)
+{
+ Oid ownerId = GetUserId();
+ Oid namespaceId = RelationGetNamespace(indexRelation);
+ Oid new_array_oid = AssignTypeArrayOid();
+ ObjectAddress new_type_addr;
+ char* relarrayname;
+
+ /* Index must not have a reltype yet */
+ Assert(indexRelation->rd_rel->reltype == InvalidOid);
+
+ /*
+ * Build compound type for compound index to be able to use it in statistic.
+ * We need to collect statistic for compound indexes to be able to better
+ * predict selectivity of multicolumn joins.
+ */
+ new_type_addr = TypeCreate(InvalidOid,
+ RelationGetRelationName(indexRelation),
+ namespaceId,
+ RelationGetRelid(indexRelation),
+ RELKIND_INDEX,
+ ownerId, /* owner's ID */
+ -1, /* internal size (varlena) */
+ TYPTYPE_COMPOSITE, /* type-type (composite) */
+ TYPCATEGORY_COMPOSITE, /* type-category (ditto) */
+ false, /* composite types are never preferred */
+ DEFAULT_TYPDELIM, /* default array delimiter */
+ F_RECORD_IN, /* input procedure */
+ F_RECORD_OUT, /* output procedure */
+ F_RECORD_RECV, /* receive procedure */
+ F_RECORD_SEND, /* send procedure */
+ InvalidOid, /* typmodin procedure - none */
+ InvalidOid, /* typmodout procedure - none */
+ InvalidOid, /* analyze procedure - default */
+ InvalidOid, /* subscript procedure - default */
+ InvalidOid, /* array element type - irrelevant */
+ false, /* this is not an array type */
+ new_array_oid, /* array type if any */
+ InvalidOid, /* domain base type - irrelevant */
+ NULL, /* default value - none */
+ NULL, /* default binary representation */
+ false, /* passed by reference */
+ 'd', /* alignment - must be the largest! */
+ 'x', /* fully TOASTable */
+ -1, /* typmod */
+ 0, /* array dimensions for typBaseType */
+ false, /* Type NOT NULL */
+ InvalidOid); /* rowtypes never have a collation */
+
+ indexRelation->rd_rel->reltype = new_type_addr.objectId;
+
+ relarrayname = makeArrayTypeName(RelationGetRelationName(indexRelation),
+ namespaceId);
+
+ TypeCreate(new_array_oid, /* force the type's OID to this */
+ relarrayname, /* Array type name */
+ namespaceId, /* Same namespace as parent */
+ InvalidOid, /* Not composite, no relationOid */
+ 0, /* relkind, also N/A here */
+ ownerId, /* owner's ID */
+ -1, /* Internal size (varlena) */
+ TYPTYPE_BASE, /* Not composite - typelem is */
+ TYPCATEGORY_ARRAY, /* type-category (array) */
+ false, /* array types are never preferred */
+ DEFAULT_TYPDELIM, /* default array delimiter */
+ F_ARRAY_IN, /* array input proc */
+ F_ARRAY_OUT, /* array output proc */
+ F_ARRAY_RECV, /* array recv (bin) proc */
+ F_ARRAY_SEND, /* array send (bin) proc */
+ InvalidOid, /* typmodin procedure - none */
+ InvalidOid, /* typmodout procedure - none */
+ F_ARRAY_TYPANALYZE, /* array analyze procedure */
+ F_ARRAY_SUBSCRIPT_HANDLER, /* subscript procedure - default */
+ indexRelation->rd_rel->reltype, /* array element type - the rowtype */
+ true, /* yes, this is an array type */
+ InvalidOid, /* this has no array type */
+ InvalidOid, /* domain base type - irrelevant */
+ NULL, /* default value - none */
+ NULL, /* default binary representation */
+ false, /* passed by reference */
+ 'd', /* alignment - must be the largest! */
+ 'x', /* fully TOASTable */
+ -1, /* typmod */
+ 0, /* array dimensions for typBaseType */
+ false, /* Type NOT NULL */
+ InvalidOid); /* rowtypes never have a collation */
+
+ pfree(relarrayname);
+}
/*
* index_create
@@ -759,6 +867,7 @@ index_create(Relation heapRelation,
bool invalid = (flags & INDEX_CREATE_INVALID) != 0;
bool concurrent = (flags & INDEX_CREATE_CONCURRENT) != 0;
bool partitioned = (flags & INDEX_CREATE_PARTITIONED) != 0;
+ bool withoutType = (flags & INDEX_CREATE_WITHOUT_TYPE) != 0;
char relkind;
TransactionId relfrozenxid;
MultiXactId relminmxid;
@@ -916,6 +1025,11 @@ index_create(Relation heapRelation,
indexRelationName, RelationGetRelationName(heapRelation))));
}
+ /*
+ * Don't send cache invalidation messages for indexes on temp tables
+ */
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(heapRelation->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
/*
* construct tuple descriptor for index tuples
*/
@@ -993,6 +1107,11 @@ index_create(Relation heapRelation,
Assert(relminmxid == InvalidMultiXactId);
Assert(indexRelationId == RelationGetRelid(indexRelation));
+ /* Create a reltype for index if it is needed */
+ if (withoutType == false && INDEX_NEEDS_RELTYPE(indexRelation, indexInfo, accessMethodId)
+ && !is_internal)
+ IndexTypeCreate(indexRelation);
+
/*
* Obtain exclusive lock on it. Although no other transactions can see it
* until we commit, this prevents deadlock-risk complaints from lock
@@ -1284,6 +1403,8 @@ index_create(Relation heapRelation,
*/
index_close(indexRelation, NoLock);
+ END_TEMP_TABLE_SCOPE();
+
return indexRelationId;
}
@@ -1458,7 +1579,7 @@ index_concurrently_create_copy(Relation heapRelation, Oid oldIndexId,
indcoloptions->values,
stattargets,
reloptionsDatum,
- INDEX_CREATE_SKIP_BUILD | INDEX_CREATE_CONCURRENT,
+ INDEX_CREATE_SKIP_BUILD | INDEX_CREATE_CONCURRENT | INDEX_CREATE_WITHOUT_TYPE,
0,
true, /* allow table to be a system catalog? */
false, /* is_internal? */
@@ -1600,6 +1721,32 @@ index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName)
newClassForm->relispartition = oldClassForm->relispartition;
oldClassForm->relispartition = isPartition;
+ /* copy index type to new index */
+ newClassForm->reltype = oldClassForm->reltype;
+
+ if (OidIsValid(oldClassForm->reltype))
+ {
+ Relation pg_type;
+ HeapTuple typeTuple;
+ Form_pg_type typeForm;
+
+ pg_type = table_open(TypeRelationId, RowExclusiveLock);
+
+ typeTuple = SearchSysCacheCopy1(TYPEOID,
+ ObjectIdGetDatum(oldClassForm->reltype));
+ if (!HeapTupleIsValid(typeTuple))
+ elog(ERROR, "could not find tuple for type %u", oldClassForm->reltype);
+
+ typeForm = (Form_pg_type) GETSTRUCT(typeTuple);
+
+ typeForm->typrelid = newIndexId;
+
+ CatalogTupleUpdate(pg_type, &typeTuple->t_self, typeTuple);
+
+ heap_freetuple(typeTuple);
+ table_close(pg_type, RowExclusiveLock);
+ }
+
CatalogTupleUpdate(pg_class, &oldClassTuple->t_self, oldClassTuple);
CatalogTupleUpdate(pg_class, &newClassTuple->t_self, newClassTuple);
@@ -1788,8 +1935,9 @@ index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName)
* vice-versa. Note that a call to CommandCounterIncrement() would cause
* duplicate entries in pg_depend, so this should not be done.
*/
- changeDependenciesOf(RelationRelationId, newIndexId, oldIndexId);
- changeDependenciesOn(RelationRelationId, newIndexId, oldIndexId);
+ //changeDependenciesOf(RelationRelationId, newIndexId, oldIndexId);
+ //changeDependenciesOn(RelationRelationId, newIndexId, oldIndexId);
+ deleteDependencyRecordsFor(RelationRelationId, newIndexId, false);
changeDependenciesOf(RelationRelationId, oldIndexId, newIndexId);
changeDependenciesOn(RelationRelationId, oldIndexId, newIndexId);
@@ -2127,6 +2275,7 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
Relation indexRelation;
HeapTuple tuple;
bool hasexprs;
+ bool remove_statistics;
LockRelId heaprelid,
indexrelid;
LOCKTAG heaplocktag;
@@ -2203,24 +2352,6 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
*/
if (concurrent)
{
- /*
- * We must commit our transaction in order to make the first pg_index
- * state update visible to other sessions. If the DROP machinery has
- * already performed any other actions (removal of other objects,
- * pg_depend entries, etc), the commit would make those actions
- * permanent, which would leave us with inconsistent catalog state if
- * we fail partway through the following sequence. Since DROP INDEX
- * CONCURRENTLY is restricted to dropping just one index that has no
- * dependencies, we should get here before anything's been done ---
- * but let's check that to be sure. We can verify that the current
- * transaction has not executed any transactional updates by checking
- * that no XID has been assigned.
- */
- if (GetTopTransactionIdIfAny() != InvalidTransactionId)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
-
/*
* Mark index invalid by updating its pg_index entry
*/
@@ -2328,6 +2459,16 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
/* ensure that stats are dropped if transaction commits */
pgstat_drop_relation(userIndexRelation);
+ /*
+ * We might have stored multicolumn statistics for btree indexes. They are
+ * created only for non-system and non-TOAST indexes, so check only for such
+ * such indexes.
+ */
+ remove_statistics =
+ IndexRelationGetNumberOfKeyAttributes(userIndexRelation) > 1 &&
+ userIndexRelation->rd_rel->relam == BTREE_AM_OID &&
+ !IsSystemRelation(userIndexRelation);
+
/*
* Close and flush the index's relcache entry, to ensure relcache doesn't
* try to rebuild it while we're deleting catalog entries. We keep the
@@ -2363,10 +2504,10 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
PopActiveSnapshot();
/*
- * if it has any expression columns, we might have stored statistics about
- * them.
+ * if it has any expression columns or whole index stat, we might have
+ * stored statistics about them.
*/
- if (hasexprs)
+ if (hasexprs || remove_statistics)
RemoveStatistics(indexId, 0);
/*
@@ -2930,6 +3071,14 @@ index_update_stats(Relation rel,
dirty = true;
}
+ /* If index's reltype has been created, update it in pg_class. */
+ if (rel->rd_rel->relkind == RELKIND_INDEX &&
+ rd_rel->reltype != rel->rd_rel->reltype)
+ {
+ rd_rel->reltype = rel->rd_rel->reltype;
+ dirty = true;
+ }
+
if (update_stats)
{
if (rd_rel->relpages != (int32) relpages)
@@ -3767,6 +3916,8 @@ reindex_index(const ReindexStmt *stmt, Oid indexId,
*/
CheckTableNotInUse(iRel, "REINDEX INDEX");
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(iRel->rd_islocaltemp);
+
/* Set new tablespace, if requested */
if (set_tablespace)
{
@@ -3809,6 +3960,10 @@ reindex_index(const ReindexStmt *stmt, Oid indexId,
/* Create a new physical relation for the index */
RelationSetNewRelfilenumber(iRel, persistence);
+ /* Create a reltype for index if it is needed */
+ if (INDEX_NEEDS_RELTYPE(iRel, indexInfo, iRel->rd_rel->relam))
+ IndexTypeCreate(iRel);
+
/* Initialize the index and rebuild */
/* Note: we do not need to re-establish pkey setting */
index_build(heapRelation, iRel, indexInfo, true, true);
@@ -3901,6 +4056,8 @@ reindex_index(const ReindexStmt *stmt, Oid indexId,
/* Restore userid and security context */
SetUserIdAndSecContext(save_userid, save_sec_context);
+ END_TEMP_TABLE_SCOPE();
+
/* Close rels, but keep locks */
index_close(iRel, NoLock);
table_close(heapRelation, NoLock);
diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c
index 25c4b6bdc87..4ee9ae145ad 100644
--- a/src/backend/catalog/indexing.c
+++ b/src/backend/catalog/indexing.c
@@ -18,10 +18,12 @@
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
+#include "access/tempcat.h"
#include "access/xact.h"
#include "catalog/index.h"
#include "catalog/indexing.h"
#include "executor/executor.h"
+#include "utils/inval.h"
#include "utils/rel.h"
@@ -234,6 +236,12 @@ CatalogTupleInsert(Relation heapRel, HeapTuple tup)
{
CatalogIndexState indstate;
+ if (enable_temp_memory_catalog && IsTempTableScope())
+ {
+ temp_catalog_insert(heapRel, tup);
+ return;
+ }
+
CatalogTupleCheckConstraints(heapRel, tup);
indstate = CatalogOpenIndexes(heapRel);
@@ -256,6 +264,12 @@ void
CatalogTupleInsertWithInfo(Relation heapRel, HeapTuple tup,
CatalogIndexState indstate)
{
+ if (enable_temp_memory_catalog && IsTempTableScope())
+ {
+ temp_catalog_insert(heapRel, tup);
+ return;
+ }
+
CatalogTupleCheckConstraints(heapRel, tup);
simple_heap_insert(heapRel, tup);
@@ -277,6 +291,14 @@ CatalogTuplesMultiInsertWithInfo(Relation heapRel, TupleTableSlot **slot,
if (ntuples <= 0)
return;
+ if (enable_temp_memory_catalog && IsTempTableScope())
+ {
+ for (int i = 0; i < ntuples; i++)
+ temp_catalog_insert(heapRel, ExecFetchSlotHeapTuple(slot[i], true, NULL));
+ return;
+ }
+
+
heap_multi_insert(heapRel, slot, ntuples,
GetCurrentCommandId(true), 0, NULL);
@@ -315,6 +337,12 @@ CatalogTupleUpdate(Relation heapRel, ItemPointer otid, HeapTuple tup)
CatalogIndexState indstate;
TU_UpdateIndexes updateIndexes = TU_All;
+ if (enable_temp_memory_catalog && IsTempTableScope())
+ {
+ temp_catalog_update(heapRel, otid, tup);
+ return;
+ }
+
CatalogTupleCheckConstraints(heapRel, tup);
indstate = CatalogOpenIndexes(heapRel);
@@ -339,6 +367,12 @@ CatalogTupleUpdateWithInfo(Relation heapRel, ItemPointer otid, HeapTuple tup,
{
TU_UpdateIndexes updateIndexes = TU_All;
+ if (enable_temp_memory_catalog && IsTempTableScope())
+ {
+ temp_catalog_update(heapRel, otid, tup);
+ return;
+ }
+
CatalogTupleCheckConstraints(heapRel, tup);
simple_heap_update(heapRel, otid, tup, &updateIndexes);
@@ -364,5 +398,11 @@ CatalogTupleUpdateWithInfo(Relation heapRel, ItemPointer otid, HeapTuple tup,
void
CatalogTupleDelete(Relation heapRel, ItemPointer tid)
{
+ if (enable_temp_memory_catalog && IsTempTableScope())
+ {
+ temp_catalog_delete(heapRel, tid);
+ return;
+ }
+
simple_heap_delete(heapRel, tid);
}
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index dd5d241dfe2..9ef7ddd9e13 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -4613,11 +4613,18 @@ RemoveTempRelations(Oid tempNamespaceId)
object.objectId = tempNamespaceId;
object.objectSubId = 0;
+ /*
+ * Don't bother sending invalidation messages when deleting temp ralations
+ */
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(true);
+
performDeletion(&object, DROP_CASCADE,
PERFORM_DELETION_INTERNAL |
PERFORM_DELETION_QUIETLY |
PERFORM_DELETION_SKIP_ORIGINAL |
PERFORM_DELETION_SKIP_EXTENSIONS);
+
+ END_TEMP_TABLE_SCOPE();
}
/*
diff --git a/src/backend/catalog/pg_namespace.c b/src/backend/catalog/pg_namespace.c
index 6f5634a4de6..c90dacd310c 100644
--- a/src/backend/catalog/pg_namespace.c
+++ b/src/backend/catalog/pg_namespace.c
@@ -22,6 +22,7 @@
#include "catalog/objectaccess.h"
#include "catalog/pg_namespace.h"
#include "utils/builtins.h"
+#include "utils/inval.h"
#include "utils/rel.h"
#include "utils/syscache.h"
@@ -69,6 +70,11 @@ NamespaceCreate(const char *nspName, Oid ownerId, bool isTemp)
else
nspacl = NULL;
+ /*
+ * Don't send invalidation messages related to temporary namespaces.
+ */
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(isTemp);
+
nspdesc = table_open(NamespaceRelationId, RowExclusiveLock);
tupDesc = nspdesc->rd_att;
@@ -116,5 +122,7 @@ NamespaceCreate(const char *nspName, Oid ownerId, bool isTemp)
/* Post creation hook for new schema */
InvokeObjectPostCreateHook(NamespaceRelationId, nspoid, 0);
+ END_TEMP_TABLE_SCOPE();
+
return nspoid;
}
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index 85d1219b235..519e1c2371b 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -19,6 +19,7 @@
#include "postgres.h"
+#include "access/tempcat.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
#include "access/xlog.h"
@@ -31,6 +32,7 @@
#include "storage/bulk_write.h"
#include "storage/freespace.h"
#include "storage/proc.h"
+#include "storage/rd.h"
#include "storage/smgr.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
@@ -437,6 +439,13 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
*/
if (need_fsm_vacuum)
FreeSpaceMapVacuumRange(rel, nblocks, InvalidBlockNumber);
+
+ if (enable_temp_rd_buffers
+ && nblocks == 0
+ && rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+ {
+ rd_reset(RelationGetSmgr(rel));
+ }
}
/*
diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c
index 874a8fc89ad..1845af2ed8e 100644
--- a/src/backend/catalog/toasting.c
+++ b/src/backend/catalog/toasting.c
@@ -30,6 +30,7 @@
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "utils/fmgroids.h"
+#include "utils/inval.h"
#include "utils/rel.h"
#include "utils/syscache.h"
@@ -200,6 +201,12 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
snprintf(toast_idxname, sizeof(toast_idxname),
"pg_toast_%u_index", relOid);
+ /*
+ * Don't send shared invalidation messages for TOASTs created for temporary tables,
+ * because those TOAST anyway couldn't be accessed from other sessions.
+ */
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
/* this is pretty painful... need a tuple descriptor */
tupdesc = CreateTemplateTupleDesc(3);
TupleDescInitEntry(tupdesc, (AttrNumber) 1,
@@ -391,6 +398,8 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
*/
CommandCounterIncrement();
+ END_TEMP_TABLE_SCOPE();
+
return true;
}
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 4fffb76e557..4f0494cf451 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -36,8 +36,11 @@
#include "common/pg_prng.h"
#include "executor/executor.h"
#include "foreign/fdwapi.h"
+#include "funcapi.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
+#include "nodes/makefuncs.h"
+#include "nodes/pg_list.h"
#include "parser/parse_oper.h"
#include "parser/parse_relation.h"
#include "pgstat.h"
@@ -48,6 +51,7 @@
#include "utils/attoptcache.h"
#include "utils/datum.h"
#include "utils/guc.h"
+#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/pg_rusage.h"
@@ -55,6 +59,7 @@
#include "utils/sortsupport.h"
#include "utils/syscache.h"
#include "utils/timestamp.h"
+#include "utils/typcache.h"
/* Per-index data for ANALYZE */
@@ -64,6 +69,7 @@ typedef struct AnlIndexData
double tupleFract; /* fraction of rows for partial index */
VacAttrStats **vacattrstats; /* index attrs to analyze */
int attr_cnt;
+ bool multicolumn; /* Collect compound row statistic for multicolumn index */
} AnlIndexData;
@@ -241,6 +247,8 @@ analyze_rel(Oid relid, RangeVar *relation,
pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE,
RelationGetRelid(onerel));
+ BEGIN_TEMP_TABLE_SCOPE_SHARED(onerel->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
/*
* Do the normal non-recursive ANALYZE. We can skip this for partitioned
* tables, which don't contain any rows.
@@ -264,6 +272,8 @@ analyze_rel(Oid relid, RangeVar *relation,
*/
relation_close(onerel, NoLock);
+ END_TEMP_TABLE_SCOPE();
+
pgstat_progress_end_command();
}
@@ -308,6 +318,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
BufferUsage bufferusage;
PgStat_Counter startreadtime = 0;
PgStat_Counter startwritetime = 0;
+ int rowsAttrPitch;
+ Datum *rowsAttrValues;
+ bool *rowsAttrNulls;
verbose = (params->options & VACOPT_VERBOSE) != 0;
instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
@@ -323,6 +336,8 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
get_namespace_name(RelationGetNamespace(onerel)),
RelationGetRelationName(onerel))));
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(RelationUsesLocalBuffers(onerel))
+
/*
* Set up a working context so that we can easily free whatever junk gets
* created.
@@ -484,6 +499,21 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
}
thisdata->attr_cnt = tcnt;
}
+ else if (indexInfo->ii_NumIndexAttrs > 1 && va_cols == NIL &&
+ Irel[ind]->rd_rel->reltype != InvalidOid)
+ {
+ /* Collect statistic for multicolumn index for better predicting selectivity of multicolumn joins */
+ RowExpr* row = makeNode(RowExpr);
+ row->row_typeid = Irel[ind]->rd_rel->reltype;
+ row->row_format = COERCE_EXPLICIT_CAST;
+ row->location = -1;
+ row->colnames = NULL;
+ thisdata->vacattrstats = (VacAttrStats **)palloc(sizeof(VacAttrStats *));
+ thisdata->vacattrstats[0] = examine_attribute(Irel[ind], 1, (Node*)row);
+ thisdata->vacattrstats[0]->tupDesc = lookup_type_cache(row->row_typeid, TYPECACHE_TUPDESC)->tupDesc;
+ thisdata->attr_cnt = 1;
+ thisdata->multicolumn = true;
+ }
}
}
@@ -536,6 +566,25 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
rows, targrows,
&totalrows, &totaldeadrows);
+
+ if (va_cols == NIL && AllocSizeIsValid(numrows * onerel->rd_att->natts * sizeof(Datum)))
+ {
+ rowsAttrPitch = onerel->rd_att->natts;
+ rowsAttrValues = (Datum *) palloc(numrows * rowsAttrPitch * sizeof(Datum));
+ rowsAttrNulls = (bool *) palloc(numrows * rowsAttrPitch * sizeof(bool));
+ for(i = 0; i < numrows; i++)
+ {
+ size_t index = i * rowsAttrPitch;
+ heap_deform_tuple(rows[i], onerel->rd_att, rowsAttrValues + index, rowsAttrNulls + index);
+ }
+ }
+ else
+ {
+ rowsAttrPitch = 0;
+ rowsAttrValues = NULL;
+ rowsAttrNulls = NULL;
+ }
+
/*
* Compute the statistics. Temporary results during the calculations for
* each column are stored in a child context. The calc routines are
@@ -561,6 +610,10 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
AttributeOpts *aopt;
stats->rows = rows;
+ stats->rowsAttrPitch = rowsAttrPitch;
+ stats->rowsAttrValues = rowsAttrValues + (stats->tupattnum - 1);
+ stats->rowsAttrNulls = rowsAttrNulls + (stats->tupattnum - 1);
+
stats->tupDesc = onerel->rd_att;
stats->compute_stats(stats,
std_fetch_func,
@@ -856,6 +909,8 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
MemoryContextSwitchTo(caller_context);
MemoryContextDelete(anl_context);
anl_context = NULL;
+
+ END_TEMP_TABLE_SCOPE();
}
/*
@@ -956,28 +1011,41 @@ compute_index_stats(Relation onerel, double totalrows,
values,
isnull);
- /*
- * Save just the columns we care about. We copy the values
- * into ind_context from the estate's per-tuple context.
- */
- for (i = 0; i < attr_cnt; i++)
+ if (thisdata->multicolumn)
{
- VacAttrStats *stats = thisdata->vacattrstats[i];
- int attnum = stats->tupattnum;
-
- if (isnull[attnum - 1])
- {
- exprvals[tcnt] = (Datum) 0;
- exprnulls[tcnt] = true;
- }
- else
+ /* For multicolumn index construct compound value */
+ VacAttrStats *stats = thisdata->vacattrstats[0];
+ exprvals[tcnt] = HeapTupleGetDatum(heap_form_tuple(stats->tupDesc,
+ values,
+ isnull));
+ exprnulls[tcnt] = false;
+ tcnt++;
+ }
+ else
+ {
+ /*
+ * Save just the columns we care about. We copy the values
+ * into ind_context from the estate's per-tuple context.
+ */
+ for (i = 0; i < attr_cnt; i++)
{
- exprvals[tcnt] = datumCopy(values[attnum - 1],
- stats->attrtype->typbyval,
- stats->attrtype->typlen);
- exprnulls[tcnt] = false;
+ VacAttrStats *stats = thisdata->vacattrstats[i];
+ int attnum = stats->tupattnum;
+
+ if (isnull[attnum - 1])
+ {
+ exprvals[tcnt] = (Datum) 0;
+ exprnulls[tcnt] = true;
+ }
+ else
+ {
+ exprvals[tcnt] = datumCopy(values[attnum - 1],
+ stats->attrtype->typbyval,
+ stats->attrtype->typlen);
+ exprnulls[tcnt] = false;
+ }
+ tcnt++;
}
- tcnt++;
}
}
}
@@ -1797,11 +1865,22 @@ update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
static Datum
std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
{
- int attnum = stats->tupattnum;
- HeapTuple tuple = stats->rows[rownum];
- TupleDesc tupDesc = stats->tupDesc;
+ if (stats->rowsAttrPitch)
+ {
+ size_t index = rownum * stats->rowsAttrPitch;
+ *isNull = stats->rowsAttrNulls[index];
+
+ return stats->rowsAttrValues[index];
+ }
+ else
+ {
+ int attnum = stats->tupattnum;
+ HeapTuple tuple = stats->rows[rownum];
+ TupleDesc tupDesc = stats->tupDesc;
+
+ return heap_getattr(tuple, attnum, tupDesc, isNull);
+ }
- return heap_getattr(tuple, attnum, tupDesc, isNull);
}
/*
@@ -2742,6 +2821,7 @@ compute_scalar_stats(VacAttrStatsP stats,
* histogram won't collapse to empty or a singleton.)
*/
num_hist = ndistinct - num_mcv;
+
if (num_hist > num_bins)
num_hist = num_bins + 1;
if (num_hist >= 2)
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 7e2792ead71..83895910c36 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -174,7 +174,7 @@ static void ExplainFlushWorkersState(ExplainState *es);
*/
void
ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
- ParamListInfo params, DestReceiver *dest)
+ ParamListInfo params, DestReceiver *dest, uint64 *processed)
{
ExplainState *es = NewExplainState();
TupOutputState *tstate;
@@ -182,9 +182,13 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
Query *query;
List *rewritten;
+ if (processed)
+ *processed = 0;
+
/* Configure the ExplainState based on the provided options */
ParseExplainOptionList(es, stmt->options, pstate);
+
/* Extract the query and, if enabled, jumble it */
query = castNode(Query, stmt->query);
if (IsQueryIdEnabled())
@@ -244,6 +248,9 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
end_tup_output(tstate);
pfree(es->str->data);
+
+ if (processed)
+ *processed = es->es_processed;
}
/*
@@ -656,6 +663,8 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
*/
INSTR_TIME_SET_CURRENT(starttime);
+ es->es_processed += queryDesc->estate->es_processed;
+
ExecutorEnd(queryDesc);
FreeQueryDesc(queryDesc);
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index 451ae6f7f69..c1d8d0dfce8 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -48,6 +48,7 @@
#include "utils/resowner.h"
#include "utils/syscache.h"
#include "utils/varlena.h"
+#include "utils/inval.h"
/*
@@ -205,6 +206,8 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
stmt->tablespacename = NULL;
stmt->if_not_exists = seq->if_not_exists;
+ BEGIN_TEMP_TABLE_SCOPE_SHARED(stmt->relation->relpersistence == RELPERSISTENCE_TEMP);
+
address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL, NULL);
seqoid = address.objectId;
Assert(seqoid != InvalidOid);
@@ -243,6 +246,8 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
heap_freetuple(tuple);
table_close(rel, RowExclusiveLock);
+ END_TEMP_TABLE_SCOPE();
+
return address;
}
@@ -466,6 +471,8 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
init_sequence(relid, &elm, &seqrel);
+ BEGIN_TEMP_TABLE_SCOPE_SHARED(seqrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
rel = table_open(SequenceRelationId, RowExclusiveLock);
seqtuple = SearchSysCacheCopy1(SEQRELID,
ObjectIdGetDatum(relid));
@@ -534,6 +541,8 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
table_close(rel, RowExclusiveLock);
sequence_close(seqrel, NoLock);
+ END_TEMP_TABLE_SCOPE();
+
return address;
}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 3241060bd55..7adb477ece1 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -1046,6 +1046,8 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
accessMethodId = get_table_am_oid(default_table_access_method, false);
}
+ BEGIN_TEMP_TABLE_SCOPE_SHARED(stmt->relation->relpersistence == RELPERSISTENCE_TEMP);
+
/*
* Create the relation. Inherited defaults and CHECK constraints are
* passed in for immediate handling --- since they don't need parsing,
@@ -1354,6 +1356,8 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
*/
relation_close(rel, NoLock);
+ END_TEMP_TABLE_SCOPE();
+
return address;
}
@@ -1532,6 +1536,7 @@ RemoveRelations(DropStmt *drop)
ListCell *cell;
int flags = 0;
LOCKMODE lockmode = AccessExclusiveLock;
+ bool haveNonTempRelations = false;
/* DROP CONCURRENTLY uses a weaker lock, and has some restrictions */
if (drop->concurrent)
@@ -1675,10 +1680,18 @@ RemoveRelations(DropStmt *drop)
obj.objectSubId = 0;
add_exact_object_address(&obj, objects);
+
+ if (get_rel_persistence(relOid) != RELPERSISTENCE_TEMP)
+ haveNonTempRelations = true;
}
+ /* Don't send invalidation messages if and only all relations being deleted are temporary */
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(!haveNonTempRelations);
+
performMultipleDeletions(objects, drop->behavior, flags);
+ END_TEMP_TABLE_SCOPE();
+
free_object_addresses(objects);
}
@@ -2181,6 +2194,8 @@ ExecuteTruncateGuts(List *explicit_rels,
continue;
}
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
/*
* Normally, we need a transaction-safe truncation here. However, if
* the table was either created in the current (sub)transaction or has
@@ -2241,6 +2256,8 @@ ExecuteTruncateGuts(List *explicit_rels,
}
pgstat_count_truncate(rel);
+
+ END_TEMP_TABLE_SCOPE();
}
/* Now go through the hash table, and truncate foreign tables */
@@ -4279,6 +4296,8 @@ RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bo
targetrelation = relation_open(myrelid, is_index ? ShareUpdateExclusiveLock : AccessExclusiveLock);
namespaceId = RelationGetNamespace(targetrelation);
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(targetrelation->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
/*
* Find relation's pg_class tuple, and make sure newrelname isn't in use.
*/
@@ -4344,6 +4363,8 @@ RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bo
* Close rel, but keep lock!
*/
relation_close(targetrelation, NoLock);
+
+ END_TEMP_TABLE_SCOPE();
}
/*
@@ -5370,6 +5391,8 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab,
ObjectAddress address = InvalidObjectAddress;
Relation rel = tab->rel;
+ BEGIN_TEMP_TABLE_SCOPE_SHARED(rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
switch (cmd->subtype)
{
case AT_AddColumn: /* ADD COLUMN */
@@ -5671,6 +5694,8 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab,
break;
}
+ END_TEMP_TABLE_SCOPE();
+
/*
* Report the subcommand to interested event triggers.
*/
diff --git a/src/backend/commands/variable.c b/src/backend/commands/variable.c
index 608f10d9412..34b87a51cbe 100644
--- a/src/backend/commands/variable.c
+++ b/src/backend/commands/variable.c
@@ -1236,10 +1236,12 @@ check_default_with_oids(bool *newval, void **extra, GucSource source)
if (*newval)
{
/* check the GUC's definition for an explanation */
- GUC_check_errcode(ERRCODE_FEATURE_NOT_SUPPORTED);
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tables declared WITH OIDS are not supported, ignored")));
GUC_check_errmsg("tables declared WITH OIDS are not supported");
- return false;
+ *newval = false;
}
return true;
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index f1569879b52..d1a58e9a9b1 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -266,12 +266,32 @@ ExecInitQual(List *qual, PlanState *parent)
foreach_ptr(Expr, node, qual)
{
+ ExprEvalStep *lastStep;
/* first evaluate expression */
ExecInitExprRec(node, state, &state->resvalue, &state->resnull);
/* then emit EEOP_QUAL to detect if it's false (or null) */
scratch.d.qualexpr.jumpdone = -1;
+
+ lastStep = &state->steps[state->steps_len-1];
+ if (list_length(qual) == 1 &&
+ (lastStep->opcode == EEOP_BOOL_OR_STEP_LAST ||
+ lastStep->opcode == EEOP_BOOL_AND_STEP_LAST))
+ scratch.d.qualexpr.guaranteed_empty =
+ lastStep->d.boolexpr.guaranteed_empty;
+ else if (list_length(qual) == 1 &&
+ lastStep->opcode == EEOP_SUBPLAN)
+ {
+ scratch.d.qualexpr.guaranteed_empty =
+ lastStep->d.subplan.guaranteed_empty =
+ palloc(sizeof(bool));
+ *scratch.d.qualexpr.guaranteed_empty = false;
+ }
+ else
+ scratch.d.qualexpr.guaranteed_empty = NULL;
+
ExprEvalPushStep(state, &scratch);
+
adjust_jumps = lappend_int(adjust_jumps,
state->steps_len - 1);
}
@@ -1387,8 +1407,15 @@ ExecInitExprRec(Expr *node, ExprState *state,
ListCell *lc;
/* allocate scratch memory used by all steps of AND/OR */
+ scratch.d.boolexpr.guaranteed_empty = NULL;
if (boolexpr->boolop != NOT_EXPR)
+ {
scratch.d.boolexpr.anynull = (bool *) palloc(sizeof(bool));
+ scratch.d.boolexpr.guaranteed_empty = (bool *) palloc(sizeof(bool));
+ scratch.d.boolexpr.count_guaranteed_empty = (int *) palloc(sizeof(int));
+ *scratch.d.boolexpr.guaranteed_empty = false;
+ scratch.d.boolexpr.nargs = nargs;
+ }
/*
* For each argument evaluate the argument itself, then
@@ -1407,10 +1434,16 @@ ExecInitExprRec(Expr *node, ExprState *state,
foreach(lc, boolexpr->args)
{
Expr *arg = (Expr *) lfirst(lc);
+ ExprEvalStep *lastStep;
/* Evaluate argument into our output variable */
ExecInitExprRec(arg, state, resv, resnull);
+ lastStep = &state->steps[state->steps_len-1];
+ if (lastStep->opcode == EEOP_SUBPLAN)
+ lastStep->d.subplan.guaranteed_empty =
+ scratch.d.boolexpr.guaranteed_empty;
+
/* Perform the appropriate step type */
switch (boolexpr->boolop)
{
@@ -1484,6 +1517,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
}
ExecInitSubPlanExpr(subplan, state, resv, resnull);
+ scratch.d.subplan.guaranteed_empty = false;
break;
}
@@ -4577,6 +4611,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
/* then emit EEOP_QUAL to detect if result is false (or null) */
scratch.opcode = EEOP_QUAL;
+ scratch.d.qualexpr.guaranteed_empty = NULL;
scratch.d.qualexpr.jumpdone = -1;
scratch.resvalue = &state->resvalue;
scratch.resnull = &state->resnull;
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 1a37737d4a2..bbd5fba1dfd 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -1057,6 +1057,9 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
EEO_JUMP(op->d.boolexpr.jumpdone);
}
+ /* reset */
+ *op->d.boolexpr.guaranteed_empty = false;
+
EEO_NEXT();
}
@@ -1065,6 +1068,8 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
if (*op->resnull)
{
/* result is already set to NULL, need not change it */
+ /* reset */
+ *op->d.boolexpr.guaranteed_empty = false;
}
else if (!DatumGetBool(*op->resvalue))
{
@@ -1080,10 +1085,15 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
{
*op->resvalue = (Datum) 0;
*op->resnull = true;
+ /* reset */
+ *op->d.boolexpr.guaranteed_empty = false;
}
else
{
/* result is already set to TRUE, need not change it */
+ /* reset */
+ *op->d.boolexpr.guaranteed_empty = false;
+
}
EEO_NEXT();
@@ -1102,6 +1112,7 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
EEO_CASE(EEOP_BOOL_OR_STEP_FIRST)
{
*op->d.boolexpr.anynull = false;
+ *op->d.boolexpr.count_guaranteed_empty = 0;
/*
* EEOP_BOOL_OR_STEP_FIRST resets anynull, otherwise it's the same
@@ -1113,6 +1124,10 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
EEO_CASE(EEOP_BOOL_OR_STEP)
{
+ *op->d.boolexpr.count_guaranteed_empty +=
+ (int) (*op->d.boolexpr.guaranteed_empty);
+ *op->d.boolexpr.guaranteed_empty = false;
+
if (*op->resnull)
{
*op->d.boolexpr.anynull = true;
@@ -1129,6 +1144,10 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
EEO_CASE(EEOP_BOOL_OR_STEP_LAST)
{
+ *op->d.boolexpr.count_guaranteed_empty +=
+ (int) (*op->d.boolexpr.guaranteed_empty);
+ *op->d.boolexpr.guaranteed_empty = false;
+
if (*op->resnull)
{
/* result is already set to NULL, need not change it */
@@ -1150,6 +1169,10 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
}
else
{
+ if (*op->d.boolexpr.count_guaranteed_empty == op->d.boolexpr.nargs)
+ *op->d.boolexpr.guaranteed_empty = true;
+ else
+ *op->d.boolexpr.guaranteed_empty = false;
/* result is already set to FALSE, need not change it */
}
@@ -1180,6 +1203,9 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
/* ... bail out early, returning FALSE */
*op->resnull = false;
*op->resvalue = BoolGetDatum(false);
+ if (op->d.qualexpr.guaranteed_empty &&
+ op - state->steps == state->steps_len - 2 /* + EEOP_DONE */)
+ state->guaranteed_empty = *op->d.qualexpr.guaranteed_empty;
EEO_JUMP(op->d.qualexpr.jumpdone);
}
@@ -5313,7 +5339,16 @@ ExecEvalSubPlan(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
/* could potentially be nested, so make sure there's enough stack */
check_stack_depth();
- *op->resvalue = ExecSubPlan(sstate, econtext, op->resnull);
+ if (sstate->guaranteed_empty == false)
+ *op->resvalue = ExecSubPlan(sstate, econtext, op->resnull);
+ else
+ {
+ *op->resvalue = false;
+ *op->resnull = false;
+ }
+
+ if (op->opcode == EEOP_SUBPLAN && op->d.subplan.guaranteed_empty && sstate->guaranteed_empty)
+ *op->d.subplan.guaranteed_empty = sstate->guaranteed_empty;
}
/*
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 27c9eec697b..16fdf6dbc3e 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -40,6 +40,7 @@
#include "access/sysattr.h"
#include "access/table.h"
#include "access/tableam.h"
+#include "access/tempcat.h"
#include "access/xact.h"
#include "catalog/namespace.h"
#include "catalog/partition.h"
@@ -62,7 +63,7 @@
#include "utils/partcache.h"
#include "utils/rls.h"
#include "utils/snapmgr.h"
-
+#include "utils/syscache.h"
/* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */
ExecutorStart_hook_type ExecutorStart_hook = NULL;
@@ -621,6 +622,15 @@ ExecCheckPermissions(List *rangeTable, List *rteperminfos,
RTEPermissionInfo *perminfo = lfirst_node(RTEPermissionInfo, l);
Assert(OidIsValid(perminfo->relid));
+
+ if (enable_temp_memory_catalog && IsParallelWorker())
+ {
+ HeapTuple htup = SearchSysCache1(RELOID, ObjectIdGetDatum(perminfo->relid));
+ if (!htup)
+ continue;
+ ReleaseSysCache(htup);
+ }
+
result = ExecCheckOneRelPerms(perminfo);
if (!result)
{
diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c
index 31ed4783c1d..29c50027681 100644
--- a/src/backend/executor/execScan.c
+++ b/src/backend/executor/execScan.c
@@ -55,6 +55,7 @@ ExecScan(ScanState *node,
epqstate = node->ps.state->es_epq_active;
qual = node->ps.qual;
projInfo = node->ps.ps_ProjInfo;
+ node->ps.guaranteed_empty = false;
return ExecScanExtended(node,
accessMtd,
diff --git a/src/backend/executor/nodeMaterial.c b/src/backend/executor/nodeMaterial.c
index 9798bb75365..6f97df88c47 100644
--- a/src/backend/executor/nodeMaterial.c
+++ b/src/backend/executor/nodeMaterial.c
@@ -135,6 +135,8 @@ ExecMaterial(PlanState *pstate)
if (TupIsNull(outerslot))
{
node->eof_underlying = true;
+ if (tuplestore_tuple_count(tuplestorestate) == 0)
+ node->ss.ps.guaranteed_empty = true;
return NULL;
}
@@ -358,6 +360,9 @@ ExecReScanMaterial(MaterialState *node)
*/
if (outerPlan->chgParam == NULL)
ExecReScan(outerPlan);
+ else
+ node->ss.ps.guaranteed_empty = false;
+
node->eof_underlying = false;
}
}
diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c
index 5cd1a251625..683f042c180 100644
--- a/src/backend/executor/nodeNestloop.c
+++ b/src/backend/executor/nodeNestloop.c
@@ -163,6 +163,11 @@ ExecNestLoop(PlanState *pstate)
{
ENL1_printf("no inner tuple, need new outer tuple");
+ if (innerPlan->guaranteed_empty &&
+ (node->js.jointype == JOIN_INNER ||
+ node->js.jointype == JOIN_SEMI))
+ return NULL;
+
node->nl_NeedNewOuter = true;
if (!node->nl_MatchedOuter &&
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 8e55dcc159b..8ac17ac8451 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -84,7 +84,11 @@ ExecSubPlan(SubPlanState *node,
/* Select appropriate evaluation strategy */
if (subplan->useHashTable)
+ {
retval = ExecHashSubPlan(node, econtext, isNull);
+ if (node->planstate->guaranteed_empty)
+ node->guaranteed_empty = true;
+ }
else
retval = ExecScanSubPlan(node, econtext, isNull);
@@ -106,6 +110,9 @@ ExecHashSubPlan(SubPlanState *node,
SubPlan *subplan = node->subplan;
PlanState *planstate = node->planstate;
TupleTableSlot *slot;
+ bool hasParam = (planstate->plan->extParam != NULL ||
+ subplan->setParam != NIL ||
+ planstate->chgParam != NULL);
/* Shouldn't have any direct correlation Vars */
if (subplan->parParam != NIL || subplan->args != NIL)
@@ -123,8 +130,11 @@ ExecHashSubPlan(SubPlanState *node,
* lefthand side.
*/
*isNull = false;
- if (!node->havehashrows && !node->havenullrows)
+ if (!node->havehashrows && !node->havenullrows) {
+ if (hasParam == false)
+ node->planstate->guaranteed_empty = true;
return BoolGetDatum(false);
+ }
/*
* Evaluate lefthand expressions and form a projection tuple. First we
diff --git a/src/backend/lib/rbtree.c b/src/backend/lib/rbtree.c
index 3b5e5faa9bf..8565b18a69c 100644
--- a/src/backend/lib/rbtree.c
+++ b/src/backend/lib/rbtree.c
@@ -50,6 +50,7 @@ struct RBTree
rbt_combiner combiner;
rbt_allocfunc allocfunc;
rbt_freefunc freefunc;
+ rbt_fixfunc fixfunc;
/* Passthrough arg passed to all manipulation functions */
void *arg;
};
@@ -104,6 +105,7 @@ rbt_create(Size node_size,
rbt_combiner combiner,
rbt_allocfunc allocfunc,
rbt_freefunc freefunc,
+ rbt_fixfunc fixfunc,
void *arg)
{
RBTree *tree = (RBTree *) palloc(sizeof(RBTree));
@@ -116,6 +118,7 @@ rbt_create(Size node_size,
tree->combiner = combiner;
tree->allocfunc = allocfunc;
tree->freefunc = freefunc;
+ tree->fixfunc = fixfunc;
tree->arg = arg;
@@ -127,6 +130,8 @@ static inline void
rbt_copy_data(RBTree *rbt, RBTNode *dest, const RBTNode *src)
{
memcpy(dest + 1, src + 1, rbt->node_size - sizeof(RBTNode));
+ if (rbt->fixfunc)
+ rbt->fixfunc(dest, rbt->arg);
}
/**********************************************************************
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 214d2c70d60..ee95063f921 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -108,7 +108,7 @@ my @nodetag_only_files = qw(
# ABI stability during development.
my $last_nodetag = 'WindowObjectData';
-my $last_nodetag_no = 479;
+my $last_nodetag_no = 480;
# output file names
my @output_files;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index ede838cd40c..4434bb62842 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -2945,16 +2945,25 @@ range_table_entry_walker_impl(RangeTblEntry *rte,
Node *
expression_tree_mutator_impl(Node *node,
tree_mutator_callback mutator,
- void *context)
+ void *context, int flags)
{
/*
* The mutator has already decided not to modify the current node, but we
* must call the mutator for any sub-nodes.
*/
-#define FLATCOPY(newnode, node, nodetype) \
- ( (newnode) = (nodetype *) palloc(sizeof(nodetype)), \
- memcpy((newnode), (node), sizeof(nodetype)) )
+#define FLATCOPY(newnode, node, nodetype, flags) \
+ do { \
+ if ((flags) & QTW_DONT_COPY_DEFAULT) \
+ { \
+ (newnode) = (node); \
+ } \
+ else \
+ { \
+ (newnode) = (nodetype *) palloc(sizeof(nodetype)); \
+ memcpy((newnode), (node), sizeof(nodetype)); \
+ } \
+ } while(0)
#define MUTATE(newfield, oldfield, fieldtype) \
( (newfield) = (fieldtype) mutator((Node *) (oldfield), context) )
@@ -2977,7 +2986,7 @@ expression_tree_mutator_impl(Node *node,
Var *var = (Var *) node;
Var *newnode;
- FLATCOPY(newnode, var, Var);
+ FLATCOPY(newnode, var, Var, flags);
/* Assume we need not copy the varnullingrels bitmapset */
return (Node *) newnode;
}
@@ -2987,7 +2996,7 @@ expression_tree_mutator_impl(Node *node,
Const *oldnode = (Const *) node;
Const *newnode;
- FLATCOPY(newnode, oldnode, Const);
+ FLATCOPY(newnode, oldnode, Const, flags);
/* XXX we don't bother with datumCopy; should we? */
return (Node *) newnode;
}
@@ -3010,7 +3019,7 @@ expression_tree_mutator_impl(Node *node,
WithCheckOption *wco = (WithCheckOption *) node;
WithCheckOption *newnode;
- FLATCOPY(newnode, wco, WithCheckOption);
+ FLATCOPY(newnode, wco, WithCheckOption, flags);
MUTATE(newnode->qual, wco->qual, Node *);
return (Node *) newnode;
}
@@ -3019,7 +3028,7 @@ expression_tree_mutator_impl(Node *node,
Aggref *aggref = (Aggref *) node;
Aggref *newnode;
- FLATCOPY(newnode, aggref, Aggref);
+ FLATCOPY(newnode, aggref, Aggref, flags);
/* assume mutation doesn't change types of arguments */
newnode->aggargtypes = list_copy(aggref->aggargtypes);
MUTATE(newnode->aggdirectargs, aggref->aggdirectargs, List *);
@@ -3035,7 +3044,7 @@ expression_tree_mutator_impl(Node *node,
GroupingFunc *grouping = (GroupingFunc *) node;
GroupingFunc *newnode;
- FLATCOPY(newnode, grouping, GroupingFunc);
+ FLATCOPY(newnode, grouping, GroupingFunc, flags);
MUTATE(newnode->args, grouping->args, List *);
/*
@@ -3058,7 +3067,7 @@ expression_tree_mutator_impl(Node *node,
WindowFunc *wfunc = (WindowFunc *) node;
WindowFunc *newnode;
- FLATCOPY(newnode, wfunc, WindowFunc);
+ FLATCOPY(newnode, wfunc, WindowFunc, flags);
MUTATE(newnode->args, wfunc->args, List *);
MUTATE(newnode->aggfilter, wfunc->aggfilter, Expr *);
return (Node *) newnode;
@@ -3069,7 +3078,7 @@ expression_tree_mutator_impl(Node *node,
WindowFuncRunCondition *wfuncrc = (WindowFuncRunCondition *) node;
WindowFuncRunCondition *newnode;
- FLATCOPY(newnode, wfuncrc, WindowFuncRunCondition);
+ FLATCOPY(newnode, wfuncrc, WindowFuncRunCondition, flags);
MUTATE(newnode->arg, wfuncrc->arg, Expr *);
return (Node *) newnode;
}
@@ -3079,7 +3088,7 @@ expression_tree_mutator_impl(Node *node,
SubscriptingRef *sbsref = (SubscriptingRef *) node;
SubscriptingRef *newnode;
- FLATCOPY(newnode, sbsref, SubscriptingRef);
+ FLATCOPY(newnode, sbsref, SubscriptingRef, flags);
MUTATE(newnode->refupperindexpr, sbsref->refupperindexpr,
List *);
MUTATE(newnode->reflowerindexpr, sbsref->reflowerindexpr,
@@ -3097,7 +3106,7 @@ expression_tree_mutator_impl(Node *node,
FuncExpr *expr = (FuncExpr *) node;
FuncExpr *newnode;
- FLATCOPY(newnode, expr, FuncExpr);
+ FLATCOPY(newnode, expr, FuncExpr, flags);
MUTATE(newnode->args, expr->args, List *);
return (Node *) newnode;
}
@@ -3107,7 +3116,7 @@ expression_tree_mutator_impl(Node *node,
NamedArgExpr *nexpr = (NamedArgExpr *) node;
NamedArgExpr *newnode;
- FLATCOPY(newnode, nexpr, NamedArgExpr);
+ FLATCOPY(newnode, nexpr, NamedArgExpr, flags);
MUTATE(newnode->arg, nexpr->arg, Expr *);
return (Node *) newnode;
}
@@ -3117,7 +3126,7 @@ expression_tree_mutator_impl(Node *node,
OpExpr *expr = (OpExpr *) node;
OpExpr *newnode;
- FLATCOPY(newnode, expr, OpExpr);
+ FLATCOPY(newnode, expr, OpExpr, flags & ~QTW_DONT_COPY_DEFAULT);
MUTATE(newnode->args, expr->args, List *);
return (Node *) newnode;
}
@@ -3127,7 +3136,7 @@ expression_tree_mutator_impl(Node *node,
DistinctExpr *expr = (DistinctExpr *) node;
DistinctExpr *newnode;
- FLATCOPY(newnode, expr, DistinctExpr);
+ FLATCOPY(newnode, expr, DistinctExpr, flags);
MUTATE(newnode->args, expr->args, List *);
return (Node *) newnode;
}
@@ -3137,7 +3146,7 @@ expression_tree_mutator_impl(Node *node,
NullIfExpr *expr = (NullIfExpr *) node;
NullIfExpr *newnode;
- FLATCOPY(newnode, expr, NullIfExpr);
+ FLATCOPY(newnode, expr, NullIfExpr, flags);
MUTATE(newnode->args, expr->args, List *);
return (Node *) newnode;
}
@@ -3147,7 +3156,7 @@ expression_tree_mutator_impl(Node *node,
ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
ScalarArrayOpExpr *newnode;
- FLATCOPY(newnode, expr, ScalarArrayOpExpr);
+ FLATCOPY(newnode, expr, ScalarArrayOpExpr, flags);
MUTATE(newnode->args, expr->args, List *);
return (Node *) newnode;
}
@@ -3157,7 +3166,7 @@ expression_tree_mutator_impl(Node *node,
BoolExpr *expr = (BoolExpr *) node;
BoolExpr *newnode;
- FLATCOPY(newnode, expr, BoolExpr);
+ FLATCOPY(newnode, expr, BoolExpr, flags);
MUTATE(newnode->args, expr->args, List *);
return (Node *) newnode;
}
@@ -3167,7 +3176,7 @@ expression_tree_mutator_impl(Node *node,
SubLink *sublink = (SubLink *) node;
SubLink *newnode;
- FLATCOPY(newnode, sublink, SubLink);
+ FLATCOPY(newnode, sublink, SubLink, flags);
MUTATE(newnode->testexpr, sublink->testexpr, Node *);
/*
@@ -3183,7 +3192,7 @@ expression_tree_mutator_impl(Node *node,
SubPlan *subplan = (SubPlan *) node;
SubPlan *newnode;
- FLATCOPY(newnode, subplan, SubPlan);
+ FLATCOPY(newnode, subplan, SubPlan, flags);
/* transform testexpr */
MUTATE(newnode->testexpr, subplan->testexpr, Node *);
/* transform args list (params to be passed to subplan) */
@@ -3197,7 +3206,7 @@ expression_tree_mutator_impl(Node *node,
AlternativeSubPlan *asplan = (AlternativeSubPlan *) node;
AlternativeSubPlan *newnode;
- FLATCOPY(newnode, asplan, AlternativeSubPlan);
+ FLATCOPY(newnode, asplan, AlternativeSubPlan, flags);
MUTATE(newnode->subplans, asplan->subplans, List *);
return (Node *) newnode;
}
@@ -3207,7 +3216,7 @@ expression_tree_mutator_impl(Node *node,
FieldSelect *fselect = (FieldSelect *) node;
FieldSelect *newnode;
- FLATCOPY(newnode, fselect, FieldSelect);
+ FLATCOPY(newnode, fselect, FieldSelect, flags);
MUTATE(newnode->arg, fselect->arg, Expr *);
return (Node *) newnode;
}
@@ -3217,7 +3226,7 @@ expression_tree_mutator_impl(Node *node,
FieldStore *fstore = (FieldStore *) node;
FieldStore *newnode;
- FLATCOPY(newnode, fstore, FieldStore);
+ FLATCOPY(newnode, fstore, FieldStore, flags);
MUTATE(newnode->arg, fstore->arg, Expr *);
MUTATE(newnode->newvals, fstore->newvals, List *);
newnode->fieldnums = list_copy(fstore->fieldnums);
@@ -3229,7 +3238,7 @@ expression_tree_mutator_impl(Node *node,
RelabelType *relabel = (RelabelType *) node;
RelabelType *newnode;
- FLATCOPY(newnode, relabel, RelabelType);
+ FLATCOPY(newnode, relabel, RelabelType, flags);
MUTATE(newnode->arg, relabel->arg, Expr *);
return (Node *) newnode;
}
@@ -3239,7 +3248,7 @@ expression_tree_mutator_impl(Node *node,
CoerceViaIO *iocoerce = (CoerceViaIO *) node;
CoerceViaIO *newnode;
- FLATCOPY(newnode, iocoerce, CoerceViaIO);
+ FLATCOPY(newnode, iocoerce, CoerceViaIO, flags);
MUTATE(newnode->arg, iocoerce->arg, Expr *);
return (Node *) newnode;
}
@@ -3249,7 +3258,7 @@ expression_tree_mutator_impl(Node *node,
ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
ArrayCoerceExpr *newnode;
- FLATCOPY(newnode, acoerce, ArrayCoerceExpr);
+ FLATCOPY(newnode, acoerce, ArrayCoerceExpr, flags);
MUTATE(newnode->arg, acoerce->arg, Expr *);
MUTATE(newnode->elemexpr, acoerce->elemexpr, Expr *);
return (Node *) newnode;
@@ -3260,7 +3269,7 @@ expression_tree_mutator_impl(Node *node,
ConvertRowtypeExpr *convexpr = (ConvertRowtypeExpr *) node;
ConvertRowtypeExpr *newnode;
- FLATCOPY(newnode, convexpr, ConvertRowtypeExpr);
+ FLATCOPY(newnode, convexpr, ConvertRowtypeExpr, flags);
MUTATE(newnode->arg, convexpr->arg, Expr *);
return (Node *) newnode;
}
@@ -3270,7 +3279,7 @@ expression_tree_mutator_impl(Node *node,
CollateExpr *collate = (CollateExpr *) node;
CollateExpr *newnode;
- FLATCOPY(newnode, collate, CollateExpr);
+ FLATCOPY(newnode, collate, CollateExpr, flags);
MUTATE(newnode->arg, collate->arg, Expr *);
return (Node *) newnode;
}
@@ -3280,7 +3289,7 @@ expression_tree_mutator_impl(Node *node,
CaseExpr *caseexpr = (CaseExpr *) node;
CaseExpr *newnode;
- FLATCOPY(newnode, caseexpr, CaseExpr);
+ FLATCOPY(newnode, caseexpr, CaseExpr, flags);
MUTATE(newnode->arg, caseexpr->arg, Expr *);
MUTATE(newnode->args, caseexpr->args, List *);
MUTATE(newnode->defresult, caseexpr->defresult, Expr *);
@@ -3292,7 +3301,7 @@ expression_tree_mutator_impl(Node *node,
CaseWhen *casewhen = (CaseWhen *) node;
CaseWhen *newnode;
- FLATCOPY(newnode, casewhen, CaseWhen);
+ FLATCOPY(newnode, casewhen, CaseWhen, flags);
MUTATE(newnode->expr, casewhen->expr, Expr *);
MUTATE(newnode->result, casewhen->result, Expr *);
return (Node *) newnode;
@@ -3303,7 +3312,7 @@ expression_tree_mutator_impl(Node *node,
ArrayExpr *arrayexpr = (ArrayExpr *) node;
ArrayExpr *newnode;
- FLATCOPY(newnode, arrayexpr, ArrayExpr);
+ FLATCOPY(newnode, arrayexpr, ArrayExpr, flags);
MUTATE(newnode->elements, arrayexpr->elements, List *);
return (Node *) newnode;
}
@@ -3313,7 +3322,7 @@ expression_tree_mutator_impl(Node *node,
RowExpr *rowexpr = (RowExpr *) node;
RowExpr *newnode;
- FLATCOPY(newnode, rowexpr, RowExpr);
+ FLATCOPY(newnode, rowexpr, RowExpr, flags);
MUTATE(newnode->args, rowexpr->args, List *);
/* Assume colnames needn't be duplicated */
return (Node *) newnode;
@@ -3324,7 +3333,7 @@ expression_tree_mutator_impl(Node *node,
RowCompareExpr *rcexpr = (RowCompareExpr *) node;
RowCompareExpr *newnode;
- FLATCOPY(newnode, rcexpr, RowCompareExpr);
+ FLATCOPY(newnode, rcexpr, RowCompareExpr, flags);
MUTATE(newnode->largs, rcexpr->largs, List *);
MUTATE(newnode->rargs, rcexpr->rargs, List *);
return (Node *) newnode;
@@ -3335,7 +3344,7 @@ expression_tree_mutator_impl(Node *node,
CoalesceExpr *coalesceexpr = (CoalesceExpr *) node;
CoalesceExpr *newnode;
- FLATCOPY(newnode, coalesceexpr, CoalesceExpr);
+ FLATCOPY(newnode, coalesceexpr, CoalesceExpr, flags);
MUTATE(newnode->args, coalesceexpr->args, List *);
return (Node *) newnode;
}
@@ -3345,7 +3354,7 @@ expression_tree_mutator_impl(Node *node,
MinMaxExpr *minmaxexpr = (MinMaxExpr *) node;
MinMaxExpr *newnode;
- FLATCOPY(newnode, minmaxexpr, MinMaxExpr);
+ FLATCOPY(newnode, minmaxexpr, MinMaxExpr, flags);
MUTATE(newnode->args, minmaxexpr->args, List *);
return (Node *) newnode;
}
@@ -3355,7 +3364,7 @@ expression_tree_mutator_impl(Node *node,
XmlExpr *xexpr = (XmlExpr *) node;
XmlExpr *newnode;
- FLATCOPY(newnode, xexpr, XmlExpr);
+ FLATCOPY(newnode, xexpr, XmlExpr, flags);
MUTATE(newnode->named_args, xexpr->named_args, List *);
/* assume mutator does not care about arg_names */
MUTATE(newnode->args, xexpr->args, List *);
@@ -3367,7 +3376,7 @@ expression_tree_mutator_impl(Node *node,
JsonReturning *jr = (JsonReturning *) node;
JsonReturning *newnode;
- FLATCOPY(newnode, jr, JsonReturning);
+ FLATCOPY(newnode, jr, JsonReturning, flags);
MUTATE(newnode->format, jr->format, JsonFormat *);
return (Node *) newnode;
@@ -3377,7 +3386,7 @@ expression_tree_mutator_impl(Node *node,
JsonValueExpr *jve = (JsonValueExpr *) node;
JsonValueExpr *newnode;
- FLATCOPY(newnode, jve, JsonValueExpr);
+ FLATCOPY(newnode, jve, JsonValueExpr, flags);
MUTATE(newnode->raw_expr, jve->raw_expr, Expr *);
MUTATE(newnode->formatted_expr, jve->formatted_expr, Expr *);
MUTATE(newnode->format, jve->format, JsonFormat *);
@@ -3389,7 +3398,7 @@ expression_tree_mutator_impl(Node *node,
JsonConstructorExpr *jce = (JsonConstructorExpr *) node;
JsonConstructorExpr *newnode;
- FLATCOPY(newnode, jce, JsonConstructorExpr);
+ FLATCOPY(newnode, jce, JsonConstructorExpr, flags);
MUTATE(newnode->args, jce->args, List *);
MUTATE(newnode->func, jce->func, Expr *);
MUTATE(newnode->coercion, jce->coercion, Expr *);
@@ -3402,7 +3411,7 @@ expression_tree_mutator_impl(Node *node,
JsonIsPredicate *pred = (JsonIsPredicate *) node;
JsonIsPredicate *newnode;
- FLATCOPY(newnode, pred, JsonIsPredicate);
+ FLATCOPY(newnode, pred, JsonIsPredicate, flags);
MUTATE(newnode->expr, pred->expr, Node *);
MUTATE(newnode->format, pred->format, JsonFormat *);
@@ -3413,7 +3422,7 @@ expression_tree_mutator_impl(Node *node,
JsonExpr *jexpr = (JsonExpr *) node;
JsonExpr *newnode;
- FLATCOPY(newnode, jexpr, JsonExpr);
+ FLATCOPY(newnode, jexpr, JsonExpr, flags);
MUTATE(newnode->formatted_expr, jexpr->formatted_expr, Node *);
MUTATE(newnode->path_spec, jexpr->path_spec, Node *);
MUTATE(newnode->passing_values, jexpr->passing_values, List *);
@@ -3428,7 +3437,7 @@ expression_tree_mutator_impl(Node *node,
JsonBehavior *behavior = (JsonBehavior *) node;
JsonBehavior *newnode;
- FLATCOPY(newnode, behavior, JsonBehavior);
+ FLATCOPY(newnode, behavior, JsonBehavior, flags);
MUTATE(newnode->expr, behavior->expr, Node *);
return (Node *) newnode;
}
@@ -3438,7 +3447,7 @@ expression_tree_mutator_impl(Node *node,
NullTest *ntest = (NullTest *) node;
NullTest *newnode;
- FLATCOPY(newnode, ntest, NullTest);
+ FLATCOPY(newnode, ntest, NullTest, flags);
MUTATE(newnode->arg, ntest->arg, Expr *);
return (Node *) newnode;
}
@@ -3448,7 +3457,7 @@ expression_tree_mutator_impl(Node *node,
BooleanTest *btest = (BooleanTest *) node;
BooleanTest *newnode;
- FLATCOPY(newnode, btest, BooleanTest);
+ FLATCOPY(newnode, btest, BooleanTest, flags);
MUTATE(newnode->arg, btest->arg, Expr *);
return (Node *) newnode;
}
@@ -3458,7 +3467,7 @@ expression_tree_mutator_impl(Node *node,
CoerceToDomain *ctest = (CoerceToDomain *) node;
CoerceToDomain *newnode;
- FLATCOPY(newnode, ctest, CoerceToDomain);
+ FLATCOPY(newnode, ctest, CoerceToDomain, flags);
MUTATE(newnode->arg, ctest->arg, Expr *);
return (Node *) newnode;
}
@@ -3468,7 +3477,7 @@ expression_tree_mutator_impl(Node *node,
ReturningExpr *rexpr = (ReturningExpr *) node;
ReturningExpr *newnode;
- FLATCOPY(newnode, rexpr, ReturningExpr);
+ FLATCOPY(newnode, rexpr, ReturningExpr, flags);
MUTATE(newnode->retexpr, rexpr->retexpr, Expr *);
return (Node *) newnode;
}
@@ -3478,7 +3487,7 @@ expression_tree_mutator_impl(Node *node,
TargetEntry *targetentry = (TargetEntry *) node;
TargetEntry *newnode;
- FLATCOPY(newnode, targetentry, TargetEntry);
+ FLATCOPY(newnode, targetentry, TargetEntry, flags & ~QTW_DONT_COPY_DEFAULT);
MUTATE(newnode->expr, targetentry->expr, Expr *);
return (Node *) newnode;
}
@@ -3491,7 +3500,7 @@ expression_tree_mutator_impl(Node *node,
WindowClause *wc = (WindowClause *) node;
WindowClause *newnode;
- FLATCOPY(newnode, wc, WindowClause);
+ FLATCOPY(newnode, wc, WindowClause, flags);
MUTATE(newnode->partitionClause, wc->partitionClause, List *);
MUTATE(newnode->orderClause, wc->orderClause, List *);
MUTATE(newnode->startOffset, wc->startOffset, Node *);
@@ -3504,7 +3513,7 @@ expression_tree_mutator_impl(Node *node,
CTECycleClause *cc = (CTECycleClause *) node;
CTECycleClause *newnode;
- FLATCOPY(newnode, cc, CTECycleClause);
+ FLATCOPY(newnode, cc, CTECycleClause, flags);
MUTATE(newnode->cycle_mark_value, cc->cycle_mark_value, Node *);
MUTATE(newnode->cycle_mark_default, cc->cycle_mark_default, Node *);
return (Node *) newnode;
@@ -3515,7 +3524,7 @@ expression_tree_mutator_impl(Node *node,
CommonTableExpr *cte = (CommonTableExpr *) node;
CommonTableExpr *newnode;
- FLATCOPY(newnode, cte, CommonTableExpr);
+ FLATCOPY(newnode, cte, CommonTableExpr, flags);
/*
* Also invoke the mutator on the CTE's Query node, so it can
@@ -3534,7 +3543,7 @@ expression_tree_mutator_impl(Node *node,
PartitionBoundSpec *pbs = (PartitionBoundSpec *) node;
PartitionBoundSpec *newnode;
- FLATCOPY(newnode, pbs, PartitionBoundSpec);
+ FLATCOPY(newnode, pbs, PartitionBoundSpec, flags);
MUTATE(newnode->listdatums, pbs->listdatums, List *);
MUTATE(newnode->lowerdatums, pbs->lowerdatums, List *);
MUTATE(newnode->upperdatums, pbs->upperdatums, List *);
@@ -3546,7 +3555,7 @@ expression_tree_mutator_impl(Node *node,
PartitionRangeDatum *prd = (PartitionRangeDatum *) node;
PartitionRangeDatum *newnode;
- FLATCOPY(newnode, prd, PartitionRangeDatum);
+ FLATCOPY(newnode, prd, PartitionRangeDatum, flags);
MUTATE(newnode->value, prd->value, Node *);
return (Node *) newnode;
}
@@ -3576,7 +3585,7 @@ expression_tree_mutator_impl(Node *node,
FromExpr *from = (FromExpr *) node;
FromExpr *newnode;
- FLATCOPY(newnode, from, FromExpr);
+ FLATCOPY(newnode, from, FromExpr, flags);
MUTATE(newnode->fromlist, from->fromlist, List *);
MUTATE(newnode->quals, from->quals, Node *);
return (Node *) newnode;
@@ -3587,7 +3596,7 @@ expression_tree_mutator_impl(Node *node,
OnConflictExpr *oc = (OnConflictExpr *) node;
OnConflictExpr *newnode;
- FLATCOPY(newnode, oc, OnConflictExpr);
+ FLATCOPY(newnode, oc, OnConflictExpr, flags);
MUTATE(newnode->arbiterElems, oc->arbiterElems, List *);
MUTATE(newnode->arbiterWhere, oc->arbiterWhere, Node *);
MUTATE(newnode->onConflictSet, oc->onConflictSet, List *);
@@ -3602,7 +3611,7 @@ expression_tree_mutator_impl(Node *node,
MergeAction *action = (MergeAction *) node;
MergeAction *newnode;
- FLATCOPY(newnode, action, MergeAction);
+ FLATCOPY(newnode, action, MergeAction, flags);
MUTATE(newnode->qual, action->qual, Node *);
MUTATE(newnode->targetList, action->targetList, List *);
@@ -3614,7 +3623,7 @@ expression_tree_mutator_impl(Node *node,
PartitionPruneStepOp *opstep = (PartitionPruneStepOp *) node;
PartitionPruneStepOp *newnode;
- FLATCOPY(newnode, opstep, PartitionPruneStepOp);
+ FLATCOPY(newnode, opstep, PartitionPruneStepOp, flags);
MUTATE(newnode->exprs, opstep->exprs, List *);
return (Node *) newnode;
@@ -3628,7 +3637,7 @@ expression_tree_mutator_impl(Node *node,
JoinExpr *join = (JoinExpr *) node;
JoinExpr *newnode;
- FLATCOPY(newnode, join, JoinExpr);
+ FLATCOPY(newnode, join, JoinExpr, flags);
MUTATE(newnode->larg, join->larg, Node *);
MUTATE(newnode->rarg, join->rarg, Node *);
MUTATE(newnode->quals, join->quals, Node *);
@@ -3641,7 +3650,7 @@ expression_tree_mutator_impl(Node *node,
SetOperationStmt *setop = (SetOperationStmt *) node;
SetOperationStmt *newnode;
- FLATCOPY(newnode, setop, SetOperationStmt);
+ FLATCOPY(newnode, setop, SetOperationStmt, flags);
MUTATE(newnode->larg, setop->larg, Node *);
MUTATE(newnode->rarg, setop->rarg, Node *);
/* We do not mutate groupClauses by default */
@@ -3653,7 +3662,7 @@ expression_tree_mutator_impl(Node *node,
IndexClause *iclause = (IndexClause *) node;
IndexClause *newnode;
- FLATCOPY(newnode, iclause, IndexClause);
+ FLATCOPY(newnode, iclause, IndexClause, flags);
MUTATE(newnode->rinfo, iclause->rinfo, RestrictInfo *);
MUTATE(newnode->indexquals, iclause->indexquals, List *);
return (Node *) newnode;
@@ -3664,7 +3673,7 @@ expression_tree_mutator_impl(Node *node,
PlaceHolderVar *phv = (PlaceHolderVar *) node;
PlaceHolderVar *newnode;
- FLATCOPY(newnode, phv, PlaceHolderVar);
+ FLATCOPY(newnode, phv, PlaceHolderVar, flags);
MUTATE(newnode->phexpr, phv->phexpr, Expr *);
/* Assume we need not copy the relids bitmapsets */
return (Node *) newnode;
@@ -3675,7 +3684,7 @@ expression_tree_mutator_impl(Node *node,
InferenceElem *inferenceelemdexpr = (InferenceElem *) node;
InferenceElem *newnode;
- FLATCOPY(newnode, inferenceelemdexpr, InferenceElem);
+ FLATCOPY(newnode, inferenceelemdexpr, InferenceElem, flags);
MUTATE(newnode->expr, newnode->expr, Node *);
return (Node *) newnode;
}
@@ -3685,7 +3694,7 @@ expression_tree_mutator_impl(Node *node,
AppendRelInfo *appinfo = (AppendRelInfo *) node;
AppendRelInfo *newnode;
- FLATCOPY(newnode, appinfo, AppendRelInfo);
+ FLATCOPY(newnode, appinfo, AppendRelInfo, flags);
MUTATE(newnode->translated_vars, appinfo->translated_vars, List *);
/* Assume nothing need be done with parent_colnos[] */
return (Node *) newnode;
@@ -3696,7 +3705,7 @@ expression_tree_mutator_impl(Node *node,
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) node;
PlaceHolderInfo *newnode;
- FLATCOPY(newnode, phinfo, PlaceHolderInfo);
+ FLATCOPY(newnode, phinfo, PlaceHolderInfo, flags);
MUTATE(newnode->ph_var, phinfo->ph_var, PlaceHolderVar *);
/* Assume we need not copy the relids bitmapsets */
return (Node *) newnode;
@@ -3707,7 +3716,7 @@ expression_tree_mutator_impl(Node *node,
RangeTblFunction *rtfunc = (RangeTblFunction *) node;
RangeTblFunction *newnode;
- FLATCOPY(newnode, rtfunc, RangeTblFunction);
+ FLATCOPY(newnode, rtfunc, RangeTblFunction, flags);
MUTATE(newnode->funcexpr, rtfunc->funcexpr, Node *);
/* Assume we need not copy the coldef info lists */
return (Node *) newnode;
@@ -3718,7 +3727,7 @@ expression_tree_mutator_impl(Node *node,
TableSampleClause *tsc = (TableSampleClause *) node;
TableSampleClause *newnode;
- FLATCOPY(newnode, tsc, TableSampleClause);
+ FLATCOPY(newnode, tsc, TableSampleClause, flags);
MUTATE(newnode->args, tsc->args, List *);
MUTATE(newnode->repeatable, tsc->repeatable, Expr *);
return (Node *) newnode;
@@ -3729,7 +3738,7 @@ expression_tree_mutator_impl(Node *node,
TableFunc *tf = (TableFunc *) node;
TableFunc *newnode;
- FLATCOPY(newnode, tf, TableFunc);
+ FLATCOPY(newnode, tf, TableFunc, flags);
MUTATE(newnode->ns_uris, tf->ns_uris, List *);
MUTATE(newnode->docexpr, tf->docexpr, Node *);
MUTATE(newnode->rowexpr, tf->rowexpr, Node *);
@@ -3781,7 +3790,7 @@ query_tree_mutator_impl(Query *query,
{
Query *newquery;
- FLATCOPY(newquery, query, Query);
+ FLATCOPY(newquery, query, Query, flags);
query = newquery;
}
@@ -3825,7 +3834,7 @@ query_tree_mutator_impl(Query *query,
WindowClause *wc = lfirst_node(WindowClause, temp);
WindowClause *newnode;
- FLATCOPY(newnode, wc, WindowClause);
+ FLATCOPY(newnode, wc, WindowClause, flags);
MUTATE(newnode->startOffset, wc->startOffset, Node *);
MUTATE(newnode->endOffset, wc->endOffset, Node *);
@@ -3874,7 +3883,7 @@ range_table_mutator_impl(List *rtable,
RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt);
RangeTblEntry *newrte;
- FLATCOPY(newrte, rte, RangeTblEntry);
+ FLATCOPY(newrte, rte, RangeTblEntry, flags);
switch (rte->rtekind)
{
case RTE_RELATION:
diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile
index 1e199ff66f7..06dd07f3270 100644
--- a/src/backend/optimizer/path/Makefile
+++ b/src/backend/optimizer/path/Makefile
@@ -14,6 +14,7 @@ include $(top_builddir)/src/Makefile.global
OBJS = \
allpaths.o \
+ appendorpath.o \
clausesel.o \
costsize.o \
equivclass.o \
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index a20eb2c44e2..d1d69727d56 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -796,6 +796,9 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
/* Consider index scans */
create_index_paths(root, rel);
+ /* Consider index scans with rewrited quals */
+ keybased_rewrite_index_paths(root, rel);
+
}
/*
diff --git a/src/backend/optimizer/path/appendorpath.c b/src/backend/optimizer/path/appendorpath.c
new file mode 100644
index 00000000000..8bdfc1c4549
--- /dev/null
+++ b/src/backend/optimizer/path/appendorpath.c
@@ -0,0 +1,1069 @@
+/*
+ * support Append plan for ORed clauses
+ * Teodor Sigaev <teodor@sigaev.ru>
+ */
+#include "postgres.h"
+
+#include "access/skey.h"
+#include "catalog/pg_am.h"
+#include "optimizer/cost.h"
+#include "optimizer/clauses.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/paths.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/planmain.h"
+#include "optimizer/restrictinfo.h"
+#include "utils/lsyscache.h"
+
+typedef struct CKey {
+ RestrictInfo *rinfo; /* original rinfo */
+ int n; /* IndexPath's number in bitmapquals */
+ OpExpr *normalizedexpr; /* expression with Var on left */
+ Var *var;
+ Node *value;
+ Oid opfamily;
+ int strategy;
+ uint8 strategyMask;
+} CKey;
+#define BTMASK(x) ( 1<<(x) )
+
+static List* find_common_quals( BitmapOrPath *path );
+static RestrictInfo* unionOperation(PlannerInfo *root, CKey *key);
+static BitmapOrPath* cleanup_nested_quals( PlannerInfo *root, RelOptInfo *rel, BitmapOrPath *path );
+static List* sortIndexScans( List* ipaths );
+static List* reverseScanDirIdxPaths(List *indexPaths);
+static IndexPath* reverseScanDirIdxPath(IndexPath *ipath);
+static bool checkSameIndex(Path *path, Oid *indexoid);
+
+#define IS_LESS(a) ( (a) == BTLessStrategyNumber || (a)== BTLessEqualStrategyNumber )
+#define IS_GREATER(a) ( (a) == BTGreaterStrategyNumber || (a) == BTGreaterEqualStrategyNumber )
+#define IS_ONE_DIRECTION(a,b) ( \
+ ( IS_LESS(a) && IS_LESS(b) ) \
+ || \
+ ( IS_GREATER(a) && IS_GREATER(b) ) \
+)
+
+typedef struct ExExpr {
+ OpExpr *expr;
+ Oid opfamily;
+ Oid lefttype;
+ Oid righttype;
+ int strategy;
+ int attno;
+} ExExpr;
+
+
+typedef struct IndexPathEx {
+ IndexPath *path;
+ List *preparedquals; /* list of ExExpr */
+} IndexPathEx;
+
+static List*
+clauses_get_exprs(List *listIndexClause) {
+ ListCell *i, *c;
+ List *exprs=NULL;
+
+ foreach(i, listIndexClause)
+ {
+ IndexClause *ic = lfirst(i);
+
+ foreach(c, ic->indexquals)
+ {
+ RestrictInfo *rinfo = lfirst(c);
+ OpExpr *expr = (OpExpr*)rinfo->clause;
+
+ exprs = lappend(exprs, expr);
+ }
+ }
+
+ return exprs;
+}
+
+
+/*----------
+ * keybased_rewrite_or_index_quals
+ * Examine join OR-of-AND quals to see if any useful common restriction
+ * clauses can be extracted. If so, try to use for creating new index paths.
+ *
+ * For example consider
+ * WHERE ( a.x=5 and a.y>10 ) OR a.x>5
+ * and there is an index on a.x or (a.x, a.y). So, plan
+ * will be seqscan or BitmapOr(IndexPath,IndexPath)
+ * So, we can add some restriction:
+ * WHERE (( a.x=5 and a.y>10 ) OR a.x>5) AND a.x>=5
+ * and plan may be so
+ * Index Scan (a.x>=5)
+ * Filter( (( a.x=5 and a.y>10 ) OR a.x>5) )
+ *
+ * We don't want to add new clauses to baserestrictinfo, just
+ * use it as index quals.
+ *
+ * Next thing which it possible to test is use append of
+ * searches instead of OR.
+ * For example consider
+ * WHERE ( a.x=5 and a.y>10 ) OR a.x>6
+ * and there is an index on (a.x) (a.x, a.y)
+ * So, we can suggest follow plan:
+ * Append
+ * Filter ( a.x=5 and a.y>10 ) OR (a.x>6)
+ * Index Scan (a.x=5) --in case of index on (a.x)
+ * Index Scan (a.x>6)
+ * For that we should proof that index quals isn't overlapped,
+ * also, some index quals may be containedi in other, so it can be eliminated
+ */
+
+void
+keybased_rewrite_index_paths(PlannerInfo *root, RelOptInfo *rel)
+{
+ BitmapOrPath *bestpath = NULL;
+ ListCell *i;
+ List *commonquals;
+ AppendPath *appendidxpath;
+ List *indexPaths;
+ IndexOptInfo *index;
+
+ foreach(i, rel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
+
+ if (restriction_is_or_clause(rinfo))
+ {
+ /*
+ * Use the generate_bitmap_or_paths() machinery to estimate the
+ * value of each OR clause. We can use regular restriction
+ * clauses along with the OR clause contents to generate
+ * indexquals. We pass outer_rel = NULL so that sub-clauses
+ * that are actually joins will be ignored.
+ */
+ List *orpaths;
+ ListCell *k;
+
+ orpaths = generate_bitmap_or_paths(root, rel,
+ list_make1(rinfo),
+ rel->baserestrictinfo);
+
+ /* Locate the cheapest OR path */
+ foreach(k, orpaths)
+ {
+ BitmapOrPath *path = (BitmapOrPath *) lfirst(k);
+ Oid indexoid = InvalidOid;
+
+ Assert(IsA(path, BitmapOrPath));
+
+ if (checkSameIndex((Path*)path, &indexoid) == false)
+ continue;
+
+ if (bestpath == NULL ||
+ path->path.total_cost < bestpath->path.total_cost)
+ {
+ bestpath = path;
+ }
+ }
+ }
+ }
+
+ /* Fail if no suitable clauses found */
+ if (bestpath == NULL)
+ return;
+
+ commonquals = find_common_quals(bestpath);
+ /* Found quals with the same args, but with, may be, different
+ operations */
+ if ( commonquals != NULL ) {
+ List *origBaseRestrictInfo=NIL;
+
+ foreach(i, commonquals) {
+ CKey *key = (CKey*)lfirst(i);
+ RestrictInfo *rinfo;
+
+ /*
+ * get 'union' of operation for key
+ */
+ rinfo = unionOperation(root, key);
+ if ( rinfo )
+ {
+ // ListCell *i2;
+ // bool clauseIsDuplicate = false;
+
+ // foreach(i2, rel->baserestrictinfo)
+ // {
+ // RestrictInfo *existingRInfo = (RestrictInfo*)lfirst(i2);
+
+ // clauseIsDuplicate=false;
+ // }
+
+ // if (!clauseIsDuplicate)
+ // {
+ if (!origBaseRestrictInfo)
+ {
+ origBaseRestrictInfo = list_copy(rel->baserestrictinfo);
+ // origBaseRestrictInfo = rel->baserestrictinfo;
+ // rel->baserestrictinfo = list_copy(rel->baserestrictinfo);
+ }
+
+ rel->baserestrictinfo = lappend(rel->baserestrictinfo, rinfo);
+ // }
+
+ }
+ }
+
+ /*
+ * Ok, we found common quals and union it, so we will try to
+ * create new possible index paths
+ */
+ if (origBaseRestrictInfo)
+ {
+ create_index_paths(root, rel);
+ // list_free(rel->baserestrictinfo);
+ rel->baserestrictinfo = origBaseRestrictInfo;
+ }
+ }
+
+ /*
+ * Check if indexquals isn't overlapped and all index scan
+ * are on the same index.
+ */
+ if ( (bestpath = cleanup_nested_quals( root, rel, bestpath )) == NULL )
+ return;
+
+ if (IsA(bestpath, IndexPath)) {
+ IndexPath *ipath = (IndexPath*)bestpath;
+
+ /*
+ * It's possible to do only one index scan :)
+ */
+ index = ipath->indexinfo;
+
+ if ( root->query_pathkeys != NIL && index->sortopfamily && OidIsValid(index->sortopfamily[0]) )
+ {
+ List *pathkeys;
+
+ pathkeys = build_index_pathkeys(root, index,
+ ForwardScanDirection);
+ pathkeys = truncate_useless_pathkeys(root, rel,
+ pathkeys);
+
+ ipath->path.pathkeys = pathkeys;
+ add_path(rel, (Path *) ipath);
+
+ /*
+ * add path ordered in backward direction if our pathkeys
+ * is still unusable...
+ */
+ if ( pathkeys == NULL || pathkeys_useful_for_ordering(root, pathkeys) == 0 )
+ {
+ pathkeys = build_index_pathkeys(root, index,
+ BackwardScanDirection);
+ pathkeys = truncate_useless_pathkeys(root, rel,
+ pathkeys);
+
+ ipath = reverseScanDirIdxPath( ipath );
+
+ ipath->path.pathkeys = pathkeys;
+ add_path(rel, (Path *) ipath);
+ }
+ } else
+ add_path(rel, (Path *) ipath);
+ return;
+ }
+
+ /* recount costs */
+ foreach(i, bestpath->bitmapquals ) {
+ IndexPath *ipath = (IndexPath*)lfirst(i);
+
+ Assert( IsA(ipath, IndexPath) );
+ ipath->path.rows = rel->tuples * clauselist_selectivity(root,
+ clauses_get_exprs(ipath->indexclauses),
+ rel->relid,
+ JOIN_INNER,
+ NULL);
+ ipath->path.rows = clamp_row_est(ipath->path.rows);
+ cost_index(ipath, root, 1, false);
+ }
+
+ /*
+ * Check if append index can suggest ordering of result
+ *
+ * Also, we should say to AppendPath about targetlist:
+ * target list will be taked from indexscan
+ */
+ index = ((IndexPath*)linitial(bestpath->bitmapquals))->indexinfo;
+ if ( root->query_pathkeys != NIL && index->sortopfamily && OidIsValid(index->sortopfamily[0]) &&
+ (indexPaths = sortIndexScans( bestpath->bitmapquals )) !=NULL ) {
+ List *pathkeys;
+
+ pathkeys = build_index_pathkeys(root, index,
+ ForwardScanDirection);
+ pathkeys = truncate_useless_pathkeys(root, rel,
+ pathkeys);
+
+ appendidxpath = create_append_path_ext(root, rel, indexPaths, NIL, pathkeys, NULL, 0,
+ false, -1.0, true);
+ add_path(rel, (Path *) appendidxpath);
+
+ /*
+ * add path ordered in backward direction if our pathkeys
+ * is still unusable...
+ */
+ if ( pathkeys == NULL || pathkeys_useful_for_ordering(root, pathkeys) == 0 ) {
+
+ pathkeys = build_index_pathkeys(root, index,
+ BackwardScanDirection);
+ pathkeys = truncate_useless_pathkeys(root, rel,
+ pathkeys);
+
+ indexPaths = reverseScanDirIdxPaths(indexPaths);
+ appendidxpath = create_append_path_ext(root, rel, indexPaths, NIL,
+ pathkeys, NULL,
+ 0, false, -1.0,
+ true);
+ add_path(rel, (Path *) appendidxpath);
+ }
+ } else {
+ appendidxpath = create_append_path_ext(root, rel, bestpath->bitmapquals,
+ NIL, NIL, NULL,
+ 0, false, -1.0, true);
+ add_path(rel, (Path *) appendidxpath);
+ }
+}
+
+/*
+ * returns true if all indexscan below uses the same index
+ */
+static bool
+checkSameIndex(Path *path, Oid *indexoid) {
+ ListCell *i;
+ List *subpaths;
+
+ if (IsA(path, IndexPath))
+ {
+ IndexPath *indpath = (IndexPath*)path;
+
+ if (indpath->indexinfo->relam != BTREE_AM_OID)
+ return false;
+
+ if (*indexoid == InvalidOid)
+ *indexoid = indpath->indexinfo->indexoid;
+ else if (*indexoid != indpath->indexinfo->indexoid)
+ return false;
+
+ return true;
+ }
+ else if (IsA(path, BitmapOrPath))
+ {
+ BitmapOrPath *orpath = (BitmapOrPath*)path;
+
+ subpaths = orpath->bitmapquals;
+
+ }
+ else if (IsA(path, BitmapAndPath))
+ {
+ BitmapAndPath *andpath = (BitmapAndPath*)path;
+
+ subpaths = andpath->bitmapquals;
+ }
+ else
+ {
+ elog(ERROR, "unexpected path type: %d", nodeTag(path));
+ }
+
+ Assert(list_length(subpaths) > 0);
+
+ foreach(i, subpaths)
+ {
+ Path *subpath = (Path *) lfirst(i);
+
+ if (checkSameIndex(subpath, indexoid) == false)
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * transformToCkey - transform RestrictionInfo
+ * to CKey struct. Fucntion checks possibility and correctness of
+ * RestrictionInfo to use it as common key, normalizes
+ * expression and "caches" some information. Note,
+ * original RestrictInfo isn't touched
+ */
+
+static CKey*
+transformToCkey( IndexOptInfo *index, RestrictInfo* rinfo, int indexcol) {
+ CKey *key;
+ OpExpr *expr = (OpExpr*)rinfo->clause;
+
+ if ( !IsA(expr, OpExpr) )
+ return NULL;
+
+ if ( contain_mutable_functions((Node*)expr) )
+ return NULL;
+
+ if ( list_length( expr->args ) != 2 )
+ return NULL;
+
+ key = (CKey*)palloc(sizeof(CKey));
+ key->rinfo = rinfo;
+
+ key->normalizedexpr = (OpExpr*)copyObject( expr );
+ if (!bms_equal(rinfo->left_relids, index->rel->relids))
+ CommuteOpExpr(key->normalizedexpr);
+
+ /*
+ * fix_indexqual_operand returns copy of object
+ */
+ key->var = (Var*)fix_indexqual_operand(linitial(key->normalizedexpr->args), index, indexcol);
+ Assert( IsA(key->var, Var) );
+
+ key->opfamily = index->opfamily[ key->var->varattno - 1 ];
+
+ /* restore varattno, because it may be different in different index */
+ key->var->varattno = key->var->varattnosyn;
+
+ key->value = (Node*)lsecond(key->normalizedexpr->args);
+
+ key->strategy = get_op_opfamily_strategy( key->normalizedexpr->opno, key->opfamily);
+ Assert( key->strategy != InvalidStrategy );
+
+ key->strategyMask = BTMASK(key->strategy);
+
+ return key;
+}
+
+/*
+ * get_index_quals - get list of quals in
+ * CKeys form
+ */
+
+static List*
+get_index_quals(IndexPath *path, int cnt) {
+ ListCell *i, *c;
+ List *quals = NIL;
+
+ foreach(i, path->indexclauses) {
+ IndexClause *ic = lfirst(i);
+
+ foreach(c, ic->indexquals) {
+ CKey *k = transformToCkey( path->indexinfo,
+ (RestrictInfo*)lfirst(c),
+ ic->indexcol);
+ if ( k ) {
+ k->n = cnt;
+ quals = lappend(quals, k);
+ }
+ }
+ }
+ return quals;
+}
+
+/*
+ * extract all quals from bitmapquals->indexquals for
+ */
+static List*
+find_all_quals( BitmapOrPath *path, int *counter ) {
+ ListCell *i,*j;
+ List *allquals = NIL;
+
+ *counter = 0;
+
+ foreach(i, path->bitmapquals )
+ {
+ Path *subpath = (Path *) lfirst(i);
+
+ if ( IsA(subpath, BitmapAndPath) ) {
+ foreach(j, ((BitmapAndPath*)subpath)->bitmapquals) {
+ Path *subsubpath = (Path *) lfirst(j);
+
+ if ( IsA(subsubpath, IndexPath) ) {
+ if ( ((IndexPath*)subsubpath)->indexinfo->relam != BTREE_AM_OID )
+ return NIL;
+ allquals = list_concat(allquals, get_index_quals( (IndexPath*)subsubpath, *counter ));
+ } else
+ return NIL;
+ }
+ } else if ( IsA(subpath, IndexPath) ) {
+ if ( ((IndexPath*)subpath)->indexinfo->relam != BTREE_AM_OID )
+ return NIL;
+ allquals = list_concat(allquals, get_index_quals( (IndexPath*)subpath, *counter ));
+ } else
+ return NIL;
+
+ (*counter)++;
+ }
+
+ return allquals;
+}
+
+/*
+ * Compares aruments of operation
+ */
+static bool
+iseqCKeyArgs( CKey *a, CKey *b ) {
+ if ( a->opfamily != b->opfamily )
+ return false;
+
+ if ( !equal( a->value, b->value ) )
+ return false;
+
+ if ( !equal( a->var, b->var ) )
+ return false;
+
+ return true;
+}
+
+/*
+ * Count entries of CKey with the same arguments
+ */
+static int
+count_entry( List *allquals, CKey *tocmp ) {
+ ListCell *i;
+ int curcnt=0;
+
+ foreach(i, allquals) {
+ CKey *key = lfirst(i);
+
+ if ( key->n == curcnt ) {
+ continue;
+ } else if ( key->n == curcnt+1 ) {
+ if ( iseqCKeyArgs( key, tocmp ) ) {
+ tocmp->strategyMask |= key->strategyMask;
+ curcnt++;
+ }
+ } else
+ return -1;
+ }
+
+ return curcnt+1;
+}
+
+/*
+ * Finds all CKey with the same arguments
+ */
+static List*
+find_common_quals( BitmapOrPath *path ) {
+ List *allquals;
+ List *commonquals = NIL;
+ ListCell *i;
+ int counter;
+
+ if ( (allquals = find_all_quals( path, &counter ))==NIL )
+ return NIL;
+
+ foreach(i, allquals) {
+ CKey *key = lfirst(i);
+
+ if ( key->n != 0 )
+ break;
+
+ if ( counter == count_entry(allquals, key) )
+ commonquals = lappend( commonquals, key );
+ }
+
+ return commonquals;
+}
+
+/*
+ * unionOperation - make RestrictInfo with combined operation
+ */
+
+static RestrictInfo*
+unionOperation(PlannerInfo *root, CKey *key) {
+ RestrictInfo *rinfo;
+ Oid lefttype, righttype;
+ int strategy;
+
+ switch( key->strategyMask ) {
+ case BTMASK(BTLessStrategyNumber):
+ case BTMASK(BTLessEqualStrategyNumber):
+ case BTMASK(BTEqualStrategyNumber):
+ case BTMASK(BTGreaterEqualStrategyNumber):
+ case BTMASK(BTGreaterStrategyNumber):
+ /* trivial case */
+ break;
+ case BTMASK(BTLessStrategyNumber) | BTMASK(BTLessEqualStrategyNumber):
+ case BTMASK(BTLessStrategyNumber) | BTMASK(BTLessEqualStrategyNumber) | BTMASK(BTEqualStrategyNumber):
+ case BTMASK(BTLessStrategyNumber) | BTMASK(BTEqualStrategyNumber):
+ case BTMASK(BTLessEqualStrategyNumber) | BTMASK(BTEqualStrategyNumber):
+ /* any subset of <, <=, = can be unioned with <= */
+ key->strategy = BTLessEqualStrategyNumber;
+ break;
+ case BTMASK(BTGreaterEqualStrategyNumber) | BTMASK(BTGreaterStrategyNumber):
+ case BTMASK(BTEqualStrategyNumber) | BTMASK(BTGreaterEqualStrategyNumber) | BTMASK(BTGreaterStrategyNumber):
+ case BTMASK(BTEqualStrategyNumber) | BTMASK(BTGreaterStrategyNumber):
+ case BTMASK(BTEqualStrategyNumber) | BTMASK(BTGreaterEqualStrategyNumber):
+ /* any subset of >, >=, = can be unioned with >= */
+ key->strategy = BTGreaterEqualStrategyNumber;
+ break;
+ default:
+ /*
+ * Can't make common restrict qual
+ */
+ return NULL;
+ }
+
+ get_op_opfamily_properties(key->normalizedexpr->opno, key->opfamily, false,
+ &strategy, &lefttype, &righttype);
+
+ if ( strategy != key->strategy ) {
+ /*
+ * We should check because it's possible to have "strange"
+ * opfamilies - without some strategies...
+ */
+ key->normalizedexpr->opno = get_opfamily_member(key->opfamily, lefttype, righttype, key->strategy);
+
+ if ( key->normalizedexpr->opno == InvalidOid )
+ return NULL;
+
+ key->normalizedexpr->opfuncid = get_opcode( key->normalizedexpr->opno );
+ Assert ( key->normalizedexpr->opfuncid != InvalidOid );
+ }
+
+ rinfo = make_simple_restrictinfo(root, (Expr*)key->normalizedexpr);
+
+ return rinfo;
+}
+
+/*
+ * Remove unneeded RestrioctionInfo nodes as it
+ * needed by predicate_*_by()
+ */
+static void
+make_predicate(List *indexclauses, List **preds) {
+ ListCell *i, *c;
+
+ *preds = NIL;
+
+ foreach(i, indexclauses)
+ {
+ IndexClause *ic = lfirst(i);
+
+ foreach(c, ic->indexquals)
+ {
+ RestrictInfo *rinfoq = lfirst(c);
+ OpExpr *expr = (OpExpr*)rinfoq->clause;
+
+ if ( !IsA(expr, OpExpr) )
+ goto end;
+
+ if ( list_length( expr->args ) != 2 )
+ goto end;
+ }
+
+ *preds = lappend(*preds, ic);
+
+end:
+ continue;
+ }
+}
+
+#define CELL_GET_CLAUSES(x) ( ((IndexPath*)lfirst(x))->indexclauses )
+
+/*
+ * returns list of all nested quals
+ */
+static List*
+contained_quals(List *nested, List* quals, ListCell *check) {
+ ListCell *i;
+ List *checkpred;
+
+ if ( list_member_ptr( nested, lfirst(check) ) )
+ return nested;
+
+ checkpred = clauses_get_exprs(CELL_GET_CLAUSES(check));
+
+ if ( contain_mutable_functions((Node*)checkpred) )
+ return nested;
+
+ foreach(i, quals )
+ {
+ if ( check == i )
+ continue;
+
+ if ( list_member_ptr( nested, lfirst(i) ) )
+ continue;
+
+ if (predicate_implied_by( checkpred,
+ clauses_get_exprs(CELL_GET_CLAUSES(i)),
+ false ) )
+ nested = lappend( nested, lfirst(i) );
+ }
+ return nested;
+}
+
+/*
+ * Checks that one row can be in several quals.
+ * It's guaranteed by predicate_refuted_by()
+ */
+static bool
+is_intersect(List *quals, ListCell *check) {
+ ListCell *i;
+ List *checkpred=NULL;
+
+ checkpred=clauses_get_exprs(CELL_GET_CLAUSES(check));
+ Assert( checkpred != NULL );
+
+ for_each_cell(i, quals, check) {
+ if ( i==check )
+ continue;
+
+ if ( predicate_refuted_by( checkpred,
+ clauses_get_exprs(CELL_GET_CLAUSES(i)),
+ false ) == false )
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Removes nested quals and gurantees that quals are not intersected,
+ * ie one row can't satisfy to several quals. It's open a possibility of
+ * Append node using instead of BitmapOr
+ */
+static BitmapOrPath*
+cleanup_nested_quals( PlannerInfo *root, RelOptInfo *rel, BitmapOrPath *path ) {
+ ListCell *i;
+ IndexOptInfo *index=NULL;
+ List *nested = NULL;
+
+ /*
+ * check all path to use only one index
+ */
+ foreach(i, path->bitmapquals )
+ {
+
+ if ( IsA(lfirst(i), IndexPath) ) {
+ List *preds;
+ IndexPath *subpath = (IndexPath *) lfirst(i);
+
+ if ( subpath->indexinfo->relam != BTREE_AM_OID )
+ return NULL;
+
+ if ( index == NULL )
+ index = subpath->indexinfo;
+ else if ( index->indexoid != subpath->indexinfo->indexoid )
+ return NULL;
+
+ /*
+ * work only with optimizable quals
+ */
+ make_predicate(subpath->indexclauses, &preds);
+ if (preds == NIL)
+ return NULL;
+ subpath->indexclauses = preds;
+ } else
+ return NULL;
+ }
+
+ /*
+ * eliminate nested quals
+ */
+ foreach(i, path->bitmapquals ) {
+ nested = contained_quals(nested, path->bitmapquals, i);
+ }
+
+ if ( nested != NIL ) {
+ path->bitmapquals = list_difference_ptr( path->bitmapquals, nested );
+
+ Assert( list_length( path->bitmapquals )>0 );
+
+ /*
+ * All quals becomes only one after eliminating nested quals
+ */
+ if (list_length( path->bitmapquals ) == 1)
+ return (BitmapOrPath*)linitial(path->bitmapquals);
+ }
+
+ /*
+ * Checks for intersection
+ */
+ foreach(i, path->bitmapquals ) {
+ if ( is_intersect( path->bitmapquals, i ) )
+ return NULL;
+ }
+
+ return path;
+}
+
+/*
+ * Checks if whole result of one simple operation is contained
+ * in another
+ */
+static int
+simpleCmpExpr( ExExpr *a, ExExpr *b ) {
+ if ( predicate_implied_by((List*)a->expr, (List*)b->expr, false) )
+ /*
+ * a:( Var < 15 ) > b:( Var <= 10 )
+ */
+ return 1;
+ else if ( predicate_implied_by((List*)b->expr, (List*)a->expr, false) )
+ /*
+ * a:( Var <= 10 ) < b:( Var < 15 )
+ */
+ return -1;
+ else
+ return 0;
+}
+
+/*
+ * Trys to define where is equation - on left or right side
+ * a(< 10) b(=11) - on right
+ * a(> 10) b(=9) - on left
+ * a(= 10) b(=11) - on right
+ * a(= 10) b(=9) - on left
+ * Any other - result is 0;
+ */
+static int
+cmpEqExpr( ExExpr *a, ExExpr *b ) {
+ Oid oldop = b->expr->opno;
+ int res=0;
+
+ b->expr->opno = get_opfamily_member(b->opfamily, b->lefttype, b->righttype, BTLessStrategyNumber);
+ if ( b->expr->opno != InvalidOid ) {
+ b->expr->opfuncid = get_opcode( b->expr->opno );
+ res = simpleCmpExpr(a,b);
+ }
+
+ if ( res == 0 ) {
+ b->expr->opno = get_opfamily_member(b->opfamily, b->lefttype, b->righttype, BTGreaterStrategyNumber);
+ if ( b->expr->opno != InvalidOid ) {
+ b->expr->opfuncid = get_opcode( b->expr->opno );
+ res = -simpleCmpExpr(a,b);
+ }
+ }
+
+ b->expr->opno = oldop;
+ b->expr->opfuncid = get_opcode( b->expr->opno );
+
+ return res;
+}
+
+/*
+ * Is result of a contained in result of b or on the contrary?
+ */
+static int
+cmpNegCmp( ExExpr *a, ExExpr *b ) {
+ Oid oldop = b->expr->opno;
+ int res = 0;
+
+ b->expr->opno = get_negator( b->expr->opno );
+ if ( b->expr->opno != InvalidOid ) {
+ b->expr->opfuncid = get_opcode( b->expr->opno );
+ res = simpleCmpExpr(a,b);
+ }
+
+ b->expr->opno = oldop;
+ b->expr->opfuncid = get_opcode( b->expr->opno );
+
+ return ( IS_LESS(a->strategy) ) ? res : -res;
+}
+
+/*
+ * Returns 1 if whole result of a is on left comparing with result of b
+ * Returns -1 if whole result of a is on right comparing with result of b
+ * Return 0 if it's impossible to define or results is overlapped
+ * Expressions should use the same attribute of index and should be
+ * a simple: just one operation with index.
+ */
+static int
+cmpExpr( ExExpr *a, ExExpr *b ) {
+ int res;
+
+ /*
+ * If a and b are overlapped, we can't decide which one is
+ * lefter or righter
+ */
+ if ( IS_ONE_DIRECTION(a->strategy, b->strategy) ||
+ predicate_refuted_by((List*)a->expr, (List*)b->expr, false) == false )
+ return 0;
+
+ /*
+ * In this place it's impossible to have a row which satisfies
+ * a and b expressions, so we will try to find relatiove position of that results
+ */
+ if (a->strategy == BTEqualStrategyNumber &&
+ b->strategy == BTEqualStrategyNumber) {
+ return cmpEqExpr(a, b);
+ } else if ( b->strategy == BTEqualStrategyNumber ) {
+ return -cmpEqExpr(a, b); /* Covers cases with any operations in a */
+ } else if ( a->strategy == BTEqualStrategyNumber ) {
+ return cmpEqExpr(b, a);
+ } else if ( (res = cmpNegCmp(a, b)) == 0 ) { /* so, a(<10) b(>20) */
+ res = -cmpNegCmp(b, a);
+ }
+
+ return res;
+}
+
+static IndexOptInfo *sortingIndex = NULL;
+static bool volatile unableToDefine = false;
+
+/*
+ * Try to define positions of result which satisfy indexquals a and b per
+ * one index's attribute.
+ */
+static int
+cmpColumnQuals( List *a, List *b, int attno ) {
+ int res = 0;
+ ListCell *ai, *bi;
+
+ foreach(ai, a) {
+ ExExpr *ae = (ExExpr*)lfirst(ai);
+
+ if ( attno != ae->attno )
+ continue;
+
+ foreach(bi, b) {
+ ExExpr *be = (ExExpr*)lfirst(bi);
+
+ if ( attno != be->attno )
+ continue;
+
+ if ((res=cmpExpr(ae, be))!=0)
+ return res;
+
+ if (res == 0 && ae->strategy == be->strategy &&
+ be->strategy != BTEqualStrategyNumber &&
+ equal(ae->expr, be->expr))
+ {
+ /*
+ * It's impossible to get defined order for non-eq the same clauses
+ */
+ unableToDefine = true;
+ PG_RE_THROW(); /* it should be PG_THROW(), but it's the same */
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Compare result of two indexquals.
+ * Warinig: it use PG_RE_THROW(), so any call should be wrapped with
+ * PG_TRY(). Try/catch construction is used here for minimize unneeded
+ * actions when sorting is impossible
+ */
+static int
+cmpIndexPathEx(const void *a, const void *b) {
+ IndexPathEx *aipe = (IndexPathEx*)a;
+ IndexPathEx *bipe = (IndexPathEx*)b;
+ int attno, res = 0;
+
+ for(attno=1; res==0 && attno<=sortingIndex->ncolumns; attno++)
+ res=cmpColumnQuals(aipe->preparedquals, bipe->preparedquals, attno);
+
+ if ( res==0 ) {
+ unableToDefine = true;
+ PG_RE_THROW(); /* it should be PG_THROW(), but it's the same */
+ }
+
+ return res;
+}
+
+/*
+ * Initialize lists of operation in useful form
+ */
+static List*
+prepareQuals(IndexOptInfo *index, List *indexclauses) {
+ ListCell *i, *c;
+ List *res=NULL;
+ ExExpr *ex;
+
+ foreach(i, indexclauses)
+ {
+ IndexClause *ic = lfirst(i);
+
+ foreach(c, ic->indexquals)
+ {
+ RestrictInfo *rinfo = lfirst(c);
+ OpExpr *expr = (OpExpr*)rinfo->clause;
+
+ if ( !IsA(expr, OpExpr) )
+ return NULL;
+
+ if ( list_length( expr->args ) != 2 )
+ return NULL;
+
+ if ( contain_mutable_functions((Node*)expr) )
+ return NULL;
+
+ ex = (ExExpr*)palloc(sizeof(ExExpr));
+ ex->expr = (OpExpr*)copyObject( expr );
+ if (!bms_equal(rinfo->left_relids, index->rel->relids))
+ CommuteOpExpr(ex->expr);
+ linitial(ex->expr->args) = fix_indexqual_operand(linitial(ex->expr->args), index, ic->indexcol);
+ ex->attno = ((Var*)linitial(ex->expr->args))->varattno;
+ ex->opfamily = index->opfamily[ ex->attno - 1 ];
+ get_op_opfamily_properties( ex->expr->opno, ex->opfamily, false,
+ &ex->strategy, &ex->lefttype, &ex->righttype);
+
+ res = lappend(res, ex);
+ }
+ }
+
+ return res;
+}
+
+/*
+ * sortIndexScans - sorts index scans to get sorted results.
+ * Function supposed that index is the same for all
+ * index scans
+ */
+static List*
+sortIndexScans( List* ipaths ) {
+ ListCell *i;
+ int j=0;
+ IndexPathEx *ipe = (IndexPathEx*)palloc( sizeof(IndexPathEx)*list_length(ipaths) );
+ List *orderedPaths = NIL;
+ IndexOptInfo *index = ((IndexPath*)linitial(ipaths))->indexinfo;
+
+ foreach(i, ipaths) {
+ ipe[j].path = (IndexPath*)lfirst(i);
+ ipe[j].preparedquals = prepareQuals(index, ipe[j].path->indexclauses);
+
+ if (ipe[j].preparedquals == NULL)
+ return NULL;
+ j++;
+ }
+
+ sortingIndex = index;
+ unableToDefine = false;
+ PG_TRY(); {
+ qsort(ipe, list_length(ipaths), sizeof(IndexPathEx), cmpIndexPathEx);
+ } PG_CATCH(); {
+ if ( unableToDefine == false )
+ PG_RE_THROW(); /* not our problem */
+ } PG_END_TRY();
+
+ if ( unableToDefine == true )
+ return NULL;
+
+ for(j=0;j<list_length(ipaths);j++)
+ orderedPaths = lappend(orderedPaths, ipe[j].path);
+
+ return orderedPaths;
+}
+
+static IndexPath*
+reverseScanDirIdxPath(IndexPath *ipath) {
+ IndexPath *n = makeNode(IndexPath);
+
+ *n = *ipath;
+
+ n->indexscandir = BackwardScanDirection;
+
+ return n;
+}
+
+static List*
+reverseScanDirIdxPaths(List *indexPaths) {
+ List *idxpath = NIL;
+ ListCell *i;
+
+ foreach(i, indexPaths) {
+ idxpath = lcons(reverseScanDirIdxPath( (IndexPath*)lfirst(i) ), idxpath);
+ }
+
+ return idxpath;
+}
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 5d51f97f219..560553eecd5 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -12,8 +12,16 @@
*
*-------------------------------------------------------------------------
*/
+#include <math.h>
#include "postgres.h"
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "catalog/pg_collation.h"
+#include "common/pg_prng.h"
+#include "commands/vacuum.h"
+#include "funcapi.h"
+#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/optimizer.h"
@@ -24,6 +32,17 @@
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
+#include "parser/parsetree.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+#define EXHAUSTIVE_IN_SELECTIVITY_THRESHOLD (default_statistics_target/4)
+#define RANGE_IN_SELECTIVITY_THRESHOLD (default_statistics_target/20)
+#define MULTICOLUMN_STATISTIC_FALLBACK_DISTINCT_THRESHOLD (default_statistics_target)
+
/*
* Data structure for accumulating info about possible range-query
* clause pairs in clauselist_selectivity.
@@ -49,6 +68,1095 @@ static Selectivity clauselist_selectivity_or(PlannerInfo *root,
SpecialJoinInfo *sjinfo,
bool use_extended_stats);
+static bool treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
+ int varRelid, SpecialJoinInfo *sjinfo);
+
+typedef enum CorrelationKind {
+ CKRestrict = 0,
+ CKIndepend, /* unknown correlation */
+ CKLikelySelf, /* Seems, should be close to be correlated, like agg with
+ self join */
+ CKSelf, /* 100% correlation because of self join */
+ CKMul /* product of all CKLikelySelf * CKSelf */
+} CorrelationKind;
+static CorrelationKind get_correlation_kind(PlannerInfo *root, int varRelid,
+ OpExpr* expr);
+
+/*
+ * Get variabe node. Returns null if node is not a Var node.
+ */
+static inline Var*
+get_var(Node* node)
+{
+ if (IsA(node, RelabelType))
+ node = (Node *) ((RelabelType *) node)->arg;
+
+ return IsA(node, Var) ? (Var*)node : NULL;
+}
+
+/*
+ * Locate compound index which can be used for multicolumn clauses/join.
+ */
+static IndexOptInfo*
+locate_inner_multicolumn_index(PlannerInfo *root, Index varno, List* vars,
+ int n_clauses,
+ int **permutation, List **missed_vars, int* n_keys)
+{
+ ListCell *ilist;
+ RelOptInfo *rel = find_base_rel(root, varno);
+ IndexOptInfo *index_opt = NULL;
+ List *missed_vars_opt = NIL;
+ int *permutation_opt = NULL;
+ int n_index_cols_opt = 0;
+ bool used[INDEX_MAX_KEYS];
+ int posvars[INDEX_MAX_KEYS];
+
+ *n_keys = 0;
+ *missed_vars = NIL;
+
+ Assert(list_length(vars) >= 1);
+ Assert(list_length(vars) <= n_clauses);
+
+ foreach(ilist, rel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
+ ListCell *vlist;
+ int i, n_index_cols = 0;
+ List *missed = NIL;
+ int *perm = NULL;
+ int last_idx = 0;
+
+ memset(used, 0, sizeof(used));
+ perm = palloc(n_clauses * sizeof(*perm));
+ for(i=0; i<n_clauses; i++)
+ perm[i] = -1;
+
+ i = 0;
+ foreach (vlist, vars)
+ {
+ Var* var = lfirst(vlist);
+ int pos;
+
+ for (pos = 0; pos < index->nkeycolumns; pos++)
+ {
+ if (index->indexkeys[pos] == var->varattno)
+ {
+ if (used[pos])
+ missed = lappend(missed, var);
+ else
+ {
+ used[pos] = true;
+ posvars[pos] = i;
+ perm[i] = pos;
+ n_index_cols++;
+ last_idx = Max(last_idx, pos);
+ break;
+ }
+ }
+ }
+
+ /* var isn't found in index columns */
+ if (pos == index->nkeycolumns && !list_member_ptr(missed, var))
+ missed = lappend(missed, var);
+
+ i += 1;
+ }
+
+ if (n_index_cols == 0)
+ continue;
+
+ /* check that found columns are first columns in index */
+ if (index->nkeycolumns != n_index_cols)
+ {
+ int old_n_index_cols = n_index_cols;
+
+ for (i = 0; i <= last_idx; i++)
+ {
+ if (n_index_cols != old_n_index_cols)
+ {
+ /*
+ * We will use only first n_index_cols columns instead of
+ * found old_n_index_cols, so, all other columns should be
+ * added to missed list
+ */
+ if (used[i])
+ {
+ Var *var = list_nth(vars, posvars[i]);
+
+ missed = lappend(missed, var);
+ }
+ }
+ else if (!used[i])
+ {
+ if (i==0)
+ /* there isn't useful prefix */
+ goto TryNextIndex;
+
+ /* we will use only first i columns, save as new n_index_cols */
+ n_index_cols = i;
+ }
+ }
+ }
+
+ /* found exact match vars - index, immediately return */
+ if (vlist == NULL && list_length(missed) == 0 && n_index_cols == index->nkeycolumns)
+ {
+ *permutation = perm;
+ *n_keys = n_index_cols;
+ return index;
+ }
+
+ /* save partially matched index */
+ if (index_opt == NULL ||
+ n_index_cols > n_index_cols_opt ||
+ (n_index_cols == n_index_cols_opt && index->nkeycolumns < index_opt->nkeycolumns))
+ {
+ index_opt = index;
+ missed_vars_opt = missed;
+ if (permutation_opt)
+ pfree(permutation_opt);
+ permutation_opt = perm;
+ perm = NULL;
+ n_index_cols_opt = n_index_cols;
+ }
+TryNextIndex:
+ if (perm)
+ pfree(perm);
+ }
+
+ if (index_opt)
+ {
+ *missed_vars = list_concat_unique(*missed_vars, missed_vars_opt);
+ *permutation = permutation_opt;
+ *n_keys = n_index_cols_opt;
+ }
+
+ return index_opt;
+}
+
+/*
+ * verify that used vars are leading columns
+ */
+static bool
+check_leading_vars_index(IndexOptInfo *index, int n_vars,
+ bool used[INDEX_MAX_KEYS])
+{
+ int i;
+
+ if (index->nkeycolumns == n_vars)
+ return true;
+
+ for(i=0; i<n_vars; i++)
+ if (used[i] == false)
+ return false;
+
+ return true;
+}
+
+
+/*
+ * Locate index which exactly match joins vars
+ */
+static IndexOptInfo*
+locate_outer_multicolumn_index(PlannerInfo *root, Index varno, List* vars,
+ int *permutation)
+{
+ ListCell *ilist;
+ RelOptInfo* rel = find_base_rel(root, varno);
+ int n_vars = list_length(vars);
+ bool used[INDEX_MAX_KEYS];
+ IndexOptInfo *index_opt = NULL;
+
+ Assert(n_vars >= 1);
+
+ foreach(ilist, rel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
+ ListCell *vlist;
+ int i;
+
+ if (index->nkeycolumns < n_vars)
+ continue;
+
+ memset(used, 0, sizeof(used));
+
+ i = 0;
+ foreach (vlist, vars)
+ {
+ Var* var = lfirst(vlist);
+
+ if (permutation[i] < 0 ||
+ index->nkeycolumns <= permutation[i] ||
+ index->indexkeys[permutation[i]] != var->varattno)
+ break;
+
+ used[i] = true;
+ i += 1;
+ }
+
+ if (vlist == NULL && check_leading_vars_index(index, n_vars, used))
+ {
+ if (index->nkeycolumns == n_vars)
+ /* found exact match vars - index, immediately return */
+ return index;
+ else if (index_opt == NULL ||
+ index_opt->nkeycolumns > index->nkeycolumns)
+ /* found better candidate - store it */
+ index_opt = index;
+ }
+ }
+
+ return index_opt;
+}
+
+typedef struct InArrayClause
+{
+ ArrayType* array;
+ Datum* elems;
+ bool* nulls;
+ int index;
+ int n_elems;
+ int curr_elem;
+} InArrayClause;
+
+typedef struct TupleIterator
+{
+ Datum values [INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ int n_variants;
+ int i_variant;
+ int *permutation;
+ List *in_clauses;
+ bool isExhaustive;
+} TupleIterator;
+
+static void
+initTupleIterator(TupleIterator *it, List *consts, int *permutation,
+ List *in_clauses)
+{
+ ListCell *l;
+ int i;
+ double n_variants = 1;
+
+ it->n_variants = 1;
+ it->permutation = permutation;
+ it->in_clauses = in_clauses;
+ it->isExhaustive = false;
+ for(i = 0; i < INDEX_MAX_KEYS; i++)
+ it->isnull[i] = true;
+
+ i = 0;
+ foreach (l, consts)
+ {
+ Const* c = (Const*) lfirst(l);
+ int j = permutation[i++];
+
+ if (j<0)
+ continue;
+ it->values[j] = c->constvalue;
+ it->isnull[j] = c->constisnull;
+ }
+
+ foreach (l, in_clauses)
+ {
+ InArrayClause* iac = (InArrayClause*) lfirst(l);
+ int16 elmlen;
+ bool elmbyval;
+ char elmalign;
+
+ get_typlenbyvalalign(iac->array->elemtype,
+ &elmlen, &elmbyval, &elmalign);
+ deconstruct_array(iac->array, iac->array->elemtype,
+ elmlen, elmbyval, elmalign,
+ &iac->elems, &iac->nulls, &iac->n_elems);
+ iac->curr_elem = 0;
+ n_variants *= (double)iac->n_elems;
+ }
+
+ if (n_variants > EXHAUSTIVE_IN_SELECTIVITY_THRESHOLD)
+ {
+ it->isExhaustive = true;
+ it->n_variants = EXHAUSTIVE_IN_SELECTIVITY_THRESHOLD;
+ }
+ else
+ it->n_variants = n_variants;
+
+ it->i_variant = it->n_variants;
+}
+
+static void
+resetTupleIterator(TupleIterator *it)
+{
+ ListCell *l;
+
+ it->i_variant = it->n_variants;
+
+ foreach (l, it->in_clauses)
+ {
+ InArrayClause* iac = (InArrayClause*) lfirst(l);
+
+ iac->curr_elem = 0;
+ }
+}
+
+static bool
+getTupleIterator(TupleIterator *it)
+{
+ ListCell *l;
+ int carry = 1;
+
+ if (it->i_variant == 0)
+ return false;
+
+ it->i_variant--;
+
+ foreach (l, it->in_clauses)
+ {
+ InArrayClause* iac = (InArrayClause*) lfirst(l);
+ int j = it->permutation[iac->index];
+
+ if (j<0)
+ continue;
+
+ if (it->isExhaustive)
+ {
+ /* use random subset of IN list(s) */
+ iac->curr_elem = pg_prng_uint64(&pg_global_prng_state) % iac->n_elems;
+ }
+ else if ((iac->curr_elem += carry) >= iac->n_elems)
+ {
+ iac->curr_elem = 0;
+ carry = 1;
+ }
+ else
+ carry = 0;
+
+ it->values[j] = iac->elems[iac->curr_elem];
+ it->isnull[j] = iac->nulls[iac->curr_elem];
+ }
+
+ return true;
+}
+
+static double
+get_numdistinct(PlannerInfo *root, IndexOptInfo* index, int n_keys)
+{
+ double numdistinct = 1.0;
+ ListCell *lc;
+ int i = 0;
+
+ foreach(lc, index->indextlist)
+ {
+ TargetEntry *tle = lfirst(lc);
+ VariableStatData vardata;
+ bool isdefault;
+
+ examine_variable(root, (Node*)tle->expr, 0, &vardata);
+
+ numdistinct *= get_variable_numdistinct(&vardata, &isdefault);
+
+ ReleaseVariableStats(vardata);
+
+ if (++i >= n_keys)
+ break;
+ }
+
+ if (numdistinct > index->tuples)
+ numdistinct = index->tuples;
+
+ return numdistinct;
+}
+
+static Selectivity
+estimate_selectivity_by_index(PlannerInfo *root, IndexOptInfo* index,
+ VariableStatData *vardata,
+ List *consts, List** missed_vars, int *permutation,
+ List *in_clauses, int n_keys,
+ bool *usedEqSel,
+ bool *isWeakSelectivity)
+{
+ TupleIterator it;
+ Selectivity sum = 0.0;
+ TypeCacheEntry *typentry;
+ Datum constant;
+ int nBins;
+ double nDistinct = 0.0;
+
+ *isWeakSelectivity = false;
+
+ if (n_keys < index->nkeycolumns )
+ {
+ double nd;
+ bool isdefault;
+
+ nDistinct = get_numdistinct(root, index, n_keys);
+ nd = get_variable_numdistinct(vardata, &isdefault);
+
+ if (isdefault == false && nDistinct > nd)
+ nDistinct = sqrt(nDistinct * nd);
+ }
+
+ /*
+ * Assume that two compound types are coherent, so we can use equality
+ * function from one type to compare it with other type. Use >= and <= range
+ * definition.
+ */
+ typentry = lookup_type_cache(vardata->atttype,
+ TYPECACHE_EQ_OPR | TYPECACHE_TUPDESC);
+ initTupleIterator(&it, consts, permutation, in_clauses);
+
+ /*
+ * Try to simplify calculations: if all variants matches to small amount of
+ * bins histogram the we don't need to check tuples separately, it's enough
+ * to checck min and max tuples and compute selecivity by range of bins
+ */
+
+ if (n_keys != index->nkeycolumns &&
+ it.n_variants > RANGE_IN_SELECTIVITY_THRESHOLD)
+ {
+ Datum constantMax = 0,
+ constantMin = 0;
+ FmgrInfo opprocLT, opprocGT;
+
+ fmgr_info(F_RECORD_GT, &opprocGT);
+ fmgr_info(F_RECORD_LT, &opprocLT);
+
+ /*
+ * Find min and max tuples
+ */
+ while(getTupleIterator(&it))
+ {
+ /* we check cache invalidation message */
+ if (typentry->tupDesc == NULL)
+ typentry = lookup_type_cache(vardata->atttype,
+ TYPECACHE_EQ_OPR | TYPECACHE_TUPDESC);
+ constant = HeapTupleGetDatum(heap_form_tuple(typentry->tupDesc,
+ it.values, it.isnull));
+
+ if (constantMax == 0 ||
+ DatumGetBool(FunctionCall2Coll(&opprocGT,
+ DEFAULT_COLLATION_OID,
+ constant, constantMax)))
+ {
+ constantMax = constant;
+ if (constantMin != 0)
+ continue;
+ }
+ if (constantMin == 0 ||
+ DatumGetBool(FunctionCall2Coll(&opprocLT,
+ DEFAULT_COLLATION_OID,
+ constant, constantMin)))
+ {
+ constantMin = constant;
+ }
+ }
+
+ sum = prefix_record_histogram_selectivity(vardata,
+ constantMin, constantMax,
+ n_keys, nDistinct,
+ &nBins);
+
+ if (sum > 0 && (nBins == it.n_variants || nBins <=2))
+ /*
+ * conclude that all tuples are in the same, rather small, range of
+ * bins
+ */
+ goto finish;
+
+ /*
+ * let try tuples one by one
+ */
+ sum = 0.0;
+ resetTupleIterator(&it);
+ }
+
+ while(getTupleIterator(&it))
+ {
+ Selectivity s;
+
+ /* we check cache invalidation message */
+ if (typentry->tupDesc == NULL)
+ typentry = lookup_type_cache(vardata->atttype,
+ TYPECACHE_EQ_OPR | TYPECACHE_TUPDESC);
+ constant = HeapTupleGetDatum(heap_form_tuple(typentry->tupDesc,
+ it.values, it.isnull));
+
+ if (n_keys != index->nkeycolumns)
+ {
+ s = prefix_record_histogram_selectivity(vardata,
+ constant, constant,
+ n_keys,
+ nDistinct,
+ &nBins);
+
+ if (nBins == 0 && n_keys < index->nkeycolumns && nDistinct > MULTICOLUMN_STATISTIC_FALLBACK_DISTINCT_THRESHOLD)
+ *isWeakSelectivity = true;
+
+ if (s < 0)
+ {
+ /*
+ * There is no histogram, fallback to single available option
+ */
+ s = eqconst_selectivity(typentry->eq_opr, DEFAULT_COLLATION_OID, vardata,
+ constant, false, true, false,
+ n_keys);
+
+ if (usedEqSel)
+ *usedEqSel = true;
+ }
+ }
+ else
+ {
+ s = eqconst_selectivity(typentry->eq_opr, DEFAULT_COLLATION_OID, vardata,
+ constant, false, true, false,
+ -1);
+ }
+
+ sum += s - s*sum;
+ }
+
+finish:
+ if (it.isExhaustive)
+ sum *= ((double)(it.n_variants))/EXHAUSTIVE_IN_SELECTIVITY_THRESHOLD;
+
+ return sum;
+}
+
+typedef struct ClauseVarPair
+{
+ Var *var;
+ int idx;
+} ClauseVarPair;
+
+static void
+appendCVP(List **cvp, Var *var, int idx)
+{
+ ClauseVarPair *e;
+
+ e = palloc(sizeof(*e));
+ e->var = var;
+ e->idx = idx;
+
+ *cvp = lappend(*cvp, e);
+}
+
+static int
+findCVP(List* cvp, Var* var)
+{
+ ListCell* lc;
+ foreach(lc, cvp)
+ {
+ ClauseVarPair* c = (ClauseVarPair*)lfirst(lc);
+ if (c->var == var)
+ return c->idx;
+ }
+ return -1;
+}
+
+static bool
+initVarData(IndexOptInfo *index, VariableStatData *vardata)
+{
+ Relation indexRel = index_open(index->indexoid, AccessShareLock);
+
+ if (!indexRel->rd_rel->reltype)
+ {
+ index_close(indexRel, AccessShareLock);
+
+ return false;
+ }
+
+ memset(vardata, 0, sizeof(*vardata));
+ vardata->isunique = index->unique;
+ vardata->atttype = indexRel->rd_rel->reltype;
+ vardata->rel = index->rel;
+ vardata->acl_ok = true;
+ vardata->statsTuple = SearchSysCache3(STATRELATTINH,
+ ObjectIdGetDatum(index->indexoid),
+ Int16GetDatum(1),
+ BoolGetDatum(false));
+ vardata->freefunc = ReleaseSysCache;
+
+ index_close(indexRel, AccessShareLock);
+
+ if (!HeapTupleIsValid(vardata->statsTuple))
+ {
+ ReleaseVariableStats(*vardata);
+ return false;
+ }
+
+ vardata->sslots = index->sslots;
+
+ return true;
+}
+
+static int
+markEstimatedColumns(Bitmapset **estimatedclauses, List *pairs,
+ List *vars, List *missed_vars)
+{
+ ListCell *l;
+ int n_estimated = 0;
+
+ foreach(l, vars)
+ {
+ Var* var = (Var *) lfirst(l);
+ ListCell *ll;
+
+ if (list_member_ptr(missed_vars, var))
+ continue;
+
+ foreach(ll, pairs)
+ {
+ ClauseVarPair *cvp=(ClauseVarPair*)lfirst(ll);
+
+ if (cvp->var == var)
+ {
+ *estimatedclauses = bms_add_member(*estimatedclauses, cvp->idx);
+ n_estimated += 1;
+ break;
+ }
+ }
+
+ Assert(ll != NULL);
+ }
+
+ return n_estimated;
+}
+
+#define SET_VARNOS(vn) do { \
+ if ((vn) != 0) \
+ { \
+ if (data[0].varno == 0) \
+ data[0].varno = (vn); \
+ else if (data[1].varno == 0 && data[0].varno != (vn)) \
+ data[1].varno = (vn); \
+ } \
+} while(0)
+
+#define GET_RELBY_NO(vn) \
+((data[0].varno == (vn) && (vn) != 0) ? &data[0] : ((data[1].varno == (vn) && (vn) != 0) ? &data[1] : NULL))
+
+#define SET_CURDATA(vn) ((cur = GET_RELBY_NO(vn)) != NULL)
+
+static bool
+hasSAOPRestriction(List *clauses, Bitmapset *estimatedclauses)
+{
+ ListCell *l;
+ int i = -1;
+
+ foreach(l, clauses)
+ {
+ Node* clause = (Node *) lfirst(l);
+ RestrictInfo *rinfo = NULL;
+
+ i++;
+ if (bms_is_member(i, estimatedclauses))
+ continue;
+
+ if (IsA(clause, RestrictInfo))
+ {
+ rinfo = (RestrictInfo *) clause;
+ if (!rinfo->orclause)
+ clause = (Node*)rinfo->clause;
+ }
+
+ if (IsA(clause, ScalarArrayOpExpr))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Check if clauses represent multicolumn join with compound indexes available
+ * for both side of comparison of indexed columns of one relation with constant
+ * values. If so, calculates selectivity of compound type comparison and returns
+ * true.
+ */
+static bool
+use_multicolumn_statistic(PlannerInfo *root, List *clauses, int varRelid,
+ JoinType jointype, SpecialJoinInfo *sjinfo,
+ Selectivity* restrict_selectivity, Selectivity *join_selectivity,
+ Bitmapset **estimatedclauses, CorrelationKind
+ *correlationKind)
+{
+ ListCell *l;
+ List* var_clause_map = NIL;
+ List* missed_vars = NIL;
+ int i;
+ int *permutation = NULL;
+ int n_estimated = 0;
+ int n_keys;
+ TypeCacheEntry *typentry;
+
+ struct {
+ Index varno;
+
+ List *restrictionColumns;
+ List *restrictionConsts;
+ List *in_clauses;
+ List *ineqRestrictionClauses;
+
+ List *joinColumns;
+
+ IndexOptInfo *index;
+ VariableStatData vardata;
+ } data[2], *cur;
+
+ if (list_length(clauses) < 1)
+ return false;
+
+ /*
+ * For simple queries default estimator is good enough, but multicolumn
+ * statistic could be too expensive because of search and decompress a lot
+ * of stat data (histogramm of multicolumn indexes).
+ */
+ if (root->join_rel_list == NIL &&
+ root->simple_rel_array_size <= 2 /* 0th is always empty */ &&
+ /* list_length(clauses) < 4 && */
+ hasSAOPRestriction(clauses, *estimatedclauses) == false)
+ return false;
+
+ *correlationKind = CKIndepend;
+ memset(data, 0, sizeof(data));
+
+ i=-1;
+ foreach(l, clauses)
+ {
+ Node* clause = (Node *) lfirst(l);
+ RestrictInfo* rinfo = NULL;
+ OpExpr *opclause = NULL;
+
+ i++;
+
+ /* do not use already estimated clauses */
+ if (bms_is_member(i, *estimatedclauses))
+ continue;
+
+ if (IsA(clause, RestrictInfo))
+ {
+ rinfo = (RestrictInfo *) clause;
+ if (!rinfo->orclause)
+ clause = (Node*)rinfo->clause;
+ }
+ if (IsA(clause, OpExpr))
+ opclause = (OpExpr*)clause;
+
+ if (IsA(clause, Var)) /* boolean variable */
+ {
+ Var* var1 = (Var*)clause;
+
+ SET_VARNOS(var1->varno);
+ if (SET_CURDATA(var1->varno))
+ {
+ cur->restrictionColumns = lappend(cur->restrictionColumns, var1);
+ appendCVP(&var_clause_map, var1, i);
+ cur->restrictionConsts = lappend(cur->restrictionConsts,
+ makeBoolConst(true, false));
+ }
+ }
+ else if (IsA(clause, BoolExpr) && ((BoolExpr*)clause)->boolop == NOT_EXPR) /* (NOT bool_expr) */
+ {
+ Node* arg1 = (Node*) linitial( ((BoolExpr*)clause)->args);
+ Var* var1 = get_var(arg1);
+
+ if (var1 == NULL)
+ continue;
+
+ SET_VARNOS(var1->varno);
+ if (SET_CURDATA(var1->varno))
+ {
+ cur->restrictionColumns = lappend(cur->restrictionColumns, var1);
+ appendCVP(&var_clause_map, var1, i);
+ cur->restrictionConsts = lappend(cur->restrictionConsts,
+ makeBoolConst(false, false));
+ }
+ }
+ else if (IsA(clause, ScalarArrayOpExpr))
+ {
+ ScalarArrayOpExpr* in = (ScalarArrayOpExpr*)clause;
+ Var* var1;
+ Node* arg2;
+ InArrayClause* iac;
+
+ var1 = get_var((Node*)linitial(in->args));
+ arg2 = (Node*) lsecond(in->args);
+
+ if (!in->useOr
+ || list_length(in->args) != 2
+ || get_oprrest(in->opno) != F_EQSEL
+ || var1 == NULL
+ || !IsA(arg2, Const))
+ {
+ continue;
+ }
+
+ SET_VARNOS(var1->varno);
+ if (SET_CURDATA(var1->varno))
+ {
+ cur->restrictionColumns = lappend(cur->restrictionColumns, var1);
+ appendCVP(&var_clause_map, var1, i);
+ cur->restrictionConsts = lappend(cur->restrictionConsts, arg2);
+
+ iac = (InArrayClause*)palloc(sizeof(InArrayClause));
+ iac->array = (ArrayType*)DatumGetPointer(((Const*)arg2)->constvalue);
+ iac->index = list_length(cur->restrictionConsts) - 1;
+
+ cur->in_clauses = lappend(cur->in_clauses, iac);
+ }
+ }
+ else if (opclause
+ && list_length(opclause->args) == 2)
+ {
+ int oprrest = get_oprrest(opclause->opno);
+ Node* arg1 = (Node*) linitial(opclause->args);
+ Node* arg2 = (Node*) lsecond(opclause->args);
+ Var* var1 = get_var(arg1);
+ Var* var2 = get_var(arg2);
+
+ if (oprrest == F_EQSEL && treat_as_join_clause(root, (Node*)opclause, NULL, varRelid, sjinfo))
+ {
+ if (var1 == NULL || var2 == NULL || var1->vartype != var2->vartype)
+ continue;
+
+ SET_VARNOS(var1->varno);
+ SET_VARNOS(var2->varno);
+
+ if (var1->varno == data[0].varno && var2->varno == data[1].varno)
+ {
+ data[0].joinColumns = lappend(data[0].joinColumns, var1);
+ appendCVP(&var_clause_map, var1, i);
+ data[1].joinColumns = lappend(data[1].joinColumns, var2);
+ appendCVP(&var_clause_map, var2, i);
+ }
+ else if (var1->varno == data[1].varno && var2->varno == data[0].varno)
+ {
+ data[0].joinColumns = lappend(data[0].joinColumns, var2);
+ appendCVP(&var_clause_map, var2, i);
+ data[1].joinColumns = lappend(data[1].joinColumns, var1);
+ appendCVP(&var_clause_map, var1, i);
+ }
+ }
+ else /* Estimate selectivity for a restriction clause. */
+ {
+ /*
+ * Give up if it is not equality comparison of variable with
+ * constant or some other clause is treated as join condition
+ */
+ if (((var1 == NULL) == (var2 == NULL)))
+ continue;
+
+ if (var1 == NULL)
+ {
+ /* swap var1 and var2 */
+ var1 = var2;
+ arg2 = arg1;
+ }
+
+ SET_VARNOS(var1->varno);
+
+ if (SET_CURDATA(var1->varno))
+ {
+ if ((rinfo && is_pseudo_constant_clause_relids(arg2, rinfo->right_relids))
+ || (!rinfo && NumRelids(root, clause) == 1 && is_pseudo_constant_clause(arg2)))
+ {
+ /* Restriction clause with a pseudoconstant . */
+ Node* const_val = estimate_expression_value(root, arg2);
+
+ if (IsA(const_val, Const))
+ {
+ switch (oprrest)
+ {
+ case F_EQSEL:
+ cur->restrictionColumns =
+ lappend(cur->restrictionColumns, var1);
+ cur->restrictionConsts =
+ lappend(cur->restrictionConsts, const_val);
+ appendCVP(&var_clause_map, var1, i);
+ break;
+ case F_SCALARGTSEL:
+ case F_SCALARGESEL:
+ case F_SCALARLTSEL:
+ case F_SCALARLESEL:
+ /*
+ * We do not consider range predicates now,
+ * but we can mark them as estimated
+ * if their variables are covered by index.
+ */
+ appendCVP(&var_clause_map, var1, i);
+ cur->ineqRestrictionClauses =
+ lappend(cur->ineqRestrictionClauses, var1);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ }
+ }
+ }
+ /* else just skip clause to work with it later in caller */
+ }
+
+ *restrict_selectivity = 1.0;
+ *join_selectivity = 1.0;
+
+ /*
+ * First, try to estimate selectivity by restrictions
+ */
+ for(i=0; i<lengthof(data); i++)
+ {
+ cur = &data[i];
+
+ /* compute restriction clauses if applicable */
+ if (cur->varno == 0 || list_length(cur->restrictionColumns) < 1)
+ continue;
+
+ cur->index = locate_inner_multicolumn_index(
+ root, cur->varno, cur->restrictionColumns,
+ list_length(clauses), &permutation, &missed_vars, &n_keys);
+
+ if (cur->index && n_keys > 1 &&
+ initVarData(cur->index, &cur->vardata))
+ {
+ bool usedEqSel= false;
+ double s;
+ bool usedWeakSelectivity;
+
+ s = estimate_selectivity_by_index(
+ root, cur->index, &cur->vardata,
+ cur->restrictionConsts, &missed_vars, permutation,
+ cur->in_clauses, n_keys, &usedEqSel, &usedWeakSelectivity);
+
+ if (usedWeakSelectivity)
+ {
+ double simpleSelectivity = 1.0;
+ foreach(l, cur->restrictionColumns)
+ {
+ Node* clause;
+ int clauseIndex;
+ double clauseSelectivity;
+
+ clauseIndex = findCVP(var_clause_map, (Var*)lfirst(l));
+ if (clauseIndex < 0)
+ {
+ simpleSelectivity = -1;
+ break;
+ }
+
+ clause = (Node*)list_nth(clauses, clauseIndex);
+
+ clauseSelectivity = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo, false);
+
+ simpleSelectivity *= clauseSelectivity;
+ }
+
+ if (simpleSelectivity > 0 && simpleSelectivity < s)
+ {
+ s = simpleSelectivity;
+ }
+ }
+
+ *restrict_selectivity *= s;
+
+ ReleaseVariableStats(cur->vardata);
+
+ /*
+ * mark inequality clauses as used, see estimate_selectivity_by_index()
+ */
+ if (usedEqSel)
+ {
+ foreach(l, cur->ineqRestrictionClauses)
+ {
+ Var* var = (Var *) lfirst(l);
+
+ /*
+ * Note, restrictionColumns will contains extra columns !
+ */
+ for(i=0; i<cur->index->nkeycolumns; i++)
+ if (cur->index->indexkeys[i] == var->varattno)
+ cur->restrictionColumns =
+ lappend(cur->restrictionColumns, var);
+ }
+ }
+
+ n_estimated +=
+ markEstimatedColumns(estimatedclauses, var_clause_map,
+ cur->restrictionColumns, missed_vars);
+ }
+
+ if (permutation)
+ {
+ pfree(permutation);
+ permutation = NULL;
+ }
+ }
+
+ /* Deal with join clauses, if possible */
+ if (list_length(data[0].joinColumns) < 1)
+ goto cleanup;
+
+ data[0].index = locate_inner_multicolumn_index(
+ root,
+ data[0].varno, data[0].joinColumns,
+ list_length(clauses), &permutation, &missed_vars, &n_keys);
+
+ if (!data[0].index || n_keys < 1)
+ goto cleanup;
+
+ Assert(permutation != NULL);
+ Assert(data[1].varno != 0);
+ Assert(list_length(data[0].joinColumns) == list_length(data[1].joinColumns));
+
+ data[1].index = locate_outer_multicolumn_index(
+ root,
+ data[1].varno, data[1].joinColumns,
+ permutation);
+
+ if (!data[1].index)
+ goto cleanup;
+
+ if (!initVarData(data[0].index, &data[0].vardata))
+ goto cleanup;
+
+ if (!initVarData(data[1].index, &data[1].vardata))
+ {
+ ReleaseVariableStats(data[0].vardata);
+ goto cleanup;
+ }
+
+ typentry = lookup_type_cache(data[0].vardata.atttype, TYPECACHE_EQ_OPR);
+ *join_selectivity *= eqjoin_selectivity(root, typentry->eq_opr,
+ DEFAULT_COLLATION_OID,
+ &data[0].vardata, &data[1].vardata,
+ sjinfo, n_keys);
+
+ /* for self join */
+ if (data[0].index->indexoid == data[1].index->indexoid)
+ *correlationKind = CKSelf;
+ else
+ {
+ RangeTblEntry *lrte = planner_rt_fetch(data[0].index->rel->relid, root),
+ *rrte = planner_rt_fetch(data[1].index->rel->relid, root);
+
+ if (lrte->relid == rrte->relid)
+ *correlationKind = CKSelf;
+ }
+
+ for (i = 0; i < lengthof(data); i++)
+ ReleaseVariableStats(data[i].vardata);
+
+ n_estimated +=
+ markEstimatedColumns(estimatedclauses, var_clause_map,
+ data[0].joinColumns, missed_vars);
+
+cleanup:
+ if (permutation)
+ pfree(permutation);
+
+ return n_estimated != 0;
+}
+
/****************************************************************************
* ROUTINES TO COMPUTE SELECTIVITIES
****************************************************************************/
@@ -96,6 +1204,54 @@ static Selectivity clauselist_selectivity_or(PlannerInfo *root,
* Of course this is all very dependent on the behavior of the inequality
* selectivity functions; perhaps some day we can generalize the approach.
*/
+
+static void
+appendSelectivityRes(Selectivity s[5], Selectivity sel, CorrelationKind ck)
+{
+ switch(ck)
+ {
+ case CKRestrict:
+ s[ck] *= sel;
+ break;
+ case CKSelf:
+ case CKLikelySelf:
+ s[CKMul] *= sel;
+ if (s[ck] > sel)
+ s[ck] = sel;
+ /* FALLTHROUGH */
+ case CKIndepend:
+ s[CKIndepend] *= sel;
+ break;
+ default:
+ elog(ERROR, "unknown selectivity kind: %d", ck);
+ }
+}
+
+static Selectivity
+finalizeSelectivityRes(Selectivity s[5])
+{
+ Selectivity sel;
+
+ sel = s[CKRestrict] * s[CKIndepend];
+
+ if (s[CKIndepend] != s[CKMul])
+ {
+ /* we have both independ and correlated - fallback */
+ sel *= s[CKMul];
+ }
+ else
+ {
+ /* we have only correlated join clauses */
+ if (s[CKLikelySelf] != 1.0 && sel < s[CKLikelySelf])
+ sel = sel + (s[CKLikelySelf] - sel) * 0.25;
+
+ if (s[CKSelf] != 1.0 && sel < s[CKSelf])
+ sel = sel + (s[CKSelf] - sel) * 1.0;
+ }
+
+ return sel;
+}
+
Selectivity
clauselist_selectivity(PlannerInfo *root,
List *clauses,
@@ -121,12 +1277,14 @@ clauselist_selectivity_ext(PlannerInfo *root,
SpecialJoinInfo *sjinfo,
bool use_extended_stats)
{
- Selectivity s1 = 1.0;
+ Selectivity s[5 /* per CorrelationKind */] = {1.0, 1.0, 1.0, 1.0, 1.0};
+ Selectivity s2 = 1.0, s3 = 1.0;
RelOptInfo *rel;
Bitmapset *estimatedclauses = NULL;
RangeQueryClause *rqlist = NULL;
ListCell *l;
int listidx;
+ CorrelationKind ck;
/*
* If there's exactly one clause, just go directly to
@@ -150,9 +1308,23 @@ clauselist_selectivity_ext(PlannerInfo *root,
* 'estimatedclauses' is populated with the 0-based list position
* index of clauses estimated here, and that should be ignored below.
*/
- s1 = statext_clauselist_selectivity(root, clauses, varRelid,
+ s2 = statext_clauselist_selectivity(root, clauses, varRelid,
jointype, sjinfo, rel,
&estimatedclauses, false);
+ appendSelectivityRes(s, s2, CKRestrict);
+ }
+
+ /*
+ * Check if join conjuncts corresponds to some compound indexes on left and
+ * right joined relations or indexed columns of one relation is compared
+ * with constant values. In this case selectivity of join can be calculated
+ * based on statistic of this compound index.
+ */
+ while(use_multicolumn_statistic(root, clauses, varRelid, jointype, sjinfo,
+ &s2, &s3, &estimatedclauses, &ck))
+ {
+ appendSelectivityRes(s, s2, CKRestrict);
+ appendSelectivityRes(s, s3, ck);
}
/*
@@ -168,7 +1340,6 @@ clauselist_selectivity_ext(PlannerInfo *root,
{
Node *clause = (Node *) lfirst(l);
RestrictInfo *rinfo;
- Selectivity s2;
listidx++;
@@ -194,7 +1365,7 @@ clauselist_selectivity_ext(PlannerInfo *root,
rinfo = (RestrictInfo *) clause;
if (rinfo->pseudoconstant)
{
- s1 = s1 * s2;
+ appendSelectivityRes(s, s2, CKRestrict);
continue;
}
clause = (Node *) rinfo->clause;
@@ -208,12 +1379,17 @@ clauselist_selectivity_ext(PlannerInfo *root,
* the simple way we are expecting.) Most of the tests here can be
* done more efficiently with rinfo than without.
*/
+ ck = treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo) ?
+ CKIndepend : CKRestrict;
if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
{
OpExpr *expr = (OpExpr *) clause;
bool varonleft = true;
bool ok;
+ if (ck == CKIndepend)
+ ck = get_correlation_kind(root, varRelid, expr);
+
if (rinfo)
{
ok = (rinfo->num_base_rels == 1) &&
@@ -252,7 +1428,7 @@ clauselist_selectivity_ext(PlannerInfo *root,
break;
default:
/* Just merge the selectivity in generically */
- s1 = s1 * s2;
+ appendSelectivityRes(s, s2, ck);
break;
}
continue; /* drop to loop bottom */
@@ -260,7 +1436,7 @@ clauselist_selectivity_ext(PlannerInfo *root,
}
/* Not the right form, so treat it generically. */
- s1 = s1 * s2;
+ appendSelectivityRes(s, s2, ck);
}
/*
@@ -273,7 +1449,6 @@ clauselist_selectivity_ext(PlannerInfo *root,
if (rqlist->have_lobound && rqlist->have_hibound)
{
/* Successfully matched a pair of range clauses */
- Selectivity s2;
/*
* Exact equality to the default value probably means the
@@ -322,15 +1497,13 @@ clauselist_selectivity_ext(PlannerInfo *root,
}
}
/* Merge in the selectivity of the pair of clauses */
- s1 *= s2;
+ appendSelectivityRes(s, s2, CKRestrict);
}
else
{
/* Only found one of a pair, merge it in generically */
- if (rqlist->have_lobound)
- s1 *= rqlist->lobound;
- else
- s1 *= rqlist->hibound;
+ appendSelectivityRes(s, (rqlist->have_lobound) ? rqlist->lobound :
+ rqlist->hibound, CKRestrict);
}
/* release storage and advance */
rqnext = rqlist->next;
@@ -338,7 +1511,7 @@ clauselist_selectivity_ext(PlannerInfo *root,
rqlist = rqnext;
}
- return s1;
+ return finalizeSelectivityRes(s);
}
/*
@@ -624,6 +1797,137 @@ treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
}
}
+typedef struct RangeTblEntryContext {
+ RangeTblEntry *rte;
+ int count;
+} RangeTblEntryContext;
+
+static bool
+find_rte_walker(Node *node, RangeTblEntryContext *context)
+{
+ if (node == NULL)
+ return false;
+
+ if (context->count > 1)
+ return true; /* skip rest */
+
+ if (IsA(node, RangeTblEntry)) {
+ RangeTblEntry *rte = (RangeTblEntry*)node;
+
+ if (rte->rtekind == RTE_RELATION)
+ {
+ if (context->count == 0)
+ {
+ context->count++;
+ context->rte=rte;
+ }
+ else if (rte->relid != context->rte->relid)
+ {
+ context->count++;
+ return true; /* more that one relation in subtree */
+ }
+ }
+ else if (!(rte->rtekind == RTE_SUBQUERY || rte->rtekind == RTE_JOIN ||
+ rte->rtekind == RTE_CTE))
+ {
+ context->count++;
+ return true; /* more that one relation in subtree */
+ }
+
+ return false; /* allow range_table_walker to continue */
+ }
+
+ if (IsA(node, Query))
+ return query_tree_walker((Query *) node, find_rte_walker,
+ (void *) context, QTW_EXAMINE_RTES_BEFORE);
+
+ return expression_tree_walker(node, find_rte_walker, (void *) context);
+}
+
+static RangeTblEntry*
+find_single_rte(RangeTblEntry *node)
+{
+ RangeTblEntryContext context;
+
+ context.rte = NULL;
+ context.count = 0;
+
+ (void)range_table_walker(list_make1(node),
+ find_rte_walker,
+ (void *) &context, QTW_EXAMINE_RTES_BEFORE);
+
+ return context.count == 1 ? context.rte : NULL;
+}
+
+#define IsSameRelationRTE(a, b) ( \
+ (a)->rtekind == (b)->rtekind && \
+ (a)->rtekind == RTE_RELATION && \
+ (a)->relid == (b)->relid \
+)
+
+
+/*
+ * Any self join or join with aggregation over the same table
+ */
+
+static CorrelationKind
+get_correlation_kind(PlannerInfo *root, int varRelid, OpExpr* expr)
+{
+ Node *left_arg, *right_arg;
+ Relids left_varnos, right_varnos;
+ int left_varno, right_varno;
+ RangeTblEntry *left_rte, *right_rte;
+
+ if (varRelid != 0)
+ /* We consider only case of joins, not restriction mode */
+ return CKIndepend;
+
+ /* Check if it is equality comparison */
+ if (get_oprrest(expr->opno) != F_EQSEL)
+ return CKIndepend;
+
+ left_arg = linitial(expr->args);
+ right_arg = lsecond(expr->args);
+
+ /*
+ * Check if it is join of two different relations
+ */
+ left_varnos = pull_varnos(root, left_arg);
+ right_varnos = pull_varnos(root, right_arg);
+ if (!bms_get_singleton_member(left_varnos, &left_varno) ||
+ !bms_get_singleton_member(right_varnos, &right_varno) ||
+ left_varno == right_varno)
+ return CKIndepend;
+
+ left_rte = planner_rt_fetch(left_varno, root);
+ right_rte = planner_rt_fetch(right_varno, root);
+
+ if (IsSameRelationRTE(left_rte, right_rte))
+ {
+ Var *lvar = get_var(left_arg),
+ *rvar = get_var(right_arg);
+
+ /* self join detected, check if it simple a=b clause */
+ if (lvar == NULL || rvar == NULL)
+ return CKLikelySelf;
+ return (lvar->varattno == rvar->varattno) ?
+ CKSelf : CKLikelySelf;
+ }
+
+ if ((left_rte = find_single_rte(left_rte)) == NULL)
+ return CKIndepend;
+ if ((right_rte = find_single_rte(right_rte)) == NULL)
+ return CKIndepend;
+
+ if (IsSameRelationRTE(left_rte, right_rte))
+ {
+ /* self join detected, but over some transformation which cannot be
+ * flatten */
+ return CKLikelySelf;
+ }
+
+ return CKIndepend;
+}
/*
* clause_selectivity -
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index a290a3e66b6..45f334f75a9 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -168,6 +168,7 @@ typedef struct
{
PlannerInfo *root;
QualCost total;
+ bool calccoalesce;
} cost_qual_eval_context;
static List *extract_nonindex_conditions(List *qual_clauses, List *indexclauses);
@@ -330,7 +331,7 @@ cost_seqscan(Path *path, PlannerInfo *root,
cpu_run_cost = cpu_per_tuple * baserel->tuples;
/* tlist eval costs are paid per output row, not per tuple scanned */
startup_cost += path->pathtarget->cost.startup;
- cpu_run_cost += path->pathtarget->cost.per_tuple * path->rows;
+ cpu_run_cost += path->pathtarget->cost.per_tuple * ((!param_info && baserel->rowsUnclamped > 0 && baserel->rows <= 1) ? baserel->rowsUnclamped : path->rows);
/* Adjust costing for parallelism, if used. */
if (path->parallel_workers > 0)
@@ -805,7 +806,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
cost_qual_eval(&qpqual_cost, qpquals, root);
startup_cost += qpqual_cost.startup;
- cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+ cpu_per_tuple = cpu_tuple_cost + 2.0*qpqual_cost.per_tuple;
cpu_run_cost += cpu_per_tuple * tuples_fetched;
@@ -1036,6 +1037,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
double spc_seq_page_cost,
spc_random_page_cost;
double T;
+ double rows_est;
/* Should only be applied to base relations */
Assert(IsA(baserel, RelOptInfo));
@@ -1088,7 +1090,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
startup_cost += qpqual_cost.startup;
- cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+ cpu_per_tuple = cpu_tuple_cost + 2.0*qpqual_cost.per_tuple;
cpu_run_cost = cpu_per_tuple * tuples_fetched;
/* Adjust costing for parallelism, if used. */
@@ -1102,12 +1104,21 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
path->rows = clamp_row_est(path->rows / parallel_divisor);
}
-
run_cost += cpu_run_cost;
/* tlist eval costs are paid per output row, not per tuple scanned */
startup_cost += path->pathtarget->cost.startup;
- run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+ if (!param_info && baserel->rowsUnclamped > 0 && baserel->rows <= 1)
+ {
+ rows_est = baserel->rowsUnclamped;
+ if (path->parallel_workers > 0)
+ rows_est /= get_parallel_divisor(path);
+ }
+ else
+ rows_est = path->rows;
+
+ run_cost += path->pathtarget->cost.per_tuple * rows_est;
path->disabled_nodes = enable_bitmapscan ? 0 : 1;
path->startup_cost = startup_cost;
@@ -1858,6 +1869,327 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
rterm->pathtarget->width);
}
+/*
+ * is_fake_var
+ * Workaround for generate_append_tlist() which generates fake Vars with
+ * varno == 0, that will cause a fail of estimate_num_group() call
+ *
+ * XXX Ummm, why would estimate_num_group fail with this?
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+ if (IsA(expr, RelabelType))
+ expr = (Expr *) ((RelabelType *) expr)->arg;
+
+ return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ * Returns relative complexity of comparing two values based on its width.
+ * The idea behind is that the comparison becomes more expensive the longer the
+ * value is. Return value is in cpu_operator_cost units.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+ double width = -1.0; /* fake value */
+
+ if (IsA(expr, RelabelType))
+ expr = (Expr *) ((RelabelType *) expr)->arg;
+
+ /* Try to find actual stat in corresponding relation */
+ if (IsA(expr, Var))
+ {
+ Var *var = (Var *) expr;
+
+ if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+ {
+ RelOptInfo *rel = root->simple_rel_array[var->varno];
+
+ if (rel != NULL &&
+ var->varattno >= rel->min_attr &&
+ var->varattno <= rel->max_attr)
+ {
+ int ndx = var->varattno - rel->min_attr;
+
+ if (rel->attr_widths[ndx] > 0)
+ width = rel->attr_widths[ndx];
+ }
+ }
+ }
+
+ /* Didn't find any actual stats, try using type width instead. */
+ if (width < 0.0)
+ {
+ Node *node = (Node *) expr;
+
+ width = get_typavgwidth(exprType(node), exprTypmod(node));
+ }
+
+ /*
+ * Values are passed as Datum type, so comparisons can't be cheaper than
+ * comparing a Datum value.
+ *
+ * FIXME I find this reasoning questionable. We may pass int2, and
+ * comparing it is probably a bit cheaper than comparing a bigint.
+ */
+ if (width <= sizeof(Datum))
+ return 1.0;
+
+ /*
+ * We consider the cost of a comparison not to be directly proportional to
+ * width of the argument, because widths of the arguments could be
+ * slightly different (we only know the average width for the whole
+ * column). So we use log16(width) as an estimate.
+ */
+ return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ * compute CPU cost of sort (i.e. in-memory)
+ *
+ * The main thing we need to calculate to estimate sort CPU costs is the number
+ * of calls to the comparator functions. The difficulty is that for multi-column
+ * sorts there may be different data types involved (for some of which the calls
+ * may be much more expensive). Furthermore, columns may have a very different
+ * number of distinct values - the higher the number, the fewer comparisons will
+ * be needed for the following columns.
+ *
+ * The algorithm is incremental - we add pathkeys one by one, and at each step we
+ * estimate the number of necessary comparisons (based on the number of distinct
+ * groups in the current pathkey prefix and the new pathkey), and the comparison
+ * costs (which is data type specific).
+ *
+ * Estimation of the number of comparisons is based on ideas from:
+ *
+ * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
+ * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
+ *
+ * In term of that paper, let N - number of tuples, Xi - number of identical
+ * tuples with value Ki, then the estimate of number of comparisons is:
+ *
+ * log(N! / (X1! * X2! * ..)) ~ sum(Xi * log(N/Xi))
+ *
+ * We assume all Xi the same because now we don't have any estimation of
+ * group sizes, we have only know the estimate of number of groups (distinct
+ * values). In that case, formula becomes:
+ *
+ * N * log(NumberOfGroups)
+ *
+ * For multi-column sorts we need to estimate the number of comparisons for
+ * each individual column - for example with columns (c1, c2, ..., ck) we
+ * can estimate that number of comparisons on ck is roughly
+ *
+ * ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
+ *
+ * Let k be a column number, Gk - number of groups defined by k columns, and Fk
+ * the cost of the comparison is
+ *
+ * N * sum( Fk * log(Gk) )
+ *
+ * Note: We also consider column width, not just the comparator cost.
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys. In this case, it will fallback to
+ * simple comparison cost estimate.
+ */
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+ Cost comparison_cost, double tuples, double output_tuples,
+ bool heapSort)
+{
+ Cost per_tuple_cost = 0.0;
+ ListCell *lc;
+ List *pathkeyExprs = NIL;
+ double tuplesPerPrevGroup = tuples;
+ double totalFuncCost = 1.0;
+ bool has_fake_var = false;
+ int i = 0;
+ Oid prev_datatype = InvalidOid;
+ List *cache_varinfos = NIL;
+
+ /* fallback if pathkeys is unknown */
+ if (list_length(pathkeys) == 0)
+ {
+ /*
+ * If we'll use a bounded heap-sort keeping just K tuples in memory,
+ * for a total number of tuple comparisons of N log2 K; but the
+ * constant factor is a bit higher than for quicksort. Tweak it so
+ * that the cost curve is continuous at the crossover point.
+ */
+ output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+ per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+ /* add cost provided by caller */
+ per_tuple_cost += comparison_cost;
+
+ return per_tuple_cost * tuples;
+ }
+
+ /*
+ * Computing total cost of sorting takes into account the per-column
+ * comparison function cost. We try to compute the needed number of
+ * comparisons per column.
+ */
+ foreach(lc, pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(lc);
+ EquivalenceMember *em;
+ double nGroups,
+ correctedNGroups;
+ Cost funcCost = 1.0;
+
+ /*
+ * We believe that equivalence members aren't very different, so, to
+ * estimate cost we consider just the first member.
+ */
+ em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+ if (em->em_datatype != InvalidOid)
+ {
+ /* do not lookup funcCost if the data type is the same */
+ if (prev_datatype != em->em_datatype)
+ {
+ Oid sortop;
+ QualCost cost;
+
+ sortop = get_opfamily_member_for_cmptype(pathkey->pk_opfamily,
+ em->em_datatype, em->em_datatype,
+ pathkey->pk_cmptype);
+
+ cost.startup = 0;
+ cost.per_tuple = 0;
+ add_function_cost(root, get_opcode(sortop), NULL, &cost);
+
+ /*
+ * add_function_cost returns the product of cpu_operator_cost
+ * and procost, but we need just procost, co undo that.
+ */
+ funcCost = cost.per_tuple / cpu_operator_cost;
+
+ prev_datatype = em->em_datatype;
+ }
+ }
+
+ /* factor in the width of the values in this column */
+ funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+ /* now we have per-key cost, so add to the running total */
+ totalFuncCost += funcCost;
+
+ /* remember if we have found a fake Var in pathkeys */
+ has_fake_var |= is_fake_var(em->em_expr);
+ pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+ /*
+ * We need to calculate the number of comparisons for this column,
+ * which requires knowing the group size. So we estimate the number of
+ * groups by calling estimate_num_groups_incremental(), which
+ * estimates the group size for "new" pathkeys.
+ *
+ * Note: estimate_num_groups_incremental does not handle fake Vars, so
+ * use a default estimate otherwise.
+ */
+ if (!has_fake_var)
+ nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+ tuplesPerPrevGroup, NULL, NULL,
+ &cache_varinfos,
+ list_length(pathkeyExprs) - 1);
+ else if (tuples > 4.0)
+
+ /*
+ * Use geometric mean as estimation if there are no stats.
+ *
+ * We don't use DEFAULT_NUM_DISTINCT here, because that's used for
+ * a single column, but here we're dealing with multiple columns.
+ */
+ nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
+ else
+ nGroups = tuples;
+
+ /*
+ * Presorted keys are not considered in the cost above, but we still
+ * do have to compare them in the qsort comparator. So make sure to
+ * factor in the cost in that case.
+ */
+ if (i >= nPresortedKeys)
+ {
+ if (heapSort)
+ {
+ /*
+ * have to keep at least one group, and a multiple of group
+ * size
+ */
+ correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup);
+ }
+ else
+ /* all groups in the input */
+ correctedNGroups = nGroups;
+
+ correctedNGroups = Max(1.0, ceil(correctedNGroups));
+
+ per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+ }
+
+ i++;
+
+ /*
+ * Once we get single-row group, it means tuples in the group are
+ * unique and we can skip all remaining columns.
+ */
+ if (tuplesPerPrevGroup <= nGroups)
+ break;
+
+ /*
+ * Uniform distributions with all groups being of the same size are
+ * the best case, with nice smooth behavior. Real-world distributions
+ * tend not to be uniform, though, and we don't have any reliable
+ * easy-to-use information. As a basic defense against skewed
+ * distributions, we use a 1.5 factor to make the expected group a bit
+ * larger, but we need to be careful not to make the group larger than
+ * in the preceding step.
+ */
+ tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
+ ceil(1.5 * tuplesPerPrevGroup / nGroups));
+ }
+
+ list_free(pathkeyExprs);
+
+ /* per_tuple_cost is in cpu_operator_cost units */
+ per_tuple_cost *= cpu_operator_cost;
+
+ /*
+ * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles
+ * E. Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort
+ * estimation formula has additional term proportional to number of tuples
+ * (see Chapter 8.2 and Theorem 4.1). That affects cases with a low number
+ * of tuples, approximately less than 1e4. We could implement it as an
+ * additional multiplier under the logarithm, but we use a bit more
+ * complex formula which takes into account the number of unique tuples
+ * and it's not clear how to combine the multiplier with the number of
+ * groups. Estimate it as 10 cpu_operator_cost units.
+ */
+ per_tuple_cost += 10 * cpu_operator_cost;
+
+ per_tuple_cost += comparison_cost;
+
+ return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+ double tuples)
+{
+ return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+ 0, tuples, tuples, false);
+}
+
/*
* cost_tuplesort
* Determines and returns the cost of sorting a relation using tuplesort,
@@ -1874,7 +2206,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
* number of initial runs formed and M is the merge order used by tuplesort.c.
* Since the average initial run should be about sort_mem, we have
* disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- * cpu = comparison_cost * t * log2(t)
+ * and cpu cost (computed by compute_cpu_sort_cost()).
*
* If the sort is bounded (i.e., only the first k result tuples are needed)
* and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1893,9 +2225,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
* 'comparison_cost' is the extra cost per comparison, if any
* 'sort_mem' is the number of kilobytes of work memory allowed for the sort
* 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later
*/
static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
double tuples, int width,
Cost comparison_cost, int sort_mem,
double limit_tuples)
@@ -1912,9 +2246,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
if (tuples < 2.0)
tuples = 2.0;
- /* Include the default cost-per-comparison */
- comparison_cost += 2.0 * cpu_operator_cost;
-
/* Do we have a useful LIMIT? */
if (limit_tuples > 0 && limit_tuples < tuples)
{
@@ -1938,12 +2269,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
double log_runs;
double npageaccesses;
- /*
- * CPU costs
- *
- * Assume about N log2 N comparisons
- */
- *startup_cost = comparison_cost * tuples * LOG2(tuples);
+ /* CPU costs */
+ *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+ comparison_cost, tuples,
+ tuples, false);
/* Disk costs */
@@ -1959,18 +2288,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
}
else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
{
- /*
- * We'll use a bounded heap-sort keeping just K tuples in memory, for
- * a total number of tuple comparisons of N log2 K; but the constant
- * factor is a bit higher than for quicksort. Tweak it so that the
- * cost curve is continuous at the crossover point.
- */
- *startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+ /* We'll use a bounded heap-sort keeping just K tuples in memory. */
+ *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+ comparison_cost, tuples,
+ output_tuples, true);
}
else
{
/* We'll use plain quicksort on all the input tuples */
- *startup_cost = comparison_cost * tuples * LOG2(tuples);
+ *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+ comparison_cost, tuples,
+ tuples, false);
}
/*
@@ -2085,7 +2413,7 @@ cost_incremental_sort(Path *path,
* Estimate the average cost of sorting of one group where presorted keys
* are equal.
*/
- cost_tuplesort(&group_startup_cost, &group_run_cost,
+ cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
group_tuples, width, comparison_cost, sort_mem,
limit_tuples);
@@ -2151,7 +2479,7 @@ cost_sort(Path *path, PlannerInfo *root,
Cost startup_cost;
Cost run_cost;
- cost_tuplesort(&startup_cost, &run_cost,
+ cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
tuples, width,
comparison_cost, sort_mem,
limit_tuples);
@@ -2247,7 +2575,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
* Determines and returns the cost of an Append node.
*/
void
-cost_append(AppendPath *apath)
+cost_append_ext(AppendPath *apath, PlannerInfo *root)
{
ListCell *l;
@@ -2320,7 +2648,7 @@ cost_append(AppendPath *apath)
* any child.
*/
cost_sort(&sort_path,
- NULL, /* doesn't currently need root */
+ root,
pathkeys,
subpath->disabled_nodes,
subpath->total_cost,
@@ -3795,8 +4123,9 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
/* Public result fields */
workspace->disabled_nodes = disabled_nodes;
- workspace->startup_cost = startup_cost;
- workspace->total_cost = startup_cost + run_cost + inner_run_cost;
+ workspace->startup_cost = startup_cost + outer_path->total_cost/outer_rows +
+ inner_path->total_cost/inner_rows;
+ workspace->total_cost = workspace->startup_cost + run_cost + inner_run_cost;
/* Save private data for final_cost_mergejoin */
workspace->run_cost = run_cost;
workspace->inner_run_cost = inner_run_cost;
@@ -4761,6 +5090,7 @@ cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root)
context.root = root;
context.total.startup = 0;
context.total.per_tuple = 0;
+ context.calccoalesce = true;
/* We don't charge any cost for the implicit ANDing at top level ... */
@@ -4786,6 +5116,22 @@ cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root)
context.root = root;
context.total.startup = 0;
context.total.per_tuple = 0;
+ context.calccoalesce = true;
+
+ cost_qual_eval_walker(qual, &context);
+
+ *cost = context.total;
+}
+
+void
+cost_qual_eval_node_index(QualCost *cost, Node *qual, PlannerInfo *root)
+{
+ cost_qual_eval_context context;
+
+ context.root = root;
+ context.total.startup = 0;
+ context.total.per_tuple = 0;
+ context.calccoalesce = false;
cost_qual_eval_walker(qual, &context);
@@ -4815,6 +5161,7 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
locContext.root = context->root;
locContext.total.startup = 0;
locContext.total.per_tuple = 0;
+ locContext.calccoalesce = context->calccoalesce;
/*
* For an OR clause, recurse into the marked-up tree so that we
@@ -5051,6 +5398,11 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
*/
return false;
}
+ else if (IsA(node, CoalesceExpr) && context->calccoalesce)
+ {
+ context->total.per_tuple += cpu_operator_cost *
+ list_length(((CoalesceExpr *) node)->args);
+ }
/* recurse into children */
return expression_tree_walker(node, cost_qual_eval_walker, context);
@@ -5362,6 +5714,13 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel)
rel->rows = clamp_row_est(nrows);
+ if (isnan(nrows) || nrows > MAXIMUM_ROWCOUNT)
+ rel->rowsUnclamped = rel->rows;
+ else if (nrows < 0.001)
+ rel->rowsUnclamped = 0.001;
+ else
+ rel->rowsUnclamped = nrows;
+
cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root);
set_rel_width(root, rel);
@@ -5512,6 +5871,7 @@ calc_joinrel_size_estimate(PlannerInfo *root,
Selectivity jselec;
Selectivity pselec;
double nrows;
+ bool apply_righthand = false;
/*
* Compute joinclause selectivity. Note that we are only considering
@@ -5550,9 +5910,11 @@ calc_joinrel_size_estimate(PlannerInfo *root,
{
RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
- if (RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
+ if (RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) {
pushedquals = lappend(pushedquals, rinfo);
- else
+ apply_righthand |= bms_overlap(rinfo->clause_relids,
+ sjinfo->min_righthand);
+ } else
joinquals = lappend(joinquals, rinfo);
}
@@ -5604,6 +5966,8 @@ calc_joinrel_size_estimate(PlannerInfo *root,
nrows = outer_rows * inner_rows * fkselec * jselec;
if (nrows < outer_rows)
nrows = outer_rows;
+ if (apply_righthand && inner_rows < outer_rows)
+ pselec *= inner_rows / outer_rows;
nrows *= pselec;
break;
case JOIN_FULL:
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 441f12f6c50..a989d8ca949 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -801,7 +801,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
if (opcintype == cur_em->em_datatype &&
equal(expr, cur_em->em_expr))
- return cur_ec; /* Match! */
+ {
+ /*
+ * Match!
+ *
+ * Copy the sortref if it wasn't set yet. That may happen if
+ * the ec was constructed from WHERE clause, i.e. it doesn't
+ * have a target reference at all.
+ */
+ if (cur_ec->ec_sortref == 0 && sortref > 0)
+ cur_ec->ec_sortref = sortref;
+ return cur_ec;
+ }
}
}
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 4d891aedeca..b9ebf19e467 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -110,8 +110,6 @@ static List *build_index_paths(PlannerInfo *root, RelOptInfo *rel,
bool *skip_nonnative_saop);
static List *build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
List *clauses, List *other_clauses);
-static List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
- List *clauses, List *other_clauses);
static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
List *paths);
static int path_usage_comparator(const void *a, const void *b);
@@ -1624,7 +1622,7 @@ make_bitmap_paths_for_or_group(PlannerInfo *root, RelOptInfo *rel,
* for the purpose of generating indexquals, but are not to be searched for
* ORs. (See build_paths_for_OR() for motivation.)
*/
-static List *
+List *
generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
List *clauses, List *other_clauses)
{
@@ -3936,7 +3934,6 @@ match_clause_to_ordering_op(IndexOptInfo *index,
return clause;
}
-
/****************************************************************************
* ---- ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS ----
****************************************************************************/
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index af5aa8aea84..266af3121d6 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -1100,6 +1100,12 @@ try_mergejoin_path(PlannerInfo *root,
pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
innersortkeys = NIL;
+ if ((innersortkeys != NIL || outersortkeys != NIL) && jointype != JOIN_FULL)
+ {
+ bms_free(required_outer);
+ return;
+ }
+
/*
* See comments in try_nestloop_path().
*/
diff --git a/src/backend/optimizer/path/meson.build b/src/backend/optimizer/path/meson.build
index 12f36d85cb6..2e831dff34b 100644
--- a/src/backend/optimizer/path/meson.build
+++ b/src/backend/optimizer/path/meson.build
@@ -2,6 +2,7 @@
backend_sources += files(
'allpaths.c',
+ 'appendorpath.c',
'clausesel.c',
'costsize.c',
'equivclass.c',
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 8b04d40d36d..031271e87c3 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,16 +17,21 @@
*/
#include "postgres.h"
+#include <float.h>
+
+#include "miscadmin.h"
#include "access/stratnum.h"
#include "catalog/pg_opfamily.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/cost.h"
+#include "optimizer/cost.h"
#include "optimizer/optimizer.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "partitioning/partbounds.h"
#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
/* Consider reordering of GROUP BY keys? */
bool enable_group_by_reordering = true;
@@ -366,7 +371,7 @@ pathkeys_contained_in(List *keys1, List *keys2)
*
* Returns the number of GROUP BY keys with a matching pathkey.
*/
-static int
+int
group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
List **group_clauses,
int num_groupby_pathkeys)
@@ -2156,7 +2161,7 @@ right_merge_direction(PlannerInfo *root, PathKey *pathkey)
* ordering. Thus we return 0, if no valuable keys are found, or the number
* of leading keys shared by the list and the requested ordering..
*/
-static int
+int
pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
{
int n_common_pathkeys;
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 79598f62ca4..aea18117202 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -76,6 +76,9 @@ static bool is_innerrel_unique_for(PlannerInfo *root,
static int self_join_candidates_cmp(const void *a, const void *b);
static bool replace_relid_callback(Node *node,
ChangeVarNodes_context *context);
+// static Bitmapset *replace_relid(Relids relids, int oldId, int newId);
+// static void replace_varno(Node *node, int from, int to);
+// static bool replace_varno_walker(Node *node, ReplaceVarnoContext *ctx);
/*
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index d721471e2af..f3251102891 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -172,7 +172,6 @@ static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_pat
static Node *fix_indexqual_clause(PlannerInfo *root,
IndexOptInfo *index, int indexcol,
Node *clause, List *indexcolnos);
-static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol);
static List *get_switched_clauses(List *clauses, Relids outerrelids);
static List *order_qual_clauses(PlannerInfo *root, List *clauses);
static void copy_generic_path_info(Plan *dest, Path *src);
@@ -1245,6 +1244,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
/* Generate a Result plan with constant-FALSE gating qual */
Plan *plan;
+ tlist = build_path_tlist(root, &best_path->path);
plan = (Plan *) make_result(tlist,
(Node *) list_make1(makeBoolConst(false,
false)),
@@ -1273,7 +1273,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
plan->plan.righttree = NULL;
plan->apprelids = rel->relids;
- if (pathkeys != NIL)
+ if (pathkeys != NIL && best_path->pull_tlist == false)
{
/*
* Compute sort column info, and adjust the Append's tlist as needed.
@@ -1307,11 +1307,17 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
/* Must insist that all children return the same tlist */
subplan = create_plan_recurse(root, subpath, CP_EXACT_TLIST);
+ if (tlist == NIL && best_path->pull_tlist)
+ plan->plan.targetlist = tlist = copyObject(subplan->targetlist);
+
/*
* For ordered Appends, we must insert a Sort node if subplan isn't
* sufficiently ordered.
+ * if best_path->pull_tlist = then plan came from
+ * keybased_rewrite_index_paths() which guarantee correct sorting in
+ * subplan
*/
- if (pathkeys != NIL)
+ if (pathkeys != NIL && best_path->pull_tlist == false)
{
int numsortkeys;
AttrNumber *sortColIdx;
@@ -5258,7 +5264,7 @@ fix_indexqual_clause(PlannerInfo *root, IndexOptInfo *index, int indexcol,
* Most of the code here is just for sanity cross-checking that the given
* expression actually matches the index column it's claimed to.
*/
-static Node *
+Node *
fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol)
{
Var *result;
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index d706546f332..f9eacc4fa27 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -3133,6 +3133,10 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
{
Var *var = (Var *) node;
+ /* join_references_mutator already checks this node */
+ if (var->varno == OUTER_VAR)
+ return (Node*)copyObject(var);
+
/*
* Verify that Vars with non-default varreturningtype only appear in
* the RETURNING list, and refer to the target relation.
@@ -3163,6 +3167,9 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
/* then in the inner. */
if (context->inner_itlist)
{
+ if (var->varno == INNER_VAR)
+ return (Node*)copyObject(var);
+
newvar = search_indexed_tlist_for_var(var,
context->inner_itlist,
INNER_VAR,
diff --git a/src/backend/optimizer/util/joininfo.c b/src/backend/optimizer/util/joininfo.c
index f26e38c6552..ffee0a0d1ad 100644
--- a/src/backend/optimizer/util/joininfo.c
+++ b/src/backend/optimizer/util/joininfo.c
@@ -177,7 +177,6 @@ remove_join_clause_from_rels(PlannerInfo *root,
* Remove the restrictinfo from the list. Pointer comparison is
* sufficient.
*/
- Assert(list_member_ptr(rel->joininfo, restrictinfo));
rel->joininfo = list_delete_ptr(rel->joininfo, restrictinfo);
}
}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e8d8a537061..cf39b89a42e 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -235,6 +235,18 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
/* ... but path2 fuzzily worse on startup, so path1 wins */
return COSTS_BETTER1;
}
+
+ if (IsA(path1, IndexPath) && IsA(path2, IndexPath))
+ {
+ IndexPath *ipath1 = (IndexPath*)path1;
+ IndexPath *ipath2 = (IndexPath*)path2;
+
+ if (ipath1->indexselectivity < ipath2->indexselectivity)
+ return COSTS_BETTER1;
+ else if (ipath1->indexselectivity > ipath2->indexselectivity)
+ return COSTS_BETTER2;
+ }
+
/* fuzzily the same on both costs */
return COSTS_EQUAL;
@@ -1288,7 +1300,7 @@ create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel,
}
/*
- * create_append_path
+ * create_append_path_ext
* Creates a path corresponding to an Append plan, returning the
* pathnode.
*
@@ -1300,12 +1312,12 @@ create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel,
* by totalling the row estimates from the 'subpaths' list.
*/
AppendPath *
-create_append_path(PlannerInfo *root,
+create_append_path_ext(PlannerInfo *root,
RelOptInfo *rel,
List *subpaths, List *partial_subpaths,
List *pathkeys, Relids required_outer,
int parallel_workers, bool parallel_aware,
- double rows)
+ double rows, bool pull_tlist)
{
AppendPath *pathnode = makeNode(AppendPath);
ListCell *l;
@@ -1315,6 +1327,7 @@ create_append_path(PlannerInfo *root,
pathnode->path.pathtype = T_Append;
pathnode->path.parent = rel;
pathnode->path.pathtarget = rel->reltarget;
+ pathnode->pull_tlist = pull_tlist;
/*
* If this is for a baserel (not a join or non-leaf partition), we prefer
@@ -1407,12 +1420,12 @@ create_append_path(PlannerInfo *root,
pathnode->path.total_cost = child->total_cost;
}
else
- cost_append(pathnode);
+ cost_append_ext(pathnode, root);
/* Must do this last, else cost_append complains */
pathnode->path.pathkeys = child->pathkeys;
}
else
- cost_append(pathnode);
+ cost_append_ext(pathnode, root);
/* If the caller provided a row estimate, override the computed value. */
if (rows >= 0)
@@ -4211,9 +4224,13 @@ adjust_limit_rows_costs(double *rows, /* in/out parameter */
if (count_rows > *rows)
count_rows = *rows;
if (input_rows > 0)
+ {
+ *startup_cost = *startup_cost +
+ 2*(input_total_cost - input_startup_cost) / input_rows;
*total_cost = *startup_cost +
(input_total_cost - input_startup_cost)
* count_rows / input_rows;
+ }
*rows = count_rows;
if (*rows < 1)
*rows = 1;
@@ -4338,11 +4355,12 @@ reparameterize_path(PlannerInfo *root, Path *path,
i++;
}
return (Path *)
- create_append_path(root, rel, childpaths, partialpaths,
+ create_append_path_ext(root, rel, childpaths, partialpaths,
apath->path.pathkeys, required_outer,
apath->path.parallel_workers,
apath->path.parallel_aware,
- -1);
+ -1,
+ apath->pull_tlist);
}
case T_Material:
{
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 1d9aaf9146f..714460e96ec 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -508,6 +508,9 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
index_close(indexRelation, NoLock);
+ info->sslots = palloc0(
+ (STATISTIC_NUM_SLOTS + 1) * sizeof(AttStatsSlot));
+
/*
* We've historically used lcons() here. It'd make more sense to
* use lappend(), but that causes the planner to change behavior
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index ff507331a06..06b1ed73bb8 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -1336,6 +1336,54 @@ build_joinrel_joinlist(RelOptInfo *joinrel,
joinrel->joininfo = result;
}
+typedef struct UniquePtrList {
+ List *unique_list;
+ HTAB *h;
+} UniquePtrList;
+
+static void
+addUniquePtrList(UniquePtrList *upl, void *v)
+{
+ if (upl->h != NULL || list_length(upl->unique_list) > 32)
+ {
+ bool found;
+
+ if (upl->h == NULL)
+ {
+ HASHCTL hash_ctl;
+ ListCell *l;
+
+ MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+
+ hash_ctl.keysize = sizeof(void*);
+ hash_ctl.entrysize = sizeof(void*);
+
+ upl->h = hash_create("UniquePtrList storage", 64,
+ &hash_ctl,
+ HASH_BLOBS |
+ HASH_ELEM);
+
+ foreach(l, upl->unique_list)
+ {
+ void *k = lfirst(l);
+
+ hash_search(upl->h, &k,
+ HASH_ENTER,
+ &found);
+ Assert(found == false);
+ }
+ }
+
+ hash_search(upl->h, &v, HASH_ENTER, &found);
+ if (found == false)
+ upl->unique_list = lappend(upl->unique_list, v);
+ }
+ else
+ {
+ upl->unique_list = list_append_unique_ptr(upl->unique_list, v);
+ }
+}
+
static List *
subbuild_joinrel_restrictlist(PlannerInfo *root,
RelOptInfo *joinrel,
@@ -1344,6 +1392,10 @@ subbuild_joinrel_restrictlist(PlannerInfo *root,
List *new_restrictlist)
{
ListCell *l;
+ UniquePtrList upl;
+
+ memset(&upl, 0, sizeof(upl));
+ upl.unique_list = new_restrictlist;
foreach(l, input_rel->joininfo)
{
@@ -1388,7 +1440,7 @@ subbuild_joinrel_restrictlist(PlannerInfo *root,
* will have been multiply-linked rather than copied, pointer
* equality should be a sufficient test.)
*/
- new_restrictlist = list_append_unique_ptr(new_restrictlist, rinfo);
+ addUniquePtrList(&upl, rinfo);
}
else
{
@@ -1399,7 +1451,8 @@ subbuild_joinrel_restrictlist(PlannerInfo *root,
}
}
- return new_restrictlist;
+ hash_destroy(upl.h);
+ return upl.unique_list;
}
static List *
@@ -1408,6 +1461,10 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel,
List *new_joininfo)
{
ListCell *l;
+ UniquePtrList upl;
+
+ memset(&upl, 0, sizeof(upl));
+ upl.unique_list = new_joininfo;
/* Expected to be called only for join between parent relations. */
Assert(joinrel->reloptkind == RELOPT_JOINREL);
@@ -1433,11 +1490,12 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel,
* multiply-linked rather than copied, pointer equality should be
* a sufficient test.)
*/
- new_joininfo = list_append_unique_ptr(new_joininfo, rinfo);
+ addUniquePtrList(&upl, rinfo);
}
}
- return new_joininfo;
+ hash_destroy(upl.h);
+ return upl.unique_list;
}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 03c80eaaf22..f852ad50827 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -15027,7 +15027,7 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr LIKE a_expr ESCAPE a_expr %prec LIKE
{
- FuncCall *n = makeFuncCall(SystemFuncName("like_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("like_escape")),
list_make2($3, $5),
COERCE_EXPLICIT_CALL,
@2);
@@ -15041,7 +15041,7 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr NOT_LA LIKE a_expr ESCAPE a_expr %prec NOT_LA
{
- FuncCall *n = makeFuncCall(SystemFuncName("like_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("like_escape")),
list_make2($4, $6),
COERCE_EXPLICIT_CALL,
@2);
@@ -15055,7 +15055,7 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr ILIKE a_expr ESCAPE a_expr %prec ILIKE
{
- FuncCall *n = makeFuncCall(SystemFuncName("like_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("like_escape")),
list_make2($3, $5),
COERCE_EXPLICIT_CALL,
@2);
@@ -15069,7 +15069,7 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr NOT_LA ILIKE a_expr ESCAPE a_expr %prec NOT_LA
{
- FuncCall *n = makeFuncCall(SystemFuncName("like_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("like_escape")),
list_make2($4, $6),
COERCE_EXPLICIT_CALL,
@2);
@@ -15079,7 +15079,7 @@ a_expr: c_expr { $$ = $1; }
| a_expr SIMILAR TO a_expr %prec SIMILAR
{
- FuncCall *n = makeFuncCall(SystemFuncName("similar_to_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("similar_to_escape")),
list_make1($4),
COERCE_EXPLICIT_CALL,
@2);
@@ -15088,7 +15088,7 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr SIMILAR TO a_expr ESCAPE a_expr %prec SIMILAR
{
- FuncCall *n = makeFuncCall(SystemFuncName("similar_to_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("similar_to_escape")),
list_make2($4, $6),
COERCE_EXPLICIT_CALL,
@2);
@@ -15097,7 +15097,7 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr NOT_LA SIMILAR TO a_expr %prec NOT_LA
{
- FuncCall *n = makeFuncCall(SystemFuncName("similar_to_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("similar_to_escape")),
list_make1($5),
COERCE_EXPLICIT_CALL,
@2);
@@ -15106,7 +15106,7 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr NOT_LA SIMILAR TO a_expr ESCAPE a_expr %prec NOT_LA
{
- FuncCall *n = makeFuncCall(SystemFuncName("similar_to_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("similar_to_escape")),
list_make2($5, $7),
COERCE_EXPLICIT_CALL,
@2);
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 9f20a70ce13..9b0f5c95fef 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -1202,6 +1202,7 @@ transformFromClauseItem(ParseState *pstate, Node *n,
&r_namespace);
/* Remove the left-side RTEs from the namespace list again */
+
pstate->p_namespace = list_truncate(pstate->p_namespace,
sv_namespace_length);
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 04ecf64b1fc..ac7163d6b6c 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -2244,7 +2244,6 @@ addRangeTableEntryForJoin(ParseState *pstate,
{
RangeTblEntry *rte = makeNode(RangeTblEntry);
Alias *eref;
- int numaliases;
ParseNamespaceItem *nsitem;
Assert(pstate != NULL);
@@ -2270,19 +2269,37 @@ addRangeTableEntryForJoin(ParseState *pstate,
rte->join_using_alias = join_using_alias;
rte->alias = alias;
- eref = alias ? copyObject(alias) : makeAlias("unnamed_join", NIL);
- numaliases = list_length(eref->colnames);
-
/* fill in any unspecified alias columns */
- if (numaliases < list_length(colnames))
- eref->colnames = list_concat(eref->colnames,
- list_copy_tail(colnames, numaliases));
+ if (alias)
+ {
+ int numaliases;
- if (numaliases > list_length(colnames))
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
- errmsg("join expression \"%s\" has %d columns available but %d columns specified",
- eref->aliasname, list_length(colnames), numaliases)));
+ eref = copyObject(alias);
+
+ numaliases = list_length(eref->colnames);
+
+ if (numaliases == 0)
+ {
+ eref->colnames = colnames;
+ }
+ else if (numaliases > 0 && numaliases < list_length(colnames))
+ {
+ eref->colnames = list_concat(eref->colnames,
+ list_copy_tail(colnames, numaliases));
+ list_free(colnames);
+ }
+
+ if (numaliases > list_length(colnames))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("join expression \"%s\" has %d columns available but %d columns specified",
+ eref->aliasname, list_length(colnames), numaliases)));
+ }
+ else
+ {
+ eref = makeAlias("unnamed_join", NIL);
+ eref->colnames = colnames;
+ }
rte->eref = eref;
@@ -2999,8 +3016,11 @@ expandRTE(RangeTblEntry *rte, int rtindex, int sublevels_up,
{
char *label = strVal(lfirst(colname));
- *colnames = lappend(*colnames,
- makeString(pstrdup(label)));
+ /*
+ * Assume label is already pstrdup'ed somewhere, so
+ * don't duplicate it again
+ */
+ *colnames = lappend(*colnames, makeString(label));
}
if (colvars)
diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c
index 0fcd1fbd14e..eaf5476222d 100644
--- a/src/backend/rewrite/rewriteManip.c
+++ b/src/backend/rewrite/rewriteManip.c
@@ -1463,7 +1463,7 @@ replace_rte_variables(Node *node, int target_varno, int sublevels_up,
result = query_or_expression_tree_mutator(node,
replace_rte_variables_mutator,
&context,
- 0);
+ QTW_DONT_COPY_DEFAULT);
if (context.inserted_sublink)
{
@@ -1533,13 +1533,13 @@ replace_rte_variables_mutator(Node *node,
newnode = query_tree_mutator((Query *) node,
replace_rte_variables_mutator,
context,
- 0);
+ QTW_DONT_COPY_DEFAULT);
newnode->hasSubLinks |= context->inserted_sublink;
context->inserted_sublink = save_inserted_sublink;
context->sublevels_up--;
return (Node *) newnode;
}
- return expression_tree_mutator(node, replace_rte_variables_mutator, context);
+ return expression_tree_mutator_ext(node, replace_rte_variables_mutator, context, QTW_DONT_COPY_DEFAULT);
}
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index a4ec7959f31..7defe4b1929 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -965,6 +965,7 @@ count_usable_fds(int max_to_probe, int *usable_fds, int *already_open)
int used = 0;
int highestfd = 0;
int j;
+ int fdTest = 0;
#ifdef HAVE_GETRLIMIT
struct rlimit rlim;
@@ -980,6 +981,15 @@ count_usable_fds(int max_to_probe, int *usable_fds, int *already_open)
ereport(WARNING, (errmsg("getrlimit failed: %m")));
#endif /* HAVE_GETRLIMIT */
+#ifdef WIN32
+ /*
+ * we have error on Windows7 with max_files_per_process > 1200 when dup(0) - stdin
+ * make test on postgresql.conf file
+ */
+ fdTest = _open(ConfigFileName, _O_RDONLY);
+ if (fdTest < 0)
+ fdTest = 0; /* fallback to stdin */
+#endif
/* dup until failure or probe limit reached */
for (;;)
{
@@ -995,7 +1005,7 @@ count_usable_fds(int max_to_probe, int *usable_fds, int *already_open)
break;
#endif
- thisfd = dup(2);
+ thisfd = dup(fdTest);
if (thisfd < 0)
{
/* Expect EMFILE or ENFILE, else it's fishy */
@@ -1022,6 +1032,10 @@ count_usable_fds(int max_to_probe, int *usable_fds, int *already_open)
for (j = 0; j < used; j++)
close(fd[j]);
+#ifdef WIN32
+ if (fdTest>0)
+ _close(fdTest);
+#endif
pfree(fd);
/*
diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c
index 6eea8e87169..75c05b22970 100644
--- a/src/backend/storage/ipc/sinval.c
+++ b/src/backend/storage/ipc/sinval.c
@@ -15,6 +15,7 @@
#include "postgres.h"
#include "access/xact.h"
+#include "access/xlog.h"
#include "miscadmin.h"
#include "storage/latch.h"
#include "storage/sinvaladt.h"
diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c
index c5748b690f4..ea557e0dc62 100644
--- a/src/backend/storage/ipc/sinvaladt.c
+++ b/src/backend/storage/ipc/sinvaladt.c
@@ -17,6 +17,7 @@
#include <signal.h>
#include <unistd.h>
+#include "access/xlog.h"
#include "miscadmin.h"
#include "storage/ipc.h"
#include "storage/proc.h"
@@ -126,8 +127,8 @@
* per iteration.
*/
-#define MAXNUMMESSAGES 4096
-#define MSGNUMWRAPAROUND (MAXNUMMESSAGES * 262144)
+#define MAXNUMMESSAGES 16384
+#define MSGNUMWRAPAROUND (MAXNUMMESSAGES * 65536)
#define CLEANUP_MIN (MAXNUMMESSAGES / 2)
#define CLEANUP_QUANTUM (MAXNUMMESSAGES / 16)
#define SIG_THRESHOLD (MAXNUMMESSAGES / 2)
@@ -171,8 +172,6 @@ typedef struct SISeg
int maxMsgNum; /* next message number to be assigned */
int nextThreshold; /* # of messages to call SICleanupQueue */
- slock_t msgnumLock; /* spinlock protecting maxMsgNum */
-
/*
* Circular buffer holding shared-inval messages
*/
@@ -246,7 +245,6 @@ SharedInvalShmemInit(void)
shmInvalBuffer->minMsgNum = 0;
shmInvalBuffer->maxMsgNum = 0;
shmInvalBuffer->nextThreshold = CLEANUP_MIN;
- SpinLockInit(&shmInvalBuffer->msgnumLock);
/* The buffer[] array is initially all unused, so we need not fill it */
@@ -362,6 +360,13 @@ CleanupInvalidationState(int status, Datum arg)
LWLockRelease(SInvalWriteLock);
}
+
+#define MAXNUMLOCALMESSAGES 8192
+static SharedInvalidationMessage localInvalBuffer[MAXNUMLOCALMESSAGES];
+static int localInvalRPos = 0;
+static int localInvalWPos = 0;
+
+
/*
* SIInsertDataEntries
* Add new invalidation message(s) to the buffer.
@@ -370,6 +375,21 @@ void
SIInsertDataEntries(const SharedInvalidationMessage *data, int n)
{
SISeg *segP = shmInvalBuffer;
+
+ /* Place local messages to local-only buffer */
+ if (MyDatabaseId != InvalidOid)
+ {
+ int count = n;
+ for (int i=0; i<count; i++)
+ {
+ if (data[i].isLocal)
+ {
+ localInvalBuffer[localInvalWPos] = data[i];
+ localInvalWPos = (localInvalWPos + 1 ) % MAXNUMLOCALMESSAGES;
+ n--;
+ }
+ }
+ }
/*
* N can be arbitrarily large. We divide the work into groups of no more
@@ -412,16 +432,21 @@ SIInsertDataEntries(const SharedInvalidationMessage *data, int n)
* Insert new message(s) into proper slot of circular buffer
*/
max = segP->maxMsgNum;
- while (nthistime-- > 0)
+ while (nthistime)
{
- segP->buffer[max % MAXNUMMESSAGES] = *data++;
- max++;
+ if ((MyDatabaseId == InvalidOid) || !data->isLocal)
+ {
+ segP->buffer[max % MAXNUMMESSAGES] = *data;
+ max++;
+ nthistime--;
+ }
+
+ data++;
}
/* Update current value of maxMsgNum using spinlock */
- SpinLockAcquire(&segP->msgnumLock);
+ pg_memory_barrier();
segP->maxMsgNum = max;
- SpinLockRelease(&segP->msgnumLock);
/*
* Now that the maxMsgNum change is globally visible, we give everyone
@@ -475,7 +500,16 @@ SIGetDataEntries(SharedInvalidationMessage *data, int datasize)
SISeg *segP;
ProcState *stateP;
int max;
- int n;
+ int n = 0;
+
+ if (localInvalRPos != localInvalWPos)
+ {
+ while (n < datasize && localInvalRPos != localInvalWPos)
+ {
+ data[n++] = localInvalBuffer[localInvalRPos];
+ localInvalRPos = (localInvalRPos + 1) % MAXNUMLOCALMESSAGES;
+ }
+ }
segP = shmInvalBuffer;
stateP = &segP->procState[MyProcNumber];
@@ -492,7 +526,7 @@ SIGetDataEntries(SharedInvalidationMessage *data, int datasize)
* invalidation had arrived slightly later in the first place.
*/
if (!stateP->hasMessages)
- return 0;
+ return n;
LWLockAcquire(SInvalReadLock, LW_SHARED);
@@ -507,10 +541,9 @@ SIGetDataEntries(SharedInvalidationMessage *data, int datasize)
*/
stateP->hasMessages = false;
- /* Fetch current value of maxMsgNum using spinlock */
- SpinLockAcquire(&segP->msgnumLock);
+ /* Fetch current value of maxMsgNum */
+ pg_memory_barrier();
max = segP->maxMsgNum;
- SpinLockRelease(&segP->msgnumLock);
if (stateP->resetState)
{
@@ -534,7 +567,6 @@ SIGetDataEntries(SharedInvalidationMessage *data, int datasize)
* cannot delete them here. SICleanupQueue() will eventually remove them
* from the queue.
*/
- n = 0;
while (n < datasize && stateP->nextMsgNum < max)
{
data[n++] = segP->buffer[stateP->nextMsgNum % MAXNUMMESSAGES];
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 2776ceb295b..73ca8dc01bd 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -32,6 +32,7 @@
#include <signal.h>
#include <unistd.h>
+#include "access/tempcat.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/twophase_rmgr.h"
@@ -44,10 +45,13 @@
#include "storage/procarray.h"
#include "storage/spin.h"
#include "storage/standby.h"
+#include "utils/inval.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "utils/resowner.h"
-
+#include "catalog/pg_class.h"
+#include "utils/syscache.h"
+#include "access/htup_details.h"
/* GUC variables */
int max_locks_per_xact; /* used to set the lock table size */
@@ -428,6 +432,68 @@ static void LockRefindAndRelease(LockMethod lockMethodTable, PGPROC *proc,
static void GetSingleProcBlockerStatusData(PGPROC *blocked_proc,
BlockedProcsData *data);
+/*
+ * Check if locking/unlocking specific lock can be skipped.
+ * Currently only some locks on temporary tables are skipped.
+ */
+static bool IsLockCanBeSkipped(const LOCKTAG *locktag, bool sessionLock)
+{
+ Form_pg_class form;
+ HeapTuple tuple;
+ bool isTemp;
+ static bool inRecursion = false;
+
+ /*
+ * Only relation locks can be skipped
+ */
+ if (locktag->locktag_type != LOCKTAG_RELATION)
+ return false;
+
+ /*
+ * Skip locks only in transaction, otherwise cache search will fail
+ */
+ if (!IsTransactionState())
+ return false;
+
+ /*
+ * Don't skip session locks for relation, because during unlock we
+ * can't verify here if lock was skipped or not
+ */
+ if (sessionLock)
+ return false;
+
+ /*
+ * Searching SysCache result recursive call to this function.
+ * Since no SysCache locks can be skipped don't check if it's temporary table
+ * inside recursion.
+ */
+ if (inRecursion)
+ return false;
+
+ if(enable_temp_memory_catalog && IsTempTableScope())
+ return true;
+
+ /*
+ * Try get relation description, if possible
+ */
+ inRecursion = true;
+ tuple = TryGetSysCacheRelationClassTuple(locktag->locktag_field2);
+ inRecursion = false;
+
+ /*
+ * Treat fails as if relation is not temporary
+ */
+ if (!tuple)
+ return false;
+
+ form = (Form_pg_class) GETSTRUCT(tuple);
+ isTemp = form->relpersistence == RELPERSISTENCE_TEMP;
+
+ ReleaseSysCache(tuple);
+
+ return isTemp;
+}
+
/*
* Initialize the lock manager's shmem data structures.
@@ -656,7 +722,7 @@ LockHeldByMe(const LOCKTAG *locktag,
&localtag,
HASH_FIND, NULL);
- if (locallock && locallock->nLocks > 0)
+ if ((locallock && locallock->nLocks > 0) || IsLockCanBeSkipped(locktag, false))
return true;
if (orstronger)
@@ -870,6 +936,10 @@ LockAcquireExtended(const LOCKTAG *locktag,
lockMethodTable->lockModeNames[lockmode]),
errhint("Only RowExclusiveLock or less can be acquired on database objects during recovery.")));
+ /* Don't lock if it's not required. Treat as already locked. */
+ if (IsLockCanBeSkipped(locktag, sessionLock))
+ return LOCKACQUIRE_ALREADY_CLEAR;
+
#ifdef LOCK_DEBUG
if (LOCK_DEBUG_ENABLED(locktag))
elog(LOG, "LockAcquire: lock [%u,%u] %s",
@@ -2107,6 +2177,9 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
*/
if (!locallock || locallock->nLocks <= 0)
{
+ /* Treat skipped locks (they aren't actually locked) as unlocked */
+ if (IsLockCanBeSkipped(locktag, sessionLock))
+ return true;
elog(WARNING, "you don't own a lock of type %s",
lockMethodTable->lockModeNames[lockmode]);
return false;
@@ -2145,6 +2218,8 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
}
if (i < 0)
{
+ if (IsLockCanBeSkipped(locktag, sessionLock))
+ return true;
/* don't release a lock belonging to another owner */
elog(WARNING, "you don't own a lock of type %s",
lockMethodTable->lockModeNames[lockmode]);
diff --git a/src/backend/storage/smgr/Makefile b/src/backend/storage/smgr/Makefile
index 1d0b98764f9..428e67336d7 100644
--- a/src/backend/storage/smgr/Makefile
+++ b/src/backend/storage/smgr/Makefile
@@ -15,6 +15,7 @@ include $(top_builddir)/src/Makefile.global
OBJS = \
bulk_write.o \
md.o \
+ rd.o \
smgr.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/storage/smgr/meson.build b/src/backend/storage/smgr/meson.build
index 9288e35a852..7eb71fb9d3d 100644
--- a/src/backend/storage/smgr/meson.build
+++ b/src/backend/storage/smgr/meson.build
@@ -3,5 +3,6 @@
backend_sources += files(
'bulk_write.c',
'md.c',
+ 'rd.c',
'smgr.c',
)
diff --git a/src/backend/storage/smgr/rd.c b/src/backend/storage/smgr/rd.c
new file mode 100644
index 00000000000..6e1e649f32e
--- /dev/null
+++ b/src/backend/storage/smgr/rd.c
@@ -0,0 +1,363 @@
+#include "postgres.h"
+
+#include "storage/md.h"
+#include "storage/rd.h"
+#include "miscadmin.h"
+#include "utils/memutils.h"
+
+typedef struct _RdBuffer
+{
+ dlist_node node;
+ RelFileLocatorBackend rlocator;
+ ForkNumber forknum;
+ BlockNumber ballocated;
+ BlockNumber bsize;
+ char *data;
+} _RdBuffer;
+
+/*
+ * Size of buffer in blocks. After the buffer is exhausted the storage switches
+ * to 'md' and flushed all the data to disk.
+ */
+int temp_rd_buffers = 4;
+
+bool enable_temp_rd_buffers = false;
+
+static MemoryContext mctx;
+static dlist_head buffers;
+
+
+static _RdBuffer*
+_find_buffer(RelFileLocatorBackend* rlocator, ForkNumber forknum)
+{
+ dlist_iter iter;
+ dlist_foreach(iter, &buffers)
+ {
+ _RdBuffer* buffer = dlist_container(_RdBuffer, node, iter.cur);
+ if (RelFileLocatorBackendEquals(buffer->rlocator, *rlocator) && buffer->forknum == forknum)
+ return buffer;
+ }
+ return NULL;
+}
+
+
+static _RdBuffer*
+_open_buffer(SMgrRelation reln, ForkNumber forknum)
+{
+ _RdBuffer* tbuf = reln->rd_bufs[forknum] ;
+ if (tbuf)
+ return tbuf;
+
+ tbuf = _find_buffer(&reln->smgr_rlocator, forknum);
+ if (tbuf)
+ return tbuf;
+
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("temporary page doesn't exists")));
+
+ return NULL;
+}
+
+
+static void
+switch_to_md(SMgrRelation reln)
+{
+ dlist_mutable_iter iter;
+
+ for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
+ reln->rd_bufs[forknum] = 0;
+
+ reln->smgr_which = 0;
+
+ mdopen(reln);
+
+ dlist_foreach_modify(iter, &buffers)
+ {
+ _RdBuffer* buffer = dlist_container(_RdBuffer, node, iter.cur);
+ if (RelFileLocatorBackendEquals(buffer->rlocator, reln->smgr_rlocator))
+ {
+ smgrcreate(reln, buffer->forknum, false);
+ for(BlockNumber bn=0; bn < buffer->bsize; bn++)
+ smgrextend(reln, buffer->forknum, bn, buffer->data + bn*BLCKSZ, true);
+
+ dlist_delete(&buffer->node);
+ pfree(buffer->data);
+ pfree(buffer);
+ }
+ }
+}
+
+
+void
+rd_reset(SMgrRelation reln)
+{
+ BlockNumber nblocks[MAX_FORKNUM+1];
+ char* buf;
+
+ if (reln->smgr_which == 1)
+ return;
+
+ for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
+ {
+ if (smgrexists(reln, forknum))
+ {
+ nblocks[forknum] = smgrnblocks(reln, forknum);
+
+ if (nblocks[forknum] > temp_rd_buffers)
+ return;
+ }
+ else
+ nblocks[forknum] = InvalidBlockNumber;
+ }
+
+
+ buf = palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, 0);
+
+ for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
+ {
+ if (nblocks[forknum] == InvalidBlockNumber)
+ continue;
+
+ rd_create(reln, forknum, false);
+ for (BlockNumber bn=0; bn < nblocks[forknum]; bn++)
+ {
+ smgrread(reln, forknum, bn, buf);
+ rd_extend(reln, forknum, bn, buf, true);
+ }
+ }
+
+ pfree(buf);
+
+ smgrdounlinkall(&reln, 1, false);
+
+ reln->smgr_which = 1;
+}
+
+
+void
+rd_init(void)
+{
+ mctx = AllocSetContextCreate(TopMemoryContext, "RdSmgr", ALLOCSET_DEFAULT_SIZES);
+ dlist_init(&buffers);
+}
+
+
+void
+rd_shutdown(void)
+{
+}
+
+
+void
+rd_open(SMgrRelation reln)
+{
+ mdopen(reln);
+
+ for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
+ reln->rd_bufs[forknum] = 0;
+}
+
+
+void
+rd_close(SMgrRelation reln, ForkNumber forknum)
+{
+ (void) reln;
+ (void) forknum;
+}
+
+
+void
+rd_create(SMgrRelation reln, ForkNumber forknum, bool isRedo)
+{
+ _RdBuffer* tbuf = _find_buffer(&reln->smgr_rlocator, forknum);
+ if (tbuf)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("temporary page already exists")));
+
+ tbuf = MemoryContextAlloc(mctx, sizeof(_RdBuffer));
+ tbuf->rlocator = reln->smgr_rlocator;
+ tbuf->forknum = forknum;
+ tbuf->ballocated = temp_rd_buffers;
+ tbuf->bsize = 0;
+ tbuf->data = MemoryContextAllocAligned(mctx, tbuf->ballocated*BLCKSZ, PG_IO_ALIGN_SIZE, 0);
+
+ dlist_push_tail(&buffers, &tbuf->node);
+
+ reln->rd_bufs[forknum] = tbuf;
+}
+
+
+bool
+rd_exists(SMgrRelation reln, ForkNumber forknum)
+{
+ return _find_buffer(&reln->smgr_rlocator, forknum);
+}
+
+
+void
+rd_unlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
+{
+ _RdBuffer* tbuf = _find_buffer(&rlocator, forknum);
+ if (tbuf)
+ {
+ dlist_delete(&tbuf->node);
+ pfree(tbuf->data);
+ pfree(tbuf);
+ }
+}
+
+
+void
+rd_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
+{
+ _RdBuffer* tbuf = _open_buffer(reln, forknum);
+
+ while (nblocks)
+ {
+ if (blocknum >= tbuf->bsize)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("temporary page write beyond size")));
+
+ if (blocknum >= tbuf->ballocated)
+ {
+ switch_to_md(reln);
+ smgrwritev(reln, forknum, blocknum, buffers, nblocks, skipFsync);
+ return;
+ }
+
+ memcpy(tbuf->data + blocknum*BLCKSZ, *buffers, BLCKSZ);
+
+ buffers++;
+ nblocks--;
+ blocknum++;
+ }
+}
+
+
+void
+rd_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
+{
+ _RdBuffer* tbuf = _open_buffer(reln, forknum);
+
+ if (blocknum >= tbuf->ballocated)
+ {
+ switch_to_md(reln);
+ smgrextend(reln, forknum, blocknum, buffer, skipFsync);
+ return;
+ }
+
+ memcpy(tbuf->data + blocknum*BLCKSZ, buffer, BLCKSZ);
+
+ tbuf->bsize = Max(tbuf->bsize, blocknum+1);
+}
+
+
+void
+rd_zeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
+{
+ _RdBuffer* tbuf = _open_buffer(reln, forknum);
+
+ if (blocknum + nblocks > tbuf->ballocated)
+ {
+ switch_to_md(reln);
+ smgrzeroextend(reln, forknum, blocknum, nblocks, skipFsync);
+ return;
+ }
+
+ memset(tbuf->data + blocknum*BLCKSZ, 0, BLCKSZ*nblocks);
+
+ tbuf->bsize = Max(tbuf->bsize, blocknum+nblocks);
+}
+
+
+void
+rd_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
+{
+ _RdBuffer* tbuf = _open_buffer(reln, forknum);
+
+ while (nblocks)
+ {
+ if (blocknum >= tbuf->bsize)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read block %u in rd of size %u", blocknum, tbuf->bsize)));
+
+ memcpy(*buffers, tbuf->data + blocknum*BLCKSZ, BLCKSZ);
+
+ buffers++;
+ nblocks--;
+ blocknum++;
+ }
+}
+
+
+BlockNumber
+rd_nblocks(SMgrRelation reln, ForkNumber forknum)
+{
+ _RdBuffer* tbuf = _open_buffer(reln, forknum);
+ return tbuf->bsize;
+}
+
+
+void
+rd_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber old_blocks, BlockNumber nblocks)
+{
+ _RdBuffer* tbuf = _open_buffer(reln, forknum);
+ tbuf->bsize = nblocks;
+ (void) old_blocks;
+}
+
+
+bool
+rd_prefetch(SMgrRelation reln,
+ ForkNumber forknum,
+ BlockNumber blocknum,
+ int nblocks)
+{
+ (void) reln;
+ (void) forknum;
+ (void) blocknum;
+ (void) nblocks;
+ return true;
+}
+
+
+void
+rd_writeback(SMgrRelation reln,
+ ForkNumber forknum,
+ BlockNumber blocknum,
+ BlockNumber nblocks)
+{
+ (void) reln;
+ (void) forknum;
+ (void) blocknum;
+ (void) nblocks;
+}
+
+
+void
+rd_immedsync(SMgrRelation reln,
+ ForkNumber forknum)
+{
+ (void) reln;
+ (void) forknum;
+}
+
+
+void
+rd_registersync(SMgrRelation reln, ForkNumber forknum)
+{
+ (void) reln;
+ (void) forknum;
+}
+
+
+int
+rd_fd(SMgrRelation reln, ForkNumber forknum,
+ BlockNumber blocknum, uint32 *off)
+{
+ return -1;
+}
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index bce37a36d51..e6ae9011e2f 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -71,6 +71,7 @@
#include "storage/ipc.h"
#include "storage/md.h"
#include "storage/smgr.h"
+#include "storage/rd.h"
#include "utils/hsearch.h"
#include "utils/inval.h"
@@ -148,7 +149,29 @@ static const f_smgr smgrsw[] = {
.smgr_immedsync = mdimmedsync,
.smgr_registersync = mdregistersync,
.smgr_fd = mdfd,
- }
+ },
+
+ /* ram disk */
+ {
+ .smgr_init = rd_init,
+ .smgr_shutdown = rd_shutdown,
+ .smgr_open = rd_open,
+ .smgr_close = rd_close,
+ .smgr_create = rd_create,
+ .smgr_exists = rd_exists,
+ .smgr_unlink = rd_unlink,
+ .smgr_extend = rd_extend,
+ .smgr_zeroextend = rd_zeroextend,
+ .smgr_prefetch = rd_prefetch,
+ .smgr_readv = rd_readv,
+ .smgr_writev = rd_writev,
+ .smgr_writeback = rd_writeback,
+ .smgr_nblocks = rd_nblocks,
+ .smgr_truncate = rd_truncate,
+ .smgr_immedsync = rd_immedsync,
+ .smgr_registersync = rd_registersync,
+ .smgr_fd = rd_fd,
+ },
};
static const int NSmgr = lengthof(smgrsw);
@@ -272,8 +295,19 @@ smgropen(RelFileLocator rlocator, ProcNumber backend)
/* hash_search already filled in the lookup key */
reln->smgr_targblock = InvalidBlockNumber;
for (int i = 0; i <= MAX_FORKNUM; ++i)
+ {
reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
- reln->smgr_which = 0; /* we only have md.c at present */
+ reln->rd_bufs[i] = NULL;
+ reln->md_num_open_segs[i] = 0;
+ }
+
+ if (RelFileLocatorBackendIsTemp(reln->smgr_rlocator)
+ && !smgrsw[0].smgr_exists(reln, MAIN_FORKNUM)
+ && enable_temp_rd_buffers)
+ /* use rd structure until we switch to md after threshold */
+ reln->smgr_which = 1;
+ else
+ reln->smgr_which = 0;
/* it is not pinned yet */
reln->pincount = 0;
@@ -583,7 +617,8 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
* closed our own smgr rel.
*/
for (i = 0; i < nrels; i++)
- CacheInvalidateSmgr(rlocators[i]);
+ if (!SmgrIsTemp(rels[i]))
+ CacheInvalidateSmgr(rlocators[i]);
/*
* Delete the physical file(s).
@@ -893,7 +928,8 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks,
* is a performance-critical path.) As in the unlink code, we want to be
* sure the message is sent before we start changing things on-disk.
*/
- CacheInvalidateSmgr(reln->smgr_rlocator);
+ if (!SmgrIsTemp(reln))
+ CacheInvalidateSmgr(reln->smgr_rlocator);
/* Do the truncation */
for (i = 0; i < nforks; i++)
diff --git a/src/backend/tcop/cmdtag.c b/src/backend/tcop/cmdtag.c
index fa556187eec..7d43ca087e0 100644
--- a/src/backend/tcop/cmdtag.c
+++ b/src/backend/tcop/cmdtag.c
@@ -145,7 +145,7 @@ BuildQueryCompletionString(char *buff, const QueryCompletion *qc,
*/
if (command_tag_display_rowcount(tag) && !nameonly)
{
- if (tag == CMDTAG_INSERT)
+ if (tag == CMDTAG_INSERT || tag == CMDTAG_EXPLAIN_INSERT)
{
*bufp++ = ' ';
*bufp++ = '0';
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 2f8c3d5f918..edab0eaf728 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -71,6 +71,7 @@
#include "tcop/pquery.h"
#include "tcop/tcopprot.h"
#include "tcop/utility.h"
+#include "utils/builtins.h"
#include "utils/guc_hooks.h"
#include "utils/injection_point.h"
#include "utils/lsyscache.h"
@@ -3296,6 +3297,15 @@ ProcessRecoveryConflictInterrupts(void)
*/
void
ProcessInterrupts(void)
+{
+ if (likely(!ProcessInterrupts_hook))
+ standard_ProcessInterrupts();
+ else
+ ProcessInterrupts_hook();
+}
+
+void
+standard_ProcessInterrupts(void)
{
/* OK to accept any interrupts now? */
if (InterruptHoldoffCount != 0 || CritSectionCount != 0)
diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c
index 08791b8f75e..fe5752666d0 100644
--- a/src/backend/tcop/pquery.c
+++ b/src/backend/tcop/pquery.c
@@ -772,7 +772,13 @@ PortalRun(Portal portal, long count, bool isTopLevel,
if (qc && portal->qc.commandTag != CMDTAG_UNKNOWN)
{
CopyQueryCompletion(qc, &portal->qc);
- qc->nprocessed = nprocessed;
+ if (portal->qc.commandTag == CMDTAG_EXPLAIN ||
+ portal->qc.commandTag == CMDTAG_EXPLAIN_INSERT ||
+ portal->qc.commandTag == CMDTAG_EXPLAIN_UPDATE ||
+ portal->qc.commandTag == CMDTAG_EXPLAIN_DELETE)
+ qc->nprocessed = portal->qc.nprocessed;
+ else
+ qc->nprocessed = nprocessed;
}
/* Mark portal not active */
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 4b8d877842b..83f2780861d 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -863,7 +863,32 @@ standard_ProcessUtility(PlannedStmt *pstmt,
break;
case T_ExplainStmt:
- ExplainQuery(pstate, (ExplainStmt *) parsetree, params, dest);
+ {
+ Query *query;
+ uint64 processed;
+ int explainTag;
+
+ ExplainQuery(pstate, (ExplainStmt *) parsetree, params, dest, &processed);
+
+ query = castNode(Query, ((ExplainStmt *) parsetree)->query);
+ switch (query->commandType)
+ {
+ case CMD_INSERT:
+ explainTag = CMDTAG_EXPLAIN_INSERT;
+ break;
+ case CMD_UPDATE:
+ explainTag = CMDTAG_EXPLAIN_UPDATE;
+ break;
+ case CMD_DELETE:
+ explainTag = CMDTAG_EXPLAIN_DELETE;
+ break;
+ default:
+ explainTag = CMDTAG_EXPLAIN;
+ break;
+ }
+ if (qc)
+ SetQueryCompletion(qc, explainTag, processed);
+ }
break;
case T_AlterSystemStmt:
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 8fdc677371f..9ecb57ebe23 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -54,20 +54,9 @@
#include "utils/selfuncs.h"
#include "utils/varlena.h"
-
-typedef enum
-{
- Pattern_Type_Like,
- Pattern_Type_Like_IC,
- Pattern_Type_Regex,
- Pattern_Type_Regex_IC,
- Pattern_Type_Prefix,
-} Pattern_Type;
-
-typedef enum
-{
- Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact,
-} Pattern_Prefix_Status;
+#include "catalog/pg_proc.h"
+#include "utils/catcache.h"
+#include "utils/syscache.h"
static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
static List *match_pattern_prefix(Node *leftop,
@@ -107,6 +96,119 @@ static Datum string_to_datum(const char *str, Oid datatype);
static Const *string_to_const(const char *str, Oid datatype);
static Const *string_to_bytea_const(const char *str, size_t str_len);
+/****************************************************************************
+ * ---- ROUTINES FOR "SPECIAL" INDEXABLE OPERATORS FOR
+ * SPECIAL USER_DEFINED TYPES ----
+ * -- teodor
+ ****************************************************************************/
+
+static Oid mmPFPOid = InvalidOid;
+static Oid mmGTOid = InvalidOid;
+static Oid mcharOid = InvalidOid;
+static Oid mvarcharOid = InvalidOid;
+
+#define HeapTupleGetOid(type, tuple) (((type)GETSTRUCT(tuple))->oid)
+
+static Oid
+findTypeOid(char *typname)
+{
+ CatCList *catlist;
+ HeapTuple tup;
+ int n_members;
+ Oid typoid;
+
+ catlist = SearchSysCacheList(TYPENAMENSP, 1,
+ CStringGetDatum(typname), 0, 0);
+
+ n_members = catlist->n_members;
+
+ if (n_members != 1)
+ {
+ ReleaseSysCacheList(catlist);
+ if (n_members > 1)
+ elog(ERROR,"There are %d candidates for '%s' type",
+ n_members, typname);
+ return InvalidOid;
+ }
+
+ tup = &catlist->members[0]->tuple;
+
+ typoid = HeapTupleGetOid(Form_pg_type, tup);
+
+ ReleaseSysCacheList(catlist);
+
+ return typoid;
+}
+
+static bool
+fillMCharOIDS() {
+ CatCList *catlist;
+ HeapTuple tup;
+ char *funcname = "mchar_pattern_fixed_prefix";
+ int n_members;
+
+ catlist = SearchSysCacheList(PROCNAMEARGSNSP, 1,
+ CStringGetDatum(funcname), 0, 0);
+ n_members = catlist->n_members;
+
+ if (n_members != 1) {
+ ReleaseSysCacheList(catlist);
+ if (n_members > 1)
+ elog(ERROR,"There are %d candidates for '%s' function'", n_members, funcname);
+ return false;
+ }
+
+ tup = &catlist->members[0]->tuple;
+
+ if ( HeapTupleGetOid(Form_pg_proc, tup) != mmPFPOid ) {
+ char *quals_funcname = "mchar_greaterstring";
+ Oid tmp_mmPFPOid = HeapTupleGetOid(Form_pg_proc, tup);
+
+ ReleaseSysCacheList(catlist);
+
+ mcharOid = findTypeOid("mchar");
+ mvarcharOid = findTypeOid("mvarchar");
+
+ if ( mcharOid == InvalidOid || mvarcharOid == InvalidOid ) {
+ elog(LOG,"Can't find mchar/mvarvarchar types: mchar=%d mvarchar=%d",
+ mcharOid, mvarcharOid);
+ return false;
+ }
+
+ catlist = SearchSysCacheList(PROCNAMEARGSNSP, 1,
+ CStringGetDatum(quals_funcname), 0, 0);
+ n_members = catlist->n_members;
+
+ if ( n_members != 1 ) {
+ ReleaseSysCacheList(catlist);
+ if ( n_members > 1 )
+ elog(ERROR,"There are %d candidates for '%s' function'", n_members, quals_funcname);
+ return false;
+ }
+
+ tup = &catlist->members[0]->tuple;
+ mmGTOid = HeapTupleGetOid(Form_pg_proc, tup);
+ mmPFPOid = tmp_mmPFPOid;
+ }
+
+ ReleaseSysCacheList(catlist);
+
+ return true;
+}
+
+static Pattern_Prefix_Status
+mchar_pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Const **prefix)
+{
+ if (!fillMCharOIDS())
+ return Pattern_Prefix_None;
+
+ return (Pattern_Prefix_Status)DatumGetInt32( OidFunctionCall3(
+ mmPFPOid,
+ PointerGetDatum(patt),
+ Int32GetDatum(ptype),
+ PointerGetDatum(prefix)
+ ) );
+}
/*
* Planner support functions for LIKE, regex, and related operators
@@ -259,6 +361,7 @@ match_pattern_prefix(Node *leftop,
Expr *expr;
FmgrInfo ltproc;
Const *greaterstr;
+ bool isMchar = false;
/*
* Can't do anything with a non-constant or NULL pattern argument.
@@ -275,8 +378,16 @@ match_pattern_prefix(Node *leftop,
/*
* Try to extract a fixed prefix from the pattern.
*/
- pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
- &prefix, NULL);
+ ldatatype = exprType(leftop);
+ if (fillMCharOIDS() && (ldatatype == mcharOid ||
+ ldatatype == mvarcharOid))
+ {
+ pstatus = mchar_pattern_fixed_prefix(patt, ptype, &prefix);
+ isMchar = true;
+ }
+ else
+ pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
+ &prefix, NULL);
/* fail if no fixed prefix */
if (pstatus == Pattern_Prefix_None)
@@ -291,7 +402,6 @@ match_pattern_prefix(Node *leftop,
* selected operators also determine the needed type of the prefix
* constant.
*/
- ldatatype = exprType(leftop);
switch (ldatatype)
{
case TEXTOID:
@@ -358,7 +468,16 @@ match_pattern_prefix(Node *leftop,
break;
default:
/* Can't get here unless we're attached to the wrong operator */
- return NIL;
+ if (!isMchar)
+ return NIL;
+ collation_aware = false;
+ rdatatype = mvarcharOid;
+ ltopr = get_opfamily_member(opfamily, ldatatype, rdatatype,
+ BTLessStrategyNumber);
+ eqopr = get_opfamily_member(opfamily, ldatatype, rdatatype,
+ BTEqualStrategyNumber);
+ geopr = get_opfamily_member(opfamily, ldatatype, rdatatype,
+ BTGreaterEqualStrategyNumber);
}
/*
@@ -370,9 +489,10 @@ match_pattern_prefix(Node *leftop,
*/
if (prefix->consttype != rdatatype)
{
- Assert(prefix->consttype == TEXTOID &&
- rdatatype == BPCHAROID);
- prefix->consttype = rdatatype;
+ Assert(isMchar || (prefix->consttype == TEXTOID &&
+ rdatatype == BPCHAROID));
+ if (!isMchar)
+ prefix->consttype = rdatatype;
}
/*
@@ -454,7 +574,12 @@ match_pattern_prefix(Node *leftop,
if (!op_in_opfamily(ltopr, opfamily))
return result;
fmgr_info(get_opcode(ltopr), <proc);
- greaterstr = make_greater_string(prefix, <proc, indexcollation);
+ if (isMchar)
+ greaterstr = (Const*)DatumGetPointer(OidFunctionCall1(
+ mmGTOid,
+ PointerGetDatum(prefix)));
+ else
+ greaterstr = make_greater_string(prefix, <proc, indexcollation);
if (greaterstr)
{
expr = make_opclause(ltopr, BOOLOID, false,
diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c
index fe5edc0027d..1fd077b404f 100644
--- a/src/backend/utils/adt/rowtypes.c
+++ b/src/backend/utils/adt/rowtypes.c
@@ -27,7 +27,6 @@
#include "utils/lsyscache.h"
#include "utils/typcache.h"
-
/*
* structure to cache metadata needed for record I/O
*/
@@ -824,6 +823,9 @@ record_cmp(FunctionCallInfo fcinfo)
{
HeapTupleHeader record1 = PG_GETARG_HEAPTUPLEHEADER(0);
HeapTupleHeader record2 = PG_GETARG_HEAPTUPLEHEADER(1);
+ int record_cmp_prefix =
+ (PG_NARGS() == 3 && PG_GETARG_INT32(2) > 0) ?
+ PG_GETARG_INT32(2) : INT_MAX;
int result = 0;
Oid tupType1;
Oid tupType2;
@@ -908,6 +910,9 @@ record_cmp(FunctionCallInfo fcinfo)
nulls2 = (bool *) palloc(ncolumns2 * sizeof(bool));
heap_deform_tuple(&tuple2, tupdesc2, values2, nulls2);
+ ncolumns1 = Min(ncolumns1, record_cmp_prefix);
+ ncolumns2 = Min(ncolumns2, record_cmp_prefix);
+
/*
* Scan corresponding columns, allowing for dropped columns in different
* places in the two rows. i1 and i2 are physical column indexes, j is
@@ -1068,6 +1073,9 @@ record_eq(PG_FUNCTION_ARGS)
{
HeapTupleHeader record1 = PG_GETARG_HEAPTUPLEHEADER(0);
HeapTupleHeader record2 = PG_GETARG_HEAPTUPLEHEADER(1);
+ int record_cmp_prefix =
+ (PG_NARGS() == 3 && PG_GETARG_INT32(2) > 0) ?
+ PG_GETARG_INT32(2) : INT_MAX;
bool result = true;
Oid tupType1;
Oid tupType2;
@@ -1152,6 +1160,9 @@ record_eq(PG_FUNCTION_ARGS)
nulls2 = (bool *) palloc(ncolumns2 * sizeof(bool));
heap_deform_tuple(&tuple2, tupdesc2, values2, nulls2);
+ ncolumns1 = Min(ncolumns1, record_cmp_prefix);
+ ncolumns2 = Min(ncolumns2, record_cmp_prefix);
+
/*
* Scan corresponding columns, allowing for dropped columns in different
* places in the two rows. i1 and i2 are physical column indexes, j is
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 3d6e6bdbfd2..00bbb6666c3 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -39,6 +39,7 @@
#include "catalog/pg_type.h"
#include "commands/defrem.h"
#include "commands/tablespace.h"
+#include "common/hashfn.h"
#include "common/keywords.h"
#include "executor/spi.h"
#include "funcapi.h"
@@ -4923,6 +4924,8 @@ static char *
make_colname_unique(char *colname, deparse_namespace *dpns,
deparse_columns *colinfo)
{
+ CHECK_FOR_INTERRUPTS();
+
/*
* If the selected name isn't unique, append digits to make it so. For a
* very long input name, we might have to truncate to stay within
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index c82bacd7c15..f8523fb7e43 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -149,13 +149,14 @@ get_relation_stats_hook_type get_relation_stats_hook = NULL;
get_index_stats_hook_type get_index_stats_hook = NULL;
static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
-static double eqjoinsel_inner(Oid opfuncoid, Oid collation,
+static double eqjoinsel_inner(Oid operator, Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
AttStatsSlot *sslot1, AttStatsSlot *sslot2,
Form_pg_statistic stats1, Form_pg_statistic stats2,
- bool have_mcvs1, bool have_mcvs2);
+ bool have_mcvs1, bool have_mcvs2,
+ int record_cmp_prefix);
static double eqjoinsel_semi(Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
@@ -163,7 +164,8 @@ static double eqjoinsel_semi(Oid opfuncoid, Oid collation,
AttStatsSlot *sslot1, AttStatsSlot *sslot2,
Form_pg_statistic stats1, Form_pg_statistic stats2,
bool have_mcvs1, bool have_mcvs2,
- RelOptInfo *inner_rel);
+ RelOptInfo *inner_rel,
+ int record_cmp_prefix);
static bool estimate_multivariate_ndistinct(PlannerInfo *root,
RelOptInfo *rel, List **varinfos, double *ndistinct);
static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid,
@@ -219,6 +221,20 @@ static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
static double btcost_correlation(IndexOptInfo *index,
VariableStatData *vardata);
+static bool
+join_is_reversed_variables(SpecialJoinInfo *sjinfo,
+ VariableStatData *vardata1, VariableStatData *vardata2)
+{
+ if (vardata1->rel &&
+ bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
+ return true; /* var1 is on RHS */
+ else if (vardata2->rel &&
+ bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
+ return true; /* var2 is on LHS */
+ else
+ return false;
+}
+
/*
* eqsel - Selectivity of "=" for any data types.
@@ -283,14 +299,64 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
((Const *) other)->constisnull,
varonleft, negate);
else
+ {
selec = var_eq_non_const(&vardata, operator, collation, other,
varonleft, negate);
+ if (IsA(other, Var) || (IsA(other, RelabelType) &&
+ IsA(((RelabelType *) other)->arg, Var)))
+ {
+ VariableStatData rightvardata;
+ double varselec;
+ examine_variable(root, other, 0, &rightvardata);
+ varselec = eqjoin_selectivity(root, operator, collation, &vardata,
+ &rightvardata, NULL, -1);
+ ReleaseVariableStats(rightvardata);
+
+ /*
+ * If 'other' is variable from another relation then use
+ * selectivity with that variable if it's lower. This prevents worst
+ * cases when selectivity is too high which typically causes nested
+ * loop joins in plan with very bad estimations.
+ */
+ if (varselec > selec)
+ selec = varselec;
+ }
+ }
+
ReleaseVariableStats(vardata);
return selec;
}
+static bool
+get_cached_attstatsslot(AttStatsSlot *sslot, VariableStatData *vardata,
+ int reqkind, Oid reqop, int flags)
+{
+ if (vardata->sslots)
+ {
+ /*
+ * vardata has somewhere cache
+ */
+ AttStatsSlot *sslotp;
+
+ sslotp = fill_attstatsslot(vardata->sslots,
+ vardata->statsTuple,
+ reqkind, reqop, flags);
+
+ if (sslotp)
+ {
+ *sslot = *sslotp;
+ return true;
+ }
+ }
+
+ return get_attstatsslot(sslot, vardata->statsTuple,
+ reqkind, reqop,
+ flags);
+}
+
+
/*
* var_eq_const --- eqsel for var = const case
*
@@ -300,6 +366,15 @@ double
var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation,
Datum constval, bool constisnull,
bool varonleft, bool negate)
+{
+ return eqconst_selectivity(oproid, collation, vardata, constval,
+ constisnull, varonleft, negate, -1);
+}
+
+Selectivity
+eqconst_selectivity(Oid oproid, Oid collation,
+ VariableStatData *vardata, Datum constval, bool constisnull,
+ bool varonleft, bool negate, int record_cmp_prefix)
{
double selec;
double nullfrac = 0.0;
@@ -332,7 +407,8 @@ var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation,
* be different from ours, but it's much more likely to be right than
* ignoring the information.)
*/
- if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
+ if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0 &&
+ record_cmp_prefix <= 0)
{
selec = 1.0 / vardata->rel->tuples;
}
@@ -351,11 +427,11 @@ var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation,
* don't like this, maybe you shouldn't be using eqsel for your
* operator...)
*/
- if (get_attstatsslot(&sslot, vardata->statsTuple,
- STATISTIC_KIND_MCV, InvalidOid,
- ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
+ if (get_cached_attstatsslot(&sslot, vardata,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
{
- LOCAL_FCINFO(fcinfo, 2);
+ LOCAL_FCINFO(fcinfo, 3);
FmgrInfo eqproc;
fmgr_info(opfuncoid, &eqproc);
@@ -366,15 +442,17 @@ var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation,
* eqproc returns NULL, though really equality functions should
* never do that.
*/
- InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
+ InitFunctionCallInfoData(*fcinfo, &eqproc, 3, collation,
NULL, NULL);
fcinfo->args[0].isnull = false;
fcinfo->args[1].isnull = false;
+ fcinfo->args[2].isnull = false;
/* be careful to apply operator right way 'round */
if (varonleft)
fcinfo->args[1].value = constval;
else
fcinfo->args[0].value = constval;
+ fcinfo->args[2].value = Int32GetDatum(record_cmp_prefix);
for (i = 0; i < sslot.nvalues; i++)
{
@@ -518,16 +596,32 @@ var_eq_non_const(VariableStatData *vardata, Oid oproid, Oid collation,
if (ndistinct > 1)
selec /= ndistinct;
- /*
- * Cross-check: selectivity should never be estimated as more than the
- * most common value's.
- */
if (get_attstatsslot(&sslot, vardata->statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
- ATTSTATSSLOT_NUMBERS))
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
{
- if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
- selec = sslot.numbers[0];
+ int i;
+ double sum_selec = 0.0;
+
+ /*
+ * Compute quadratic mean, walk on array in reverse direction to
+ * do not lose accuracy. We don't bother about sslot.nnumbers
+ * equality to zero, because in this case we just get the same
+ * result. But equality to zero is unlikely.
+ */
+ for(i=sslot.nnumbers - 1; i>=0; i--)
+ sum_selec += sslot.numbers[i] * sslot.numbers[i];
+
+ selec = sqrt((selec * selec + sum_selec) /
+ ((double)sslot.nnumbers + 1.0));
+
+ /*
+ * Cross-check: selectivity should never be estimated as
+ * more than the most common value's.
+ */
+ if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
+ selec = sslot.numbers[0];
+
free_attstatsslot(&sslot);
}
}
@@ -1024,6 +1118,138 @@ generic_restriction_selectivity(PlannerInfo *root, Oid oproid, Oid collation,
return selec;
}
+/*
+ * Binary search of bound constval in histogramm
+ */
+static int
+prefix_record_histogram_search(AttStatsSlot *sslot, int start,
+ Datum constval, int record_cmp_prefix,
+ FmgrInfo *opproc, bool isgt)
+{
+ int lobound = start; /* first possible slot to search */
+ int hibound = sslot->nvalues; /* last+1 slot to search */
+
+ while (lobound < hibound)
+ {
+ int probe = (lobound + hibound) / 2;
+ bool ltcmp;
+
+ ltcmp = DatumGetBool(FunctionCall3Coll(opproc,
+ DEFAULT_COLLATION_OID,
+ sslot->values[probe],
+ constval,
+ Int32GetDatum(record_cmp_prefix)));
+ if (isgt)
+ ltcmp = !ltcmp;
+ if (ltcmp)
+ lobound = probe + 1;
+ else
+ hibound = probe;
+ }
+
+ return lobound;
+}
+
+/*
+ * Simple function to estimate selctivity by prefix of record, it just counts
+ * number of histogram bins matched by record prefix - similar to
+ * histogram_selectivity() but it knows about sortability of record
+ */
+double
+prefix_record_histogram_selectivity(VariableStatData *vardata,
+ Datum constvalLeft, Datum constvalRight,
+ int record_cmp_prefix,
+ double ndistinct,int *n_bins)
+{
+ double result = -1.0;
+ AttStatsSlot sslot;
+
+ if (HeapTupleIsValid(vardata->statsTuple) &&
+ get_cached_attstatsslot(&sslot, vardata,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES))
+ {
+ FmgrInfo opprocLT, opprocGT;
+ int start = -1,
+ end = -1;
+
+
+ if (sslot.nvalues > 2)
+ {
+ fmgr_info(F_RECORD_GE, &opprocGT);
+ fmgr_info(F_RECORD_LE, &opprocLT);
+
+ start = prefix_record_histogram_search(&sslot, 0, constvalLeft,
+ record_cmp_prefix,
+ &opprocGT, true);
+ if (start < 0)
+ start = 0;
+ end = prefix_record_histogram_search(&sslot, start, constvalRight,
+ -1,
+ &opprocLT, false);
+ if (end >= sslot.nvalues)
+ end = sslot.nvalues - 1;
+ }
+ else
+ {
+ fmgr_info(F_RECORD_GT, &opprocGT);
+ fmgr_info(F_RECORD_LE, &opprocLT);
+
+ /*
+ * Find first bin which start border is less than constant
+ */
+ for (start = sslot.nvalues - 1; start >= 0; start--)
+ {
+ if (DatumGetBool(FunctionCall3Coll(&opprocGT,
+ DEFAULT_COLLATION_OID,
+ constvalLeft,
+ sslot.values[start],
+ Int32GetDatum(record_cmp_prefix))))
+ break;
+ }
+
+ if (start < 0)
+ start=0;
+
+ /*
+ * Find last bin which end border is less than constant
+ */
+ for (end = start; end <= sslot.nvalues - 2; end ++)
+ {
+ if (DatumGetBool(FunctionCall3Coll(&opprocLT,
+ DEFAULT_COLLATION_OID,
+ constvalRight,
+ sslot.values[end + 1],
+ Int32GetDatum(-1))))
+ break;
+ }
+ }
+
+ if (opprocGT.fn_extra)
+ pfree(opprocGT.fn_extra);
+ if (opprocLT.fn_extra)
+ pfree(opprocLT.fn_extra);
+
+ *n_bins = (start >= end) ? 0 : end - start;
+ result = (start >= end) ? 0.5 : end - start;
+ result /= ((double) (sslot.nvalues));
+
+ free_attstatsslot(&sslot);
+
+ if (*n_bins == 0 && ndistinct > 1)
+ {
+ double ntuples = vardata->rel->tuples;
+ double ntuplesbin = vardata->rel->tuples / sslot.nvalues;
+
+ result *= (1 - pow((ntuples - ntuplesbin) / ntuples,
+ ntuples / ndistinct));
+ }
+ }
+
+ return result;
+
+}
+
/*
* ineq_histogram_selectivity - Examine the histogram for scalarineqsel
*
@@ -2284,11 +2510,32 @@ eqjoinsel(PG_FUNCTION_ARGS)
JoinType jointype = (JoinType) PG_GETARG_INT16(3);
#endif
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
- Oid collation = PG_GET_COLLATION();
+ VariableStatData vardata1;
+ VariableStatData vardata2;
+ Selectivity s;
+ Oid collation = PG_GET_COLLATION();
+
+ get_join_variables(root, args, sjinfo,
+ &vardata1, &vardata2, NULL);
+
+
+ s = eqjoin_selectivity(root, operator, collation, &vardata1, &vardata2,
+ sjinfo, -1);
+
+ ReleaseVariableStats(vardata1);
+ ReleaseVariableStats(vardata2);
+
+ PG_RETURN_FLOAT8((float8)s);
+}
+
+Selectivity
+eqjoin_selectivity(PlannerInfo *root, Oid operator, Oid collation,
+ VariableStatData* vardata1,
+ VariableStatData* vardata2, SpecialJoinInfo *sjinfo,
+ int record_cmp_prefix)
+{
double selec;
double selec_inner;
- VariableStatData vardata1;
- VariableStatData vardata2;
double nd1;
double nd2;
bool isdefault1;
@@ -2304,11 +2551,14 @@ eqjoinsel(PG_FUNCTION_ARGS)
bool join_is_reversed;
RelOptInfo *inner_rel;
- get_join_variables(root, args, sjinfo,
- &vardata1, &vardata2, &join_is_reversed);
+ join_is_reversed = sjinfo && join_is_reversed_variables(sjinfo, vardata1, vardata2);
- nd1 = get_variable_numdistinct(&vardata1, &isdefault1);
- nd2 = get_variable_numdistinct(&vardata2, &isdefault2);
+ nd1 = get_variable_numdistinct(vardata1, &isdefault1);
+ nd2 = get_variable_numdistinct(vardata2, &isdefault2);
+
+ if ((isdefault1 && vardata1->rel && vardata1->rel->tuples <= 0.0) ||
+ (isdefault2 && vardata2->rel && vardata2->rel->tuples <= 0.0))
+ return 0.0;
opfuncoid = get_opcode(operator);
@@ -2319,47 +2569,48 @@ eqjoinsel(PG_FUNCTION_ARGS)
* There is no use in fetching one side's MCVs if we lack MCVs for the
* other side, so do a quick check to verify that both stats exist.
*/
- get_mcv_stats = (HeapTupleIsValid(vardata1.statsTuple) &&
- HeapTupleIsValid(vardata2.statsTuple) &&
- get_attstatsslot(&sslot1, vardata1.statsTuple,
+ get_mcv_stats = (HeapTupleIsValid(vardata1->statsTuple) &&
+ HeapTupleIsValid(vardata2->statsTuple) &&
+ get_attstatsslot(&sslot1, vardata1->statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
0) &&
- get_attstatsslot(&sslot2, vardata2.statsTuple,
+ get_attstatsslot(&sslot2, vardata2->statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
0));
- if (HeapTupleIsValid(vardata1.statsTuple))
+ if (HeapTupleIsValid(vardata1->statsTuple))
{
/* note we allow use of nullfrac regardless of security check */
- stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
+ stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
if (get_mcv_stats &&
- statistic_proc_security_check(&vardata1, opfuncoid))
- have_mcvs1 = get_attstatsslot(&sslot1, vardata1.statsTuple,
+ statistic_proc_security_check(vardata1, opfuncoid))
+ have_mcvs1 = get_attstatsslot(&sslot1, vardata1->statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
}
- if (HeapTupleIsValid(vardata2.statsTuple))
+ if (HeapTupleIsValid(vardata2->statsTuple))
{
/* note we allow use of nullfrac regardless of security check */
- stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
+ stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
if (get_mcv_stats &&
- statistic_proc_security_check(&vardata2, opfuncoid))
- have_mcvs2 = get_attstatsslot(&sslot2, vardata2.statsTuple,
+ statistic_proc_security_check(vardata2, opfuncoid))
+ have_mcvs2 = get_attstatsslot(&sslot2, vardata2->statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
}
/* We need to compute the inner-join selectivity in all cases */
- selec_inner = eqjoinsel_inner(opfuncoid, collation,
- &vardata1, &vardata2,
+ selec_inner = eqjoinsel_inner(operator, opfuncoid, collation,
+ vardata1, vardata2,
nd1, nd2,
isdefault1, isdefault2,
&sslot1, &sslot2,
stats1, stats2,
- have_mcvs1, have_mcvs2);
+ have_mcvs1, have_mcvs2,
+ record_cmp_prefix);
- switch (sjinfo->jointype)
+ switch (sjinfo ? sjinfo->jointype : JOIN_INNER)
{
case JOIN_INNER:
case JOIN_LEFT:
@@ -2379,26 +2630,28 @@ eqjoinsel(PG_FUNCTION_ARGS)
if (!join_is_reversed)
selec = eqjoinsel_semi(opfuncoid, collation,
- &vardata1, &vardata2,
+ vardata1, vardata2,
nd1, nd2,
isdefault1, isdefault2,
&sslot1, &sslot2,
stats1, stats2,
have_mcvs1, have_mcvs2,
- inner_rel);
+ inner_rel,
+ record_cmp_prefix);
else
{
Oid commop = get_commutator(operator);
Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;
selec = eqjoinsel_semi(commopfuncoid, collation,
- &vardata2, &vardata1,
+ vardata2, vardata1,
nd2, nd1,
isdefault2, isdefault1,
&sslot2, &sslot1,
stats2, stats1,
have_mcvs2, have_mcvs1,
- inner_rel);
+ inner_rel,
+ record_cmp_prefix);
}
/*
@@ -2424,12 +2677,132 @@ eqjoinsel(PG_FUNCTION_ARGS)
free_attstatsslot(&sslot1);
free_attstatsslot(&sslot2);
- ReleaseVariableStats(vardata1);
- ReleaseVariableStats(vardata2);
-
CLAMP_PROBABILITY(selec);
- PG_RETURN_FLOAT8((float8) selec);
+ return selec;
+}
+
+static int
+cmp_vardata(FmgrInfo *eqproc, FmgrInfo *ltproc,
+ Datum v1, Datum v2, int record_cmp_prefix)
+{
+ int cmp;
+
+ cmp = DatumGetBool(FunctionCall3Coll(ltproc,
+ DEFAULT_COLLATION_OID,
+ v1, v2,
+ Int32GetDatum(record_cmp_prefix)));
+
+ if (cmp)
+ return -1;
+
+ cmp = DatumGetBool(FunctionCall3Coll(eqproc,
+ DEFAULT_COLLATION_OID,
+ v1, v2,
+ Int32GetDatum(record_cmp_prefix)));
+
+ return !cmp;
+}
+static double
+eqjoinsel_histogram(Oid eqop,
+ VariableStatData *vardata1, VariableStatData *vardata2,
+ int record_cmp_prefix, double nd1, double nd2)
+{
+ bool have_hist1 = false;
+ bool have_hist2 = false;
+ AttStatsSlot sslot1;
+ AttStatsSlot sslot2;
+ int i1 = 0, i2 = 0;
+ double n1 = 0.0, n2 = 0.0;
+ double result = -1.0;
+ FmgrInfo eqproc, ltproc;
+ Oid orderop = InvalidOid;
+ List *opfamilies;
+ ListCell *lc;
+
+ if (!(HeapTupleIsValid(vardata1->statsTuple) &&
+ HeapTupleIsValid(vardata2->statsTuple)))
+ return result;
+
+ memset(&sslot1, 0, sizeof(sslot1));
+ memset(&sslot2, 0, sizeof(sslot2));
+
+ have_hist1 = get_cached_attstatsslot(&sslot1, vardata1,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES);
+ have_hist2 = get_cached_attstatsslot(&sslot2, vardata2,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES);
+
+ if (!(have_hist1 && have_hist2))
+ goto out;
+
+ opfamilies = get_mergejoin_opfamilies(eqop);
+ foreach(lc, opfamilies) {
+ Oid opf = lfirst_oid(lc);
+
+ orderop = get_opfamily_member(opf, vardata1->vartype, vardata2->vartype,
+ BTLessStrategyNumber);
+
+ if (OidIsValid(orderop))
+ break;
+ }
+
+ /* == from fulleq, for example */
+ if (!OidIsValid(orderop))
+ goto out;
+
+ fmgr_info(get_opcode(eqop), &eqproc);
+ fmgr_info(get_opcode(orderop), <proc);
+
+ result = 0.0;
+ while(i1 < sslot1.nvalues && i2 < sslot2.nvalues)
+ {
+ int cmp;
+
+ cmp = cmp_vardata(&eqproc, <proc, sslot1.values[i1], sslot2.values[i2],
+ record_cmp_prefix);
+
+ if (cmp < 0)
+ {
+ i1++;
+ n1++;
+ if (n2 > 0)
+ {
+ result += 0.5 / (sslot1.nvalues*sslot2.nvalues);
+ n2=0.0;
+ }
+ }
+ else if (cmp > 0)
+ {
+ i2++;
+ n2++;
+ if (n1 > 0)
+ {
+ result += 0.5 / (sslot1.nvalues*sslot2.nvalues);
+ n1=0.0;
+ }
+ }
+ else
+ {
+ i1++; i2++;
+ n1++; n2++;
+ result += (n1/sslot1.nvalues)*(n2/sslot2.nvalues);
+ n1 = 0.0; n2 = 0.0;
+ }
+
+ }
+
+ nd1 /= sslot1.nvalues;
+ nd2 /= sslot2.nvalues;
+
+ result /= (nd1 > nd2) ? nd1 : nd2;
+
+out:
+ free_attstatsslot(&sslot1);
+ free_attstatsslot(&sslot2);
+
+ return result;
}
/*
@@ -2439,13 +2812,14 @@ eqjoinsel(PG_FUNCTION_ARGS)
* that it's worth trying to distinguish them here.
*/
static double
-eqjoinsel_inner(Oid opfuncoid, Oid collation,
+eqjoinsel_inner(Oid operator, Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
AttStatsSlot *sslot1, AttStatsSlot *sslot2,
Form_pg_statistic stats1, Form_pg_statistic stats2,
- bool have_mcvs1, bool have_mcvs2)
+ bool have_mcvs1, bool have_mcvs2,
+ int record_cmp_prefix)
{
double selec;
@@ -2463,7 +2837,7 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
* results", Technical Report 1018, Computer Science Dept., University
* of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
*/
- LOCAL_FCINFO(fcinfo, 2);
+ LOCAL_FCINFO(fcinfo, 3);
FmgrInfo eqproc;
bool *hasmatch1;
bool *hasmatch2;
@@ -2489,10 +2863,12 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
* returns NULL, though really equality functions should never do
* that.
*/
- InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
+ InitFunctionCallInfoData(*fcinfo, &eqproc, 3, collation,
NULL, NULL);
fcinfo->args[0].isnull = false;
fcinfo->args[1].isnull = false;
+ fcinfo->args[2].isnull = false;
+ fcinfo->args[2].value = Int32GetDatum(record_cmp_prefix);
hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
hasmatch2 = (bool *) palloc0(sslot2->nvalues * sizeof(bool));
@@ -2618,11 +2994,34 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
- selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
- if (nd1 > nd2)
- selec /= nd1;
- else
- selec /= nd2;
+ if (isdefault1 && vardata1->rel && nd1 > vardata1->rel->rows)
+ {
+ nd1 = vardata1->rel->rows;
+ if (nd1 == 0.0)
+ nd1 = 1.0;
+ }
+
+ if (isdefault2 && vardata2->rel && nd2 > vardata2->rel->rows)
+ {
+ nd2 = vardata2->rel->rows;
+ if (nd2 == 0.0)
+ nd2 = 1.0;
+ }
+
+ selec = eqjoinsel_histogram(operator, vardata1, vardata2,
+ record_cmp_prefix, nd1, nd2);
+
+ if (selec < 0)
+ {
+ selec = 1.0;
+
+ if (nd1 > nd2)
+ selec /= nd1;
+ else
+ selec /= nd2;
+ }
+
+ selec *= (1.0 - nullfrac1) * (1.0 - nullfrac2);
}
return selec;
@@ -2643,7 +3042,8 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
AttStatsSlot *sslot1, AttStatsSlot *sslot2,
Form_pg_statistic stats1, Form_pg_statistic stats2,
bool have_mcvs1, bool have_mcvs2,
- RelOptInfo *inner_rel)
+ RelOptInfo *inner_rel,
+ int record_cmp_prefix)
{
double selec;
@@ -2690,7 +3090,7 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
* lists. We still have to estimate for the remaining population, but
* in a skewed distribution this gives us a big leg up in accuracy.
*/
- LOCAL_FCINFO(fcinfo, 2);
+ LOCAL_FCINFO(fcinfo, 3);
FmgrInfo eqproc;
bool *hasmatch1;
bool *hasmatch2;
@@ -2719,10 +3119,12 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
* returns NULL, though really equality functions should never do
* that.
*/
- InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
+ InitFunctionCallInfoData(*fcinfo, &eqproc, 3, collation,
NULL, NULL);
fcinfo->args[0].isnull = false;
fcinfo->args[1].isnull = false;
+ fcinfo->args[2].isnull = false;
+ fcinfo->args[2].value = Int32GetDatum(record_cmp_prefix);
hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
@@ -3446,11 +3848,29 @@ double
estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
List **pgset, EstimationInfo *estinfo)
{
- List *varinfos = NIL;
+ return estimate_num_groups_incremental(root, groupExprs,
+ input_rows, pgset, estinfo,
+ NULL, 0);
+}
+
+/*
+ * estimate_num_groups_incremental
+ * An estimate_num_groups variant, optimized for cases that are adding the
+ * expressions incrementally (e.g. one by one).
+ */
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+ double input_rows,
+ List **pgset, EstimationInfo *estinfo,
+ List **cache_varinfos, int prevNExprs)
+{
+ List *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
double srf_multiplier = 1.0;
double numdistinct;
ListCell *l;
- int i;
+ int i,
+ j,
+ k;
/* Zero the estinfo output parameter, if non-NULL */
if (estinfo != NULL)
@@ -3481,7 +3901,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
*/
numdistinct = 1.0;
- i = 0;
+ i = j = 0;
foreach(l, groupExprs)
{
Node *groupexpr = (Node *) lfirst(l);
@@ -3490,6 +3910,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
List *varshere;
ListCell *l2;
+ /* was done on previous call */
+ if (cache_varinfos && j++ < prevNExprs)
+ {
+ if (pgset)
+ i++; /* to keep in sync with lines below */
+ continue;
+ }
+
/* is expression in this grouping set? */
if (pgset && !list_member_int(*pgset, i++))
continue;
@@ -3539,6 +3967,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
}
ReleaseVariableStats(vardata);
+ if (list_length(varinfos) > 2*list_length(groupExprs))
+ continue;
+
/*
* Else pull out the component Vars. Handle PlaceHolderVars by
* recursing into their arguments (effectively assuming that the
@@ -3559,13 +3990,21 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
if (varshere == NIL)
{
if (contain_volatile_functions(groupexpr))
+ {
+ if (cache_varinfos)
+ *cache_varinfos = varinfos;
return input_rows;
+ }
continue;
}
+ if (list_length(varshere) >= 8)
+ continue;
+
/*
* Else add variables to varinfos list
*/
+ k = 0;
foreach(l2, varshere)
{
Node *var = (Node *) lfirst(l2);
@@ -3573,9 +4012,15 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
examine_variable(root, var, 0, &vardata);
varinfos = add_unique_group_var(root, varinfos, var, &vardata);
ReleaseVariableStats(vardata);
+
+ if (++k > 4)
+ break;
}
}
+ if (cache_varinfos)
+ *cache_varinfos = varinfos;
+
/*
* If now no Vars, we must have an all-constant or all-boolean GROUP BY
* list.
@@ -5233,14 +5678,8 @@ get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
examine_variable(root, left, 0, vardata1);
examine_variable(root, right, 0, vardata2);
- if (vardata1->rel &&
- bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
- *join_is_reversed = true; /* var1 is on RHS */
- else if (vardata2->rel &&
- bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
- *join_is_reversed = true; /* var2 is on LHS */
- else
- *join_is_reversed = false;
+ if (join_is_reversed)
+ *join_is_reversed = join_is_reversed_variables(sjinfo, vardata1, vardata2);
}
/* statext_expressions_load copies the tuple, so just pfree it. */
@@ -6972,7 +7411,7 @@ index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
other_operand = NULL; /* keep compiler quiet */
}
- cost_qual_eval_node(&index_qual_cost, other_operand, root);
+ cost_qual_eval_node_index(&index_qual_cost, other_operand, root);
qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
}
return qual_arg_cost;
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 657648996c2..22a30166f34 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -20,6 +20,7 @@
#include "access/table.h"
#include "access/xact.h"
#include "catalog/catalog.h"
+#include "catalog/namespace.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_type.h"
#include "common/hashfn.h"
@@ -2373,27 +2374,25 @@ CatCacheCopyKeys(TupleDesc tupdesc, int nkeys, int *attnos,
* system relation.
*/
void
-PrepareToInvalidateCacheTuple(Relation relation,
+PrepareToInvalidateCacheTuple(Oid reloid,
HeapTuple tuple,
HeapTuple newtuple,
void (*function) (int, uint32, Oid, void *),
void *context)
{
slist_iter iter;
- Oid reloid;
+ char prevTempScope = temp_table_scope;
CACHE_elog(DEBUG2, "PrepareToInvalidateCacheTuple: called");
/*
* sanity checks
*/
- Assert(RelationIsValid(relation));
+ Assert(OidIsValid(reloid));
Assert(HeapTupleIsValid(tuple));
Assert(PointerIsValid(function));
Assert(CacheHdr != NULL);
- reloid = RelationGetRelid(relation);
-
/* ----------------
* for each cache
* if the cache contains tuples from the specified relation
@@ -2407,6 +2406,9 @@ PrepareToInvalidateCacheTuple(Relation relation,
CatCache *ccp = slist_container(CatCache, cc_next, iter.cur);
uint32 hashvalue;
Oid dbid;
+ bool isLocal = false;
+ Oid relationId = InvalidOid;
+ bool checkTemp = false;
if (ccp->cc_reloid != reloid)
continue;
@@ -2417,6 +2419,47 @@ PrepareToInvalidateCacheTuple(Relation relation,
hashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, tuple);
dbid = ccp->cc_relisshared ? (Oid) 0 : MyDatabaseId;
+ if (reloid == RelationRelationId)
+ {
+ Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple);
+
+ isLocal = (classtup->relpersistence == RELPERSISTENCE_TEMP) ?
+ true : false;
+ }
+ else if (reloid == AttributeRelationId)
+ {
+ Form_pg_attribute atttup = (Form_pg_attribute) GETSTRUCT(tuple);
+
+ relationId = atttup->attrelid;
+ checkTemp = true;
+ }
+ else if (reloid == IndexRelationId)
+ {
+ Form_pg_index indextup = (Form_pg_index) GETSTRUCT(tuple);
+
+ relationId = indextup->indexrelid;
+ checkTemp = true;
+ }
+
+ if (checkTemp)
+ {
+ HeapTuple htup = SearchSysCache1(RELOID,
+ ObjectIdGetDatum(relationId));
+
+ if (HeapTupleIsValid(htup))
+ {
+ Form_pg_class c = (Form_pg_class)GETSTRUCT(htup);
+
+ isLocal = (c->relisshared == false &&
+ c->relpersistence == RELPERSISTENCE_TEMP &&
+ isTempOrTempToastNamespace(c->relnamespace)) ?
+ true : false;
+ ReleaseSysCache(htup);
+ }
+ }
+
+ temp_table_scope = isLocal ? TEMP_TABLE_SCOPE_LOCAL : prevTempScope;
+
(*function) (ccp->id, hashvalue, dbid, context);
if (newtuple)
@@ -2429,6 +2472,8 @@ PrepareToInvalidateCacheTuple(Relation relation,
(*function) (ccp->id, newhashvalue, dbid, context);
}
}
+
+ temp_table_scope = prevTempScope;
}
/* ResourceOwner callbacks */
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 02505c88b8e..c37d47fccaf 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -118,6 +118,7 @@
#include "access/xact.h"
#include "access/xloginsert.h"
#include "catalog/catalog.h"
+#include "catalog/namespace.h"
#include "catalog/pg_constraint.h"
#include "miscadmin.h"
#include "storage/procnumber.h"
@@ -293,6 +294,11 @@ static struct RELCACHECALLBACK
static int relcache_callback_count = 0;
+
+
+char temp_table_scope = TEMP_TABLE_SCOPE_NOTEMP;
+
+
static struct RELSYNCCALLBACK
{
RelSyncCallbackFunction function;
@@ -349,6 +355,14 @@ AddInvalidationMessage(InvalidationMsgsGroup *group, int subgroup,
}
/* Okay, add message to current group */
ima->msgs[nextindex] = *msg;
+
+ /* Mark message as local-only when it's related to temporary tables.
+ Don't mark snapshot invalidation or any messages when higher that
+ read commited isolation level, because it causes troubles. */
+ ima->msgs[nextindex].isLocal = IsLocalTempTableScope() &&
+ msg->id != SHAREDINVALSNAPSHOT_ID &&
+ XactIsoLevel == XACT_READ_COMMITTED;
+
group->nextmsg[subgroup]++;
}
@@ -1442,6 +1456,8 @@ CacheInvalidateHeapTupleCommon(Relation relation,
Oid tupleRelId;
Oid databaseId;
Oid relationId;
+ bool tempRel = false;
+ bool checkTemp = false;
/* PrepareToInvalidateCacheTuple() needs relcache */
AssertCouldGetRelation();
@@ -1478,7 +1494,7 @@ CacheInvalidateHeapTupleCommon(Relation relation,
RegisterSnapshotInvalidation(info, databaseId, tupleRelId);
}
else
- PrepareToInvalidateCacheTuple(relation, tuple, newtuple,
+ PrepareToInvalidateCacheTuple(tupleRelId, tuple, newtuple,
RegisterCatcacheInvalidation,
(void *) info);
@@ -1498,6 +1514,9 @@ CacheInvalidateHeapTupleCommon(Relation relation,
databaseId = InvalidOid;
else
databaseId = MyDatabaseId;
+
+ tempRel = (classtup->relpersistence == RELPERSISTENCE_TEMP) ?
+ true : false;
}
else if (tupleRelId == AttributeRelationId)
{
@@ -1516,6 +1535,7 @@ CacheInvalidateHeapTupleCommon(Relation relation,
* never come here for a shared rel anyway.)
*/
databaseId = MyDatabaseId;
+ checkTemp = true;
}
else if (tupleRelId == IndexRelationId)
{
@@ -1529,6 +1549,7 @@ CacheInvalidateHeapTupleCommon(Relation relation,
*/
relationId = indextup->indexrelid;
databaseId = MyDatabaseId;
+ checkTemp = true;
}
else if (tupleRelId == ConstraintRelationId)
{
@@ -1550,10 +1571,29 @@ CacheInvalidateHeapTupleCommon(Relation relation,
else
return;
+ if (checkTemp)
+ {
+ HeapTuple htup = SearchSysCache1(RELOID, ObjectIdGetDatum(relationId));
+
+ if (HeapTupleIsValid(htup))
+ {
+ Form_pg_class c = (Form_pg_class)GETSTRUCT(htup);
+
+ tempRel = (c->relisshared == false &&
+ c->relpersistence == RELPERSISTENCE_TEMP &&
+ isTempOrTempToastNamespace(c->relnamespace)) ?
+ true : false;
+
+ ReleaseSysCache(htup);
+ }
+ }
+
/*
* Yes. We need to register a relcache invalidation event.
*/
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(tempRel);
RegisterRelcacheInvalidation(info, databaseId, relationId);
+ END_TEMP_TABLE_SCOPE();
}
/*
@@ -1639,8 +1679,12 @@ CacheInvalidateRelcache(Relation relation)
else
databaseId = MyDatabaseId;
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(relation->rd_rel->relisshared == false &&
+ RELATION_IS_LOCAL(relation) &&
+ !RELATION_IS_OTHER_TEMP(relation));
RegisterRelcacheInvalidation(PrepareInvalidationState(),
databaseId, relationId);
+ END_TEMP_TABLE_SCOPE();
}
/*
@@ -1673,8 +1717,13 @@ CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
databaseId = InvalidOid;
else
databaseId = MyDatabaseId;
+
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(classtup->relisshared == false &&
+ classtup->relpersistence == RELPERSISTENCE_TEMP &&
+ isTempOrTempToastNamespace(classtup->relnamespace));
RegisterRelcacheInvalidation(PrepareInvalidationState(),
databaseId, relationId);
+ END_TEMP_TABLE_SCOPE();
}
/*
@@ -1760,6 +1809,8 @@ CacheInvalidateSmgr(RelFileLocatorBackend rlocator)
msg.sm.backend_hi = rlocator.backend >> 16;
msg.sm.backend_lo = rlocator.backend & 0xffff;
msg.sm.rlocator = rlocator.locator;
+ msg.isLocal = false;
+
/* check AddCatcacheInvalidationMessage() for an explanation */
VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg));
@@ -1788,6 +1839,8 @@ CacheInvalidateRelmap(Oid databaseId)
msg.rm.id = SHAREDINVALRELMAP_ID;
msg.rm.dbId = databaseId;
+ msg.isLocal = false;
+
/* check AddCatcacheInvalidationMessage() for an explanation */
VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg));
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index c460a72b75d..0b8eb787086 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -47,6 +47,7 @@
#include "utils/datum.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
+#include "utils/memutils.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
@@ -3476,6 +3477,52 @@ get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple,
return true;
}
+AttStatsSlot*
+fill_attstatsslot(AttStatsSlot *sslots, HeapTuple statstuple,
+ int reqkind, Oid reqop, int flags)
+{
+ int add_flags = 0, has_flags = 0;
+ AttStatsSlot *sslot;
+ MemoryContext oldctx;
+
+ if (reqkind >= STATISTIC_NUM_SLOTS)
+ return NULL; /* not there */
+
+ sslot = sslots + reqkind;
+
+ if (sslot->values != NULL)
+ has_flags |= ATTSTATSSLOT_VALUES;
+ if (sslot->numbers != NULL)
+ has_flags |= ATTSTATSSLOT_NUMBERS;
+
+ if ((flags & ATTSTATSSLOT_VALUES) && !(has_flags & ATTSTATSSLOT_VALUES))
+ add_flags |= ATTSTATSSLOT_VALUES;
+
+ if ((flags & ATTSTATSSLOT_NUMBERS) && !(has_flags & ATTSTATSSLOT_NUMBERS))
+ add_flags |= ATTSTATSSLOT_NUMBERS;
+
+ if (add_flags == 0 && (reqop == InvalidOid || sslot->staop == reqop))
+ return sslot;
+
+ sslot->incache = false;
+ free_attstatsslot(sslot);
+
+ oldctx = MemoryContextSwitchTo(GetMemoryChunkContext(sslots));
+
+ if (get_attstatsslot(sslot, statstuple, reqkind, reqop,
+ add_flags | has_flags))
+ {
+ sslot->incache = true;
+ MemoryContextSwitchTo(oldctx);
+ return sslot;
+ }
+ else
+ {
+ MemoryContextSwitchTo(oldctx);
+ return NULL;
+ }
+}
+
/*
* free_attstatsslot
* Free data allocated by get_attstatsslot
@@ -3483,6 +3530,10 @@ get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple,
void
free_attstatsslot(AttStatsSlot *sslot)
{
+ /* do not free cached slot */
+ if (sslot->incache)
+ return;
+
/* The values[] array was separately palloc'd by deconstruct_array */
if (sslot->values)
pfree(sslot->values);
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index f944453a1d8..4056c3a7691 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -21,6 +21,7 @@
#include "postgres.h"
#include "access/htup_details.h"
+#include "access/tempcat.h"
#include "catalog/pg_db_role_setting_d.h"
#include "catalog/pg_depend_d.h"
#include "catalog/pg_description_d.h"
@@ -162,6 +163,8 @@ InitCatalogCache(void)
sizeof(Oid), oid_compare);
CacheInitialized = true;
+
+ temp_catalog_init();
}
/*
@@ -799,3 +802,20 @@ oid_compare(const void *a, const void *b)
return pg_cmp_u32(oa, ob);
}
+
+HeapTuple TryGetSysCacheRelationClassTuple(Oid relid)
+{
+ HeapTuple tuple;
+
+ /* Fail if cache is unitialized */
+ if (!SysCache[RELOID])
+ return NULL;
+
+ /* Fail if cache didn't yet populated tuple description.
+ * We better fail that miss the cache. */
+ if (!SysCache[RELOID]->cc_tupdesc)
+ return NULL;
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ return tuple;
+}
\ No newline at end of file
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index d31cb45a058..4ff54d293a9 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -44,6 +44,8 @@ volatile uint32 InterruptHoldoffCount = 0;
volatile uint32 QueryCancelHoldoffCount = 0;
volatile uint32 CritSectionCount = 0;
+ProcessInterrupts_hook_type ProcessInterrupts_hook = NULL;
+
int MyProcPid;
pg_time_t MyStartTime;
TimestampTz MyStartTimestamp;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index d14b1678e7f..590bcf58645 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -85,6 +85,7 @@
#include "storage/pg_shmem.h"
#include "storage/predicate.h"
#include "storage/procnumber.h"
+#include "storage/rd.h"
#include "storage/standby.h"
#include "tcop/backend_startup.h"
#include "tcop/tcopprot.h"
@@ -101,6 +102,7 @@
#include "utils/ps_status.h"
#include "utils/rls.h"
#include "utils/xml.h"
+#include "access/tempcat.h"
#ifdef TRACE_SYNCSCAN
#include "access/syncscan.h"
@@ -1036,6 +1038,26 @@ struct config_bool ConfigureNamesBool[] =
true,
NULL, NULL, NULL
},
+ {
+ {"enable_temp_memory_catalog", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("Enable in-memory system catalog for temporary tables."),
+ NULL,
+ GUC_EXPLAIN
+ },
+ &enable_temp_memory_catalog,
+ false,
+ NULL, NULL, NULL
+ },
+ {
+ {"enable_temp_rd_buffers", PGC_USERSET, RESOURCES_MEM,
+ gettext_noop("Enable in-memory page buffers for temporary tables."),
+ NULL,
+ GUC_EXPLAIN
+ },
+ &enable_temp_rd_buffers,
+ false,
+ NULL, NULL, NULL
+ },
{
{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
gettext_noop("Enables genetic query optimization."),
@@ -3860,6 +3882,17 @@ struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ // {
+ // {"temp_rd_buffers", PGC_USERSET, RESOURCES_MEM,
+ // gettext_noop("Sets the default number of buffers for each temporary table."),
+ // NULL,
+ // GUC_UNIT_BLOCKS | GUC_EXPLAIN
+ // },
+ // &temp_rd_buffers,
+ // 4, 1, 128<<10,
+ // NULL, NULL, NULL
+ // },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index a9d8293474a..cf39d9ab9f9 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -140,6 +140,7 @@
# (change requires restart)
# Caution: it is not advisable to set max_prepared_transactions nonzero unless
# you actively intend to use prepared transactions.
+#enable_temp_memory_catalog = off
#work_mem = 4MB # min 64kB
#hash_mem_multiplier = 2.0 # 1-1000.0 multiplier on hash table work_mem
#maintenance_work_mem = 64MB # min 64kB
diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c
index c9aecab8d66..62e8d854b04 100644
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@@ -579,7 +579,7 @@ tuplestore_select_read_pointer(Tuplestorestate *state, int ptr)
int64
tuplestore_tuple_count(Tuplestorestate *state)
{
- return state->tuples;
+ return (state) ? state->tuples : 0;
}
/*
diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c
index e816cf58101..d6580106d8c 100644
--- a/src/bin/pg_basebackup/pg_receivewal.c
+++ b/src/bin/pg_basebackup/pg_receivewal.c
@@ -97,6 +97,7 @@ usage(void)
printf(_(" -d, --dbname=CONNSTR connection string\n"));
printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
printf(_(" -p, --port=PORT database server port number\n"));
+ printf(_(" -u, --umask set files mode according to umask (might break security!)\n"));
printf(_(" -U, --username=NAME connect as specified database user\n"));
printf(_(" -w, --no-password never prompt for password\n"));
printf(_(" -W, --password force password prompt (should happen automatically)\n"));
@@ -631,6 +632,7 @@ main(int argc, char **argv)
{"endpos", required_argument, NULL, 'E'},
{"host", required_argument, NULL, 'h'},
{"port", required_argument, NULL, 'p'},
+ {"umask", no_argument, NULL, 'u'},
{"username", required_argument, NULL, 'U'},
{"no-loop", no_argument, NULL, 'n'},
{"no-password", no_argument, NULL, 'w'},
@@ -677,7 +679,7 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "d:D:E:h:np:s:S:U:vwWZ:",
+ while ((c = getopt_long(argc, argv, "d:D:E:h:np:s:S:U:vuwWZ:",
long_options, &option_index)) != -1)
{
switch (c)
@@ -712,6 +714,9 @@ main(int argc, char **argv)
case 'S':
replication_slot = pg_strdup(optarg);
break;
+ case 'u':
+ useumask = 1;
+ break;
case 'U':
dbuser = pg_strdup(optarg);
break;
diff --git a/src/bin/pg_basebackup/pg_recvlogical.c b/src/bin/pg_basebackup/pg_recvlogical.c
index fb7a6a1d05d..1a377beba6c 100644
--- a/src/bin/pg_basebackup/pg_recvlogical.c
+++ b/src/bin/pg_basebackup/pg_recvlogical.c
@@ -338,11 +338,14 @@ StreamLogicalLog(void)
{
struct stat statbuf;
+ mode_t mode = (useumask == 1) ?
+ (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) : (S_IRUSR | S_IWUSR);
+
if (strcmp(outfile, "-") == 0)
outfd = fileno(stdout);
else
outfd = open(outfile, O_CREAT | O_APPEND | O_WRONLY | PG_BINARY,
- S_IRUSR | S_IWUSR);
+ mode);
if (outfd == -1)
{
pg_log_error("could not open log file \"%s\": %m", outfile);
diff --git a/src/bin/pg_basebackup/streamutil.c b/src/bin/pg_basebackup/streamutil.c
index c7b8a4c3a4b..78783a24ade 100644
--- a/src/bin/pg_basebackup/streamutil.c
+++ b/src/bin/pg_basebackup/streamutil.c
@@ -47,6 +47,7 @@ char *dbhost = NULL;
char *dbuser = NULL;
char *dbport = NULL;
char *dbname = NULL;
+int useumask = 0; /* 0=auto, -1=never, 1=always */
int dbgetpassword = 0; /* 0=auto, -1=never, 1=always */
static char *password = NULL;
PGconn *conn = NULL;
diff --git a/src/bin/pg_basebackup/streamutil.h b/src/bin/pg_basebackup/streamutil.h
index 017b227303c..051c17c0c4e 100644
--- a/src/bin/pg_basebackup/streamutil.h
+++ b/src/bin/pg_basebackup/streamutil.h
@@ -23,6 +23,7 @@ extern char *dbhost;
extern char *dbuser;
extern char *dbport;
extern char *dbname;
+extern int useumask;
extern int dbgetpassword;
extern int WalSegSz;
diff --git a/src/bin/pg_basebackup/walmethods.c b/src/bin/pg_basebackup/walmethods.c
index eaaabc5f374..a2e323d0ddc 100644
--- a/src/bin/pg_basebackup/walmethods.c
+++ b/src/bin/pg_basebackup/walmethods.c
@@ -28,6 +28,7 @@
#include "common/logging.h"
#include "pgtar.h"
#include "walmethods.h"
+#include "streamutil.h"
/* Size of zlib buffer for .tar.gz */
#define ZLIB_OUT_SIZE 4096
@@ -130,6 +131,8 @@ dir_open_for_write(WalWriteMethod *wwmethod, const char *pathname,
size_t lz4bufsize = 0;
void *lz4buf = NULL;
#endif
+ mode_t mode = (useumask == 1) ?
+ (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) : (S_IRUSR | S_IWUSR);
clear_error(wwmethod);
@@ -144,7 +147,7 @@ dir_open_for_write(WalWriteMethod *wwmethod, const char *pathname,
* does not do any system calls to fsync() to make changes permanent on
* disk.
*/
- fd = open(tmppath, O_WRONLY | O_CREAT | PG_BINARY, pg_file_create_mode);
+ fd = open(tmppath, O_WRONLY | O_CREAT | PG_BINARY, pg_file_create_mode | mode);
if (fd < 0)
{
wwmethod->lasterrno = errno;
@@ -838,6 +841,8 @@ tar_open_for_write(WalWriteMethod *wwmethod, const char *pathname,
const char *temp_suffix, size_t pad_to_size)
{
TarMethodData *tar_data = (TarMethodData *) wwmethod;
+ mode_t mode = (useumask == 1) ?
+ (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) : (S_IRUSR | S_IWUSR);
char *tmppath;
clear_error(wwmethod);
@@ -849,7 +854,7 @@ tar_open_for_write(WalWriteMethod *wwmethod, const char *pathname,
*/
tar_data->fd = open(tar_data->tarfilename,
O_WRONLY | O_CREAT | PG_BINARY,
- pg_file_create_mode);
+ pg_file_create_mode | mode);
if (tar_data->fd < 0)
{
wwmethod->lasterrno = errno;
diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index a1976fae607..e58ec9b0a39 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -87,7 +87,6 @@ static void flagInhIndexes(Archive *fout, TableInfo *tblinfo, int numTables);
static void flagInhAttrs(Archive *fout, DumpOptions *dopt, TableInfo *tblinfo,
int numTables);
static int strInArray(const char *pattern, char **arr, int arr_size);
-static IndxInfo *findIndexByOid(Oid oid);
/*
@@ -877,7 +876,7 @@ findTableByOid(Oid oid)
* finds the DumpableObject for the index with the given oid
* returns NULL if not found
*/
-static IndxInfo *
+IndxInfo *
findIndexByOid(Oid oid)
{
CatalogId catId;
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 35e7581d66d..becd5edb9d3 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -2057,11 +2057,22 @@ selectDumpableType(TypeInfo *tyinfo, Archive *fout)
if (OidIsValid(tyinfo->typrelid) &&
tyinfo->typrelkind != RELKIND_COMPOSITE_TYPE)
{
- TableInfo *tytable = findTableByOid(tyinfo->typrelid);
+ DumpableObject *parentRel;
tyinfo->dobj.objType = DO_DUMMY_TYPE;
- if (tytable != NULL)
- tyinfo->dobj.dump = tytable->dobj.dump;
+
+ /* Get associated relation */
+ if (tyinfo->typrelkind == RELKIND_INDEX)
+ parentRel = (DumpableObject *) findIndexByOid(tyinfo->typrelid);
+ else
+ parentRel = (DumpableObject *) findTableByOid(tyinfo->typrelid);
+
+ /*
+ * If associated relation found, dump based on if the
+ * contents of the associated relation are being dumped.
+ */
+ if (parentRel != NULL)
+ tyinfo->dobj.dump = parentRel->dump;
else
tyinfo->dobj.dump = DUMP_COMPONENT_NONE;
return;
@@ -5548,6 +5559,9 @@ binary_upgrade_set_type_oids_by_type_oid(Archive *fout,
Oid pg_type_multirange_array_oid;
TypeInfo *tinfo;
+ if (pg_type_oid == InvalidOid)
+ return;
+
appendPQExpBufferStr(upgrade_buffer, "\n-- For binary upgrade, must preserve pg_type oid\n");
appendPQExpBuffer(upgrade_buffer,
"SELECT pg_catalog.binary_upgrade_set_next_pg_type_oid('%u'::pg_catalog.oid);\n\n",
@@ -5679,6 +5693,17 @@ collectBinaryUpgradeClassOids(Archive *fout)
PQclear(res);
}
+static void
+binary_upgrade_set_type_oids_by_rel_oid(Archive *fout,
+ PQExpBuffer upgrade_buffer,
+ Oid pg_type_oid
+ )
+{
+ if (OidIsValid(pg_type_oid))
+ binary_upgrade_set_type_oids_by_type_oid(fout, upgrade_buffer,
+ pg_type_oid, false, false);
+}
+
static void
binary_upgrade_set_pg_class_oids(Archive *fout,
PQExpBuffer upgrade_buffer, Oid pg_class_oid)
@@ -7687,6 +7712,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
i_indnkeyatts,
i_indnatts,
i_indkey,
+ i_indtype,
i_indisclustered,
i_indisreplident,
i_indnullsnotdistinct,
@@ -7745,7 +7771,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
appendPQExpBufferStr(query,
"pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, "
- "i.indkey, i.indisclustered, "
+ "i.indkey, t.reltype AS indtype, i.indisclustered, "
"c.contype, c.conname, "
"c.condeferrable, c.condeferred, "
"c.tableoid AS contableoid, "
@@ -7865,6 +7891,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
i_indnkeyatts = PQfnumber(res, "indnkeyatts");
i_indnatts = PQfnumber(res, "indnatts");
i_indkey = PQfnumber(res, "indkey");
+ i_indtype = PQfnumber(res, "indtype");
i_indisclustered = PQfnumber(res, "indisclustered");
i_indisreplident = PQfnumber(res, "indisreplident");
i_indnullsnotdistinct = PQfnumber(res, "indnullsnotdistinct");
@@ -7951,6 +7978,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
indxinfo[j].indkeys = (Oid *) pg_malloc(indxinfo[j].indnattrs * sizeof(Oid));
parseOidArray(PQgetvalue(res, j, i_indkey),
indxinfo[j].indkeys, indxinfo[j].indnattrs);
+ indxinfo[j].indtype = atooid(PQgetvalue(res, j, i_indtype));
indxinfo[j].indisclustered = (PQgetvalue(res, j, i_indisclustered)[0] == 't');
indxinfo[j].indisreplident = (PQgetvalue(res, j, i_indisreplident)[0] == 't');
indxinfo[j].indnullsnotdistinct = (PQgetvalue(res, j, i_indnullsnotdistinct)[0] == 't');
@@ -18111,8 +18139,13 @@ dumpIndex(Archive *fout, const IndxInfo *indxinfo)
int nstatvals = 0;
if (dopt->binary_upgrade)
+ {
binary_upgrade_set_pg_class_oids(fout, q,
indxinfo->dobj.catId.oid);
+ if (indxinfo->indnkeyattrs > 1)
+ binary_upgrade_set_type_oids_by_rel_oid(fout, q,
+ indxinfo->indtype);
+ }
/* Plain secondary index */
appendPQExpBuffer(q, "%s;\n", indxinfo->indexdef);
@@ -18379,8 +18412,14 @@ dumpConstraint(Archive *fout, const ConstraintInfo *coninfo)
coninfo->dobj.name);
if (dopt->binary_upgrade)
+ {
+ if (indxinfo->indnkeyattrs > 1)
+ binary_upgrade_set_type_oids_by_rel_oid(fout, q,
+ indxinfo->indtype);
+
binary_upgrade_set_pg_class_oids(fout, q,
indxinfo->dobj.catId.oid);
+ }
appendPQExpBuffer(q, "ALTER %sTABLE ONLY %s\n", foreign,
fmtQualifiedDumpable(tbinfo));
@@ -19963,6 +20002,28 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
*/
switch (dobj->objType)
{
+ case DO_DUMMY_TYPE:
+ {
+ /*
+ * In Vanilla, dummy types were only created for tables.
+ * In Postgres Pro for improving join selectivity estimation
+ * we also create two types for each composite index:
+ * 1) a type for attributes of the index
+ * 2) a type which is an array containing elements of type (1)
+ * These types depend on indexes, so adding preDataBound -> type
+ * dependency would create a loop; don't do that.
+ */
+ TypeInfo *tyinfo = (TypeInfo *) dobj;
+ if (tyinfo->isArray)
+ /* If it's an array, take its element type */
+ tyinfo = findTypeByOid(tyinfo->typelem);
+
+ if (OidIsValid(tyinfo->typrelid) &&
+ (tyinfo->typrelkind == RELKIND_INDEX ||
+ tyinfo->typrelkind == RELKIND_PARTITIONED_INDEX))
+ break;
+ }
+ /* FALLTHROUGH */
case DO_NAMESPACE:
case DO_EXTENSION:
case DO_TYPE:
@@ -19980,7 +20041,6 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
case DO_ATTRDEF:
case DO_PROCLANG:
case DO_CAST:
- case DO_DUMMY_TYPE:
case DO_TSPARSER:
case DO_TSDICT:
case DO_TSTEMPLATE:
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 30121af7bdb..9dbf6590bf3 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -428,6 +428,7 @@ typedef struct _indxInfo
int indnattrs; /* total number of index attributes */
Oid *indkeys; /* In spite of the name 'indkeys' this field
* contains both key and nonkey attributes */
+ Oid indtype; /* OID of index's composite type, if any */
bool indisclustered;
bool indisreplident;
bool indnullsnotdistinct;
@@ -768,6 +769,7 @@ extern AccessMethodInfo *findAccessMethodByOid(Oid oid);
extern CollInfo *findCollationByOid(Oid oid);
extern NamespaceInfo *findNamespaceByOid(Oid oid);
extern ExtensionInfo *findExtensionByOid(Oid oid);
+extern IndxInfo *findIndexByOid(Oid oid);
extern PublicationInfo *findPublicationByOid(Oid oid);
extern SubscriptionInfo *findSubscriptionByOid(Oid oid);
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 3a9424c19c9..938e30c23bb 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -96,6 +96,8 @@ typedef struct HeapScanDescData
uint32 rs_cindex; /* current tuple's index in vistuples */
uint32 rs_ntuples; /* number of visible tuples on page */
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */
+ struct TempCatScanData *tempscan;
+ HeapTuple temptup;
} HeapScanDescData;
typedef struct HeapScanDescData *HeapScanDesc;
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index b5e0fb386c0..3718663e6be 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -202,6 +202,7 @@ typedef struct ParallelIndexScanDescData
} ParallelIndexScanDescData;
struct TupleTableSlot;
+struct TempCatScanData;
/* Struct for storage-or-index scans of system tables */
typedef struct SysScanDescData
@@ -212,6 +213,8 @@ typedef struct SysScanDescData
struct IndexScanDescData *iscan; /* only valid in index-scan case */
struct SnapshotData *snapshot; /* snapshot to unregister at end of scan */
struct TupleTableSlot *slot;
+ struct TempCatScanData *tempscan;
+
} SysScanDescData;
#endif /* RELSCAN_H */
diff --git a/src/include/access/tempcat.h b/src/include/access/tempcat.h
new file mode 100644
index 00000000000..d1216bb439f
--- /dev/null
+++ b/src/include/access/tempcat.h
@@ -0,0 +1,28 @@
+#ifndef TEMPCAT_H
+#define TEMPCAT_H
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/relscan.h"
+#include "access/skey.h"
+#include "executor/tuptable.h"
+#include "utils/rel.h"
+
+extern bool enable_temp_memory_catalog;
+
+typedef struct TempCatScanData TempCatScanData;
+
+extern void temp_catalog_init(void);
+extern void temp_catalog_insert(Relation relation, HeapTuple htup);
+extern void temp_catalog_delete(Relation relation, ItemPointer ptr);
+extern void temp_catalog_update(Relation relation, ItemPointer ptr, HeapTuple htup);
+extern void temp_catalog_update_inplace(Relation relation, HeapTuple htup);
+extern TempCatScanData* temp_catalog_beginscan(Relation rel, int nkeys, ScanKey key);
+extern void temp_catalog_endscan(TempCatScanData* scan);
+extern HeapTuple temp_catalog_getnext(TempCatScanData* scan, BufferHeapTupleTableSlot* bslot);
+extern bool temp_catalog_is_fetched(TempCatScanData* scan);
+
+extern ItemPointerData temp_catalog_tupmap_assign (ItemPointer ptr, void* data);
+extern bool temp_catalog_tupmap_unassign(ItemPointer ptr, void* data);
+extern void* temp_catalog_tupmap_get (ItemPointer ptr);
+#endif
\ No newline at end of file
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index b2bc10ee041..7fb94f0199c 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -133,6 +133,8 @@ typedef enum
XACT_EVENT_PRE_COMMIT,
XACT_EVENT_PARALLEL_PRE_COMMIT,
XACT_EVENT_PRE_PREPARE,
+ XACT_EVENT_PRE_ABORT,
+ XACT_EVENT_PARALLEL_PRE_ABORT,
} XactEvent;
typedef void (*XactCallback) (XactEvent event, void *arg);
@@ -143,6 +145,7 @@ typedef enum
SUBXACT_EVENT_COMMIT_SUB,
SUBXACT_EVENT_ABORT_SUB,
SUBXACT_EVENT_PRE_COMMIT_SUB,
+ SUBXACT_EVENT_PRE_ABORT_SUB,
} SubXactEvent;
typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid,
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index 4daa8bef5ee..3a9f7da0911 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -65,6 +65,7 @@ extern void index_check_primary_key(Relation heapRel,
#define INDEX_CREATE_IF_NOT_EXISTS (1 << 4)
#define INDEX_CREATE_PARTITIONED (1 << 5)
#define INDEX_CREATE_INVALID (1 << 6)
+#define INDEX_CREATE_WITHOUT_TYPE (1 << 7)
extern Oid index_create(Relation heapRelation,
const char *indexRelationName,
diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat
index 6dca77e0a22..56d6895b0e1 100644
--- a/src/include/catalog/pg_type.dat
+++ b/src/include/catalog/pg_type.dat
@@ -695,4 +695,12 @@
typreceive => 'brin_minmax_multi_summary_recv',
typsend => 'brin_minmax_multi_summary_send', typalign => 'i',
typstorage => 'x', typcollation => 'default' },
+{ oid => '14756', descr => 'pseudo-type representing removed abstime',
+ typname => 'abstime', typlen => '-1', typbyval => 'f', typtype => 'p',
+ typcategory => 'P', typinput => 'timestamp_in', typoutput => 'timestamp_out',
+ typreceive => 'timestamp_recv', typsend => 'timestamp_send', typalign => 'c' },
+{ oid => '14757', descr => 'pseudo-type representing removed reltime',
+ typname => 'reltime', typlen => '-1', typbyval => 'f', typtype => 'p',
+ typcategory => 'P', typinput => 'timestamp_in', typoutput => 'timestamp_out',
+ typreceive => 'timestamp_recv', typsend => 'timestamp_send', typalign => 'c' },
]
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index 3b122f79ed8..f8bbfc387d1 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -51,7 +51,7 @@ extern PGDLLIMPORT explain_get_index_name_hook_type explain_get_index_name_hook;
extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
- ParamListInfo params, DestReceiver *dest);
+ ParamListInfo params, DestReceiver *dest, uint64 *processed);
extern void standard_ExplainOneQuery(Query *query, int cursorOptions,
IntoClause *into, struct ExplainState *es,
const char *queryString, ParamListInfo params,
diff --git a/src/include/commands/explain_state.h b/src/include/commands/explain_state.h
index 32728f5d1a1..c22475fb341 100644
--- a/src/include/commands/explain_state.h
+++ b/src/include/commands/explain_state.h
@@ -74,6 +74,8 @@ typedef struct ExplainState
/* extensions */
void **extension_state;
int extension_state_allocated;
+
+ uint64 es_processed; /* sum of queryDesc->estate->es_processed */
} ExplainState;
typedef void (*ExplainOptionHandler) (ExplainState *, DefElem *, ParseState *);
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index bc37a80dc74..1f5512fab3c 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -170,6 +170,9 @@ typedef struct VacAttrStats
*/
int tupattnum; /* attribute number within tuples */
HeapTuple *rows; /* access info for std fetch function */
+ int rowsAttrPitch; /* access info for rows data */
+ Datum* rowsAttrValues;
+ bool* rowsAttrNulls;
TupleDesc tupDesc;
Datum *exprvals; /* access info for index fetch function */
bool *exprnulls;
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index 75366203706..c25ab939e92 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -397,12 +397,17 @@ typedef struct ExprEvalStep
struct
{
bool *anynull; /* track if any input was NULL */
+ bool *guaranteed_empty;
+ bool is_last;
+ int *count_guaranteed_empty;
+ int nargs;
int jumpdone; /* jump here if result determined */
} boolexpr;
/* for EEOP_QUAL */
struct
{
+ bool *guaranteed_empty;
int jumpdone; /* jump here on false or null */
} qualexpr;
@@ -689,6 +694,7 @@ typedef struct ExprEvalStep
{
/* out-of-line state, created by nodeSubplan.c */
SubPlanState *sstate;
+ bool *guaranteed_empty;
} subplan;
/* for EEOP_AGG_*DESERIALIZE */
diff --git a/src/include/executor/execScan.h b/src/include/executor/execScan.h
index 2003cbc7ed5..eead18f9140 100644
--- a/src/include/executor/execScan.h
+++ b/src/include/executor/execScan.h
@@ -241,6 +241,14 @@ ExecScanExtended(ScanState *node,
return slot;
}
}
+ else if (qual && qual->guaranteed_empty)
+ {
+ /* Qual guarantees the absence of results */
+ node->ps.guaranteed_empty = true;
+ ExecClearTuple(slot);
+
+ return slot;
+ }
else
InstrCountFiltered1(node, 1);
diff --git a/src/include/lib/rbtree.h b/src/include/lib/rbtree.h
index 37d6d8ed037..39d17a71278 100644
--- a/src/include/lib/rbtree.h
+++ b/src/include/lib/rbtree.h
@@ -58,12 +58,14 @@ typedef int (*rbt_comparator) (const RBTNode *a, const RBTNode *b, void *arg);
typedef void (*rbt_combiner) (RBTNode *existing, const RBTNode *newdata, void *arg);
typedef RBTNode *(*rbt_allocfunc) (void *arg);
typedef void (*rbt_freefunc) (RBTNode *x, void *arg);
+typedef void (*rbt_fixfunc) (RBTNode *x, void *arg);
extern RBTree *rbt_create(Size node_size,
rbt_comparator comparator,
rbt_combiner combiner,
rbt_allocfunc allocfunc,
rbt_freefunc freefunc,
+ rbt_fixfunc fixfunc,
void *arg);
extern RBTNode *rbt_find(RBTree *rbt, const RBTNode *data);
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 9a7d733ddef..9bf90d53459 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -105,8 +105,13 @@ extern PGDLLIMPORT volatile uint32 InterruptHoldoffCount;
extern PGDLLIMPORT volatile uint32 QueryCancelHoldoffCount;
extern PGDLLIMPORT volatile uint32 CritSectionCount;
+/* to allow extensions to handle custom interrupts */
+typedef void (*ProcessInterrupts_hook_type) (void);
+extern PGDLLIMPORT ProcessInterrupts_hook_type ProcessInterrupts_hook;
+
/* in tcop/postgres.c */
extern void ProcessInterrupts(void);
+extern void standard_ProcessInterrupts(void);
/* Test whether an interrupt is pending */
#ifndef WIN32
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 409e172bfb6..56561695095 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -147,6 +147,8 @@ typedef struct ExprState
* ExecInitExprRec().
*/
ErrorSaveContext *escontext;
+
+ bool guaranteed_empty;
} ExprState;
@@ -1025,6 +1027,7 @@ typedef struct SubPlanState
ExprState *lhs_hash_expr; /* hash expr for lefthand datatype(s) */
FmgrInfo *cur_eq_funcs; /* equality functions for LHS vs. table */
ExprState *cur_eq_comp; /* equality comparator for LHS vs. table */
+ bool guaranteed_empty;
} SubPlanState;
/*
@@ -1203,6 +1206,8 @@ typedef struct PlanState
*/
TupleDesc scandesc;
+ bool guaranteed_empty;
+
/*
* Define the slot types for inner, outer and scanslots for expression
* contexts with this state as a parent. If *opsset is set, then
diff --git a/src/include/nodes/nodeFuncs.h b/src/include/nodes/nodeFuncs.h
index 5653fec8cbe..2f3415c9ae1 100644
--- a/src/include/nodes/nodeFuncs.h
+++ b/src/include/nodes/nodeFuncs.h
@@ -30,6 +30,7 @@ struct PlanState; /* avoid including execnodes.h too */
* contents */
#define QTW_DONT_COPY_QUERY 0x40 /* do not copy top Query */
#define QTW_EXAMINE_SORTGROUP 0x80 /* include SortGroupClause lists */
+#define QTW_DONT_COPY_DEFAULT 0x00 /* only custom mutator will copy */
#define QTW_IGNORE_GROUPEXPRS 0x100 /* GROUP expressions list */
@@ -155,7 +156,10 @@ extern bool check_functions_in_node(Node *node, check_function_callback checker,
#define expression_tree_walker(n, w, c) \
expression_tree_walker_impl(n, (tree_walker_callback) (w), c)
#define expression_tree_mutator(n, m, c) \
- expression_tree_mutator_impl(n, (tree_mutator_callback) (m), c)
+ expression_tree_mutator_impl(n, (tree_mutator_callback) (m), c, 0)
+#define expression_tree_mutator_ext(n, m, c, f) \
+ expression_tree_mutator_impl(n, (tree_mutator_callback) (m), c, f)
+
#define query_tree_walker(q, w, c, f) \
query_tree_walker_impl(q, (tree_walker_callback) (w), c, f)
@@ -186,7 +190,7 @@ extern bool expression_tree_walker_impl(Node *node,
void *context);
extern Node *expression_tree_mutator_impl(Node *node,
tree_mutator_callback mutator,
- void *context);
+ void *context, int flags);
extern bool query_tree_walker_impl(Query *query,
tree_walker_callback walker,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 6567759595d..2e6217255bb 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -17,10 +17,12 @@
#define PATHNODES_H
#include "access/sdir.h"
+#include "catalog/pg_statistic.h"
#include "lib/stringinfo.h"
#include "nodes/params.h"
#include "nodes/parsenodes.h"
#include "storage/block.h"
+#include "utils/lsyscache.h"
/*
@@ -899,6 +901,7 @@ typedef struct RelOptInfo
*/
/* estimated number of result tuples */
Cardinality rows;
+ Cardinality rowsUnclamped;
/*
* per-relation planner control flags
@@ -1235,6 +1238,10 @@ struct IndexOptInfo
/* AM's cost estimator */
/* Rather than include amapi.h here, we declare amcostestimate like this */
void (*amcostestimate) (struct PlannerInfo *, struct IndexPath *, double, Cost *, Cost *, Selectivity *, double *, double *) pg_node_attr(read_write_ignore);
+
+ /* cache for per-tuple index statistic. That stats could be large and it
+ * will be expensive to uncompress it every time */
+ AttStatsSlot *sslots pg_node_attr(equal_ignore, query_jumble_ignore, read_write_ignore, read_as(0));
};
/*
@@ -1604,6 +1611,16 @@ typedef struct PathKey
bool pk_nulls_first; /* do NULLs come before normal values? */
} PathKey;
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+ NodeTag type;
+ List *pathkeys;
+ List *clauses;
+} PathKeyInfo;
+
/*
* Contains an order of group-by clauses and the corresponding list of
* pathkeys.
@@ -2067,6 +2084,11 @@ typedef struct AppendPath
/* Index of first partial path in subpaths; list_length(subpaths) if none */
int first_partial_path;
Cardinality limit_tuples; /* hard limit on output tuples, or -1 */
+ bool pull_tlist; /* if = true, create_append_plan()
+ * should get targetlist from any
+ * subpath - they are the same,
+ * because the only place - append
+ * index scan for range OR */
} AppendPath;
#define IS_DUMMY_APPEND(p) \
diff --git a/src/include/nodes/supportnodes.h b/src/include/nodes/supportnodes.h
index 9c047cc401b..a17500997e0 100644
--- a/src/include/nodes/supportnodes.h
+++ b/src/include/nodes/supportnodes.h
@@ -35,6 +35,22 @@
#include "nodes/plannodes.h"
+typedef enum
+{
+ Pattern_Type_Like,
+ Pattern_Type_Like_IC,
+ Pattern_Type_Regex,
+ Pattern_Type_Regex_IC,
+ Pattern_Type_Prefix
+} Pattern_Type;
+
+typedef enum
+{
+ Pattern_Prefix_None,
+ Pattern_Prefix_Partial,
+ Pattern_Prefix_Exact
+} Pattern_Prefix_Status;
+
struct PlannerInfo; /* avoid including pathnodes.h here */
struct IndexOptInfo;
struct SpecialJoinInfo;
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index d397fe27dc1..56981d97e51 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -118,7 +118,15 @@ extern void cost_incremental_sort(Path *path,
Cost input_startup_cost, Cost input_total_cost,
double input_tuples, int width, Cost comparison_cost, int sort_mem,
double limit_tuples);
-extern void cost_append(AppendPath *apath);
+extern void cost_append_ext(AppendPath *path, PlannerInfo *root);
+
+static inline void cost_append(AppendPath *apath)
+{
+ cost_append_ext(apath, NULL);
+}
+
+extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
+ int nPresortedKeys, double tuples);
extern void cost_merge_append(Path *path, PlannerInfo *root,
List *pathkeys, int n_streams,
int input_disabled_nodes,
@@ -185,6 +193,7 @@ extern void cost_gather_merge(GatherMergePath *path, PlannerInfo *root,
extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan);
extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root);
extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root);
+extern void cost_qual_eval_node_index(QualCost *cost, Node *qual, PlannerInfo *root);
extern void compute_semi_anti_join_factors(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 60dcdb77e41..1da33158702 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -68,11 +68,24 @@ extern TidRangePath *create_tidrangescan_path(PlannerInfo *root,
RelOptInfo *rel,
List *tidrangequals,
Relids required_outer);
-extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel,
+
+extern AppendPath *create_append_path_ext(PlannerInfo *root, RelOptInfo *rel,
+ List *subpaths, List *partial_subpaths,
+ List *pathkeys, Relids required_outer,
+ int parallel_workers, bool parallel_aware,
+ double rows, bool pull_tlist);
+
+static inline AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel,
List *subpaths, List *partial_subpaths,
List *pathkeys, Relids required_outer,
int parallel_workers, bool parallel_aware,
- double rows);
+ double rows)
+{
+ return create_append_path_ext(root, rel, subpaths, partial_subpaths, pathkeys,
+ required_outer, parallel_workers, parallel_aware,
+ rows, false);
+}
+
extern MergeAppendPath *create_merge_append_path(PlannerInfo *root,
RelOptInfo *rel,
List *subpaths,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 8410531f2d6..76470c75845 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -69,6 +69,8 @@ extern void generate_partitionwise_join_paths(PlannerInfo *root,
* routines to generate index paths
*/
extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel);
+extern List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
+ List *clauses, List *other_clauses);
extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
List *restrictlist,
List *exprlist, List *oprlist);
@@ -217,6 +219,10 @@ typedef enum
extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
extern bool pathkeys_contained_in(List *keys1, List *keys2);
extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+ List **group_pathkeys,
+ List **group_clauses,
+ int num_groupby_pathkeys);
extern List *get_useful_group_keys_orderings(PlannerInfo *root, Path *path);
extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
Relids required_outer,
@@ -264,6 +270,7 @@ extern List *select_outer_pathkeys_for_merge(PlannerInfo *root,
extern List *make_inner_pathkeys_for_merge(PlannerInfo *root,
List *mergeclauses,
List *outer_pathkeys);
+extern int pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys);
extern List *trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root,
List *mergeclauses,
List *pathkeys);
@@ -272,6 +279,7 @@ extern List *truncate_useless_pathkeys(PlannerInfo *root,
List *pathkeys);
extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);
extern List *append_pathkeys(List *target, List *source);
+extern void keybased_rewrite_index_paths(PlannerInfo *root, RelOptInfo *rel);
extern PathKey *make_canonical_pathkey(PlannerInfo *root,
EquivalenceClass *eclass, Oid opfamily,
CompareType cmptype, bool nulls_first);
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 9d3debcab28..950cc792253 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -50,6 +50,9 @@ extern Plan *materialize_finished_plan(Plan *subplan);
extern bool is_projection_capable_path(Path *path);
extern bool is_projection_capable_plan(Plan *plan);
+extern Node * fix_indexqual_operand(Node *node, IndexOptInfo *index, int
+ indexcol);
+
/* External use of these functions is deprecated: */
extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree);
extern Agg *make_agg(List *tlist, List *qual,
diff --git a/src/include/port.h b/src/include/port.h
index 3964d3b1293..f811cbc6c54 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -107,6 +107,53 @@ extern void pgfnames_cleanup(char **filenames);
#define is_absolute_path(filename) is_windows_absolute_path(filename)
#endif
+
+/*
+ * Socket error codes handling constants.
+ * Any socket related routines must use SOCK_ERRNO instead of errno!
+ *
+ * In Windows socket errors are checked using WSAGetLastError instead of errno -
+ * errno might return 0 while WSAGetLastError returns actual socket error codes.
+ * WSAGetLastError uses own error codes different from errno.
+ */
+#ifdef WIN32
+#define SOCK_EWOULDBLOCK WSAEWOULDBLOCK
+#define SOCK_EINTR WSAEINTR
+#define SOCK_EINVAL WSAEINVAL
+#define SOCK_EIO WSAEFAULT /* used only for setting error code */
+#define SOCK_EINPROGRESS WSAEINPROGRESS
+#define SOCK_ECONNRESET WSAECONNRESET
+#define SOCK_ECONNABORTED WSAECONNABORTED
+#define SOCK_EHOSTDOWN WSAEHOSTDOWN
+#define SOCK_EHOSTUNREACH WSAEHOSTUNREACH
+#define SOCK_ENETDOWN WSAENETDOWN
+#define SOCK_ENETRESET WSAENETRESET
+#define SOCK_ENETUNREACH WSAENETUNREACH
+#define SOCK_ETIMEDOUT WSAETIMEDOUT
+#else
+#ifdef EAGAIN
+#define SOCK_EAGAIN EAGAIN
+#endif
+#ifdef EWOULDBLOCK
+#define SOCK_EWOULDBLOCK EWOULDBLOCK
+#endif
+#define SOCK_EINTR EINTR
+#define SOCK_EINVAL EINVAL
+#define SOCK_EIO EIO
+#define SOCK_EINPROGRESS EINPROGRESS
+#define SOCK_ECONNRESET ECONNRESET
+#ifdef EPIPE
+#define SOCK_EPIPE EPIPE
+#endif
+#define SOCK_ECONNABORTED ECONNABORTED
+#define SOCK_EHOSTDOWN EHOSTDOWN
+#define SOCK_EHOSTUNREACH EHOSTUNREACH
+#define SOCK_ENETDOWN ENETDOWN
+#define SOCK_ENETRESET ENETRESET
+#define SOCK_ENETUNREACH ENETUNREACH
+#define SOCK_ETIMEDOUT ETIMEDOUT
+#endif
+
/*
* This macro provides a centralized list of all errnos that identify
* hard failure of a previously-established network connection.
@@ -119,16 +166,22 @@ extern void pgfnames_cleanup(char **filenames);
* are actually reporting errors typically single out EPIPE and ECONNRESET,
* while allowing the network failures to be reported generically.
*/
-#define ALL_CONNECTION_FAILURE_ERRNOS \
- EPIPE: \
- case ECONNRESET: \
- case ECONNABORTED: \
- case EHOSTDOWN: \
- case EHOSTUNREACH: \
- case ENETDOWN: \
- case ENETRESET: \
- case ENETUNREACH: \
- case ETIMEDOUT
+
+#define ALL_CONNECTION_FAILURE_ERRNOS_COMMON \
+ SOCK_ECONNRESET: \
+ case SOCK_ECONNABORTED: \
+ case SOCK_EHOSTDOWN: \
+ case SOCK_EHOSTUNREACH: \
+ case SOCK_ENETDOWN: \
+ case SOCK_ENETRESET: \
+ case SOCK_ENETUNREACH: \
+ case SOCK_ETIMEDOUT
+
+#ifdef SOCK_EPIPE
+#define ALL_CONNECTION_FAILURE_ERRNOS SOCK_EPIPE: case ALL_CONNECTION_FAILURE_ERRNOS_COMMON
+#else
+#define ALL_CONNECTION_FAILURE_ERRNOS ALL_CONNECTION_FAILURE_ERRNOS_COMMON
+#endif
/* Portable locale initialization (in exec.c) */
extern void set_pglocale_pgservice(const char *argv0, const char *app);
diff --git a/src/include/storage/rd.h b/src/include/storage/rd.h
new file mode 100644
index 00000000000..f87c52068e9
--- /dev/null
+++ b/src/include/storage/rd.h
@@ -0,0 +1,34 @@
+#ifndef RD_H
+#define RD_H
+
+#include "postgres.h"
+
+#include "storage/block.h"
+#include "storage/relfilelocator.h"
+#include "storage/smgr.h"
+#include "storage/sync.h"
+
+extern void rd_init(void);
+extern void rd_shutdown(void);
+extern void rd_open(SMgrRelation reln);
+extern void rd_close(SMgrRelation reln, ForkNumber forknum);
+extern void rd_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
+extern bool rd_exists(SMgrRelation reln, ForkNumber forknum);
+extern void rd_unlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo);
+extern void rd_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync);
+extern void rd_zeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync);
+extern bool rd_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks);
+extern void rd_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks);
+extern void rd_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync);
+extern void rd_writeback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks);
+extern BlockNumber rd_nblocks(SMgrRelation reln, ForkNumber forknum);
+extern void rd_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber old_blocks, BlockNumber nblocks);
+extern void rd_immedsync(SMgrRelation reln, ForkNumber forknum);
+extern void rd_registersync(SMgrRelation reln, ForkNumber forknum);
+extern void rd_reset(SMgrRelation reln);
+extern int rd_fd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off);
+
+extern int temp_rd_buffers;
+extern bool enable_temp_rd_buffers;
+
+#endif /* RD_H */
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 2f73f9fcf57..5d624afcaf0 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -390,6 +390,17 @@ do \
#endif /* __sparc__ */
+/* Elbrus */
+#ifdef __e2k__
+#define HAS_TEST_AND_SET
+typedef int slock_t;
+/* There is no need to check for sync_lock availability. */
+#define TAS(lock) __sync_lock_test_and_set(lock, 1)
+#define S_UNLOCK(lock) __sync_lock_release(lock)
+#define SPIN_DELAY() do { __asm__ __volatile__ ("nop" : : ); } while(0)
+#endif
+
+
/* PowerPC */
#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
#define HAS_TEST_AND_SET
diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h
index 845a5851b57..1cd509b0b69 100644
--- a/src/include/storage/sinval.h
+++ b/src/include/storage/sinval.h
@@ -121,8 +121,10 @@ typedef struct
* RelationSyncCache */
} SharedInvalRelSyncMsg;
-typedef union
+typedef struct
{
+ union
+ {
int8 id; /* type field --- must be first */
SharedInvalCatcacheMsg cc;
SharedInvalCatalogMsg cat;
@@ -131,6 +133,9 @@ typedef union
SharedInvalRelmapMsg rm;
SharedInvalSnapshotMsg sn;
SharedInvalRelSyncMsg rs;
+ };
+
+ bool isLocal;
} SharedInvalidationMessage;
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 3964d9334b3..58d9f57a5b5 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -60,6 +60,7 @@ typedef struct SMgrRelationData
*/
int md_num_open_segs[MAX_FORKNUM + 1];
struct _MdfdVec *md_seg_fds[MAX_FORKNUM + 1];
+ struct _RdBuffer *rd_bufs[MAX_FORKNUM + 1];
/*
* Pinning support. If unpinned (ie. pincount == 0), 'node' is a list
diff --git a/src/include/tcop/cmdtag.h b/src/include/tcop/cmdtag.h
index 8d027fcc457..6f6b970cd92 100644
--- a/src/include/tcop/cmdtag.h
+++ b/src/include/tcop/cmdtag.h
@@ -29,6 +29,7 @@ typedef enum CommandTag
typedef struct QueryCompletion
{
CommandTag commandTag;
+ CommandTag explainCommandTag;
uint64 nprocessed;
} QueryCompletion;
diff --git a/src/include/tcop/cmdtaglist.h b/src/include/tcop/cmdtaglist.h
index d250a714d59..eec145798dc 100644
--- a/src/include/tcop/cmdtaglist.h
+++ b/src/include/tcop/cmdtaglist.h
@@ -178,6 +178,9 @@ PG_CMDTAG(CMDTAG_DROP_USER_MAPPING, "DROP USER MAPPING", true, false, false)
PG_CMDTAG(CMDTAG_DROP_VIEW, "DROP VIEW", true, false, false)
PG_CMDTAG(CMDTAG_EXECUTE, "EXECUTE", false, false, false)
PG_CMDTAG(CMDTAG_EXPLAIN, "EXPLAIN", false, false, false)
+PG_CMDTAG(CMDTAG_EXPLAIN_INSERT, "INSERT", false, false, true)
+PG_CMDTAG(CMDTAG_EXPLAIN_UPDATE, "UPDATE", false, false, true)
+PG_CMDTAG(CMDTAG_EXPLAIN_DELETE, "DELETE", false, false, true)
PG_CMDTAG(CMDTAG_FETCH, "FETCH", false, false, true)
PG_CMDTAG(CMDTAG_GRANT, "GRANT", true, false, false)
PG_CMDTAG(CMDTAG_GRANT_ROLE, "GRANT ROLE", false, false, false)
diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h
index 277ec33c00b..58ab696a0c3 100644
--- a/src/include/utils/catcache.h
+++ b/src/include/utils/catcache.h
@@ -223,7 +223,7 @@ extern void ResetCatalogCaches(void);
extern void ResetCatalogCachesExt(bool debug_discard);
extern void CatalogCacheFlushCatalog(Oid catId);
extern void CatCacheInvalidate(CatCache *cache, uint32 hashValue);
-extern void PrepareToInvalidateCacheTuple(Relation relation,
+extern void PrepareToInvalidateCacheTuple(Oid relid,
HeapTuple tuple,
HeapTuple newtuple,
void (*function) (int, uint32, Oid, void *),
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h
index 9b871caef62..61ea5168404 100644
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -82,4 +82,74 @@ extern void InvalidateSystemCaches(void);
extern void InvalidateSystemCachesExtended(bool debug_discard);
extern void LogLogicalInvalidations(void);
+
+/*
+ * Hints that operation being performed is related to temporary tables.
+ */
+extern char temp_table_scope;
+
+#define TEMP_TABLE_SCOPE_NOTEMP 0
+#define TEMP_TABLE_SCOPE_SHARED 1
+#define TEMP_TABLE_SCOPE_LOCAL 2
+
+/*
+ * This is modified PG_TRY/PG_FINALLY/PG_END_TRY block that conditionally sets
+ * and restores `temp_table_scope` on error. It's optimized to do not use
+ * try/catch mechanism when `isTemp` is false. When entering scope by using
+ * `BEGIN_TEMP_TABLE_SCOPE` the previous value of `temp_table_scope` is saved,
+ * and new value is set according to `level`. On upon reaching
+ * `END_TEMP_TABLE_SCOPE` or exception, the value `temp_table_scope` is
+ * restored to saved value. Thus, nesting of scope is possible.
+ *
+ * When level `level` is `TEMP_TABLE_SCOPE_LOCAL` (or `BEGIN_TEMP_TABLE_SCOPE_LOCAL`
+ * used with non-zero argument), some of shared invalidation messages aren't sent
+ * to other sessions.
+ *
+ * When level is `TEMP_TABLE_SCOPE_LOCAL` or `TEMP_TABLE_SCOPE_SHARED`
+ * (or `BEGIN_TEMP_TABLE_SCOPE_*` used with non-zero argument) all created WAL
+ * records won't issue fsync on commit.
+ */
+#define BEGIN_TEMP_TABLE_SCOPE(level) \
+ do { \
+ const char _temp_scope_level = (level); \
+ const bool _temp_scope_do = (_temp_scope_level != temp_table_scope); \
+ bool _temp_scope_throw = false; \
+ char _temp_scope_save_state; \
+ sigjmp_buf* _temp_scope_save_exception_stack = PG_exception_stack; \
+ ErrorContextCallback* _temp_scope_save_error_stack; \
+ sigjmp_buf _temp_scope_save_sigjmp_buf; \
+ if (_temp_scope_do) \
+ { \
+ _temp_scope_save_state = temp_table_scope; \
+ _temp_scope_save_error_stack = error_context_stack; \
+ if (sigsetjmp(_temp_scope_save_sigjmp_buf, 0) == 0) \
+ { \
+ PG_exception_stack = &_temp_scope_save_sigjmp_buf; \
+ temp_table_scope = level; \
+ } \
+ else \
+ _temp_scope_throw = true; \
+ } \
+ if (!_temp_scope_throw) \
+ {
+
+#define BEGIN_TEMP_TABLE_SCOPE_LOCAL(isTemp) BEGIN_TEMP_TABLE_SCOPE( (isTemp) ? TEMP_TABLE_SCOPE_LOCAL : TEMP_TABLE_SCOPE_NOTEMP )
+#define BEGIN_TEMP_TABLE_SCOPE_SHARED(isTemp) BEGIN_TEMP_TABLE_SCOPE( (isTemp) ? TEMP_TABLE_SCOPE_SHARED : TEMP_TABLE_SCOPE_NOTEMP )
+
+#define END_TEMP_TABLE_SCOPE() \
+ } \
+ PG_exception_stack = _temp_scope_save_exception_stack; \
+ if (_temp_scope_do) \
+ { \
+ error_context_stack = _temp_scope_save_error_stack; \
+ temp_table_scope = _temp_scope_save_state; \
+ if (_temp_scope_throw) \
+ PG_RE_THROW(); \
+ } \
+ } while (0)
+
+#define IsTempTableScope() (temp_table_scope != TEMP_TABLE_SCOPE_NOTEMP)
+#define IsLocalTempTableScope() (temp_table_scope == TEMP_TABLE_SCOPE_LOCAL)
+#define IsSharedTempTableScope() (temp_table_scope == TEMP_TABLE_SCOPE_SHARED)
+
#endif /* INVAL_H */
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index fa7c7e0323b..df65a5baa66 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -60,6 +60,8 @@ typedef struct AttStatsSlot
/* Remaining fields are private to get_attstatsslot/free_attstatsslot */
void *values_arr; /* palloc'd values array, if any */
void *numbers_arr; /* palloc'd numbers array, if any */
+
+ bool incache; /* do not free because struct is cached */
} AttStatsSlot;
/* Hook for plugins to get control in get_attavgwidth() */
@@ -195,6 +197,8 @@ extern int32 get_typavgwidth(Oid typid, int32 typmod);
extern int32 get_attavgwidth(Oid relid, AttrNumber attnum);
extern bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple,
int reqkind, Oid reqop, int flags);
+extern AttStatsSlot* fill_attstatsslot(AttStatsSlot *sslots, HeapTuple statstuple,
+ int reqkind, Oid reqop, int flags);
extern void free_attstatsslot(AttStatsSlot *sslot);
extern char *get_namespace_name(Oid nspid);
extern char *get_namespace_name_or_temp(Oid nspid);
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index fb4fa53363d..8f03c29fa09 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -98,6 +98,7 @@ typedef struct VariableStatData
* clause */
bool acl_ok; /* true if user has SELECT privilege on all
* rows from the table or column */
+ AttStatsSlot *sslots;
} VariableStatData;
#define ReleaseVariableStats(vardata) \
@@ -180,6 +181,9 @@ extern double generic_restriction_selectivity(PlannerInfo *root,
Oid oproid, Oid collation,
List *args, int varRelid,
double default_selectivity);
+double prefix_record_histogram_selectivity(VariableStatData *vardata,
+ Datum constvalLeft, Datum constvalRight, int record_cmp_prefix,
+ double ndistinct,int *n_bins);
extern double ineq_histogram_selectivity(PlannerInfo *root,
VariableStatData *vardata,
Oid opoid, FmgrInfo *opproc,
@@ -224,6 +228,11 @@ extern List *estimate_multivariate_bucketsize(PlannerInfo *root,
RelOptInfo *inner,
List *hashclauses,
Selectivity *innerbucketsize);
+extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+ double input_rows, List **pgset,
+ EstimationInfo *estinfo,
+ List **cache_varinfos, int prevNExprs);
+
extern void estimate_hash_bucket_stats(PlannerInfo *root,
Node *hashkey, double nbuckets,
Selectivity *mcv_freq,
@@ -247,5 +256,13 @@ extern Selectivity scalararraysel_containment(PlannerInfo *root,
Node *leftop, Node *rightop,
Oid elemtype, bool isEquality, bool useOr,
int varRelid);
-
+extern Selectivity eqjoin_selectivity(PlannerInfo *root, Oid operator, Oid
+ collation,
+ VariableStatData* vardata1,
+ VariableStatData* vardata2,
+ SpecialJoinInfo *sjinfo,
+ int record_cmp_prefix);
+extern Selectivity eqconst_selectivity(Oid operator, Oid collation,
+ VariableStatData *vardata, Datum constval, bool constisnull,
+ bool varonleft, bool negate, int record_cmp_prefix);
#endif /* SELFUNCS_H */
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index 3039713259e..0391ca70f8b 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -83,6 +83,13 @@ extern bool RelationInvalidatesSnapshotsOnly(Oid relid);
extern bool RelationHasSysCache(Oid relid);
extern bool RelationSupportsSysCache(Oid relid);
+
+/* Retreive relation description data, if possible.
+ * Use only in situations when it's ok if this function will
+ * fail (return false) for no reason.
+ */
+extern HeapTuple TryGetSysCacheRelationClassTuple(Oid relid);
+
/*
* The use of the macros below rather than direct calls to the corresponding
* functions is encouraged, as it insulates the caller from changes in the
diff --git a/src/interfaces/libpq/fe-cancel.c b/src/interfaces/libpq/fe-cancel.c
index c872a0267f0..ca0c885bc01 100644
--- a/src/interfaces/libpq/fe-cancel.c
+++ b/src/interfaces/libpq/fe-cancel.c
@@ -655,7 +655,7 @@ retry3:
if (connect(tmpsock, (struct sockaddr *) &cancel->raddr.addr,
cancel->raddr.salen) < 0)
{
- if (SOCK_ERRNO == EINTR)
+ if (SOCK_ERRNO == SOCK_EINTR)
/* Interrupted system call - we'll just try again */
goto retry3;
strlcpy(errbuf, "PQcancel() -- connect() failed: ", errbufsize);
@@ -672,7 +672,7 @@ retry4:
*/
if (send(tmpsock, (char *) &cancel->cancel_pkt_len, cancel_pkt_len, 0) != cancel_pkt_len)
{
- if (SOCK_ERRNO == EINTR)
+ if (SOCK_ERRNO == SOCK_EINTR)
/* Interrupted system call - we'll just try again */
goto retry4;
strlcpy(errbuf, "PQcancel() -- send() failed: ", errbufsize);
@@ -689,7 +689,7 @@ retry4:
retry5:
if (recv(tmpsock, &recvbuf, 1, 0) < 0)
{
- if (SOCK_ERRNO == EINTR)
+ if (SOCK_ERRNO == SOCK_EINTR)
/* Interrupted system call - we'll just try again */
goto retry5;
/* we ignore other error conditions */
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index e7770da3a58..89c078b4d0c 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -3481,11 +3481,11 @@ keep_going: /* We will come back to here until there is
if (connect(conn->sock, (struct sockaddr *) &addr_cur->addr.addr,
addr_cur->addr.salen) < 0)
{
- if (SOCK_ERRNO == EINPROGRESS ||
+ if (SOCK_ERRNO == SOCK_EINPROGRESS ||
#ifdef WIN32
- SOCK_ERRNO == EWOULDBLOCK ||
+ SOCK_ERRNO == SOCK_EWOULDBLOCK ||
#endif
- SOCK_ERRNO == EINTR)
+ SOCK_ERRNO == SOCK_EINTR)
{
/*
* This is fine - we're in non-blocking mode, and
diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
index dcc8a447d66..8f7e3651a63 100644
--- a/src/interfaces/libpq/fe-exec.c
+++ b/src/interfaces/libpq/fe-exec.c
@@ -3843,7 +3843,9 @@ PQcmdTuples(PGresult *res)
if (!res)
return "";
- if (strncmp(res->cmdStatus, "INSERT ", 7) == 0)
+ if (strncmp(res->cmdStatus, "EXPLAIN ", 8) == 0)
+ p = res->cmdStatus + 8;
+ else if (strncmp(res->cmdStatus, "INSERT ", 7) == 0)
{
p = res->cmdStatus + 7;
/* INSERT: skip oid and space */
diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c
index dca44fdc5d2..c464fa512ba 100644
--- a/src/interfaces/libpq/fe-misc.c
+++ b/src/interfaces/libpq/fe-misc.c
@@ -660,16 +660,16 @@ retry3:
{
switch (SOCK_ERRNO)
{
- case EINTR:
+ case SOCK_EINTR:
goto retry3;
/* Some systems return EAGAIN/EWOULDBLOCK for no data */
-#ifdef EAGAIN
- case EAGAIN:
+#ifdef SOCK_EAGAIN
+ case SOCK_EAGAIN:
return someread;
#endif
-#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
- case EWOULDBLOCK:
+#if defined(SOCK_EWOULDBLOCK) && (!defined(SOCK_EAGAIN) || (SOCK_EWOULDBLOCK != SOCK_EAGAIN))
+ case SOCK_EWOULDBLOCK:
return someread;
#endif
@@ -755,16 +755,16 @@ retry4:
{
switch (SOCK_ERRNO)
{
- case EINTR:
+ case SOCK_EINTR:
goto retry4;
/* Some systems return EAGAIN/EWOULDBLOCK for no data */
-#ifdef EAGAIN
- case EAGAIN:
+#ifdef SOCK_EAGAIN
+ case SOCK_EAGAIN:
return 0;
#endif
-#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
- case EWOULDBLOCK:
+#if defined(SOCK_EWOULDBLOCK) && (!defined(SOCK_EAGAIN) || (SOCK_EWOULDBLOCK != SOCK_EAGAIN))
+ case SOCK_EWOULDBLOCK:
return 0;
#endif
@@ -883,15 +883,15 @@ pqSendSome(PGconn *conn, int len)
/* Anything except EAGAIN/EWOULDBLOCK/EINTR is trouble */
switch (SOCK_ERRNO)
{
-#ifdef EAGAIN
- case EAGAIN:
+#ifdef SOCK_EAGAIN
+ case SOCK_EAGAIN:
break;
#endif
-#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
- case EWOULDBLOCK:
+#if defined(SOCK_EWOULDBLOCK) && (!defined(SOCK_EAGAIN) || (SOCK_EWOULDBLOCK != SOCK_EAGAIN))
+ case SOCK_EWOULDBLOCK:
break;
#endif
- case EINTR:
+ case SOCK_EINTR:
continue;
default:
@@ -1112,7 +1112,7 @@ pqSocketCheck(PGconn *conn, int forRead, int forWrite, pg_usec_time_t end_time)
/* We will retry as long as we get EINTR */
do
result = PQsocketPoll(sock, forRead, forWrite, end_time);
- while (result < 0 && SOCK_ERRNO == EINTR);
+ while (result < 0 && SOCK_ERRNO == SOCK_EINTR);
if (result < 0)
{
diff --git a/src/interfaces/libpq/fe-secure-gssapi.c b/src/interfaces/libpq/fe-secure-gssapi.c
index 843b31e175f..6ff0cb05cdd 100644
--- a/src/interfaces/libpq/fe-secure-gssapi.c
+++ b/src/interfaces/libpq/fe-secure-gssapi.c
@@ -84,8 +84,8 @@
* transport negotiation is complete).
*
* On success, returns the number of data bytes consumed (possibly less than
- * len). On failure, returns -1 with errno set appropriately. If the errno
- * indicates a non-retryable error, a message is added to conn->errorMessage.
+ * len). On failure, returns -1 with SOCK_ERRNO (need to use SOCK_ERRNO since it is different from errno in Windows)
+ * set appropriately. If the SOCK_ERRNO indicates a non-retryable error, a message is added to conn->errorMessage.
* For retryable errors, caller should call again (passing the same or more
* data) once the socket is ready.
*/
@@ -121,7 +121,7 @@ pg_GSS_write(PGconn *conn, const void *ptr, size_t len)
{
appendPQExpBufferStr(&conn->errorMessage,
"GSSAPI caller failed to retransmit all data needing to be retried\n");
- SOCK_ERRNO_SET(EINVAL);
+ SOCK_ERRNO_SET(SOCK_EINVAL);
return -1;
}
@@ -199,14 +199,14 @@ pg_GSS_write(PGconn *conn, const void *ptr, size_t len)
if (major != GSS_S_COMPLETE)
{
pg_GSS_error(libpq_gettext("GSSAPI wrap error"), conn, major, minor);
- SOCK_ERRNO_SET(EIO); /* for lack of a better idea */
+ SOCK_ERRNO_SET(SOCK_EIO); /* for lack of a better idea */
goto cleanup;
}
if (conf_state == 0)
{
libpq_append_conn_error(conn, "outgoing GSSAPI message would not use confidentiality");
- SOCK_ERRNO_SET(EIO); /* for lack of a better idea */
+ SOCK_ERRNO_SET(SOCK_EIO); /* for lack of a better idea */
goto cleanup;
}
@@ -215,7 +215,7 @@ pg_GSS_write(PGconn *conn, const void *ptr, size_t len)
libpq_append_conn_error(conn, "client tried to send oversize GSSAPI packet (%zu > %zu)",
(size_t) output.length,
PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32));
- SOCK_ERRNO_SET(EIO); /* for lack of a better idea */
+ SOCK_ERRNO_SET(SOCK_EIO); /* for lack of a better idea */
goto cleanup;
}
@@ -258,8 +258,8 @@ cleanup:
* transport negotiation is complete).
*
* Returns the number of data bytes read, or on failure, returns -1
- * with errno set appropriately. If the errno indicates a non-retryable
- * error, a message is added to conn->errorMessage. For retryable errors,
+ * with SOCK_ERRNO (need to use SOCK_ERRNO since it is different from errno in Windows) set appropriately.
+ * If the SOCK_ERRNO indicates a non-retryable error, a message is added to conn->errorMessage. For retryable errors,
* caller should call again once the socket is ready.
*/
ssize_t
@@ -341,7 +341,7 @@ pg_GSS_read(PGconn *conn, void *ptr, size_t len)
/* If we still haven't got the length, return to the caller */
if (PqGSSRecvLength < sizeof(uint32))
{
- SOCK_ERRNO_SET(EWOULDBLOCK);
+ SOCK_ERRNO_SET(SOCK_EWOULDBLOCK);
return -1;
}
}
@@ -354,7 +354,7 @@ pg_GSS_read(PGconn *conn, void *ptr, size_t len)
libpq_append_conn_error(conn, "oversize GSSAPI packet sent by the server (%zu > %zu)",
(size_t) input.length,
PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32));
- SOCK_ERRNO_SET(EIO); /* for lack of a better idea */
+ SOCK_ERRNO_SET(SOCK_EIO); /* for lack of a better idea */
return -1;
}
@@ -373,7 +373,7 @@ pg_GSS_read(PGconn *conn, void *ptr, size_t len)
/* If we don't yet have the whole packet, return to the caller */
if (PqGSSRecvLength - sizeof(uint32) < input.length)
{
- SOCK_ERRNO_SET(EWOULDBLOCK);
+ SOCK_ERRNO_SET(SOCK_EWOULDBLOCK);
return -1;
}
@@ -393,7 +393,7 @@ pg_GSS_read(PGconn *conn, void *ptr, size_t len)
pg_GSS_error(libpq_gettext("GSSAPI unwrap error"), conn,
major, minor);
ret = -1;
- SOCK_ERRNO_SET(EIO); /* for lack of a better idea */
+ SOCK_ERRNO_SET(SOCK_EIO); /* for lack of a better idea */
goto cleanup;
}
@@ -401,7 +401,7 @@ pg_GSS_read(PGconn *conn, void *ptr, size_t len)
{
libpq_append_conn_error(conn, "incoming GSSAPI message did not use confidentiality");
ret = -1;
- SOCK_ERRNO_SET(EIO); /* for lack of a better idea */
+ SOCK_ERRNO_SET(SOCK_EIO); /* for lack of a better idea */
goto cleanup;
}
@@ -437,8 +437,15 @@ gss_read(PGconn *conn, void *recv_buffer, size_t length, ssize_t *ret)
*ret = pqsecure_raw_read(conn, recv_buffer, length);
if (*ret < 0)
{
- if (SOCK_ERRNO == EAGAIN || SOCK_ERRNO == EWOULDBLOCK ||
- SOCK_ERRNO == EINTR)
+ int err = SOCK_ERRNO;
+ if (
+#ifdef SOCK_EAGAIN
+ err == SOCK_EAGAIN ||
+#endif
+#ifdef SOCK_EWOULDBLOCK
+ err == SOCK_EWOULDBLOCK ||
+#endif
+ err == SOCK_EINTR)
return PGRES_POLLING_READING;
else
return PGRES_POLLING_FAILED;
@@ -458,8 +465,16 @@ gss_read(PGconn *conn, void *recv_buffer, size_t length, ssize_t *ret)
*ret = pqsecure_raw_read(conn, recv_buffer, length);
if (*ret < 0)
{
- if (SOCK_ERRNO == EAGAIN || SOCK_ERRNO == EWOULDBLOCK ||
- SOCK_ERRNO == EINTR)
+ int err = SOCK_ERRNO;
+
+ if (
+#ifdef SOCK_EAGAIN
+ err == SOCK_EAGAIN ||
+#endif
+#ifdef SOCK_EWOULDBLOCK
+ err == SOCK_EWOULDBLOCK ||
+#endif
+ err == SOCK_EINTR)
return PGRES_POLLING_READING;
else
return PGRES_POLLING_FAILED;
@@ -522,8 +537,15 @@ pqsecure_open_gss(PGconn *conn)
ret = pqsecure_raw_write(conn, PqGSSSendBuffer + PqGSSSendNext, amount);
if (ret < 0)
{
- if (SOCK_ERRNO == EAGAIN || SOCK_ERRNO == EWOULDBLOCK ||
- SOCK_ERRNO == EINTR)
+ int err = SOCK_ERRNO;
+ if (
+#ifdef SOCK_EAGAIN
+ err == SOCK_EAGAIN ||
+#endif
+#ifdef SOCK_EWOULDBLOCK
+ err == SOCK_EWOULDBLOCK ||
+#endif
+ err == SOCK_EINTR)
return PGRES_POLLING_WRITING;
else
return PGRES_POLLING_FAILED;
diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c
index 51dd7b9fec0..7c2446f95fb 100644
--- a/src/interfaces/libpq/fe-secure-openssl.c
+++ b/src/interfaces/libpq/fe-secure-openssl.c
@@ -157,7 +157,7 @@ rloop:
appendPQExpBufferStr(&conn->errorMessage,
"SSL_read failed but did not provide error information\n");
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
}
break;
case SSL_ERROR_WANT_READ:
@@ -176,8 +176,11 @@ rloop:
if (n < 0 && SOCK_ERRNO != 0)
{
result_errno = SOCK_ERRNO;
- if (result_errno == EPIPE ||
- result_errno == ECONNRESET)
+ if (
+#ifdef SOCK_EPIPE
+ result_errno == SOCK_EPIPE ||
+#endif
+ result_errno == SOCK_ECONNRESET)
libpq_append_conn_error(conn, "server closed the connection unexpectedly\n"
"\tThis probably means the server terminated abnormally\n"
"\tbefore or while processing the request.");
@@ -190,7 +193,7 @@ rloop:
{
libpq_append_conn_error(conn, "SSL SYSCALL error: EOF detected");
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
}
break;
@@ -201,7 +204,7 @@ rloop:
libpq_append_conn_error(conn, "SSL error: %s", errm);
SSLerrfree(errm);
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
break;
}
@@ -213,13 +216,13 @@ rloop:
* server crash.
*/
libpq_append_conn_error(conn, "SSL connection has been closed unexpectedly");
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
break;
default:
libpq_append_conn_error(conn, "unrecognized SSL error code: %d", err);
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
break;
}
@@ -259,7 +262,7 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len)
appendPQExpBufferStr(&conn->errorMessage,
"SSL_write failed but did not provide error information\n");
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
}
break;
case SSL_ERROR_WANT_READ:
@@ -283,7 +286,11 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len)
if (n < 0 && SOCK_ERRNO != 0)
{
result_errno = SOCK_ERRNO;
- if (result_errno == EPIPE || result_errno == ECONNRESET)
+ if (
+#ifdef SOCK_EPIPE
+ result_errno == SOCK_EPIPE ||
+#endif
+ result_errno == SOCK_ECONNRESET)
libpq_append_conn_error(conn, "server closed the connection unexpectedly\n"
"\tThis probably means the server terminated abnormally\n"
"\tbefore or while processing the request.");
@@ -296,7 +303,7 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len)
{
libpq_append_conn_error(conn, "SSL SYSCALL error: EOF detected");
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
}
break;
@@ -307,7 +314,7 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len)
libpq_append_conn_error(conn, "SSL error: %s", errm);
SSLerrfree(errm);
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
break;
}
@@ -319,13 +326,13 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len)
* server crash.
*/
libpq_append_conn_error(conn, "SSL connection has been closed unexpectedly");
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
break;
default:
libpq_append_conn_error(conn, "unrecognized SSL error code: %d", err);
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
break;
}
@@ -1748,13 +1755,13 @@ pgconn_bio_read(BIO *h, char *buf, int size)
/* If we were interrupted, tell caller to retry */
switch (SOCK_ERRNO)
{
-#ifdef EAGAIN
- case EAGAIN:
+#ifdef SOCK_EAGAIN
+ case SOCK_EAGAIN:
#endif
-#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
- case EWOULDBLOCK:
+#if defined(SOCK_EWOULDBLOCK) && (!defined(SOCK_EAGAIN) || (SOCK_EWOULDBLOCK != SOCK_EAGAIN))
+ case SOCK_EWOULDBLOCK:
#endif
- case EINTR:
+ case SOCK_EINTR:
BIO_set_retry_read(h);
break;
@@ -1781,13 +1788,13 @@ pgconn_bio_write(BIO *h, const char *buf, int size)
/* If we were interrupted, tell caller to retry */
switch (SOCK_ERRNO)
{
-#ifdef EAGAIN
- case EAGAIN:
+#ifdef SOCK_EAGAIN
+ case SOCK_EAGAIN:
#endif
-#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
- case EWOULDBLOCK:
+#if defined(SOCK_EWOULDBLOCK) && (!defined(SOCK_EAGAIN) || (SOCK_EWOULDBLOCK != SOCK_EAGAIN))
+ case SOCK_EWOULDBLOCK:
#endif
- case EINTR:
+ case SOCK_EINTR:
BIO_set_retry_write(h);
break;
diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c
index e686681ba15..ffb24034120 100644
--- a/src/interfaces/libpq/fe-secure.c
+++ b/src/interfaces/libpq/fe-secure.c
@@ -207,18 +207,19 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len)
/* Set error message if appropriate */
switch (result_errno)
{
-#ifdef EAGAIN
- case EAGAIN:
+#ifdef SOCK_EAGAIN
+ case SOCK_EAGAIN:
#endif
-#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
- case EWOULDBLOCK:
+#if defined(SOCK_EWOULDBLOCK) && (!defined(SOCK_EAGAIN) || (SOCK_EWOULDBLOCK != SOCK_EAGAIN))
+ case SOCK_EWOULDBLOCK:
#endif
- case EINTR:
+ case SOCK_EINTR:
/* no error message, caller is expected to retry */
break;
-
- case EPIPE:
- case ECONNRESET:
+#ifdef SOCK_EPIPE
+ case SOCK_EPIPE:
+#endif
+ case SOCK_ECONNRESET:
libpq_append_conn_error(conn, "server closed the connection unexpectedly\n"
"\tThis probably means the server terminated abnormally\n"
"\tbefore or while processing the request.");
@@ -365,23 +366,23 @@ retry_masked:
/* Set error message if appropriate */
switch (result_errno)
{
-#ifdef EAGAIN
- case EAGAIN:
+#ifdef SOCK_EAGAIN
+ case SOCK_EAGAIN:
#endif
-#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
- case EWOULDBLOCK:
+#if defined(SOCK_EWOULDBLOCK) && (!defined(SOCK_EAGAIN) || (SOCK_EWOULDBLOCK != SOCK_EAGAIN))
+ case SOCK_EWOULDBLOCK:
#endif
- case EINTR:
+ case SOCK_EINTR:
/* no error message, caller is expected to retry */
break;
-
- case EPIPE:
+#ifdef SOCK_EPIPE
+ case SOCK_EPIPE:
/* Set flag for EPIPE */
REMEMBER_EPIPE(spinfo, true);
-
+#endif
/* FALL THRU */
- case ECONNRESET:
+ case SOCK_ECONNRESET:
conn->write_failed = true;
/* Store error message in conn->write_err_msg, if possible */
/* (strdup failure is OK, we'll cope later) */
diff --git a/src/pl/plperl/expected/plperl.out b/src/pl/plperl/expected/plperl.out
index e3d7c8896a2..311aca3e2cc 100644
--- a/src/pl/plperl/expected/plperl.out
+++ b/src/pl/plperl/expected/plperl.out
@@ -620,7 +620,7 @@ CREATE OR REPLACE FUNCTION perl_spi_prepared_set(INTEGER, INTEGER) RETURNS SETOF
spi_freeplan($x);
return;
$$ LANGUAGE plperl;
-SELECT * from perl_spi_prepared_set(1,2);
+SELECT * from perl_spi_prepared_set(1,2) ORDER BY 1;
perl_spi_prepared_set
-----------------------
2
diff --git a/src/pl/plperl/sql/plperl.sql b/src/pl/plperl/sql/plperl.sql
index bb0b8ce4cb6..b01a3c919ca 100644
--- a/src/pl/plperl/sql/plperl.sql
+++ b/src/pl/plperl/sql/plperl.sql
@@ -392,7 +392,7 @@ CREATE OR REPLACE FUNCTION perl_spi_prepared_set(INTEGER, INTEGER) RETURNS SETOF
spi_freeplan($x);
return;
$$ LANGUAGE plperl;
-SELECT * from perl_spi_prepared_set(1,2);
+SELECT * from perl_spi_prepared_set(1,2) ORDER BY 1;
--
-- Test prepare with a type with spaces
diff --git a/src/port/win32fdatasync.c b/src/port/win32fdatasync.c
index 66d75977013..16b9b64b154 100644
--- a/src/port/win32fdatasync.c
+++ b/src/port/win32fdatasync.c
@@ -22,6 +22,15 @@
int
fdatasync(int fd)
{
+#if WINVER < _WIN32_WINNT_WIN8
+#ifndef FRONTEND
+ ereport(LOG,
+ (errmsg_internal("fdatasync is not supported on this Windows version")));
+#else
+ fprintf(stderr, "fdatasync is not supported on this Windows version");
+#endif
+ return -1;
+#else
IO_STATUS_BLOCK iosb;
NTSTATUS status;
HANDLE handle;
@@ -48,4 +57,5 @@ fdatasync(int fd)
_dosmaperr(pg_RtlNtStatusToDosError(status));
return -1;
+#endif
}
diff --git a/src/port/win32gettimeofday.c b/src/port/win32gettimeofday.c
index 1e00f7ee149..cc380bf3c14 100644
--- a/src/port/win32gettimeofday.c
+++ b/src/port/win32gettimeofday.c
@@ -63,7 +63,11 @@ gettimeofday(struct timeval *tp, void *tzp)
*/
Assert(tzp == NULL);
+#if WINVER >= _WIN32_WINNT_WIN8
GetSystemTimePreciseAsFileTime(&file_time);
+#else
+ GetSystemTimeAsFileTime(&file_time);
+#endif
ularge.LowPart = file_time.dwLowDateTime;
ularge.HighPart = file_time.dwHighDateTime;
diff --git a/src/port/win32ntdll.c b/src/port/win32ntdll.c
index ab6820fb8e5..370e125731a 100644
--- a/src/port/win32ntdll.c
+++ b/src/port/win32ntdll.c
@@ -19,7 +19,10 @@
RtlGetLastNtStatus_t pg_RtlGetLastNtStatus;
RtlNtStatusToDosError_t pg_RtlNtStatusToDosError;
+
+#if WINVER >= _WIN32_WINNT_WIN8
NtFlushBuffersFileEx_t pg_NtFlushBuffersFileEx;
+#endif
typedef struct NtDllRoutine
{
@@ -30,7 +33,9 @@ typedef struct NtDllRoutine
static const NtDllRoutine routines[] = {
{"RtlGetLastNtStatus", (pg_funcptr_t *) &pg_RtlGetLastNtStatus},
{"RtlNtStatusToDosError", (pg_funcptr_t *) &pg_RtlNtStatusToDosError},
+#if WINVER >= _WIN32_WINNT_WIN8
{"NtFlushBuffersFileEx", (pg_funcptr_t *) &pg_NtFlushBuffersFileEx}
+#endif
};
static bool initialized;
diff --git a/src/test/modules/test_rbtree/test_rbtree.c b/src/test/modules/test_rbtree/test_rbtree.c
index 9113f1c8d52..5be16d9cd3a 100644
--- a/src/test/modules/test_rbtree/test_rbtree.c
+++ b/src/test/modules/test_rbtree/test_rbtree.c
@@ -84,6 +84,7 @@ create_int_rbtree(void)
irbt_combine,
irbt_alloc,
irbt_free,
+ NULL,
NULL);
}
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 4cfbe424603..dc536d862a4 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1259,7 +1259,8 @@ explain (costs off)
select distinct min(f1), max(f1) from minmaxtest;
QUERY PLAN
---------------------------------------------------------------------------------------------
- Unique
+ HashAggregate
+ Group Key: (InitPlan 1).col1, (InitPlan 2).col1
InitPlan 1
-> Limit
-> Merge Append
@@ -1282,10 +1283,8 @@ explain (costs off)
-> Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
Index Cond: (f1 IS NOT NULL)
-> Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
- -> Sort
- Sort Key: ((InitPlan 1).col1), ((InitPlan 2).col1)
- -> Result
-(26 rows)
+ -> Result
+(25 rows)
select distinct min(f1), max(f1) from minmaxtest;
min | max
@@ -2987,19 +2986,19 @@ EXPLAIN (COSTS OFF)
SELECT count(*)
FROM btg t1 JOIN btg t2 ON t1.w = t2.w AND t1.x = t2.x AND t1.z = t2.z
GROUP BY t1.w, t1.z, t1.x;
- QUERY PLAN
--------------------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------------
GroupAggregate
Group Key: t1.x, t1.w, t1.z
- -> Merge Join
- Merge Cond: ((t1.x = t2.x) AND (t1.w = t2.w) AND (t1.z = t2.z))
- -> Incremental Sort
- Sort Key: t1.x, t1.w, t1.z
- Presorted Key: t1.x
+ -> Incremental Sort
+ Sort Key: t1.x, t1.w, t1.z
+ Presorted Key: t1.x
+ -> Merge Join
+ Merge Cond: (t1.x = t2.x)
+ Join Filter: ((t1.w = t2.w) AND (t1.z = t2.z))
-> Index Scan using btg_x_y_idx on btg t1
- -> Sort
- Sort Key: t2.x, t2.w, t2.z
- -> Index Scan using btg_x_y_idx on btg t2
+ -> Materialize
+ -> Index Scan using btg_x_y_idx on btg t2
(11 rows)
RESET enable_nestloop;
@@ -3069,17 +3068,15 @@ GROUP BY c1.w, c1.z;
Group Key: c1.w, c1.z
-> Sort
Sort Key: c1.w, c1.z, c1.x, c1.y
- -> Merge Join
- Merge Cond: (c1.x = c2.x)
- -> Sort
- Sort Key: c1.x
- -> Seq Scan on group_agg_pk c1
- Disabled: true
- -> Sort
- Sort Key: c2.x
+ -> Nested Loop
+ Disabled: true
+ Join Filter: (c1.x = c2.x)
+ -> Seq Scan on group_agg_pk c1
+ Disabled: true
+ -> Materialize
-> Seq Scan on group_agg_pk c2
Disabled: true
-(14 rows)
+(12 rows)
SELECT avg(c1.f ORDER BY c1.x, c1.y)
FROM group_agg_pk c1 JOIN group_agg_pk c2 ON c1.x = c2.x
@@ -3101,21 +3098,18 @@ GROUP BY c1.y,c1.x,c2.x;
QUERY PLAN
-----------------------------------------------------
Group
- Group Key: c1.x, c1.y
- -> Incremental Sort
- Sort Key: c1.x, c1.y
- Presorted Key: c1.x
- -> Merge Join
- Merge Cond: (c1.x = c2.x)
- -> Sort
- Sort Key: c1.x
- -> Seq Scan on group_agg_pk c1
- Disabled: true
- -> Sort
- Sort Key: c2.x
+ Group Key: c1.y, c1.x
+ -> Sort
+ Sort Key: c1.y, c1.x
+ -> Nested Loop
+ Disabled: true
+ Join Filter: (c1.x = c2.x)
+ -> Seq Scan on group_agg_pk c1
+ Disabled: true
+ -> Materialize
-> Seq Scan on group_agg_pk c2
Disabled: true
-(15 rows)
+(12 rows)
EXPLAIN (COSTS OFF)
SELECT c1.y,c1.x FROM group_agg_pk c1
@@ -3125,21 +3119,18 @@ GROUP BY c1.y,c2.x,c1.x;
QUERY PLAN
-----------------------------------------------------
Group
- Group Key: c2.x, c1.y
- -> Incremental Sort
- Sort Key: c2.x, c1.y
- Presorted Key: c2.x
- -> Merge Join
- Merge Cond: (c1.x = c2.x)
- -> Sort
- Sort Key: c1.x
- -> Seq Scan on group_agg_pk c1
- Disabled: true
- -> Sort
- Sort Key: c2.x
+ Group Key: c1.y, c2.x
+ -> Sort
+ Sort Key: c1.y, c2.x
+ -> Nested Loop
+ Disabled: true
+ Join Filter: (c1.x = c2.x)
+ -> Seq Scan on group_agg_pk c1
+ Disabled: true
+ -> Materialize
-> Seq Scan on group_agg_pk c2
Disabled: true
-(15 rows)
+(12 rows)
RESET enable_nestloop;
RESET enable_hashjoin;
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 98e68e972be..c8e1533cd2f 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -1892,10 +1892,10 @@ DROP TABLE onek_with_null;
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1
WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42 OR tenthous = 0);
- QUERY PLAN
---------------------------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------
Index Scan using tenk1_thous_tenthous on tenk1
- Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42,0}'::integer[])))
+ Index Cond: ((thousand = 42) AND (thousand = 42) AND (tenthous = ANY ('{1,3,42,0}'::integer[])))
(2 rows)
SELECT * FROM tenk1
@@ -1908,10 +1908,10 @@ SELECT * FROM tenk1
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1
WHERE thousand = 42 AND (tenthous = 1 OR tenthous = (SELECT 1 + 2) OR tenthous = 42);
- QUERY PLAN
-----------------------------------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------------------------------------------------
Index Scan using tenk1_thous_tenthous on tenk1
- Index Cond: ((thousand = 42) AND (tenthous = ANY (ARRAY[1, (InitPlan 1).col1, 42])))
+ Index Cond: ((thousand = 42) AND (thousand = 42) AND (tenthous = ANY (ARRAY[1, (InitPlan 1).col1, 42])))
InitPlan 1
-> Result
(4 rows)
@@ -1926,17 +1926,12 @@ SELECT * FROM tenk1
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1
WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42 OR tenthous IS NULL);
- QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------
- Bitmap Heap Scan on tenk1
- Recheck Cond: (((thousand = 42) AND ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42))) OR ((thousand = 42) AND (tenthous IS NULL)))
+ QUERY PLAN
+---------------------------------------------------------------------------------------
+ Index Scan using tenk1_thous_tenthous on tenk1
+ Index Cond: ((thousand = 42) AND (thousand = 42))
Filter: ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42) OR (tenthous IS NULL))
- -> BitmapOr
- -> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42}'::integer[])))
- -> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: ((thousand = 42) AND (tenthous IS NULL))
-(8 rows)
+(3 rows)
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1
@@ -1965,17 +1960,12 @@ SELECT * FROM tenk1
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1
WHERE thousand = 42 AND (tenthous = 1::int2 OR tenthous = 3::int8 OR tenthous = 42::int8);
- QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------
- Bitmap Heap Scan on tenk1
- Recheck Cond: (((thousand = 42) AND (tenthous = '1'::smallint)) OR ((thousand = 42) AND ((tenthous = '3'::bigint) OR (tenthous = '42'::bigint))))
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------
+ Index Scan using tenk1_thous_tenthous on tenk1
+ Index Cond: ((thousand = 42) AND (thousand = 42))
Filter: ((tenthous = '1'::smallint) OR (tenthous = '3'::bigint) OR (tenthous = '42'::bigint))
- -> BitmapOr
- -> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: ((thousand = 42) AND (tenthous = '1'::smallint))
- -> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: ((thousand = 42) AND (tenthous = ANY ('{3,42}'::bigint[])))
-(8 rows)
+(3 rows)
EXPLAIN (COSTS OFF)
SELECT count(*) FROM tenk1
@@ -2002,10 +1992,10 @@ SELECT count(*) FROM tenk1
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1
WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
- QUERY PLAN
-------------------------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------------------------------------------------
Index Scan using tenk1_thous_tenthous on tenk1
- Index Cond: ((thousand = 42) AND (tenthous = ANY ('{1,3,42}'::integer[])))
+ Index Cond: ((thousand = 42) AND (thousand = 42) AND (tenthous = ANY ('{1,3,42}'::integer[])))
(2 rows)
SELECT * FROM tenk1
@@ -2042,7 +2032,7 @@ SELECT count(*) FROM tenk1 t1
QUERY PLAN
----------------------------------------------------------------------------
Aggregate
- -> Index Only Scan using tenk1_thous_tenthous on tenk1 t1
+ -> Seq Scan on tenk1 t1
Filter: ((thousand = 42) OR (thousand = (SubPlan 1)))
SubPlan 1
-> Limit
diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out
index ad8ab294ff6..b2809fc7eab 100644
--- a/src/test/regress/expected/equivclass.out
+++ b/src/test/regress/expected/equivclass.out
@@ -314,15 +314,15 @@ explain (costs off)
-> Index Scan using ec1_expr3 on ec1 ec1_5
-> Index Scan using ec1_expr4 on ec1 ec1_6
-> Materialize
- -> Merge Join
- Merge Cond: ((((ec1_1.ff + 2) + 1)) = ec1.f1)
+ -> Nested Loop
+ Disabled: true
+ Join Filter: ((((ec1_1.ff + 2) + 1)) = ec1.f1)
-> Merge Append
Sort Key: (((ec1_1.ff + 2) + 1))
-> Index Scan using ec1_expr2 on ec1 ec1_1
-> Index Scan using ec1_expr3 on ec1 ec1_2
-> Index Scan using ec1_expr4 on ec1 ec1_3
- -> Sort
- Sort Key: ec1.f1 USING <
+ -> Materialize
-> Index Scan using ec1_pkey on ec1
Index Cond: (ff = '42'::bigint)
(19 rows)
@@ -368,20 +368,18 @@ explain (costs off)
where ss1.x = ec1.f1 and ec1.ff = 42::int8;
QUERY PLAN
-----------------------------------------------------
- Merge Join
- Merge Cond: ((((ec1_1.ff + 2) + 1)) = ec1.f1)
- -> Merge Append
- Sort Key: (((ec1_1.ff + 2) + 1))
+ Nested Loop
+ Disabled: true
+ -> Index Scan using ec1_pkey on ec1
+ Index Cond: (ff = '42'::bigint)
+ -> Append
-> Index Scan using ec1_expr2 on ec1 ec1_1
- -> Sort
- Sort Key: (((ec1_2.ff + 3) + 1))
- -> Seq Scan on ec1 ec1_2
+ Index Cond: (((ff + 2) + 1) = ec1.f1)
+ -> Seq Scan on ec1 ec1_2
+ Filter: (((ff + 3) + 1) = ec1.f1)
-> Index Scan using ec1_expr4 on ec1 ec1_3
- -> Sort
- Sort Key: ec1.f1 USING <
- -> Index Scan using ec1_pkey on ec1
- Index Cond: (ff = '42'::bigint)
-(13 rows)
+ Index Cond: ((ff + 4) = ec1.f1)
+(11 rows)
-- check effects of row-level security
set enable_nestloop = on;
diff --git a/src/test/regress/expected/guc.out b/src/test/regress/expected/guc.out
index 7f9e29c765c..d974b80351f 100644
--- a/src/test/regress/expected/guc.out
+++ b/src/test/regress/expected/guc.out
@@ -827,7 +827,7 @@ reset check_function_bodies;
set default_with_oids to f;
-- Should not allow to set it to true.
set default_with_oids to t;
-ERROR: tables declared WITH OIDS are not supported
+WARNING: tables declared WITH OIDS are not supported, ignored
-- Test that disabling track_activities disables query ID reporting in
-- pg_stat_activity.
SET compute_query_id = on;
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index b00219643b9..4de56fcddb0 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1429,7 +1429,7 @@ set parallel_setup_cost = 0;
set parallel_tuple_cost = 0;
set max_parallel_workers_per_gather = 2;
create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
create index on t (a);
analyze t;
set enable_incremental_sort = off;
@@ -1706,19 +1706,18 @@ explain (costs off)
select * from
(select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two
order by t1.four, t1.two limit 1;
- QUERY PLAN
------------------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------------------
Limit
- -> Merge Join
- Merge Cond: ((tenk1.four = t2.four) AND (tenk1.two = t2.two))
- -> Incremental Sort
- Sort Key: tenk1.four, tenk1.two
- Presorted Key: tenk1.four
+ -> Incremental Sort
+ Sort Key: tenk1.four, tenk1.two
+ Presorted Key: tenk1.four
+ -> Nested Loop
+ Join Filter: ((tenk1.four = t2.four) AND (tenk1.two = t2.two))
-> Sort
Sort Key: tenk1.four
-> Seq Scan on tenk1
- -> Sort
- Sort Key: t2.four, t2.two
- -> Seq Scan on tenk1 t2
-(12 rows)
+ -> Materialize
+ -> Seq Scan on tenk1 t2
+(11 rows)
diff --git a/src/test/regress/expected/index_including.out b/src/test/regress/expected/index_including.out
index 4e8fe49c8cf..7159f78c3b9 100644
--- a/src/test/regress/expected/index_including.out
+++ b/src/test/regress/expected/index_including.out
@@ -129,13 +129,11 @@ DETAIL: Failing row contains (1, null, 3, (4,4),(4,4)).
INSERT INTO tbl SELECT x, 2*x, NULL, NULL FROM generate_series(1,300) AS x;
explain (costs off)
select * from tbl where (c1,c2,c3) < (2,5,1);
- QUERY PLAN
-------------------------------------------------
- Bitmap Heap Scan on tbl
+ QUERY PLAN
+--------------------------------------------
+ Seq Scan on tbl
Filter: (ROW(c1, c2, c3) < ROW(2, 5, 1))
- -> Bitmap Index Scan on covering
- Index Cond: (ROW(c1, c2) <= ROW(2, 5))
-(4 rows)
+(2 rows)
select * from tbl where (c1,c2,c3) < (2,5,1);
c1 | c2 | c3 | c4
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 6738ba316e6..127487002fe 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -218,13 +218,13 @@ SELECT t1.a, t2.e
WHERE t1.a = t2.d;
a | e
---+----
- 0 |
1 | -1
2 | 2
- 2 | 4
3 | -3
+ 2 | 4
5 | -5
5 | -5
+ 0 |
(7 rows)
--
@@ -1573,13 +1573,13 @@ SELECT *
FROM J1_TBL INNER JOIN J2_TBL USING (i);
i | j | t | k
---+---+-------+----
- 0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
- 2 | 3 | two | 4
3 | 2 | three | -3
+ 2 | 3 | two | 4
5 | 0 | five | -5
5 | 0 | five | -5
+ 0 | | zero |
(7 rows)
-- Same as above, slightly different syntax
@@ -1587,13 +1587,13 @@ SELECT *
FROM J1_TBL JOIN J2_TBL USING (i);
i | j | t | k
---+---+-------+----
- 0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
- 2 | 3 | two | 4
3 | 2 | three | -3
+ 2 | 3 | two | 4
5 | 0 | five | -5
5 | 0 | five | -5
+ 0 | | zero |
(7 rows)
SELECT *
@@ -1681,35 +1681,35 @@ SELECT *
FROM J1_TBL NATURAL JOIN J2_TBL;
i | j | t | k
---+---+-------+----
- 0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
- 2 | 3 | two | 4
3 | 2 | three | -3
+ 2 | 3 | two | 4
5 | 0 | five | -5
5 | 0 | five | -5
+ 0 | | zero |
(7 rows)
SELECT *
FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (a, d);
a | b | c | d
---+---+-------+----
- 0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
- 2 | 3 | two | 4
3 | 2 | three | -3
+ 2 | 3 | two | 4
5 | 0 | five | -5
5 | 0 | five | -5
+ 0 | | zero |
(7 rows)
SELECT *
FROM J1_TBL t1 (a, b, c) NATURAL JOIN J2_TBL t2 (d, a);
a | b | c | d
---+---+------+---
- 0 | | zero |
2 | 3 | two | 2
4 | 1 | four | 2
+ 0 | | zero |
(3 rows)
-- mismatch number of columns
@@ -1718,13 +1718,13 @@ SELECT *
FROM J1_TBL t1 (a, b) NATURAL JOIN J2_TBL t2 (a);
a | b | t | k
---+---+-------+----
- 0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
- 2 | 3 | two | 4
3 | 2 | three | -3
+ 2 | 3 | two | 4
5 | 0 | five | -5
5 | 0 | five | -5
+ 0 | | zero |
(7 rows)
--
@@ -1734,22 +1734,22 @@ SELECT *
FROM J1_TBL JOIN J2_TBL ON (J1_TBL.i = J2_TBL.i);
i | j | t | i | k
---+---+-------+---+----
- 0 | | zero | 0 |
1 | 4 | one | 1 | -1
2 | 3 | two | 2 | 2
- 2 | 3 | two | 2 | 4
3 | 2 | three | 3 | -3
+ 2 | 3 | two | 2 | 4
5 | 0 | five | 5 | -5
5 | 0 | five | 5 | -5
+ 0 | | zero | 0 |
(7 rows)
SELECT *
FROM J1_TBL JOIN J2_TBL ON (J1_TBL.i = J2_TBL.k);
i | j | t | i | k
---+---+------+---+---
- 0 | | zero | | 0
2 | 3 | two | 2 | 2
4 | 1 | four | 2 | 4
+ 0 | | zero | | 0
(3 rows)
--
@@ -1818,13 +1818,13 @@ SELECT *
FROM J1_TBL RIGHT OUTER JOIN J2_TBL USING (i);
i | j | t | k
---+---+-------+----
- 0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
- 2 | 3 | two | 4
3 | 2 | three | -3
+ 2 | 3 | two | 4
5 | 0 | five | -5
5 | 0 | five | -5
+ 0 | | zero |
| | |
| | | 0
(9 rows)
@@ -1833,13 +1833,13 @@ SELECT *
FROM J1_TBL RIGHT JOIN J2_TBL USING (i);
i | j | t | k
---+---+-------+----
- 0 | | zero |
1 | 4 | one | -1
2 | 3 | two | 2
- 2 | 3 | two | 4
3 | 2 | three | -3
+ 2 | 3 | two | 4
5 | 0 | five | -5
5 | 0 | five | -5
+ 0 | | zero |
| | |
| | | 0
(9 rows)
@@ -1985,8 +1985,8 @@ USING (name);
------+----+----
bb | 12 | 13
cc | 22 | 23
- dd | | 33
ee | 42 |
+ dd | | 33
(4 rows)
-- Cases with non-nullable expressions in subquery results;
@@ -2020,8 +2020,8 @@ NATURAL FULL JOIN
------+------+------+------+------
bb | 12 | 2 | 13 | 3
cc | 22 | 2 | 23 | 3
- dd | | | 33 | 3
ee | 42 | 2 | |
+ dd | | | 33 | 3
(4 rows)
SELECT * FROM
@@ -2142,8 +2142,8 @@ select * from x left join y on (x1 = y1 and y2 is not null);
1 | 11 | 1 | 111
2 | 22 | 2 | 222
3 | | 3 | 333
- 4 | 44 | |
5 | | |
+ 4 | 44 | |
(5 rows)
select * from (x left join y on (x1 = y1)) left join x xx(xx1,xx2)
@@ -2185,9 +2185,9 @@ on (x1 = xx1 and xx2 is not null);
----+----+----+-----+-----+-----
1 | 11 | 1 | 111 | 1 | 11
2 | 22 | 2 | 222 | 2 | 22
- 3 | | 3 | 333 | |
4 | 44 | 4 | | 4 | 44
5 | | | | |
+ 3 | | 3 | 333 | |
(5 rows)
-- these should NOT give the same answers as above
@@ -2573,11 +2573,10 @@ where t1.f1 = coalesce(t2.f1, 1);
-> Materialize
-> Seq Scan on int4_tbl t2
Filter: (f1 > 1)
- -> Materialize
- -> Seq Scan on int4_tbl t3
+ -> Seq Scan on int4_tbl t3
-> Materialize
-> Seq Scan on int4_tbl t4
-(14 rows)
+(13 rows)
explain (costs off)
select * from int4_tbl t1
@@ -2837,20 +2836,19 @@ select x.thousand, x.twothousand, count(*)
from tenk1 x inner join tenk1 y on x.thousand = y.thousand
group by x.thousand, x.twothousand
order by x.thousand desc, x.twothousand;
- QUERY PLAN
-----------------------------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------------------------------------
GroupAggregate
Group Key: x.thousand, x.twothousand
-> Incremental Sort
Sort Key: x.thousand DESC, x.twothousand
Presorted Key: x.thousand
-> Merge Join
- Merge Cond: (y.thousand = x.thousand)
- -> Index Only Scan Backward using tenk1_thous_tenthous on tenk1 y
- -> Sort
- Sort Key: x.thousand DESC
- -> Seq Scan on tenk1 x
-(11 rows)
+ Merge Cond: (x.thousand = y.thousand)
+ -> Index Scan Backward using tenk1_thous_tenthous on tenk1 x
+ -> Materialize
+ -> Index Only Scan Backward using tenk1_thous_tenthous on tenk1 y
+(10 rows)
reset enable_hashagg;
reset enable_nestloop;
@@ -2906,11 +2904,11 @@ insert into t2a values (200, 2001);
select * from t1 left join t2 on (t1.a = t2.a);
a | b | a | b
-----+------+-----+------
- 5 | 10 | |
- 15 | 20 | |
- 100 | 100 | |
200 | 1000 | 200 | 2000
200 | 1000 | 200 | 2001
+ 15 | 20 | |
+ 5 | 10 | |
+ 100 | 100 | |
(5 rows)
-- Test matching of column name with wrong alias
@@ -2997,16 +2995,12 @@ select * from tbl_ra t1
where not exists (select 1 from tbl_ra t2 where t2.b = t1.a) and t1.b < 2;
QUERY PLAN
-------------------------------------------------------
- Merge Right Anti Join
- Merge Cond: (t2.b = t1.a)
+ Merge Anti Join
+ Merge Cond: (t1.a = t2.b)
+ -> Index Scan using tbl_ra_a_key on tbl_ra t1
+ Filter: (b < 2)
-> Index Only Scan using tbl_ra_b_idx on tbl_ra t2
- -> Sort
- Sort Key: t1.a
- -> Bitmap Heap Scan on tbl_ra t1
- Recheck Cond: (b < 2)
- -> Bitmap Index Scan on tbl_ra_b_idx
- Index Cond: (b < 2)
-(9 rows)
+(5 rows)
-- and check we get the expected results
select * from tbl_ra t1
@@ -3302,27 +3296,20 @@ where not exists (
Hash Cond: (t1.c1 = t2.c2)
-> Seq Scan on tt4x t1
-> Hash
- -> Merge Right Join
- Merge Cond: (t5.c1 = t3.c2)
- -> Merge Join
- Merge Cond: (t4.c2 = t5.c1)
- -> Sort
- Sort Key: t4.c2
+ -> Hash Left Join
+ Hash Cond: (t3.c2 = t5.c1)
+ -> Hash Left Join
+ Hash Cond: (t2.c3 = t3.c1)
+ -> Seq Scan on tt4x t2
+ -> Hash
+ -> Seq Scan on tt4x t3
+ -> Hash
+ -> Hash Join
+ Hash Cond: (t4.c2 = t5.c1)
-> Seq Scan on tt4x t4
- -> Sort
- Sort Key: t5.c1
- -> Seq Scan on tt4x t5
- -> Sort
- Sort Key: t3.c2
- -> Merge Left Join
- Merge Cond: (t2.c3 = t3.c1)
- -> Sort
- Sort Key: t2.c3
- -> Seq Scan on tt4x t2
- -> Sort
- Sort Key: t3.c1
- -> Seq Scan on tt4x t3
-(24 rows)
+ -> Hash
+ -> Seq Scan on tt4x t5
+(17 rows)
--
-- regression test for problems of the sort depicted in bug #3494
@@ -3907,8 +3894,8 @@ select * from
where thousand = (q1 + q2);
QUERY PLAN
--------------------------------------------------------------
- Hash Join
- Hash Cond: (tenk1.twothousand = int4_tbl.f1)
+ Nested Loop
+ Join Filter: (tenk1.twothousand = int4_tbl.f1)
-> Nested Loop
-> Nested Loop
-> Seq Scan on q1
@@ -3917,9 +3904,8 @@ where thousand = (q1 + q2);
Recheck Cond: (thousand = (q1.q1 + q2.q2))
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: (thousand = (q1.q1 + q2.q2))
- -> Hash
- -> Seq Scan on int4_tbl
-(12 rows)
+ -> Seq Scan on int4_tbl
+(11 rows)
--
-- test ability to generate a suitable plan for a star-schema query
@@ -4911,7 +4897,7 @@ select q1, unique2, thousand, hundred
QUERY PLAN
----------------------------------------------------------------------------------------------------------
Nested Loop Left Join
- Filter: ((COALESCE(b.thousand, 123) = COALESCE(b.hundred, 123)) AND (a.q1 = COALESCE(b.hundred, 123)))
+ Filter: ((a.q1 = COALESCE(b.hundred, 123)) AND (COALESCE(b.thousand, 123) = COALESCE(b.hundred, 123)))
-> Seq Scan on int8_tbl a
-> Index Scan using tenk1_unique2 on tenk1 b
Index Cond: (unique2 = a.q1)
@@ -5401,9 +5387,8 @@ select 1 from
-> Seq Scan on int8_tbl i8
-> Result
One-Time Filter: false
- -> Materialize
- -> Seq Scan on int4_tbl i42
-(16 rows)
+ -> Seq Scan on int4_tbl i42
+(15 rows)
--
-- test for appropriate join order in the presence of lateral references
@@ -5730,39 +5715,36 @@ explain (verbose, costs off)
select a.q2, b.q1
from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1)
where coalesce(b.q1, 1) > 0;
- QUERY PLAN
----------------------------------------------------------
- Merge Left Join
+ QUERY PLAN
+-----------------------------------------------------
+ Nested Loop Left Join
+ Disabled: true
Output: a.q2, b.q1
- Merge Cond: (a.q2 = (COALESCE(b.q1, '1'::bigint)))
+ Join Filter: (a.q2 = COALESCE(b.q1, '1'::bigint))
Filter: (COALESCE(b.q1, '1'::bigint) > 0)
- -> Sort
- Output: a.q2
- Sort Key: a.q2
- -> Seq Scan on public.int8_tbl a
- Output: a.q2
- -> Sort
- Output: b.q1, (COALESCE(b.q1, '1'::bigint))
- Sort Key: (COALESCE(b.q1, '1'::bigint))
+ -> Seq Scan on public.int8_tbl a
+ Output: a.q1, a.q2
+ -> Materialize
+ Output: b.q1
-> Seq Scan on public.int8_tbl b
- Output: b.q1, COALESCE(b.q1, '1'::bigint)
-(14 rows)
+ Output: b.q1
+(11 rows)
select a.q2, b.q1
from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1)
where coalesce(b.q1, 1) > 0;
q2 | q1
-------------------+------------------
- -4567890123456789 |
- 123 | 123
- 123 | 123
456 |
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
+ 123 | 123
+ 123 | 123
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
+ -4567890123456789 |
(10 rows)
reset enable_hashjoin;
@@ -6161,34 +6143,31 @@ select d.* from d left join (select distinct * from b) s
explain (costs off)
select d.* from d left join (select * from b group by b.id, b.c_id) s
on d.a = s.id;
- QUERY PLAN
-------------------------------------------
- Merge Right Join
- Merge Cond: (b.id = d.a)
- -> Group
+ QUERY PLAN
+---------------------------
+ Hash Right Join
+ Hash Cond: (b.id = d.a)
+ -> HashAggregate
Group Key: b.id
- -> Index Scan using b_pkey on b
- -> Sort
- Sort Key: d.a
+ -> Seq Scan on b
+ -> Hash
-> Seq Scan on d
-(8 rows)
+(7 rows)
-- similarly, but keying off a DISTINCT clause
explain (costs off)
select d.* from d left join (select distinct * from b) s
on d.a = s.id;
- QUERY PLAN
---------------------------------------
- Merge Right Join
- Merge Cond: (b.id = d.a)
- -> Unique
- -> Sort
- Sort Key: b.id, b.c_id
- -> Seq Scan on b
- -> Sort
- Sort Key: d.a
+ QUERY PLAN
+---------------------------------
+ Hash Right Join
+ Hash Cond: (b.id = d.a)
+ -> HashAggregate
+ Group Key: b.id, b.c_id
+ -> Seq Scan on b
+ -> Hash
-> Seq Scan on d
-(9 rows)
+(7 rows)
-- join removal is not possible here
explain (costs off)
@@ -6444,18 +6423,17 @@ FROM int4_tbl
JOIN ((SELECT 42 AS x FROM int8_tbl LEFT JOIN innertab ON q1 = id) AS ss1
RIGHT JOIN tenk1 ON NULL)
ON tenk1.unique1 = ss1.x OR tenk1.unique2 = ss1.x;
- QUERY PLAN
---------------------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------------------
Nested Loop
+ -> Nested Loop Left Join
+ Join Filter: NULL::boolean
+ Filter: ((tenk1.unique1 = (42)) OR (tenk1.unique2 = (42)))
+ -> Seq Scan on tenk1
+ -> Result
+ One-Time Filter: false
-> Seq Scan on int4_tbl
- -> Materialize
- -> Nested Loop Left Join
- Join Filter: NULL::boolean
- Filter: ((tenk1.unique1 = (42)) OR (tenk1.unique2 = (42)))
- -> Seq Scan on tenk1
- -> Result
- One-Time Filter: false
-(9 rows)
+(8 rows)
rollback;
-- another join removal bug: we must clean up correctly when removing a PHV
@@ -8353,15 +8331,15 @@ select * from
lateral (values(x.q1,y.q1,y.q2)) v(xq1,yq1,yq2);
q1 | q2 | q1 | q2 | xq1 | yq1 | yq2
------------------+-------------------+------------------+-------------------+------------------+------------------+-------------------
- 123 | 456 | | | 123 | |
- 123 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | -4567890123456789
- 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
- 123 | 4567890123456789 | 4567890123456789 | 123 | 123 | 4567890123456789 | 123
- 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
4567890123456789 | 123 | 123 | 456 | 4567890123456789 | 123 | 456
- 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789
- 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
+ 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 123
+ 123 | 4567890123456789 | 4567890123456789 | 123 | 123 | 4567890123456789 | 123
+ 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
+ 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
+ 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789
+ 123 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | -4567890123456789
+ 123 | 456 | | | 123 | |
4567890123456789 | -4567890123456789 | | | 4567890123456789 | |
(10 rows)
@@ -8370,15 +8348,15 @@ select * from
lateral (select x.q1,y.q1,y.q2) v(xq1,yq1,yq2);
q1 | q2 | q1 | q2 | xq1 | yq1 | yq2
------------------+-------------------+------------------+-------------------+------------------+------------------+-------------------
- 123 | 456 | | | 123 | |
- 123 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | -4567890123456789
- 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
- 123 | 4567890123456789 | 4567890123456789 | 123 | 123 | 4567890123456789 | 123
- 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
4567890123456789 | 123 | 123 | 456 | 4567890123456789 | 123 | 456
- 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789
- 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
+ 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 123
+ 123 | 4567890123456789 | 4567890123456789 | 123 | 123 | 4567890123456789 | 123
+ 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
+ 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
+ 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789
+ 123 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | -4567890123456789
+ 123 | 456 | | | 123 | |
4567890123456789 | -4567890123456789 | | | 4567890123456789 | |
(10 rows)
@@ -8405,24 +8383,24 @@ select v.* from
lateral (select x.q1,y.q1 union all select x.q2,y.q2) v(vx,vy);
vx | vy
-------------------+-------------------
- 123 |
- 456 |
- 123 | 4567890123456789
- 4567890123456789 | -4567890123456789
+ 4567890123456789 | 123
+ 123 | 456
+ 4567890123456789 | 123
123 | 4567890123456789
4567890123456789 | 4567890123456789
- 123 | 4567890123456789
- 4567890123456789 | 123
4567890123456789 | 123
123 | 4567890123456789
4567890123456789 | 123
- 123 | 456
4567890123456789 | 4567890123456789
- 4567890123456789 | -4567890123456789
4567890123456789 | 4567890123456789
+ 123 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
- 4567890123456789 | 123
+ 4567890123456789 | -4567890123456789
+ 123 | 4567890123456789
+ 4567890123456789 | -4567890123456789
+ 123 |
+ 456 |
4567890123456789 |
-4567890123456789 |
(20 rows)
@@ -8749,15 +8727,15 @@ select * from
Hash Cond: (d.q1 = c.q2)
-> Nested Loop
Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))
- -> Hash Left Join
+ -> Hash Right Join
Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint))
- Hash Cond: (a.q2 = b.q1)
- -> Seq Scan on public.int8_tbl a
- Output: a.q1, a.q2
+ Hash Cond: (b.q1 = a.q2)
+ -> Seq Scan on public.int8_tbl b
+ Output: b.q1, COALESCE(b.q2, '42'::bigint)
-> Hash
- Output: b.q1, (COALESCE(b.q2, '42'::bigint))
- -> Seq Scan on public.int8_tbl b
- Output: b.q1, COALESCE(b.q2, '42'::bigint)
+ Output: a.q1, a.q2
+ -> Seq Scan on public.int8_tbl a
+ Output: a.q1, a.q2
-> Seq Scan on public.int8_tbl d
Output: d.q1, COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)
-> Hash
@@ -9405,44 +9383,39 @@ select * from j1 natural join j2;
explain (verbose, costs off)
select * from j1
inner join (select distinct id from j3) j3 on j1.id = j3.id;
- QUERY PLAN
------------------------------------------
+ QUERY PLAN
+-----------------------------------
Nested Loop
Output: j1.id, j3.id
Inner Unique: true
Join Filter: (j1.id = j3.id)
- -> Unique
+ -> HashAggregate
Output: j3.id
- -> Sort
+ Group Key: j3.id
+ -> Seq Scan on public.j3
Output: j3.id
- Sort Key: j3.id
- -> Seq Scan on public.j3
- Output: j3.id
-> Seq Scan on public.j1
Output: j1.id
-(13 rows)
+(11 rows)
-- ensure group by clause allows the inner to become unique
explain (verbose, costs off)
select * from j1
inner join (select id from j3 group by id) j3 on j1.id = j3.id;
- QUERY PLAN
------------------------------------------
+ QUERY PLAN
+-----------------------------------
Nested Loop
Output: j1.id, j3.id
Inner Unique: true
Join Filter: (j1.id = j3.id)
- -> Group
+ -> HashAggregate
Output: j3.id
Group Key: j3.id
- -> Sort
+ -> Seq Scan on public.j3
Output: j3.id
- Sort Key: j3.id
- -> Seq Scan on public.j3
- Output: j3.id
-> Seq Scan on public.j1
Output: j1.id
-(14 rows)
+(11 rows)
drop table j1;
drop table j2;
@@ -9762,16 +9735,14 @@ EXPLAIN (COSTS OFF)
SELECT 1 FROM group_tbl t1
LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE
GROUP BY s.c1, s.c2;
- QUERY PLAN
---------------------------------------------
- Group
+ QUERY PLAN
+--------------------------------------
+ HashAggregate
Group Key: t2.a, (COALESCE(t2.a))
- -> Sort
- Sort Key: t2.a, (COALESCE(t2.a))
- -> Nested Loop Left Join
- -> Seq Scan on group_tbl t1
- -> Seq Scan on group_tbl t2
-(7 rows)
+ -> Nested Loop Left Join
+ -> Seq Scan on group_tbl t1
+ -> Seq Scan on group_tbl t2
+(5 rows)
DROP TABLE group_tbl;
--
diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out
index cf2219df754..9c2246cd24b 100644
--- a/src/test/regress/expected/merge.out
+++ b/src/test/regress/expected/merge.out
@@ -42,15 +42,12 @@ WHEN MATCHED THEN
QUERY PLAN
----------------------------------------
Merge on target t
- -> Merge Join
- Merge Cond: (t.tid = s.sid)
- -> Sort
- Sort Key: t.tid
- -> Seq Scan on target t
- -> Sort
- Sort Key: s.sid
+ -> Hash Join
+ Hash Cond: (t.tid = s.sid)
+ -> Seq Scan on target t
+ -> Hash
-> Seq Scan on source s
-(9 rows)
+(6 rows)
--
-- Errors
@@ -1640,17 +1637,13 @@ WHEN MATCHED THEN
-------------------------------------------------------------------------
Merge on ex_mtarget t (actual rows=0.00 loops=1)
Tuples: updated=50
- -> Merge Join (actual rows=50.00 loops=1)
- Merge Cond: (t.a = s.a)
- -> Sort (actual rows=50.00 loops=1)
- Sort Key: t.a
- Sort Method: quicksort Memory: xxx
- -> Seq Scan on ex_mtarget t (actual rows=50.00 loops=1)
- -> Sort (actual rows=100.00 loops=1)
- Sort Key: s.a
- Sort Method: quicksort Memory: xxx
+ -> Hash Join (actual rows=50.00 loops=1)
+ Hash Cond: (t.a = s.a)
+ -> Seq Scan on ex_mtarget t (actual rows=50.00 loops=1)
+ -> Hash (actual rows=100.00 loops=1)
+ Buckets: xxx Batches: xxx Memory Usage: xxx
-> Seq Scan on ex_msource s (actual rows=100.00 loops=1)
-(12 rows)
+(8 rows)
-- only updates to selected tuples
SELECT explain_merge('
@@ -1661,17 +1654,13 @@ WHEN MATCHED AND t.a < 10 THEN
-------------------------------------------------------------------------
Merge on ex_mtarget t (actual rows=0.00 loops=1)
Tuples: updated=5 skipped=45
- -> Merge Join (actual rows=50.00 loops=1)
- Merge Cond: (t.a = s.a)
- -> Sort (actual rows=50.00 loops=1)
- Sort Key: t.a
- Sort Method: quicksort Memory: xxx
- -> Seq Scan on ex_mtarget t (actual rows=50.00 loops=1)
- -> Sort (actual rows=100.00 loops=1)
- Sort Key: s.a
- Sort Method: quicksort Memory: xxx
+ -> Hash Join (actual rows=50.00 loops=1)
+ Hash Cond: (t.a = s.a)
+ -> Seq Scan on ex_mtarget t (actual rows=50.00 loops=1)
+ -> Hash (actual rows=100.00 loops=1)
+ Buckets: xxx Batches: xxx Memory Usage: xxx
-> Seq Scan on ex_msource s (actual rows=100.00 loops=1)
-(12 rows)
+(8 rows)
-- updates + deletes
SELECT explain_merge('
@@ -1684,38 +1673,30 @@ WHEN MATCHED AND t.a >= 10 AND t.a <= 20 THEN
-------------------------------------------------------------------------
Merge on ex_mtarget t (actual rows=0.00 loops=1)
Tuples: updated=5 deleted=5 skipped=40
- -> Merge Join (actual rows=50.00 loops=1)
- Merge Cond: (t.a = s.a)
- -> Sort (actual rows=50.00 loops=1)
- Sort Key: t.a
- Sort Method: quicksort Memory: xxx
- -> Seq Scan on ex_mtarget t (actual rows=50.00 loops=1)
- -> Sort (actual rows=100.00 loops=1)
- Sort Key: s.a
- Sort Method: quicksort Memory: xxx
+ -> Hash Join (actual rows=50.00 loops=1)
+ Hash Cond: (t.a = s.a)
+ -> Seq Scan on ex_mtarget t (actual rows=50.00 loops=1)
+ -> Hash (actual rows=100.00 loops=1)
+ Buckets: xxx Batches: xxx Memory Usage: xxx
-> Seq Scan on ex_msource s (actual rows=100.00 loops=1)
-(12 rows)
+(8 rows)
-- only inserts
SELECT explain_merge('
MERGE INTO ex_mtarget t USING ex_msource s ON t.a = s.a
WHEN NOT MATCHED AND s.a < 10 THEN
INSERT VALUES (a, b)');
- explain_merge
--------------------------------------------------------------------------
+ explain_merge
+------------------------------------------------------------------------
Merge on ex_mtarget t (actual rows=0.00 loops=1)
Tuples: inserted=4 skipped=96
- -> Merge Left Join (actual rows=100.00 loops=1)
- Merge Cond: (s.a = t.a)
- -> Sort (actual rows=100.00 loops=1)
- Sort Key: s.a
- Sort Method: quicksort Memory: xxx
- -> Seq Scan on ex_msource s (actual rows=100.00 loops=1)
- -> Sort (actual rows=45.00 loops=1)
- Sort Key: t.a
- Sort Method: quicksort Memory: xxx
+ -> Hash Left Join (actual rows=100.00 loops=1)
+ Hash Cond: (s.a = t.a)
+ -> Seq Scan on ex_msource s (actual rows=100.00 loops=1)
+ -> Hash (actual rows=45.00 loops=1)
+ Buckets: xxx Batches: xxx Memory Usage: xxx
-> Seq Scan on ex_mtarget t (actual rows=45.00 loops=1)
-(12 rows)
+(8 rows)
-- all three
SELECT explain_merge('
@@ -1726,21 +1707,17 @@ WHEN MATCHED AND t.a >= 30 AND t.a <= 40 THEN
DELETE
WHEN NOT MATCHED AND s.a < 20 THEN
INSERT VALUES (a, b)');
- explain_merge
--------------------------------------------------------------------------
+ explain_merge
+------------------------------------------------------------------------
Merge on ex_mtarget t (actual rows=0.00 loops=1)
Tuples: inserted=10 updated=9 deleted=5 skipped=76
- -> Merge Left Join (actual rows=100.00 loops=1)
- Merge Cond: (s.a = t.a)
- -> Sort (actual rows=100.00 loops=1)
- Sort Key: s.a
- Sort Method: quicksort Memory: xxx
- -> Seq Scan on ex_msource s (actual rows=100.00 loops=1)
- -> Sort (actual rows=49.00 loops=1)
- Sort Key: t.a
- Sort Method: quicksort Memory: xxx
+ -> Hash Left Join (actual rows=100.00 loops=1)
+ Hash Cond: (s.a = t.a)
+ -> Seq Scan on ex_msource s (actual rows=100.00 loops=1)
+ -> Hash (actual rows=49.00 loops=1)
+ Buckets: xxx Batches: xxx Memory Usage: xxx
-> Seq Scan on ex_mtarget t (actual rows=49.00 loops=1)
-(12 rows)
+(8 rows)
-- not matched by source
SELECT explain_merge('
@@ -1751,17 +1728,13 @@ WHEN NOT MATCHED BY SOURCE and t.a < 10 THEN
-------------------------------------------------------------------------
Merge on ex_mtarget t (actual rows=0.00 loops=1)
Tuples: skipped=54
- -> Merge Left Join (actual rows=54.00 loops=1)
- Merge Cond: (t.a = s.a)
- -> Sort (actual rows=54.00 loops=1)
- Sort Key: t.a
- Sort Method: quicksort Memory: xxx
- -> Seq Scan on ex_mtarget t (actual rows=54.00 loops=1)
- -> Sort (actual rows=100.00 loops=1)
- Sort Key: s.a
- Sort Method: quicksort Memory: xxx
+ -> Hash Left Join (actual rows=54.00 loops=1)
+ Hash Cond: (t.a = s.a)
+ -> Seq Scan on ex_mtarget t (actual rows=54.00 loops=1)
+ -> Hash (actual rows=100.00 loops=1)
+ Buckets: xxx Batches: xxx Memory Usage: xxx
-> Seq Scan on ex_msource s (actual rows=100.00 loops=1)
-(12 rows)
+(8 rows)
-- not matched by source and target
SELECT explain_merge('
@@ -1794,18 +1767,15 @@ WHEN MATCHED AND t.a < 10 THEN
explain_merge
-----------------------------------------------------------------------
Merge on ex_mtarget t (actual rows=0.00 loops=1)
- -> Merge Join (actual rows=0.00 loops=1)
- Merge Cond: (t.a = s.a)
- -> Sort (actual rows=0.00 loops=1)
- Sort Key: t.a
- Sort Method: quicksort Memory: xxx
+ -> Hash Join (actual rows=0.00 loops=1)
+ Hash Cond: (s.a = t.a)
+ -> Seq Scan on ex_msource s (actual rows=1.00 loops=1)
+ -> Hash (actual rows=0.00 loops=1)
+ Buckets: xxx Batches: xxx Memory Usage: xxx
-> Seq Scan on ex_mtarget t (actual rows=0.00 loops=1)
Filter: (a < '-1000'::integer)
Rows Removed by Filter: 54
- -> Sort (never executed)
- Sort Key: s.a
- -> Seq Scan on ex_msource s (never executed)
-(12 rows)
+(9 rows)
DROP TABLE ex_msource, ex_mtarget;
DROP FUNCTION explain_merge(text);
@@ -2347,13 +2317,13 @@ MERGE INTO pa_target t
merge_action | old | new | logts | tid | balance | val
--------------+--------------------------------------------+---------------------------------------------------------------+--------------------------+-----+---------+--------------------------
UPDATE | ("Tue Jan 31 00:00:00 2017",1,100,initial) | ("Tue Jan 31 00:00:00 2017",1,110,"initial updated by merge") | Tue Jan 31 00:00:00 2017 | 1 | 110 | initial updated by merge
- UPDATE | ("Tue Feb 28 00:00:00 2017",2,200,initial) | ("Tue Feb 28 00:00:00 2017",2,220,"initial updated by merge") | Tue Feb 28 00:00:00 2017 | 2 | 220 | initial updated by merge
- INSERT | | ("Sun Jan 15 00:00:00 2017",3,30,"inserted by merge") | Sun Jan 15 00:00:00 2017 | 3 | 30 | inserted by merge
+ UPDATE | ("Tue Jan 31 00:00:00 2017",7,700,initial) | ("Tue Jan 31 00:00:00 2017",7,770,"initial updated by merge") | Tue Jan 31 00:00:00 2017 | 7 | 770 | initial updated by merge
UPDATE | ("Tue Jan 31 00:00:00 2017",4,400,initial) | ("Tue Jan 31 00:00:00 2017",4,440,"initial updated by merge") | Tue Jan 31 00:00:00 2017 | 4 | 440 | initial updated by merge
UPDATE | ("Tue Feb 28 00:00:00 2017",5,500,initial) | ("Tue Feb 28 00:00:00 2017",5,550,"initial updated by merge") | Tue Feb 28 00:00:00 2017 | 5 | 550 | initial updated by merge
- INSERT | | ("Sun Jan 15 00:00:00 2017",6,60,"inserted by merge") | Sun Jan 15 00:00:00 2017 | 6 | 60 | inserted by merge
- UPDATE | ("Tue Jan 31 00:00:00 2017",7,700,initial) | ("Tue Jan 31 00:00:00 2017",7,770,"initial updated by merge") | Tue Jan 31 00:00:00 2017 | 7 | 770 | initial updated by merge
+ UPDATE | ("Tue Feb 28 00:00:00 2017",2,200,initial) | ("Tue Feb 28 00:00:00 2017",2,220,"initial updated by merge") | Tue Feb 28 00:00:00 2017 | 2 | 220 | initial updated by merge
UPDATE | ("Tue Feb 28 00:00:00 2017",8,800,initial) | ("Tue Feb 28 00:00:00 2017",8,880,"initial updated by merge") | Tue Feb 28 00:00:00 2017 | 8 | 880 | initial updated by merge
+ INSERT | | ("Sun Jan 15 00:00:00 2017",6,60,"inserted by merge") | Sun Jan 15 00:00:00 2017 | 6 | 60 | inserted by merge
+ INSERT | | ("Sun Jan 15 00:00:00 2017",3,30,"inserted by merge") | Sun Jan 15 00:00:00 2017 | 3 | 30 | inserted by merge
INSERT | | ("Sun Jan 15 00:00:00 2017",9,90,"inserted by merge") | Sun Jan 15 00:00:00 2017 | 9 | 90 | inserted by merge
(9 rows)
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index 673ce7658b4..b04f00bab45 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -759,10 +759,10 @@ EXPLAIN (COSTS OFF)
SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2;
QUERY PLAN
--------------------------------------------------------------------
- Sort
- Sort Key: pagg_tab1.x, pagg_tab2.y
- -> HashAggregate
- Group Key: pagg_tab1.x, pagg_tab2.y
+ GroupAggregate
+ Group Key: pagg_tab1.x, pagg_tab2.y
+ -> Sort
+ Sort Key: pagg_tab1.x, pagg_tab2.y
-> Hash Left Join
Hash Cond: (pagg_tab1.x = pagg_tab2.y)
Filter: ((pagg_tab1.x > 5) OR (pagg_tab2.y < 20))
@@ -979,12 +979,12 @@ SET parallel_setup_cost = 0;
-- is not partial agg safe.
EXPLAIN (COSTS OFF)
SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
- QUERY PLAN
---------------------------------------------------------------------------------------
- Sort
- Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
- -> Gather
- Workers Planned: 2
+ QUERY PLAN
+--------------------------------------------------------------------------------------------
+ Gather Merge
+ Workers Planned: 2
+ -> Sort
+ Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-> Parallel Append
-> GroupAggregate
Group Key: pagg_tab_ml.a
@@ -1411,28 +1411,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
-- When GROUP BY clause does not match; partial aggregation is performed for each partition.
EXPLAIN (COSTS OFF)
SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
- QUERY PLAN
--------------------------------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------------------
Sort
Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
- -> Finalize GroupAggregate
+ -> Finalize HashAggregate
Group Key: pagg_tab_para.y
Filter: (avg(pagg_tab_para.x) < '12'::numeric)
- -> Gather Merge
+ -> Gather
Workers Planned: 2
- -> Sort
- Sort Key: pagg_tab_para.y
- -> Parallel Append
- -> Partial HashAggregate
- Group Key: pagg_tab_para.y
- -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
- -> Partial HashAggregate
- Group Key: pagg_tab_para_1.y
- -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
- -> Partial HashAggregate
- Group Key: pagg_tab_para_2.y
- -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(19 rows)
+ -> Parallel Append
+ -> Partial HashAggregate
+ Group Key: pagg_tab_para.y
+ -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+ -> Partial HashAggregate
+ Group Key: pagg_tab_para_1.y
+ -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+ -> Partial HashAggregate
+ Group Key: pagg_tab_para_2.y
+ -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(17 rows)
SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
y | sum | avg | count
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index d5368186caa..b460355c173 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -65,16 +65,15 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b =
-- inner join with partially-redundant join clauses
EXPLAIN (COSTS OFF)
SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.a AND t1.a = t2.b ORDER BY t1.a, t2.b;
- QUERY PLAN
----------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------
Sort
Sort Key: t1.a
-> Append
- -> Merge Join
- Merge Cond: (t1_1.a = t2_1.a)
- -> Index Scan using iprt1_p1_a on prt1_p1 t1_1
- -> Sort
- Sort Key: t2_1.b
+ -> Hash Join
+ Hash Cond: (t1_1.a = t2_1.a)
+ -> Seq Scan on prt1_p1 t1_1
+ -> Hash
-> Seq Scan on prt2_p1 t2_1
Filter: (a = b)
-> Hash Join
@@ -89,7 +88,7 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.a AND t1.a =
-> Hash
-> Seq Scan on prt2_p3 t2_3
Filter: (a = b)
-(22 rows)
+(21 rows)
SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.a AND t1.a = t2.b ORDER BY t1.a, t2.b;
a | c | b | c
@@ -495,34 +494,34 @@ EXPLAIN (COSTS OFF)
SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
(SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
- QUERY PLAN
---------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------
Sort
Sort Key: t1.a
- -> Hash Left Join
- Hash Cond: ((t1.c)::text = (t2.c)::text)
+ -> Hash Right Join
+ Hash Cond: ((t2.c)::text = (t1.c)::text)
Filter: ((t1.b + COALESCE(t2.b, 0)) = 0)
-> Append
- -> Seq Scan on prt1_p1 t1_1
- -> Seq Scan on prt1_p2 t1_2
- -> Seq Scan on prt1_p3 t1_3
+ -> Hash Join
+ Hash Cond: (t2_1.a = t3_1.b)
+ -> Seq Scan on prt1_p1 t2_1
+ -> Hash
+ -> Seq Scan on prt2_p1 t3_1
+ -> Hash Join
+ Hash Cond: (t2_2.a = t3_2.b)
+ -> Seq Scan on prt1_p2 t2_2
+ -> Hash
+ -> Seq Scan on prt2_p2 t3_2
+ -> Hash Join
+ Hash Cond: (t2_3.a = t3_3.b)
+ -> Seq Scan on prt1_p3 t2_3
+ -> Hash
+ -> Seq Scan on prt2_p3 t3_3
-> Hash
-> Append
- -> Hash Join
- Hash Cond: (t2_1.a = t3_1.b)
- -> Seq Scan on prt1_p1 t2_1
- -> Hash
- -> Seq Scan on prt2_p1 t3_1
- -> Hash Join
- Hash Cond: (t2_2.a = t3_2.b)
- -> Seq Scan on prt1_p2 t2_2
- -> Hash
- -> Seq Scan on prt2_p2 t3_2
- -> Hash Join
- Hash Cond: (t2_3.a = t3_3.b)
- -> Seq Scan on prt1_p3 t2_3
- -> Hash
- -> Seq Scan on prt2_p3 t3_3
+ -> Seq Scan on prt1_p1 t1_1
+ -> Seq Scan on prt1_p2 t1_2
+ -> Seq Scan on prt1_p3 t1_3
(26 rows)
SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
@@ -643,52 +642,41 @@ EXPLAIN (COSTS OFF)
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
WHERE a BETWEEN 490 AND 510
GROUP BY 1, 2 ORDER BY 1, 2;
- QUERY PLAN
------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------
Group
Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
- -> Merge Append
+ -> Sort
Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
- -> Group
- Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
- -> Sort
- Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
- -> Merge Full Join
- Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
- Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
- -> Sort
- Sort Key: prt1.a, prt1.b
- -> Seq Scan on prt1_p1 prt1
- -> Sort
- Sort Key: p2.a, p2.b
- -> Seq Scan on prt2_p1 p2
- -> Group
- Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
- -> Sort
- Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
- -> Merge Full Join
- Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
- Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
- -> Sort
- Sort Key: prt1_1.a, prt1_1.b
- -> Seq Scan on prt1_p2 prt1_1
- -> Sort
- Sort Key: p2_1.a, p2_1.b
- -> Seq Scan on prt2_p2 p2_1
- -> Group
- Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
- -> Sort
- Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
- -> Merge Full Join
- Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
- Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
- -> Sort
- Sort Key: prt1_2.a, prt1_2.b
- -> Seq Scan on prt1_p3 prt1_2
- -> Sort
- Sort Key: p2_2.a, p2_2.b
- -> Seq Scan on prt2_p3 p2_2
-(43 rows)
+ -> Append
+ -> Merge Full Join
+ Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+ Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+ -> Sort
+ Sort Key: prt1_1.a, prt1_1.b
+ -> Seq Scan on prt1_p1 prt1_1
+ -> Sort
+ Sort Key: p2_1.a, p2_1.b
+ -> Seq Scan on prt2_p1 p2_1
+ -> Merge Full Join
+ Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+ Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+ -> Sort
+ Sort Key: prt1_2.a, prt1_2.b
+ -> Seq Scan on prt1_p2 prt1_2
+ -> Sort
+ Sort Key: p2_2.a, p2_2.b
+ -> Seq Scan on prt2_p2 p2_2
+ -> Merge Full Join
+ Merge Cond: ((prt1_3.a = p2_3.a) AND (prt1_3.b = p2_3.b))
+ Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
+ -> Sort
+ Sort Key: prt1_3.a, prt1_3.b
+ -> Seq Scan on prt1_p3 prt1_3
+ -> Sort
+ Sort Key: p2_3.a, p2_3.b
+ -> Seq Scan on prt2_p3 p2_3
+(32 rows)
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
WHERE a BETWEEN 490 AND 510
@@ -1188,8 +1176,8 @@ SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHER
EXPLAIN (COSTS OFF)
SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
- QUERY PLAN
----------------------------------------------------------------------------
+ QUERY PLAN
+---------------------------------------------------------------------------------
Sort
Sort Key: t1.a
-> Append
@@ -1218,18 +1206,19 @@ SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (
Index Cond: (a = t1_7.b)
Filter: (b = 0)
-> Nested Loop
- -> HashAggregate
- Group Key: t1_8.b
- -> Hash Semi Join
- Hash Cond: (t1_8.b = ((t1_11.a + t1_11.b) / 2))
- -> Seq Scan on prt2_p3 t1_8
- -> Hash
- -> Seq Scan on prt1_e_p3 t1_11
- Filter: (c = 0)
+ -> Unique
+ -> Sort
+ Sort Key: t1_8.b
+ -> Hash Semi Join
+ Hash Cond: (t1_8.b = ((t1_11.a + t1_11.b) / 2))
+ -> Seq Scan on prt2_p3 t1_8
+ -> Hash
+ -> Seq Scan on prt1_e_p3 t1_11
+ Filter: (c = 0)
-> Index Scan using iprt1_p3_a on prt1_p3 t1_5
Index Cond: (a = t1_8.b)
Filter: (b = 0)
-(39 rows)
+(40 rows)
SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
a | b | c
@@ -1245,56 +1234,38 @@ SET enable_hashjoin TO off;
SET enable_nestloop TO off;
EXPLAIN (COSTS OFF)
SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
- QUERY PLAN
-------------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------------------
Merge Append
Sort Key: t1.a
-> Merge Semi Join
Merge Cond: (t1_3.a = t1_6.b)
- -> Sort
- Sort Key: t1_3.a
- -> Seq Scan on prt1_p1 t1_3
- Filter: (b = 0)
+ -> Index Scan using iprt1_p1_a on prt1_p1 t1_3
+ Filter: (b = 0)
-> Merge Semi Join
- Merge Cond: (t1_6.b = (((t1_9.a + t1_9.b) / 2)))
- -> Sort
- Sort Key: t1_6.b
- -> Seq Scan on prt2_p1 t1_6
- -> Sort
- Sort Key: (((t1_9.a + t1_9.b) / 2))
- -> Seq Scan on prt1_e_p1 t1_9
- Filter: (c = 0)
+ Merge Cond: (t1_6.b = ((t1_9.a + t1_9.b) / 2))
+ -> Index Only Scan using iprt2_p1_b on prt2_p1 t1_6
+ -> Index Scan using iprt1_e_p1_ab2 on prt1_e_p1 t1_9
+ Filter: (c = 0)
-> Merge Semi Join
Merge Cond: (t1_4.a = t1_7.b)
- -> Sort
- Sort Key: t1_4.a
- -> Seq Scan on prt1_p2 t1_4
- Filter: (b = 0)
+ -> Index Scan using iprt1_p2_a on prt1_p2 t1_4
+ Filter: (b = 0)
-> Merge Semi Join
- Merge Cond: (t1_7.b = (((t1_10.a + t1_10.b) / 2)))
- -> Sort
- Sort Key: t1_7.b
- -> Seq Scan on prt2_p2 t1_7
- -> Sort
- Sort Key: (((t1_10.a + t1_10.b) / 2))
- -> Seq Scan on prt1_e_p2 t1_10
- Filter: (c = 0)
+ Merge Cond: (t1_7.b = ((t1_10.a + t1_10.b) / 2))
+ -> Index Only Scan using iprt2_p2_b on prt2_p2 t1_7
+ -> Index Scan using iprt1_e_p2_ab2 on prt1_e_p2 t1_10
+ Filter: (c = 0)
-> Merge Semi Join
Merge Cond: (t1_5.a = t1_8.b)
- -> Sort
- Sort Key: t1_5.a
- -> Seq Scan on prt1_p3 t1_5
- Filter: (b = 0)
+ -> Index Scan using iprt1_p3_a on prt1_p3 t1_5
+ Filter: (b = 0)
-> Merge Semi Join
- Merge Cond: (t1_8.b = (((t1_11.a + t1_11.b) / 2)))
- -> Sort
- Sort Key: t1_8.b
- -> Seq Scan on prt2_p3 t1_8
- -> Sort
- Sort Key: (((t1_11.a + t1_11.b) / 2))
- -> Seq Scan on prt1_e_p3 t1_11
- Filter: (c = 0)
-(47 rows)
+ Merge Cond: (t1_8.b = ((t1_11.a + t1_11.b) / 2))
+ -> Index Only Scan using iprt2_p3_b on prt2_p3 t1_8
+ -> Index Scan using iprt1_e_p3_ab2 on prt1_e_p3 t1_11
+ Filter: (c = 0)
+(29 rows)
SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
a | b | c
@@ -1307,60 +1278,39 @@ SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (
EXPLAIN (COSTS OFF)
SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
- QUERY PLAN
-----------------------------------------------------------------------------
+ QUERY PLAN
+---------------------------------------------------------------------------
Sort
Sort Key: t1.a, t2.b, ((t3.a + t3.b))
-> Append
- -> Merge Left Join
- Merge Cond: (t1_1.a = t2_1.b)
- -> Sort
- Sort Key: t1_1.a
- -> Merge Left Join
- Merge Cond: ((((t3_1.a + t3_1.b) / 2)) = t1_1.a)
- -> Sort
- Sort Key: (((t3_1.a + t3_1.b) / 2))
- -> Seq Scan on prt1_e_p1 t3_1
- Filter: (c = 0)
- -> Sort
- Sort Key: t1_1.a
- -> Seq Scan on prt1_p1 t1_1
- -> Sort
- Sort Key: t2_1.b
- -> Seq Scan on prt2_p1 t2_1
- -> Merge Left Join
- Merge Cond: (t1_2.a = t2_2.b)
- -> Sort
- Sort Key: t1_2.a
- -> Merge Left Join
- Merge Cond: ((((t3_2.a + t3_2.b) / 2)) = t1_2.a)
- -> Sort
- Sort Key: (((t3_2.a + t3_2.b) / 2))
- -> Seq Scan on prt1_e_p2 t3_2
- Filter: (c = 0)
- -> Sort
- Sort Key: t1_2.a
- -> Seq Scan on prt1_p2 t1_2
- -> Sort
- Sort Key: t2_2.b
- -> Seq Scan on prt2_p2 t2_2
- -> Merge Left Join
- Merge Cond: (t1_3.a = t2_3.b)
- -> Sort
- Sort Key: t1_3.a
- -> Merge Left Join
- Merge Cond: ((((t3_3.a + t3_3.b) / 2)) = t1_3.a)
- -> Sort
- Sort Key: (((t3_3.a + t3_3.b) / 2))
- -> Seq Scan on prt1_e_p3 t3_3
- Filter: (c = 0)
- -> Sort
- Sort Key: t1_3.a
- -> Seq Scan on prt1_p3 t1_3
- -> Sort
- Sort Key: t2_3.b
- -> Seq Scan on prt2_p3 t2_3
-(51 rows)
+ -> Merge Right Join
+ Merge Cond: (t1_1.a = ((t3_1.a + t3_1.b) / 2))
+ -> Merge Left Join
+ Merge Cond: (t1_1.a = t2_1.b)
+ -> Index Scan using iprt1_p1_a on prt1_p1 t1_1
+ -> Materialize
+ -> Index Scan using iprt2_p1_b on prt2_p1 t2_1
+ -> Index Scan using iprt1_e_p1_ab2 on prt1_e_p1 t3_1
+ Filter: (c = 0)
+ -> Merge Right Join
+ Merge Cond: (t1_2.a = ((t3_2.a + t3_2.b) / 2))
+ -> Merge Left Join
+ Merge Cond: (t1_2.a = t2_2.b)
+ -> Index Scan using iprt1_p2_a on prt1_p2 t1_2
+ -> Materialize
+ -> Index Scan using iprt2_p2_b on prt2_p2 t2_2
+ -> Index Scan using iprt1_e_p2_ab2 on prt1_e_p2 t3_2
+ Filter: (c = 0)
+ -> Merge Right Join
+ Merge Cond: (t1_3.a = ((t3_3.a + t3_3.b) / 2))
+ -> Merge Left Join
+ Merge Cond: (t1_3.a = t2_3.b)
+ -> Index Scan using iprt1_p3_a on prt1_p3 t1_3
+ -> Materialize
+ -> Index Scan using iprt2_p3_b on prt2_p3 t2_3
+ -> Index Scan using iprt1_e_p3_ab2 on prt1_e_p3 t3_3
+ Filter: (c = 0)
+(30 rows)
SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
a | c | b | c | ?column? | c
@@ -1383,28 +1333,27 @@ SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2
-- This should generate a partitionwise join, but currently fails to
EXPLAIN (COSTS OFF)
SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
- QUERY PLAN
------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------------------------
Incremental Sort
Sort Key: prt1.a, prt2.b
Presorted Key: prt1.a
-> Merge Left Join
Merge Cond: (prt1.a = prt2.b)
- -> Sort
- Sort Key: prt1.a
- -> Append
- -> Seq Scan on prt1_p1 prt1_1
- Filter: ((a < 450) AND (b = 0))
- -> Seq Scan on prt1_p2 prt1_2
- Filter: ((a < 450) AND (b = 0))
- -> Sort
- Sort Key: prt2.b
+ -> Append
+ -> Index Scan using iprt1_p1_a on prt1_p1 prt1_1
+ Index Cond: (a < 450)
+ Filter: (b = 0)
+ -> Index Scan using iprt1_p2_a on prt1_p2 prt1_2
+ Index Cond: (a < 450)
+ Filter: (b = 0)
+ -> Materialize
-> Append
- -> Seq Scan on prt2_p2 prt2_1
- Filter: (b > 250)
- -> Seq Scan on prt2_p3 prt2_2
- Filter: (b > 250)
-(19 rows)
+ -> Index Only Scan using iprt2_p2_b on prt2_p2 prt2_1
+ Index Cond: (b > 250)
+ -> Index Only Scan using iprt2_p3_b on prt2_p3 prt2_2
+ Index Cond: (b > 250)
+(18 rows)
SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
a | b
@@ -1424,25 +1373,21 @@ SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT *
-- partitionwise join does not apply
EXPLAIN (COSTS OFF)
SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
- QUERY PLAN
------------------------------------------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------------------
Merge Join
- Merge Cond: ((t1.a = t2.b) AND (((((t1.*)::prt1))::text) = ((((t2.*)::prt2))::text)))
- -> Sort
- Sort Key: t1.a, ((((t1.*)::prt1))::text)
- -> Result
- -> Append
- -> Seq Scan on prt1_p1 t1_1
- -> Seq Scan on prt1_p2 t1_2
- -> Seq Scan on prt1_p3 t1_3
- -> Sort
- Sort Key: t2.b, ((((t2.*)::prt2))::text)
- -> Result
- -> Append
- -> Seq Scan on prt2_p1 t2_1
- -> Seq Scan on prt2_p2 t2_2
- -> Seq Scan on prt2_p3 t2_3
-(16 rows)
+ Merge Cond: (t1.a = t2.b)
+ Join Filter: ((((t2.*)::prt2))::text = (((t1.*)::prt1))::text)
+ -> Append
+ -> Index Scan using iprt1_p1_a on prt1_p1 t1_1
+ -> Index Scan using iprt1_p2_a on prt1_p2 t1_2
+ -> Index Scan using iprt1_p3_a on prt1_p3 t1_3
+ -> Materialize
+ -> Append
+ -> Index Scan using iprt2_p1_b on prt2_p1 t2_1
+ -> Index Scan using iprt2_p2_b on prt2_p2 t2_2
+ -> Index Scan using iprt2_p3_b on prt2_p3 t2_3
+(12 rows)
SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
a | b
@@ -2334,29 +2279,24 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt4_n t2, prt2 t3 WHERE t1.a = t2.a
QUERY PLAN
--------------------------------------------------------
Hash Join
- Hash Cond: (t2.a = t1.a)
+ Hash Cond: (t1.a = t2.a)
-> Append
- -> Seq Scan on prt4_n_p1 t2_1
- -> Seq Scan on prt4_n_p2 t2_2
- -> Seq Scan on prt4_n_p3 t2_3
+ -> Seq Scan on prt1_p1 t1_1
+ -> Seq Scan on prt1_p2 t1_2
+ -> Seq Scan on prt1_p3 t1_3
-> Hash
- -> Append
- -> Hash Join
- Hash Cond: (t1_1.a = t3_1.b)
- -> Seq Scan on prt1_p1 t1_1
- -> Hash
+ -> Hash Join
+ Hash Cond: (t2.a = t3.b)
+ -> Append
+ -> Seq Scan on prt4_n_p1 t2_1
+ -> Seq Scan on prt4_n_p2 t2_2
+ -> Seq Scan on prt4_n_p3 t2_3
+ -> Hash
+ -> Append
-> Seq Scan on prt2_p1 t3_1
- -> Hash Join
- Hash Cond: (t1_2.a = t3_2.b)
- -> Seq Scan on prt1_p2 t1_2
- -> Hash
-> Seq Scan on prt2_p2 t3_2
- -> Hash Join
- Hash Cond: (t1_3.a = t3_3.b)
- -> Seq Scan on prt1_p3 t1_3
- -> Hash
-> Seq Scan on prt2_p3 t3_3
-(23 rows)
+(18 rows)
-- partitionwise join can not be applied if there are no equi-join conditions
-- between partition keys
diff --git a/src/test/regress/expected/select.out b/src/test/regress/expected/select.out
index bab0cc93ff5..738abb5e9b4 100644
--- a/src/test/regress/expected/select.out
+++ b/src/test/regress/expected/select.out
@@ -521,6 +521,124 @@ SELECT * FROM nocols n, LATERAL (VALUES(n.*)) v;
--
(1 row)
+--
+-- test order by NULLS (FIRST|LAST)
+--
+select unique1, unique2 into onek_with_null from onek;
+insert into onek_with_null (unique1,unique2) values (NULL, -1), (NULL, NULL);
+select * from onek_with_null order by unique1 nulls first , unique2 limit 3;
+ unique1 | unique2
+---------+---------
+ | -1
+ |
+ 0 | 998
+(3 rows)
+
+select * from onek_with_null order by unique1 nulls last , unique2 limit 3;
+ unique1 | unique2
+---------+---------
+ 0 | 998
+ 1 | 214
+ 2 | 326
+(3 rows)
+
+select * from onek_with_null order by unique1 nulls first , unique2 nulls first limit 3;
+ unique1 | unique2
+---------+---------
+ |
+ | -1
+ 0 | 998
+(3 rows)
+
+select * from onek_with_null order by unique1 nulls last , unique2 nulls first limit 3;
+ unique1 | unique2
+---------+---------
+ 0 | 998
+ 1 | 214
+ 2 | 326
+(3 rows)
+
+select * from onek_with_null order by unique1 nulls first , unique2 nulls last limit 3;
+ unique1 | unique2
+---------+---------
+ | -1
+ |
+ 0 | 998
+(3 rows)
+
+select * from onek_with_null order by unique1 nulls last , unique2 nulls last limit 3;
+ unique1 | unique2
+---------+---------
+ 0 | 998
+ 1 | 214
+ 2 | 326
+(3 rows)
+
+select * from onek_with_null order by unique1 desc nulls first , unique2 desc limit 3;
+ unique1 | unique2
+---------+---------
+ |
+ | -1
+ 999 | 152
+(3 rows)
+
+select * from onek_with_null order by unique1 desc nulls last , unique2 desc limit 3;
+ unique1 | unique2
+---------+---------
+ 999 | 152
+ 998 | 549
+ 997 | 21
+(3 rows)
+
+select * from onek_with_null order by unique1 desc nulls first , unique2 desc nulls first limit 3;
+ unique1 | unique2
+---------+---------
+ |
+ | -1
+ 999 | 152
+(3 rows)
+
+select * from onek_with_null order by unique1 desc nulls last , unique2 desc nulls first limit 3;
+ unique1 | unique2
+---------+---------
+ 999 | 152
+ 998 | 549
+ 997 | 21
+(3 rows)
+
+select * from onek_with_null order by unique1 desc nulls first , unique2 desc nulls last limit 3;
+ unique1 | unique2
+---------+---------
+ | -1
+ |
+ 999 | 152
+(3 rows)
+
+select * from onek_with_null order by unique1 desc nulls last , unique2 desc nulls last limit 3;
+ unique1 | unique2
+---------+---------
+ 999 | 152
+ 998 | 549
+ 997 | 21
+(3 rows)
+
+select unique1 as u1, unique2 as u2 from onek_with_null order by u1 nulls first , u2 nulls first limit 3;
+ u1 | u2
+----+-----
+ |
+ | -1
+ 0 | 998
+(3 rows)
+
+select unique1 as u1, unique2 as u2 from onek_with_null order by u1 asc nulls first , u2 desc nulls first limit 3;
+ u1 | u2
+----+-----
+ |
+ | -1
+ 0 | 998
+(3 rows)
+
+drop table onek_with_null;
--
-- Test ORDER BY options
--
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index 379ba0bc9fa..a466ee43b57 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -355,15 +355,14 @@ SET max_parallel_workers_per_gather=2;
-- distinct
EXPLAIN (COSTS OFF)
SELECT DISTINCT four FROM tenk1 WHERE four = 10;
- QUERY PLAN
-----------------------------------------------
- Limit
+ QUERY PLAN
+----------------------------------------
+ HashAggregate
-> Gather
Workers Planned: 2
- -> Limit
- -> Parallel Seq Scan on tenk1
- Filter: (four = 10)
-(6 rows)
+ -> Parallel Seq Scan on tenk1
+ Filter: (four = 10)
+(5 rows)
RESET max_parallel_workers_per_gather;
RESET min_parallel_table_scan_size;
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index c2f2604a52b..563a69bdfb4 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -4,6 +4,7 @@
-- with autovacuum_enabled = off, so that we don't have unstable results
-- from auto-analyze happening when we didn't expect it.
--
+set default_statistics_target=10000; --prevent random subset for joinsel
-- check the number of estimated/actual rows in the top node
create function check_estimated_rows(text) returns table (estimated int, actual int)
language plpgsql as
@@ -1169,43 +1170,43 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ''1''');
estimated | actual
-----------+--------
- 2 | 100
+ 99 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b IN (''1'', ''2'')');
estimated | actual
-----------+--------
- 4 | 100
+ 100 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b IN (''1'', ''2'')');
estimated | actual
-----------+--------
- 8 | 200
+ 197 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b = ''1''');
estimated | actual
-----------+--------
- 4 | 100
+ 100 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c = 1');
estimated | actual
-----------+--------
- 1 | 200
+ 197 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c IN (1)');
estimated | actual
-----------+--------
- 1 | 200
+ 197 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)');
estimated | actual
-----------+--------
- 3 | 400
+ 386 | 400
(1 row)
-- OR clauses referencing the same attribute
@@ -1238,37 +1239,37 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
estimated | actual
-----------+--------
- 2 | 100
+ 99 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ANY (ARRAY[''1'', ''2''])');
estimated | actual
-----------+--------
- 4 | 100
+ 100 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
estimated | actual
-----------+--------
- 8 | 200
+ 197 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = 1');
estimated | actual
-----------+--------
- 1 | 200
+ 197 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = ANY (ARRAY[1])');
estimated | actual
-----------+--------
- 1 | 200
+ 197 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 26, 27, 51, 52, 76, 77]) AND b = ANY (ARRAY[''1'', ''2'', ''26'', ''27'']) AND c = ANY (ARRAY[1, 2])');
estimated | actual
-----------+--------
- 3 | 400
+ 386 | 400
(1 row)
-- ANY with inequalities should not benefit from functional dependencies
@@ -1934,7 +1935,7 @@ ANALYZE mcv_lists;
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
estimated | actual
-----------+--------
- 3 | 4
+ 4 | 4
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
@@ -2871,7 +2872,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 0');
estimated | actual
-----------+--------
- 96 | 102
+ 102 | 102
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 AND b = 10 AND c = 10');
@@ -2895,7 +2896,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 10');
estimated | actual
-----------+--------
- 102 | 104
+ 104 | 104
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0 AND c = 0) OR (a = 1 AND b = 1 AND c = 1) OR (a = 2 AND b = 2 AND c = 2)');
@@ -2907,7 +2908,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0) OR (a = 0 AND c = 0) OR (b = 0 AND c = 0)');
estimated | actual
-----------+--------
- 108 | 102
+ 102 | 102
(1 row)
DROP TABLE mcv_lists_partial;
@@ -3533,17 +3534,14 @@ ANALYZE sb_1, sb_2;
-- bucket size is quite big because there are possibly many correlations.
EXPLAIN (COSTS OFF) -- Choose merge join
SELECT * FROM sb_1 a, sb_2 b WHERE a.x = b.x AND a.y = b.y AND a.z = b.z;
- QUERY PLAN
--------------------------------------------------------------
- Merge Join
- Merge Cond: ((a.z = b.z) AND (a.x = b.x) AND (a.y = b.y))
- -> Sort
- Sort Key: a.z, a.x, a.y
- -> Seq Scan on sb_1 a
- -> Sort
- Sort Key: b.z, b.x, b.y
+ QUERY PLAN
+------------------------------------------------------------
+ Hash Join
+ Hash Cond: ((a.x = b.x) AND (a.y = b.y) AND (a.z = b.z))
+ -> Seq Scan on sb_1 a
+ -> Hash
-> Seq Scan on sb_2 b
-(8 rows)
+(5 rows)
-- The ndistinct extended statistics on (x, y, z) provides more reliable value
-- of bucket size.
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index ff667bec8ba..e86e2b5b155 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -2178,8 +2178,8 @@ select ss2.* from
lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
on t1.q2 = ss2.q1
order by 1, 2, 3;
- QUERY PLAN
-----------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------------
Sort
Output: (COALESCE(t3.q1)), t4.q1, t4.q2
Sort Key: (COALESCE(t3.q1)), t4.q1, t4.q2
@@ -2189,15 +2189,15 @@ order by 1, 2, 3;
-> Hash Join
Output: (COALESCE(t3.q1)), t4.q1, t4.q2
Hash Cond: (t2.q2 = t4.q1)
- -> Hash Left Join
+ -> Hash Right Join
Output: t2.q2, (COALESCE(t3.q1))
- Hash Cond: (t2.q1 = t3.q2)
- -> Seq Scan on public.int8_tbl t2
- Output: t2.q1, t2.q2
+ Hash Cond: (t3.q2 = t2.q1)
+ -> Seq Scan on public.int8_tbl t3
+ Output: t3.q2, COALESCE(t3.q1)
-> Hash
- Output: t3.q2, (COALESCE(t3.q1))
- -> Seq Scan on public.int8_tbl t3
- Output: t3.q2, COALESCE(t3.q1)
+ Output: t2.q2, t2.q1
+ -> Seq Scan on public.int8_tbl t2
+ Output: t2.q2, t2.q1
-> Hash
Output: t4.q1, t4.q2
-> Seq Scan on public.int8_tbl t4
@@ -2252,8 +2252,8 @@ select ss2.* from
lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1)
on t1.q2 = ss2.q1
order by 1, 2, 3;
- QUERY PLAN
-----------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------------
Sort
Output: ((COALESCE(t3.q1))), t4.q1, t4.q2
Sort Key: ((COALESCE(t3.q1))), t4.q1, t4.q2
@@ -2263,15 +2263,15 @@ order by 1, 2, 3;
-> Nested Loop
Output: t4.q1, t4.q2, ((COALESCE(t3.q1)))
Join Filter: (t2.q2 = t4.q1)
- -> Hash Left Join
+ -> Hash Right Join
Output: t2.q2, (COALESCE(t3.q1))
- Hash Cond: (t2.q1 = t3.q2)
- -> Seq Scan on public.int8_tbl t2
- Output: t2.q1, t2.q2
+ Hash Cond: (t3.q2 = t2.q1)
+ -> Seq Scan on public.int8_tbl t3
+ Output: t3.q2, COALESCE(t3.q1)
-> Hash
- Output: t3.q2, (COALESCE(t3.q1))
- -> Seq Scan on public.int8_tbl t3
- Output: t3.q2, COALESCE(t3.q1)
+ Output: t2.q2, t2.q1
+ -> Seq Scan on public.int8_tbl t2
+ Output: t2.q2, t2.q1
-> Seq Scan on public.int8_tbl t4
Output: t4.q1, t4.q2, (COALESCE(t3.q1))
-> Hash
@@ -2392,23 +2392,19 @@ with x as (select * from (select f1, now() as n from subselect_tbl) ss)
select * from x, x x2 where x.n = x2.n;
QUERY PLAN
-------------------------------------------
- Merge Join
+ Hash Join
Output: x.f1, x.n, x2.f1, x2.n
- Merge Cond: (x.n = x2.n)
+ Hash Cond: (x.n = x2.n)
CTE x
-> Seq Scan on public.subselect_tbl
Output: subselect_tbl.f1, now()
- -> Sort
+ -> CTE Scan on x
Output: x.f1, x.n
- Sort Key: x.n
- -> CTE Scan on x
- Output: x.f1, x.n
- -> Sort
+ -> Hash
Output: x2.f1, x2.n
- Sort Key: x2.n
-> CTE Scan on x x2
Output: x2.f1, x2.n
-(16 rows)
+(12 rows)
explain (verbose, costs off)
with x as not materialized (select * from (select f1, now() as n from subselect_tbl) ss)
@@ -2706,13 +2702,15 @@ ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
EXPLAIN (COSTS OFF)
SELECT * FROM onek, (VALUES('RFAAAA'), ('VJAAAA')) AS v (i)
WHERE onek.stringu1 = v.i;
- QUERY PLAN
--------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------
Nested Loop
-> Values Scan on "*VALUES*"
- -> Index Scan using onek_stringu1 on onek
- Index Cond: (stringu1 = ("*VALUES*".column1)::text)
-(4 rows)
+ -> Bitmap Heap Scan on onek
+ Recheck Cond: (stringu1 = ("*VALUES*".column1)::text)
+ -> Bitmap Index Scan on onek_stringu1
+ Index Cond: (stringu1 = ("*VALUES*".column1)::text)
+(6 rows)
-- VtA transformation for a composite argument is not supported
EXPLAIN (COSTS OFF)
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 83228cfca29..f238f60c948 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -171,8 +171,10 @@ select name, setting from pg_settings where name like 'enable%';
enable_self_join_elimination | on
enable_seqscan | on
enable_sort | on
+ enable_temp_memory_catalog | off
+ enable_temp_rd_buffers | off
enable_tidscan | on
-(24 rows)
+(26 rows)
-- There are always wait event descriptions for various types. InjectionPoint
-- may be present or absent, depending on history since last postmaster start.
diff --git a/src/test/regress/expected/tidscan.out b/src/test/regress/expected/tidscan.out
index e823bc91c57..99e91a94129 100644
--- a/src/test/regress/expected/tidscan.out
+++ b/src/test/regress/expected/tidscan.out
@@ -257,18 +257,14 @@ SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
SET enable_hashjoin TO off;
EXPLAIN (COSTS OFF)
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
- QUERY PLAN
------------------------------------------
+ QUERY PLAN
+------------------------------------------
Aggregate
- -> Merge Join
- Merge Cond: (t1.ctid = t2.ctid)
- -> Sort
- Sort Key: t1.ctid
- -> Seq Scan on tenk1 t1
- -> Sort
- Sort Key: t2.ctid
- -> Seq Scan on tenk1 t2
-(9 rows)
+ -> Nested Loop
+ -> Seq Scan on tenk1 t1
+ -> Tid Scan on tenk1 t2
+ TID Cond: (t1.ctid = ctid)
+(5 rows)
SELECT count(*) FROM tenk1 t1 JOIN tenk1 t2 ON t1.ctid = t2.ctid;
count
diff --git a/src/test/regress/expected/tuplesort.out b/src/test/regress/expected/tuplesort.out
index 6dd97e7427a..958f4b4780e 100644
--- a/src/test/regress/expected/tuplesort.out
+++ b/src/test/regress/expected/tuplesort.out
@@ -635,18 +635,14 @@ EXPLAIN (COSTS OFF) :qry;
-> GroupAggregate
Group Key: a.col12
Filter: (count(*) > 1)
- -> Incremental Sort
+ -> Sort
Sort Key: a.col12 DESC, a.col1
- Presorted Key: a.col12
- -> Merge Join
- Merge Cond: (a.col12 = b.col12)
- -> Sort
- Sort Key: a.col12 DESC
- -> Seq Scan on test_mark_restore a
- -> Sort
- Sort Key: b.col12 DESC
- -> Seq Scan on test_mark_restore b
-(17 rows)
+ -> Nested Loop
+ Disabled: true
+ Join Filter: (a.col12 = b.col12)
+ -> Seq Scan on test_mark_restore a
+ -> Seq Scan on test_mark_restore b
+(13 rows)
:qry;
col12 | count | count | count | count | count
@@ -674,18 +670,14 @@ EXPLAIN (COSTS OFF) :qry;
-> GroupAggregate
Group Key: a.col12
Filter: (count(*) > 1)
- -> Incremental Sort
+ -> Sort
Sort Key: a.col12 DESC, a.col1
- Presorted Key: a.col12
- -> Merge Join
- Merge Cond: (a.col12 = b.col12)
- -> Sort
- Sort Key: a.col12 DESC
- -> Seq Scan on test_mark_restore a
- -> Sort
- Sort Key: b.col12 DESC
- -> Seq Scan on test_mark_restore b
-(17 rows)
+ -> Nested Loop
+ Disabled: true
+ Join Filter: (a.col12 = b.col12)
+ -> Seq Scan on test_mark_restore a
+ -> Seq Scan on test_mark_restore b
+(13 rows)
:qry;
col12 | count | count | count | count | count
diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out
index dd0c52ab08b..79943a66045 100644
--- a/src/test/regress/expected/type_sanity.out
+++ b/src/test/regress/expected/type_sanity.out
@@ -144,10 +144,12 @@ WHERE t1.typinput = p1.oid AND t1.typtype in ('b', 'p') AND NOT
(t1.typelem != 0 AND t1.typlen < 0) AND NOT
(p1.prorettype = t1.oid AND NOT p1.proretset)
ORDER BY 1;
- oid | typname | oid | proname
-------+-----------+-----+---------
- 1790 | refcursor | 46 | textin
-(1 row)
+ oid | typname | oid | proname
+-------+-----------+------+--------------
+ 1790 | refcursor | 46 | textin
+ 14756 | abstime | 1312 | timestamp_in
+ 14757 | reltime | 1312 | timestamp_in
+(3 rows)
-- Varlena array types will point to array_in
-- Exception as of 8.1: int2vector and oidvector have their own I/O routines
@@ -196,10 +198,12 @@ WHERE t1.typoutput = p1.oid AND t1.typtype in ('b', 'p') AND NOT
(p1.oid = 'array_out'::regproc AND
t1.typelem != 0 AND t1.typlen = -1)))
ORDER BY 1;
- oid | typname | oid | proname
-------+-----------+-----+---------
- 1790 | refcursor | 47 | textout
-(1 row)
+ oid | typname | oid | proname
+-------+-----------+------+---------------
+ 1790 | refcursor | 47 | textout
+ 14756 | abstime | 1313 | timestamp_out
+ 14757 | reltime | 1313 | timestamp_out
+(3 rows)
SELECT t1.oid, t1.typname, p1.oid, p1.proname
FROM pg_type AS t1, pg_proc AS p1
@@ -260,10 +264,12 @@ WHERE t1.typreceive = p1.oid AND t1.typtype in ('b', 'p') AND NOT
(t1.typelem != 0 AND t1.typlen < 0) AND NOT
(p1.prorettype = t1.oid AND NOT p1.proretset)
ORDER BY 1;
- oid | typname | oid | proname
-------+-----------+------+----------
- 1790 | refcursor | 2414 | textrecv
-(1 row)
+ oid | typname | oid | proname
+-------+-----------+------+----------------
+ 1790 | refcursor | 2414 | textrecv
+ 14756 | abstime | 2474 | timestamp_recv
+ 14757 | reltime | 2474 | timestamp_recv
+(3 rows)
-- Varlena array types will point to array_recv
-- Exception as of 8.1: int2vector and oidvector have their own I/O routines
@@ -321,10 +327,12 @@ WHERE t1.typsend = p1.oid AND t1.typtype in ('b', 'p') AND NOT
(p1.oid = 'array_send'::regproc AND
t1.typelem != 0 AND t1.typlen = -1)))
ORDER BY 1;
- oid | typname | oid | proname
-------+-----------+------+----------
- 1790 | refcursor | 2415 | textsend
-(1 row)
+ oid | typname | oid | proname
+-------+-----------+------+----------------
+ 1790 | refcursor | 2415 | textsend
+ 14756 | abstime | 2475 | timestamp_send
+ 14757 | reltime | 2475 | timestamp_send
+(3 rows)
SELECT t1.oid, t1.typname, p1.oid, p1.proname
FROM pg_type AS t1, pg_proc AS p1
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 96962817ed4..68d21063096 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1236,18 +1236,17 @@ SELECT * FROM
SELECT 2 AS t, 4 AS x) ss
WHERE x < 4
ORDER BY x;
- QUERY PLAN
---------------------------------------------------
+ QUERY PLAN
+--------------------------------------------
Sort
Sort Key: (2)
- -> Unique
- -> Sort
- Sort Key: (1), (2)
- -> Append
- -> Result
- -> Result
- One-Time Filter: false
-(9 rows)
+ -> HashAggregate
+ Group Key: (1), (2)
+ -> Append
+ -> Result
+ -> Result
+ One-Time Filter: false
+(8 rows)
SELECT * FROM
(SELECT 1 AS t, 2 AS x
@@ -1301,19 +1300,18 @@ SELECT * FROM
SELECT 2 AS t, 4 AS x) ss
WHERE x > 3
ORDER BY x;
- QUERY PLAN
-------------------------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------------
Sort
Sort Key: ss.x
-> Subquery Scan on ss
Filter: (ss.x > 3)
- -> Unique
- -> Sort
- Sort Key: (1), (((random() * '3'::double precision))::integer)
- -> Append
- -> Result
- -> Result
-(10 rows)
+ -> HashAggregate
+ Group Key: (1), (((random() * '3'::double precision))::integer)
+ -> Append
+ -> Result
+ -> Result
+(9 rows)
SELECT * FROM
(SELECT 1 AS t, (random()*3)::int AS x
@@ -1334,24 +1332,22 @@ select distinct q1 from
union all
select distinct * from int8_tbl i82) ss
where q2 = q2;
- QUERY PLAN
-----------------------------------------------------------
- Unique
- -> Merge Append
- Sort Key: "*SELECT* 1".q1
+ QUERY PLAN
+----------------------------------------------------
+ HashAggregate
+ Group Key: "*SELECT* 1".q1
+ -> Append
-> Subquery Scan on "*SELECT* 1"
- -> Unique
- -> Sort
- Sort Key: i81.q1, i81.q2
- -> Seq Scan on int8_tbl i81
- Filter: (q2 IS NOT NULL)
+ -> HashAggregate
+ Group Key: i81.q1, i81.q2
+ -> Seq Scan on int8_tbl i81
+ Filter: (q2 IS NOT NULL)
-> Subquery Scan on "*SELECT* 2"
- -> Unique
- -> Sort
- Sort Key: i82.q1, i82.q2
- -> Seq Scan on int8_tbl i82
- Filter: (q2 IS NOT NULL)
-(15 rows)
+ -> HashAggregate
+ Group Key: i82.q1, i82.q2
+ -> Seq Scan on int8_tbl i82
+ Filter: (q2 IS NOT NULL)
+(13 rows)
select distinct q1 from
(select distinct * from int8_tbl i81
@@ -1370,24 +1366,22 @@ select distinct q1 from
union all
select distinct * from int8_tbl i82) ss
where -q1 = q2;
- QUERY PLAN
---------------------------------------------------------
- Unique
- -> Merge Append
- Sort Key: "*SELECT* 1".q1
+ QUERY PLAN
+--------------------------------------------------
+ HashAggregate
+ Group Key: "*SELECT* 1".q1
+ -> Append
-> Subquery Scan on "*SELECT* 1"
- -> Unique
- -> Sort
- Sort Key: i81.q1, i81.q2
- -> Seq Scan on int8_tbl i81
- Filter: ((- q1) = q2)
+ -> HashAggregate
+ Group Key: i81.q1, i81.q2
+ -> Seq Scan on int8_tbl i81
+ Filter: ((- q1) = q2)
-> Subquery Scan on "*SELECT* 2"
- -> Unique
- -> Sort
- Sort Key: i82.q1, i82.q2
- -> Seq Scan on int8_tbl i82
- Filter: ((- q1) = q2)
-(15 rows)
+ -> HashAggregate
+ Group Key: i82.q1, i82.q2
+ -> Seq Scan on int8_tbl i82
+ Filter: ((- q1) = q2)
+(13 rows)
select distinct q1 from
(select distinct * from int8_tbl i81
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 095df0a670c..124228f9efa 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -493,8 +493,8 @@ MERGE INTO rw_view1 t
merge_action | a | b | old | new | a | b | c | d | a | b | c | d | a | b | c | d
--------------+---+-------+------------------------------+----------------------------------------+---+-------+-------+----------+---+-------------+-------+----------------+---+-------------+-------+----------------
UPDATE | 1 | ROW 1 | (1,"Row 1",Const,"b: Row 1") | (1,"ROW 1",Const,"b: ROW 1") | 1 | Row 1 | Const | b: Row 1 | 1 | ROW 1 | Const | b: ROW 1 | 1 | ROW 1 | Const | b: ROW 1
- DELETE | 3 | ROW 3 | (3,"Row 3",Const,"b: Row 3") | | 3 | Row 3 | Const | b: Row 3 | | | | | 3 | Row 3 | Const | b: Row 3
INSERT | 2 | ROW 2 | | (2,Unspecified,Const,"b: Unspecified") | | | | | 2 | Unspecified | Const | b: Unspecified | 2 | Unspecified | Const | b: Unspecified
+ DELETE | 3 | ROW 3 | (3,"Row 3",Const,"b: Row 3") | | 3 | Row 3 | Const | b: Row 3 | | | | | 3 | Row 3 | Const | b: Row 3
(3 rows)
SET jit_above_cost TO DEFAULT;
@@ -565,18 +565,14 @@ EXPLAIN (costs off)
MERGE INTO rw_view1 t
USING (SELECT * FROM generate_series(1,5)) AS s(a) ON t.a = s.a
WHEN MATCHED THEN UPDATE SET b = 'Updated';
- QUERY PLAN
--------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------------
Merge on base_tbl
- -> Hash Join
- Hash Cond: (base_tbl.a = generate_series.generate_series)
- -> Bitmap Heap Scan on base_tbl
- Recheck Cond: (a > 0)
- -> Bitmap Index Scan on base_tbl_pkey
- Index Cond: (a > 0)
- -> Hash
- -> Function Scan on generate_series
-(9 rows)
+ -> Nested Loop
+ -> Function Scan on generate_series
+ -> Index Scan using base_tbl_pkey on base_tbl
+ Index Cond: ((a = generate_series.generate_series) AND (a > 0))
+(5 rows)
EXPLAIN (costs off)
MERGE INTO rw_view1 t
@@ -599,18 +595,14 @@ EXPLAIN (costs off)
MERGE INTO rw_view1 t
USING (SELECT * FROM generate_series(1,5)) AS s(a) ON t.a = s.a
WHEN NOT MATCHED THEN INSERT (a) VALUES (s.a);
- QUERY PLAN
--------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------------
Merge on base_tbl
- -> Hash Right Join
- Hash Cond: (base_tbl.a = generate_series.generate_series)
- -> Bitmap Heap Scan on base_tbl
- Recheck Cond: (a > 0)
- -> Bitmap Index Scan on base_tbl_pkey
- Index Cond: (a > 0)
- -> Hash
- -> Function Scan on generate_series
-(9 rows)
+ -> Nested Loop Left Join
+ -> Function Scan on generate_series
+ -> Index Scan using base_tbl_pkey on base_tbl
+ Index Cond: ((a = generate_series.generate_series) AND (a > 0))
+(5 rows)
-- it's still updatable if we add a DO ALSO rule
CREATE TABLE base_tbl_hist(ts timestamptz default now(), a int, b text);
@@ -2440,19 +2432,16 @@ UPDATE rw_view1 SET a = a + 1000 FROM other_tbl_parent WHERE a = id;
Update on base_tbl_parent
Update on base_tbl_parent base_tbl_parent_1
Update on base_tbl_child base_tbl_parent_2
- -> Merge Join
- Merge Cond: (base_tbl_parent.a = other_tbl_parent.id)
- -> Sort
- Sort Key: base_tbl_parent.a
- -> Append
- -> Seq Scan on base_tbl_parent base_tbl_parent_1
- -> Seq Scan on base_tbl_child base_tbl_parent_2
- -> Sort
- Sort Key: other_tbl_parent.id
+ -> Hash Join
+ Hash Cond: (base_tbl_parent.a = other_tbl_parent.id)
+ -> Append
+ -> Seq Scan on base_tbl_parent base_tbl_parent_1
+ -> Seq Scan on base_tbl_child base_tbl_parent_2
+ -> Hash
-> Append
-> Seq Scan on other_tbl_parent other_tbl_parent_1
-> Seq Scan on other_tbl_child other_tbl_parent_2
-(15 rows)
+(12 rows)
UPDATE rw_view1 SET a = a + 1000 FROM other_tbl_parent WHERE a = id;
SELECT * FROM ONLY base_tbl_parent ORDER BY a;
@@ -3502,23 +3491,20 @@ CREATE RULE v1_upd_rule AS ON UPDATE TO v1 DO INSTEAD
CREATE VIEW v2 WITH (security_barrier = true) AS
SELECT * FROM v1 WHERE EXISTS (SELECT 1);
EXPLAIN (COSTS OFF) UPDATE v2 SET a = 1;
- QUERY PLAN
---------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------
Update on t1
InitPlan 1
-> Result
- -> Merge Join
- Merge Cond: (t1.a = v1.a)
- -> Sort
- Sort Key: t1.a
+ -> Hash Join
+ Hash Cond: (v1.a = t1.a)
+ -> Subquery Scan on v1
+ -> Result
+ One-Time Filter: (InitPlan 1).col1
+ -> Seq Scan on t1 t1_1
+ -> Hash
-> Seq Scan on t1
- -> Sort
- Sort Key: v1.a
- -> Subquery Scan on v1
- -> Result
- One-Time Filter: (InitPlan 1).col1
- -> Seq Scan on t1 t1_1
-(14 rows)
+(11 rows)
DROP VIEW v2;
DROP VIEW v1;
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out
index b86b668f433..0f2ab156d32 100644
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -4233,15 +4233,12 @@ WHERE s.c = 1;
Run Condition: (ntile(e2.salary) OVER w1 <= 1)
-> Sort
Sort Key: e1.depname
- -> Merge Join
- Merge Cond: (e1.empno = e2.empno)
- -> Sort
- Sort Key: e1.empno
- -> Seq Scan on empsalary e1
- -> Sort
- Sort Key: e2.empno
+ -> Hash Join
+ Hash Cond: (e1.empno = e2.empno)
+ -> Seq Scan on empsalary e1
+ -> Hash
-> Seq Scan on empsalary e2
-(15 rows)
+(12 rows)
-- Ensure the run condition optimization is used in cases where the WindowFunc
-- has a Var from another query level
@@ -5383,13 +5380,14 @@ LIMIT 1;
Limit
-> WindowAgg
Window: w1 AS (ORDER BY t1.unique1 ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
- -> Merge Join
- Merge Cond: (t1.unique1 = t2.tenthous)
- -> Index Only Scan using tenk1_unique1 on tenk1 t1
- -> Sort
- Sort Key: t2.tenthous
- -> Index Only Scan using tenk1_thous_tenthous on tenk1 t2
-(9 rows)
+ -> Sort
+ Sort Key: t1.unique1
+ -> Hash Join
+ Hash Cond: (t1.unique1 = t2.tenthous)
+ -> Index Only Scan using tenk1_unique1 on tenk1 t1
+ -> Hash
+ -> Index Only Scan using tenk1_thous_tenthous on tenk1 t2
+(10 rows)
-- Ensure we get a cheap total plan. This time use 10000 FOLLOWING so we need
-- to read all join rows.
@@ -5402,13 +5400,14 @@ LIMIT 1;
Limit
-> WindowAgg
Window: w1 AS (ORDER BY t1.unique1 ROWS BETWEEN UNBOUNDED PRECEDING AND '10000'::bigint FOLLOWING)
- -> Merge Join
- Merge Cond: (t1.unique1 = t2.tenthous)
- -> Index Only Scan using tenk1_unique1 on tenk1 t1
- -> Sort
- Sort Key: t2.tenthous
- -> Index Only Scan using tenk1_thous_tenthous on tenk1 t2
-(9 rows)
+ -> Sort
+ Sort Key: t1.unique1
+ -> Hash Join
+ Hash Cond: (t1.unique1 = t2.tenthous)
+ -> Index Only Scan using tenk1_unique1 on tenk1 t1
+ -> Hash
+ -> Index Only Scan using tenk1_thous_tenthous on tenk1 t2
+(10 rows)
-- Tests for problems with failure to walk or mutate expressions
-- within window frame clauses.
diff --git a/src/test/regress/expected/with.out b/src/test/regress/expected/with.out
index 61409197dbf..9e2637dff26 100644
--- a/src/test/regress/expected/with.out
+++ b/src/test/regress/expected/with.out
@@ -750,22 +750,18 @@ select * from search_graph order by seq;
-> Recursive Union
-> Seq Scan on pg_temp.graph0 g
Output: g.f, g.t, g.label, ARRAY[ROW(g.f, g.t)]
- -> Merge Join
+ -> Hash Join
Output: g_1.f, g_1.t, g_1.label, array_cat(sg.seq, ARRAY[ROW(g_1.f, g_1.t)])
- Merge Cond: (g_1.f = sg.t)
- -> Sort
+ Hash Cond: (sg.t = g_1.f)
+ -> WorkTable Scan on search_graph sg
+ Output: sg.f, sg.t, sg.label, sg.seq
+ -> Hash
Output: g_1.f, g_1.t, g_1.label
- Sort Key: g_1.f
-> Seq Scan on pg_temp.graph0 g_1
Output: g_1.f, g_1.t, g_1.label
- -> Sort
- Output: sg.seq, sg.t
- Sort Key: sg.t
- -> WorkTable Scan on search_graph sg
- Output: sg.seq, sg.t
-> CTE Scan on search_graph
Output: search_graph.f, search_graph.t, search_graph.label, search_graph.seq
-(22 rows)
+(18 rows)
with recursive search_graph(f, t, label) as (
select * from graph0 g
@@ -823,22 +819,18 @@ select * from search_graph order by seq;
-> Recursive Union
-> Seq Scan on pg_temp.graph0 g
Output: g.f, g.t, g.label, ROW('0'::bigint, g.f, g.t)
- -> Merge Join
+ -> Hash Join
Output: g_1.f, g_1.t, g_1.label, ROW(int8inc((sg.seq)."*DEPTH*"), g_1.f, g_1.t)
- Merge Cond: (g_1.f = sg.t)
- -> Sort
+ Hash Cond: (sg.t = g_1.f)
+ -> WorkTable Scan on search_graph sg
+ Output: sg.f, sg.t, sg.label, sg.seq
+ -> Hash
Output: g_1.f, g_1.t, g_1.label
- Sort Key: g_1.f
-> Seq Scan on pg_temp.graph0 g_1
Output: g_1.f, g_1.t, g_1.label
- -> Sort
- Output: sg.seq, sg.t
- Sort Key: sg.t
- -> WorkTable Scan on search_graph sg
- Output: sg.seq, sg.t
-> CTE Scan on search_graph
Output: search_graph.f, search_graph.t, search_graph.label, search_graph.seq
-(22 rows)
+(18 rows)
with recursive search_graph(f, t, label) as (
select * from graph0 g
@@ -1092,23 +1084,23 @@ select * from search_graph;
1 | 4 | arc 1 -> 4 | f | {"(1,4)"}
4 | 5 | arc 4 -> 5 | f | {"(4,5)"}
5 | 1 | arc 5 -> 1 | f | {"(5,1)"}
- 1 | 2 | arc 1 -> 2 | f | {"(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | f | {"(5,1)","(1,3)"}
- 1 | 4 | arc 1 -> 4 | f | {"(5,1)","(1,4)"}
2 | 3 | arc 2 -> 3 | f | {"(1,2)","(2,3)"}
4 | 5 | arc 4 -> 5 | f | {"(1,4)","(4,5)"}
5 | 1 | arc 5 -> 1 | f | {"(4,5)","(5,1)"}
- 1 | 2 | arc 1 -> 2 | f | {"(4,5)","(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | f | {"(4,5)","(5,1)","(1,3)"}
+ 1 | 4 | arc 1 -> 4 | f | {"(5,1)","(1,4)"}
+ 1 | 3 | arc 1 -> 3 | f | {"(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | f | {"(5,1)","(1,2)"}
+ 5 | 1 | arc 5 -> 1 | f | {"(1,4)","(4,5)","(5,1)"}
1 | 4 | arc 1 -> 4 | f | {"(4,5)","(5,1)","(1,4)"}
- 2 | 3 | arc 2 -> 3 | f | {"(5,1)","(1,2)","(2,3)"}
+ 1 | 3 | arc 1 -> 3 | f | {"(4,5)","(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | f | {"(4,5)","(5,1)","(1,2)"}
4 | 5 | arc 4 -> 5 | f | {"(5,1)","(1,4)","(4,5)"}
- 5 | 1 | arc 5 -> 1 | f | {"(1,4)","(4,5)","(5,1)"}
- 1 | 2 | arc 1 -> 2 | f | {"(1,4)","(4,5)","(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | f | {"(1,4)","(4,5)","(5,1)","(1,3)"}
+ 2 | 3 | arc 2 -> 3 | f | {"(5,1)","(1,2)","(2,3)"}
1 | 4 | arc 1 -> 4 | t | {"(1,4)","(4,5)","(5,1)","(1,4)"}
- 2 | 3 | arc 2 -> 3 | f | {"(4,5)","(5,1)","(1,2)","(2,3)"}
+ 1 | 3 | arc 1 -> 3 | f | {"(1,4)","(4,5)","(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | f | {"(1,4)","(4,5)","(5,1)","(1,2)"}
4 | 5 | arc 4 -> 5 | t | {"(4,5)","(5,1)","(1,4)","(4,5)"}
+ 2 | 3 | arc 2 -> 3 | f | {"(4,5)","(5,1)","(1,2)","(2,3)"}
5 | 1 | arc 5 -> 1 | t | {"(5,1)","(1,4)","(4,5)","(5,1)"}
2 | 3 | arc 2 -> 3 | f | {"(1,4)","(4,5)","(5,1)","(1,2)","(2,3)"}
(25 rows)
@@ -1130,23 +1122,23 @@ select * from search_graph;
1 | 4 | arc 1 -> 4 | f | {"(1,4)"}
4 | 5 | arc 4 -> 5 | f | {"(4,5)"}
5 | 1 | arc 5 -> 1 | f | {"(5,1)"}
- 1 | 2 | arc 1 -> 2 | f | {"(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | f | {"(5,1)","(1,3)"}
- 1 | 4 | arc 1 -> 4 | f | {"(5,1)","(1,4)"}
2 | 3 | arc 2 -> 3 | f | {"(1,2)","(2,3)"}
4 | 5 | arc 4 -> 5 | f | {"(1,4)","(4,5)"}
5 | 1 | arc 5 -> 1 | f | {"(4,5)","(5,1)"}
- 1 | 2 | arc 1 -> 2 | f | {"(4,5)","(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | f | {"(4,5)","(5,1)","(1,3)"}
+ 1 | 4 | arc 1 -> 4 | f | {"(5,1)","(1,4)"}
+ 1 | 3 | arc 1 -> 3 | f | {"(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | f | {"(5,1)","(1,2)"}
+ 5 | 1 | arc 5 -> 1 | f | {"(1,4)","(4,5)","(5,1)"}
1 | 4 | arc 1 -> 4 | f | {"(4,5)","(5,1)","(1,4)"}
- 2 | 3 | arc 2 -> 3 | f | {"(5,1)","(1,2)","(2,3)"}
+ 1 | 3 | arc 1 -> 3 | f | {"(4,5)","(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | f | {"(4,5)","(5,1)","(1,2)"}
4 | 5 | arc 4 -> 5 | f | {"(5,1)","(1,4)","(4,5)"}
- 5 | 1 | arc 5 -> 1 | f | {"(1,4)","(4,5)","(5,1)"}
- 1 | 2 | arc 1 -> 2 | f | {"(1,4)","(4,5)","(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | f | {"(1,4)","(4,5)","(5,1)","(1,3)"}
+ 2 | 3 | arc 2 -> 3 | f | {"(5,1)","(1,2)","(2,3)"}
1 | 4 | arc 1 -> 4 | t | {"(1,4)","(4,5)","(5,1)","(1,4)"}
- 2 | 3 | arc 2 -> 3 | f | {"(4,5)","(5,1)","(1,2)","(2,3)"}
+ 1 | 3 | arc 1 -> 3 | f | {"(1,4)","(4,5)","(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | f | {"(1,4)","(4,5)","(5,1)","(1,2)"}
4 | 5 | arc 4 -> 5 | t | {"(4,5)","(5,1)","(1,4)","(4,5)"}
+ 2 | 3 | arc 2 -> 3 | f | {"(4,5)","(5,1)","(1,2)","(2,3)"}
5 | 1 | arc 5 -> 1 | t | {"(5,1)","(1,4)","(4,5)","(5,1)"}
2 | 3 | arc 2 -> 3 | f | {"(1,4)","(4,5)","(5,1)","(1,2)","(2,3)"}
(25 rows)
@@ -1207,21 +1199,17 @@ select * from search_graph;
-> Recursive Union
-> Seq Scan on pg_temp.graph g
Output: g.f, g.t, g.label, false, ARRAY[ROW(g.f, g.t)]
- -> Merge Join
+ -> Hash Join
Output: g_1.f, g_1.t, g_1.label, CASE WHEN (ROW(g_1.f, g_1.t) = ANY (sg.path)) THEN true ELSE false END, array_cat(sg.path, ARRAY[ROW(g_1.f, g_1.t)])
- Merge Cond: (g_1.f = sg.t)
- -> Sort
+ Hash Cond: (sg.t = g_1.f)
+ -> WorkTable Scan on search_graph sg
+ Output: sg.f, sg.t, sg.label, sg.is_cycle, sg.path
+ Filter: (NOT sg.is_cycle)
+ -> Hash
Output: g_1.f, g_1.t, g_1.label
- Sort Key: g_1.f
-> Seq Scan on pg_temp.graph g_1
Output: g_1.f, g_1.t, g_1.label
- -> Sort
- Output: sg.path, sg.t
- Sort Key: sg.t
- -> WorkTable Scan on search_graph sg
- Output: sg.path, sg.t
- Filter: (NOT sg.is_cycle)
-(20 rows)
+(16 rows)
with recursive search_graph(f, t, label) as (
select * from graph g
@@ -1239,23 +1227,23 @@ select * from search_graph;
1 | 4 | arc 1 -> 4 | f | {"(1,4)"}
4 | 5 | arc 4 -> 5 | f | {"(4,5)"}
5 | 1 | arc 5 -> 1 | f | {"(5,1)"}
- 1 | 2 | arc 1 -> 2 | f | {"(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | f | {"(5,1)","(1,3)"}
- 1 | 4 | arc 1 -> 4 | f | {"(5,1)","(1,4)"}
2 | 3 | arc 2 -> 3 | f | {"(1,2)","(2,3)"}
4 | 5 | arc 4 -> 5 | f | {"(1,4)","(4,5)"}
5 | 1 | arc 5 -> 1 | f | {"(4,5)","(5,1)"}
- 1 | 2 | arc 1 -> 2 | f | {"(4,5)","(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | f | {"(4,5)","(5,1)","(1,3)"}
+ 1 | 4 | arc 1 -> 4 | f | {"(5,1)","(1,4)"}
+ 1 | 3 | arc 1 -> 3 | f | {"(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | f | {"(5,1)","(1,2)"}
+ 5 | 1 | arc 5 -> 1 | f | {"(1,4)","(4,5)","(5,1)"}
1 | 4 | arc 1 -> 4 | f | {"(4,5)","(5,1)","(1,4)"}
- 2 | 3 | arc 2 -> 3 | f | {"(5,1)","(1,2)","(2,3)"}
+ 1 | 3 | arc 1 -> 3 | f | {"(4,5)","(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | f | {"(4,5)","(5,1)","(1,2)"}
4 | 5 | arc 4 -> 5 | f | {"(5,1)","(1,4)","(4,5)"}
- 5 | 1 | arc 5 -> 1 | f | {"(1,4)","(4,5)","(5,1)"}
- 1 | 2 | arc 1 -> 2 | f | {"(1,4)","(4,5)","(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | f | {"(1,4)","(4,5)","(5,1)","(1,3)"}
+ 2 | 3 | arc 2 -> 3 | f | {"(5,1)","(1,2)","(2,3)"}
1 | 4 | arc 1 -> 4 | t | {"(1,4)","(4,5)","(5,1)","(1,4)"}
- 2 | 3 | arc 2 -> 3 | f | {"(4,5)","(5,1)","(1,2)","(2,3)"}
+ 1 | 3 | arc 1 -> 3 | f | {"(1,4)","(4,5)","(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | f | {"(1,4)","(4,5)","(5,1)","(1,2)"}
4 | 5 | arc 4 -> 5 | t | {"(4,5)","(5,1)","(1,4)","(4,5)"}
+ 2 | 3 | arc 2 -> 3 | f | {"(4,5)","(5,1)","(1,2)","(2,3)"}
5 | 1 | arc 5 -> 1 | t | {"(5,1)","(1,4)","(4,5)","(5,1)"}
2 | 3 | arc 2 -> 3 | f | {"(1,4)","(4,5)","(5,1)","(1,2)","(2,3)"}
(25 rows)
@@ -1276,23 +1264,23 @@ select * from search_graph;
1 | 4 | arc 1 -> 4 | N | {"(1,4)"}
4 | 5 | arc 4 -> 5 | N | {"(4,5)"}
5 | 1 | arc 5 -> 1 | N | {"(5,1)"}
- 1 | 2 | arc 1 -> 2 | N | {"(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | N | {"(5,1)","(1,3)"}
- 1 | 4 | arc 1 -> 4 | N | {"(5,1)","(1,4)"}
2 | 3 | arc 2 -> 3 | N | {"(1,2)","(2,3)"}
4 | 5 | arc 4 -> 5 | N | {"(1,4)","(4,5)"}
5 | 1 | arc 5 -> 1 | N | {"(4,5)","(5,1)"}
- 1 | 2 | arc 1 -> 2 | N | {"(4,5)","(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | N | {"(4,5)","(5,1)","(1,3)"}
+ 1 | 4 | arc 1 -> 4 | N | {"(5,1)","(1,4)"}
+ 1 | 3 | arc 1 -> 3 | N | {"(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | N | {"(5,1)","(1,2)"}
+ 5 | 1 | arc 5 -> 1 | N | {"(1,4)","(4,5)","(5,1)"}
1 | 4 | arc 1 -> 4 | N | {"(4,5)","(5,1)","(1,4)"}
- 2 | 3 | arc 2 -> 3 | N | {"(5,1)","(1,2)","(2,3)"}
+ 1 | 3 | arc 1 -> 3 | N | {"(4,5)","(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | N | {"(4,5)","(5,1)","(1,2)"}
4 | 5 | arc 4 -> 5 | N | {"(5,1)","(1,4)","(4,5)"}
- 5 | 1 | arc 5 -> 1 | N | {"(1,4)","(4,5)","(5,1)"}
- 1 | 2 | arc 1 -> 2 | N | {"(1,4)","(4,5)","(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | N | {"(1,4)","(4,5)","(5,1)","(1,3)"}
+ 2 | 3 | arc 2 -> 3 | N | {"(5,1)","(1,2)","(2,3)"}
1 | 4 | arc 1 -> 4 | Y | {"(1,4)","(4,5)","(5,1)","(1,4)"}
- 2 | 3 | arc 2 -> 3 | N | {"(4,5)","(5,1)","(1,2)","(2,3)"}
+ 1 | 3 | arc 1 -> 3 | N | {"(1,4)","(4,5)","(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | N | {"(1,4)","(4,5)","(5,1)","(1,2)"}
4 | 5 | arc 4 -> 5 | Y | {"(4,5)","(5,1)","(1,4)","(4,5)"}
+ 2 | 3 | arc 2 -> 3 | N | {"(4,5)","(5,1)","(1,2)","(2,3)"}
5 | 1 | arc 5 -> 1 | Y | {"(5,1)","(1,4)","(4,5)","(5,1)"}
2 | 3 | arc 2 -> 3 | N | {"(1,4)","(4,5)","(5,1)","(1,2)","(2,3)"}
(25 rows)
@@ -1441,23 +1429,23 @@ select * from search_graph;
1 | 4 | arc 1 -> 4 | {"(1,4)"} | f | {"(1,4)"}
4 | 5 | arc 4 -> 5 | {"(4,5)"} | f | {"(4,5)"}
5 | 1 | arc 5 -> 1 | {"(5,1)"} | f | {"(5,1)"}
- 1 | 2 | arc 1 -> 2 | {"(5,1)","(1,2)"} | f | {"(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | {"(5,1)","(1,3)"} | f | {"(5,1)","(1,3)"}
- 1 | 4 | arc 1 -> 4 | {"(5,1)","(1,4)"} | f | {"(5,1)","(1,4)"}
2 | 3 | arc 2 -> 3 | {"(1,2)","(2,3)"} | f | {"(1,2)","(2,3)"}
4 | 5 | arc 4 -> 5 | {"(1,4)","(4,5)"} | f | {"(1,4)","(4,5)"}
5 | 1 | arc 5 -> 1 | {"(4,5)","(5,1)"} | f | {"(4,5)","(5,1)"}
- 1 | 2 | arc 1 -> 2 | {"(4,5)","(5,1)","(1,2)"} | f | {"(4,5)","(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | {"(4,5)","(5,1)","(1,3)"} | f | {"(4,5)","(5,1)","(1,3)"}
+ 1 | 4 | arc 1 -> 4 | {"(5,1)","(1,4)"} | f | {"(5,1)","(1,4)"}
+ 1 | 3 | arc 1 -> 3 | {"(5,1)","(1,3)"} | f | {"(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | {"(5,1)","(1,2)"} | f | {"(5,1)","(1,2)"}
+ 5 | 1 | arc 5 -> 1 | {"(1,4)","(4,5)","(5,1)"} | f | {"(1,4)","(4,5)","(5,1)"}
1 | 4 | arc 1 -> 4 | {"(4,5)","(5,1)","(1,4)"} | f | {"(4,5)","(5,1)","(1,4)"}
- 2 | 3 | arc 2 -> 3 | {"(5,1)","(1,2)","(2,3)"} | f | {"(5,1)","(1,2)","(2,3)"}
+ 1 | 3 | arc 1 -> 3 | {"(4,5)","(5,1)","(1,3)"} | f | {"(4,5)","(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | {"(4,5)","(5,1)","(1,2)"} | f | {"(4,5)","(5,1)","(1,2)"}
4 | 5 | arc 4 -> 5 | {"(5,1)","(1,4)","(4,5)"} | f | {"(5,1)","(1,4)","(4,5)"}
- 5 | 1 | arc 5 -> 1 | {"(1,4)","(4,5)","(5,1)"} | f | {"(1,4)","(4,5)","(5,1)"}
- 1 | 2 | arc 1 -> 2 | {"(1,4)","(4,5)","(5,1)","(1,2)"} | f | {"(1,4)","(4,5)","(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | {"(1,4)","(4,5)","(5,1)","(1,3)"} | f | {"(1,4)","(4,5)","(5,1)","(1,3)"}
+ 2 | 3 | arc 2 -> 3 | {"(5,1)","(1,2)","(2,3)"} | f | {"(5,1)","(1,2)","(2,3)"}
1 | 4 | arc 1 -> 4 | {"(1,4)","(4,5)","(5,1)","(1,4)"} | t | {"(1,4)","(4,5)","(5,1)","(1,4)"}
- 2 | 3 | arc 2 -> 3 | {"(4,5)","(5,1)","(1,2)","(2,3)"} | f | {"(4,5)","(5,1)","(1,2)","(2,3)"}
+ 1 | 3 | arc 1 -> 3 | {"(1,4)","(4,5)","(5,1)","(1,3)"} | f | {"(1,4)","(4,5)","(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | {"(1,4)","(4,5)","(5,1)","(1,2)"} | f | {"(1,4)","(4,5)","(5,1)","(1,2)"}
4 | 5 | arc 4 -> 5 | {"(4,5)","(5,1)","(1,4)","(4,5)"} | t | {"(4,5)","(5,1)","(1,4)","(4,5)"}
+ 2 | 3 | arc 2 -> 3 | {"(4,5)","(5,1)","(1,2)","(2,3)"} | f | {"(4,5)","(5,1)","(1,2)","(2,3)"}
5 | 1 | arc 5 -> 1 | {"(5,1)","(1,4)","(4,5)","(5,1)"} | t | {"(5,1)","(1,4)","(4,5)","(5,1)"}
2 | 3 | arc 2 -> 3 | {"(1,4)","(4,5)","(5,1)","(1,2)","(2,3)"} | f | {"(1,4)","(4,5)","(5,1)","(1,2)","(2,3)"}
(25 rows)
@@ -1479,23 +1467,23 @@ select * from search_graph;
1 | 4 | arc 1 -> 4 | (0,1,4) | f | {"(1,4)"}
4 | 5 | arc 4 -> 5 | (0,4,5) | f | {"(4,5)"}
5 | 1 | arc 5 -> 1 | (0,5,1) | f | {"(5,1)"}
- 1 | 2 | arc 1 -> 2 | (1,1,2) | f | {"(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | (1,1,3) | f | {"(5,1)","(1,3)"}
- 1 | 4 | arc 1 -> 4 | (1,1,4) | f | {"(5,1)","(1,4)"}
2 | 3 | arc 2 -> 3 | (1,2,3) | f | {"(1,2)","(2,3)"}
4 | 5 | arc 4 -> 5 | (1,4,5) | f | {"(1,4)","(4,5)"}
5 | 1 | arc 5 -> 1 | (1,5,1) | f | {"(4,5)","(5,1)"}
- 1 | 2 | arc 1 -> 2 | (2,1,2) | f | {"(4,5)","(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | (2,1,3) | f | {"(4,5)","(5,1)","(1,3)"}
+ 1 | 4 | arc 1 -> 4 | (1,1,4) | f | {"(5,1)","(1,4)"}
+ 1 | 3 | arc 1 -> 3 | (1,1,3) | f | {"(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | (1,1,2) | f | {"(5,1)","(1,2)"}
+ 5 | 1 | arc 5 -> 1 | (2,5,1) | f | {"(1,4)","(4,5)","(5,1)"}
1 | 4 | arc 1 -> 4 | (2,1,4) | f | {"(4,5)","(5,1)","(1,4)"}
- 2 | 3 | arc 2 -> 3 | (2,2,3) | f | {"(5,1)","(1,2)","(2,3)"}
+ 1 | 3 | arc 1 -> 3 | (2,1,3) | f | {"(4,5)","(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | (2,1,2) | f | {"(4,5)","(5,1)","(1,2)"}
4 | 5 | arc 4 -> 5 | (2,4,5) | f | {"(5,1)","(1,4)","(4,5)"}
- 5 | 1 | arc 5 -> 1 | (2,5,1) | f | {"(1,4)","(4,5)","(5,1)"}
- 1 | 2 | arc 1 -> 2 | (3,1,2) | f | {"(1,4)","(4,5)","(5,1)","(1,2)"}
- 1 | 3 | arc 1 -> 3 | (3,1,3) | f | {"(1,4)","(4,5)","(5,1)","(1,3)"}
+ 2 | 3 | arc 2 -> 3 | (2,2,3) | f | {"(5,1)","(1,2)","(2,3)"}
1 | 4 | arc 1 -> 4 | (3,1,4) | t | {"(1,4)","(4,5)","(5,1)","(1,4)"}
- 2 | 3 | arc 2 -> 3 | (3,2,3) | f | {"(4,5)","(5,1)","(1,2)","(2,3)"}
+ 1 | 3 | arc 1 -> 3 | (3,1,3) | f | {"(1,4)","(4,5)","(5,1)","(1,3)"}
+ 1 | 2 | arc 1 -> 2 | (3,1,2) | f | {"(1,4)","(4,5)","(5,1)","(1,2)"}
4 | 5 | arc 4 -> 5 | (3,4,5) | t | {"(4,5)","(5,1)","(1,4)","(4,5)"}
+ 2 | 3 | arc 2 -> 3 | (3,2,3) | f | {"(4,5)","(5,1)","(1,2)","(2,3)"}
5 | 1 | arc 5 -> 1 | (3,5,1) | t | {"(5,1)","(1,4)","(4,5)","(5,1)"}
2 | 3 | arc 2 -> 3 | (4,2,3) | f | {"(1,4)","(4,5)","(5,1)","(1,2)","(2,3)"}
(25 rows)
@@ -1672,23 +1660,23 @@ select * from v_cycle1;
1 | 4 | arc 1 -> 4
4 | 5 | arc 4 -> 5
5 | 1 | arc 5 -> 1
- 1 | 2 | arc 1 -> 2
- 1 | 3 | arc 1 -> 3
- 1 | 4 | arc 1 -> 4
2 | 3 | arc 2 -> 3
4 | 5 | arc 4 -> 5
5 | 1 | arc 5 -> 1
- 1 | 2 | arc 1 -> 2
- 1 | 3 | arc 1 -> 3
1 | 4 | arc 1 -> 4
- 2 | 3 | arc 2 -> 3
- 4 | 5 | arc 4 -> 5
- 5 | 1 | arc 5 -> 1
- 1 | 2 | arc 1 -> 2
1 | 3 | arc 1 -> 3
+ 1 | 2 | arc 1 -> 2
+ 5 | 1 | arc 5 -> 1
1 | 4 | arc 1 -> 4
+ 1 | 3 | arc 1 -> 3
+ 1 | 2 | arc 1 -> 2
+ 4 | 5 | arc 4 -> 5
2 | 3 | arc 2 -> 3
+ 1 | 4 | arc 1 -> 4
+ 1 | 3 | arc 1 -> 3
+ 1 | 2 | arc 1 -> 2
4 | 5 | arc 4 -> 5
+ 2 | 3 | arc 2 -> 3
5 | 1 | arc 5 -> 1
2 | 3 | arc 2 -> 3
(25 rows)
@@ -1702,23 +1690,23 @@ select * from v_cycle2;
1 | 4 | arc 1 -> 4
4 | 5 | arc 4 -> 5
5 | 1 | arc 5 -> 1
- 1 | 2 | arc 1 -> 2
- 1 | 3 | arc 1 -> 3
- 1 | 4 | arc 1 -> 4
2 | 3 | arc 2 -> 3
4 | 5 | arc 4 -> 5
5 | 1 | arc 5 -> 1
- 1 | 2 | arc 1 -> 2
- 1 | 3 | arc 1 -> 3
1 | 4 | arc 1 -> 4
- 2 | 3 | arc 2 -> 3
- 4 | 5 | arc 4 -> 5
- 5 | 1 | arc 5 -> 1
- 1 | 2 | arc 1 -> 2
1 | 3 | arc 1 -> 3
+ 1 | 2 | arc 1 -> 2
+ 5 | 1 | arc 5 -> 1
1 | 4 | arc 1 -> 4
+ 1 | 3 | arc 1 -> 3
+ 1 | 2 | arc 1 -> 2
+ 4 | 5 | arc 4 -> 5
2 | 3 | arc 2 -> 3
+ 1 | 4 | arc 1 -> 4
+ 1 | 3 | arc 1 -> 3
+ 1 | 2 | arc 1 -> 2
4 | 5 | arc 4 -> 5
+ 2 | 3 | arc 2 -> 3
5 | 1 | arc 5 -> 1
2 | 3 | arc 2 -> 3
(25 rows)
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index f1f8fae5654..f28bdc4cb0d 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -208,7 +208,7 @@ set parallel_tuple_cost = 0;
set max_parallel_workers_per_gather = 2;
create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
create index on t (a);
analyze t;
diff --git a/src/test/regress/sql/select.sql b/src/test/regress/sql/select.sql
index 1d1bf2b9310..6617d363760 100644
--- a/src/test/regress/sql/select.sql
+++ b/src/test/regress/sql/select.sql
@@ -148,6 +148,33 @@ CREATE TEMP TABLE nocols();
INSERT INTO nocols DEFAULT VALUES;
SELECT * FROM nocols n, LATERAL (VALUES(n.*)) v;
+--
+-- test order by NULLS (FIRST|LAST)
+--
+
+select unique1, unique2 into onek_with_null from onek;
+insert into onek_with_null (unique1,unique2) values (NULL, -1), (NULL, NULL);
+
+
+select * from onek_with_null order by unique1 nulls first , unique2 limit 3;
+select * from onek_with_null order by unique1 nulls last , unique2 limit 3;
+select * from onek_with_null order by unique1 nulls first , unique2 nulls first limit 3;
+select * from onek_with_null order by unique1 nulls last , unique2 nulls first limit 3;
+select * from onek_with_null order by unique1 nulls first , unique2 nulls last limit 3;
+select * from onek_with_null order by unique1 nulls last , unique2 nulls last limit 3;
+
+select * from onek_with_null order by unique1 desc nulls first , unique2 desc limit 3;
+select * from onek_with_null order by unique1 desc nulls last , unique2 desc limit 3;
+select * from onek_with_null order by unique1 desc nulls first , unique2 desc nulls first limit 3;
+select * from onek_with_null order by unique1 desc nulls last , unique2 desc nulls first limit 3;
+select * from onek_with_null order by unique1 desc nulls first , unique2 desc nulls last limit 3;
+select * from onek_with_null order by unique1 desc nulls last , unique2 desc nulls last limit 3;
+
+select unique1 as u1, unique2 as u2 from onek_with_null order by u1 nulls first , u2 nulls first limit 3;
+select unique1 as u1, unique2 as u2 from onek_with_null order by u1 asc nulls first , u2 desc nulls first limit 3;
+
+drop table onek_with_null;
+
--
-- Test ORDER BY options
--
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 7272f9d8764..f4b9fe9b248 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -6,6 +6,8 @@
-- from auto-analyze happening when we didn't expect it.
--
+set default_statistics_target=10000; --prevent random subset for joinsel
+
-- check the number of estimated/actual rows in the top node
create function check_estimated_rows(text) returns table (estimated int, actual int)
language plpgsql as
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 58606898add..9663acbd401 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -410,6 +410,7 @@ CatalogIdMapEntry
CatalogIndexState
ChangeVarNodes_callback
ChangeVarNodes_context
+ReplaceVarnoContext
CheckPoint
CheckPointStmt
CheckpointStatsData
@@ -4172,6 +4173,7 @@ unicodeStyleColumnFormat
unicodeStyleFormat
unicodeStyleRowFormat
unicode_linestyle
+UniqueRelInfo
unit_conversion
unlogged_relation_entry
utf_local_conversion_func