File 0001-1C_FULL.patch of Package postgresql17
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000000..932935ec109
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "pg_wait_sampling"]
+ path = contrib/pg_wait_sampling
+ url = ../pg_wait_sampling
diff --git a/contrib/Makefile b/contrib/Makefile
index abd780f2774..fe48af247a2 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global
SUBDIRS = \
amcheck \
auth_delay \
+ auto_dump \
auto_explain \
basic_archive \
basebackup_to_shell \
@@ -19,17 +20,22 @@ SUBDIRS = \
dict_int \
dict_xsyn \
earthdistance \
+ fasttrun \
file_fdw \
fuzzystrmatch \
+ fulleq \
hstore \
intagg \
intarray \
isn \
lo \
ltree \
+ mchar \
oid2name \
+ online_analyze \
pageinspect \
passwordcheck \
+ plantuner \
pg_buffercache \
pg_freespacemap \
pg_prewarm \
@@ -39,6 +45,7 @@ SUBDIRS = \
pgrowlocks \
pgstattuple \
pg_visibility \
+ pg_wait_sampling \
pg_walinspect \
postgres_fdw \
seg \
@@ -49,7 +56,8 @@ SUBDIRS = \
tsm_system_rows \
tsm_system_time \
unaccent \
- vacuumlo
+ vacuumlo \
+ dbcopies_decoding
ifeq ($(with_ssl),openssl)
SUBDIRS += pgcrypto sslinfo
diff --git a/contrib/auto_dump/Makefile b/contrib/auto_dump/Makefile
new file mode 100644
index 00000000000..7af96424df7
--- /dev/null
+++ b/contrib/auto_dump/Makefile
@@ -0,0 +1,16 @@
+MODULES = auto_dump
+
+EXTENSION = auto_dump
+PGFILEDESC = "auto_dump"
+DATA = auto_dump--1.0.sql
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/auto_dump
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/auto_dump/auto_dump--1.0.sql b/contrib/auto_dump/auto_dump--1.0.sql
new file mode 100644
index 00000000000..36da0901e90
--- /dev/null
+++ b/contrib/auto_dump/auto_dump--1.0.sql
@@ -0,0 +1 @@
+\echo Use "CREATE EXTENSION auto_dump" to load this file. \quit
diff --git a/contrib/auto_dump/auto_dump.c b/contrib/auto_dump/auto_dump.c
new file mode 100644
index 00000000000..b2423800cc4
--- /dev/null
+++ b/contrib/auto_dump/auto_dump.c
@@ -0,0 +1,1012 @@
+#include "postgres.h"
+
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+
+#include "commands/explain.h"
+#include "commands/copy.h"
+#include "executor/instrument.h"
+#include "utils/regproc.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/syscache.h"
+#include "utils/builtins.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_class_d.h"
+#include "catalog/pg_proc_d.h"
+#include "catalog/pg_type.h"
+#include "access/table.h"
+#include "access/tableam.h"
+#include "access/skey.h"
+#include "access/heapam.h"
+#include "miscadmin.h"
+#include "dirent.h"
+#include "common/file_perm.h"
+#include "nodes/pg_list.h"
+#include "storage/latch.h"
+#include "storage/procarray.h"
+#include "utils/resowner.h"
+#include "tcop/pquery.h"
+#include "utils/lsyscache.h"
+#include "utils/snapmgr.h"
+#include "mb/pg_wchar.h"
+#include "nodes/makefuncs.h"
+#include "common/keywords.h"
+
+PG_MODULE_MAGIC;
+
+PG_FUNCTION_INFO_V1(auto_dump);
+Datum auto_dump(PG_FUNCTION_ARGS);
+
+static bool dump_enable = false;
+static bool dump_all_temp_tables = false;
+static bool dump_temporary_tables = true;
+static bool dump_persistent_tables = false;
+static bool dump_data = true;
+static bool dump_indexes = true;
+static bool dump_query = true;
+static bool dump_create = true;
+static bool dump_plan = true;
+static bool dump_on_cancel = false;
+static bool dump_on_bad_plan = false;
+static char* dump_on_query_string = "";
+static int dump_plan_count_threshold = 0;
+static int dump_plan_percent_threshold = 0;
+static char* output_directory = "";
+static char* query_output_directory;
+static int nesting_level;
+static bool query_dumped;
+static bool plan_dumped;
+static bool plan_analysis_dumped;
+
+static ExecutorStart_hook_type prev_ExecutorStart;
+static ProcessInterrupts_hook_type prev_ProcessInterrupts;
+static ExecutorEnd_hook_type prev_ExecutorEnd;
+static ExecutorRun_hook_type prev_ExecutorRun;
+static ExecutorFinish_hook_type prev_ExecutorFinish;
+
+
+typedef struct FieldInfo{
+
+ HeapTuple tuple;
+ Form_pg_attribute form;
+ AttrNumber attnum;
+ FmgrInfo outfunc;
+ bool first;
+ char *attname;
+} FieldInfo;
+
+
+/*
+ * Checks, if plan is "bad" based on difference between expected number of rows
+ * and actual number of rows. Plan in considered bad if both relative and absolute
+ * threshold are reached (or disabled).
+ */
+static bool
+IsBadPlan(PlanState *planstate)
+{
+ double expect_rows;
+ double actual_rows;
+ ListCell* lc;
+ int c;
+
+ if (!planstate)
+ return false;
+
+ if (planstate->instrument)
+ {
+ InstrEndLoop(planstate->instrument);
+
+ expect_rows = planstate->plan->plan_rows;
+ actual_rows = planstate->instrument->ntuples;
+
+ if (planstate->instrument->nloops > 0)
+ actual_rows /= planstate->instrument->nloops;
+
+ if (
+ (dump_plan_count_threshold < 0 || (fabs(actual_rows - expect_rows) > dump_plan_count_threshold)) &&
+ (dump_plan_percent_threshold < 0 || (expect_rows <= 0 || fabs(actual_rows-expect_rows)/expect_rows*100 > dump_plan_percent_threshold)))
+ {
+ ereport(DEBUG5,
+ (errmsg("auto_dump hit bad plan threshold: expected=%.0f actual=%.0f", expect_rows, actual_rows),
+ errhidestmt(true),
+ errhidecontext(true)));
+ return true;
+ }
+ }
+
+ if (IsBadPlan(outerPlanState(planstate)))
+ return true;
+
+ if (IsBadPlan(innerPlanState(planstate)))
+ return true;
+
+ foreach(lc, planstate->initPlan)
+ {
+ if(IsBadPlan(((SubPlanState*)lfirst(lc))->planstate))
+ return true;
+ }
+
+ foreach(lc, planstate->subPlan)
+ {
+ if(IsBadPlan(((SubPlanState*)lfirst(lc))->planstate))
+ return true;
+ }
+
+ switch (nodeTag(planstate->plan))
+ {
+ case T_Append:
+ for(c=0; c < ((AppendState *) planstate)->as_nplans; c++)
+ {
+ if (IsBadPlan(((AppendState *) planstate)->appendplans[c]))
+ return true;
+ }
+ break;
+
+ case T_MergeAppend:
+ for(c=0; c < ((MergeAppendState *) planstate)->ms_nplans; c++)
+ {
+ if (IsBadPlan(((MergeAppendState *) planstate)->mergeplans[c]))
+ return true;
+ }
+ break;
+
+ case T_BitmapAnd:
+ for(c=0; c < ((BitmapAndState *) planstate)->nplans; c++)
+ {
+ if (IsBadPlan(((BitmapAndState *) planstate)->bitmapplans[c]))
+ return true;
+ }
+ break;
+
+ case T_BitmapOr:
+ for(c=0; c < ((BitmapOrState *) planstate)->nplans ; c++)
+ {
+ if (IsBadPlan(((BitmapOrState *) planstate)->bitmapplans[c]))
+ return true;
+ }
+ break;
+
+ case T_SubqueryScan:
+ if(IsBadPlan(((SubqueryScanState *) planstate)->subplan))
+ return true;
+ break;
+
+ case T_CustomScan:
+ foreach(lc, ((CustomScanState *) planstate)->custom_ps)
+ {
+ if(IsBadPlan((PlanState*)lfirst(lc)))
+ return true;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return false;
+}
+
+
+static bool
+IsQueryMatch(const char* query, const char* pattern)
+{
+ // TODO revert asap to simple pattern search after publication
+ return strstr(query, pattern);
+}
+
+
+static bool
+PrepareDump()
+{
+ time_t t;
+ struct tm tm;
+ static int counter = 0;
+
+ /* Do nothing if not output directory specified */
+ if (!*output_directory)
+ return false;
+
+ t = time(NULL);
+ tm = *localtime(&t);
+
+ query_output_directory = psprintf("%s/%d-%04d_%02d_%02d_%02d_%02d_%02d_%02d/", make_absolute_path(output_directory),
+ MyProcPid, tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour,
+ tm.tm_min, tm.tm_sec, (counter++) % 100 );
+ if (pg_mkdir_p(query_output_directory, PG_DIR_MODE_OWNER) != 0)
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not create directory \"%s\"", query_output_directory)));
+
+ return true;
+}
+
+
+static void
+SavePlan_(QueryDesc* queryDesc, char const* name, bool analyze, bool withdata)
+{
+ FILE* f;
+ mode_t oumask;
+ MemoryContext oldcxt;
+ ExplainState* es;
+
+ oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
+
+ es = NewExplainState();
+ es->analyze = analyze;
+ es->verbose = analyze;
+ es->buffers = analyze;
+ es->wal = analyze;
+ es->timing = analyze;
+ es->summary = analyze;
+ es->format = EXPLAIN_FORMAT_TEXT;
+ es->settings = analyze;
+
+ ExplainBeginOutput(es);
+ ExplainQueryText(es, queryDesc);
+ ExplainQueryParameters(es, queryDesc->params, -1);
+ ExplainPrintPlan(es, queryDesc);
+ if (es->analyze)
+ ExplainPrintTriggers(es, queryDesc);
+ if (es->costs)
+ ExplainPrintJITSummary(es, queryDesc);
+ ExplainEndOutput(es);
+
+ /* Remove last line break */
+ if (es->str->len > 0 && es->str->data[es->str->len - 1] == '\n')
+ es->str->data[--es->str->len] = '\0';
+
+ oumask = umask((mode_t) ((~(S_IRUSR | S_IWUSR)) & (S_IRWXU | S_IRWXG | S_IRWXO)));
+ // TODO revert to original per-file save behaviour
+ f = fopen(psprintf("%s/%s.sql", query_output_directory, withdata?"createwithdata":"create"), "ab");
+ umask(oumask);
+ fputs("/*\n", f);
+ fputs(es->str->data, f);
+ fputs("\n*/\n", f);
+ fclose(f);
+
+ MemoryContextSwitchTo(oldcxt);
+}
+
+// TODO revert asap to original per-file save design after publication
+static void
+SavePlan(QueryDesc* queryDesc, char const* name, bool analyze)
+{
+ SavePlan_(queryDesc, name, analyze, false);
+ if (dump_data)
+ SavePlan_(queryDesc, name, analyze, true);
+}
+
+
+static void
+FlushStringInfoToFile(StringInfo buf, FILE* f)
+{
+ if (buf->len)
+ fwrite(buf->data, buf->len, 1, f);
+ resetStringInfo(buf);
+}
+
+static char*
+QuotedIdentifier(char const* s)
+{
+ bool need_quotes = false;
+ StringInfoData buf;
+ size_t remaining;
+ const char *cp;
+ int encoding = PG_UTF8;
+
+ for (cp=s; *cp; cp++)
+ {
+ if (!((*cp >= 'a' && *cp <= 'z') || (*cp == '_') || (*cp >= '0' && *cp <= '9' && cp != s)))
+ {
+ need_quotes = true;
+ break;
+ }
+ }
+
+ if (!need_quotes)
+ {
+ int kwnum = ScanKeywordLookup(s, &ScanKeywords);
+ if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD)
+ need_quotes = true;
+ }
+
+ if (!need_quotes)
+ return pstrdup(s);
+
+
+ initStringInfo(&buf);
+ appendStringInfoChar(&buf, '"');
+
+ remaining = strlen(s);
+ cp =s;
+ while (remaining > 0)
+ {
+ int charlen;
+
+ if (!IS_HIGHBIT_SET(*cp))
+ {
+ if (*cp == '"')
+ appendStringInfoChar(&buf, *cp);
+ appendStringInfoChar(&buf, *cp);
+ cp++;
+ remaining--;
+ continue;
+ }
+
+ charlen = pg_encoding_mblen(encoding, cp);
+
+ if (remaining >= charlen &&
+ pg_encoding_verifymbchar(encoding, cp, charlen) != -1)
+ {
+ for (int i = 0; i < charlen; i++)
+ {
+ appendStringInfoChar(&buf, *cp);
+ remaining--;
+ cp++;
+ }
+ }
+ else
+ {
+ enlargeStringInfo(&buf, 2);
+ pg_encoding_set_invalid(encoding, buf.data + buf.len);
+ buf.len += 2;
+ buf.data[buf.len] = '\0';
+ remaining--;
+ cp++;
+ }
+ }
+
+ appendStringInfoChar(&buf, '"');
+ return buf.data;
+}
+
+
+static void
+SaveTables_(QueryDesc* queryDesc, bool withdata)
+{
+ mode_t oumask;
+ FILE* f;
+ ListCell* lc;
+ List* tableOids = NULL;
+ StringInfoData buf;
+
+ /* Populate list of all temporary tables OIDs */
+ if (dump_all_temp_tables) {
+ ScanKeyData key[1];
+ Relation pgclass;
+ TableScanDesc scan;
+ HeapTuple tuple;
+
+ ScanKeyInit(&key[0],
+ Anum_pg_proc_pronamespace,
+ BTEqualStrategyNumber,
+ F_OIDEQ,
+ ObjectIdGetDatum(LookupCreationNamespace("pg_temp")));
+
+ pgclass = table_open(RelationRelationId, AccessShareLock);
+ scan = table_beginscan_catalog(pgclass, 1, key);
+
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)))
+ {
+ Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
+ if (classForm->relkind == RELKIND_RELATION && classForm->relpersistence == RELPERSISTENCE_TEMP)
+ tableOids = list_append_unique_oid(tableOids, classForm->oid);
+ }
+
+ table_endscan(scan);
+ table_close(pgclass, AccessShareLock);
+ }
+
+ /* Populate list of query's tables OIDs */
+ foreach(lc, queryDesc->plannedstmt->relationOids) {
+ tableOids = list_append_unique_oid(tableOids, lfirst_oid(lc));
+ }
+
+ oumask = umask((mode_t) ((~(S_IRUSR | S_IWUSR | S_IWUSR)) & (S_IRWXU | S_IRWXG | S_IRWXO)));
+ // TODO revert to original per-file save behaviour
+ f = fopen(psprintf("%s/%s.sql", query_output_directory, withdata?"createwithdata":"create"), "ab");
+ umask(oumask);
+
+ initStringInfo(&buf);
+
+ foreach(lc, tableOids) {
+ HeapTuple classTuple;
+ Form_pg_class classForm;
+
+ classTuple = SearchSysCache1(RELOID, ObjectIdGetDatum(lfirst_oid(lc)));
+ classForm = (Form_pg_class) GETSTRUCT(classTuple);
+
+ if (classForm->relkind == RELKIND_RELATION &&
+ ((classForm->relpersistence == RELPERSISTENCE_PERMANENT && dump_persistent_tables) ||
+ (classForm->relpersistence == RELPERSISTENCE_TEMP && (dump_temporary_tables || dump_all_temp_tables)))
+ ){
+
+ FieldInfo *fields = (FieldInfo*)palloc(sizeof(FieldInfo) * classForm->relnatts);
+ int numFields = 0;
+ FieldInfo *field;
+ AttrNumber attno;
+ char *relname;
+
+ relname = QuotedIdentifier(classForm->relname.data);
+
+ for (attno = 1; attno <= classForm->relnatts; attno++)
+ {
+ bool isvarlena;
+ Oid outfuncid;
+
+ field = &fields[numFields];
+
+ field->tuple = SearchSysCache2(ATTNUM,
+ ObjectIdGetDatum(classForm->oid),
+ Int16GetDatum(attno));
+
+ field->form = (Form_pg_attribute) GETSTRUCT(field->tuple);
+
+ if (field->form->attisdropped)
+ {
+ ReleaseSysCache(field->tuple);
+ continue;
+ }
+
+ field->attnum = attno;
+ field->first = (numFields==0);
+ field->attname = QuotedIdentifier(field->form->attname.data);
+
+ getTypeOutputInfo(field->form->atttypid, &outfuncid, &isvarlena);
+ fmgr_info(outfuncid, &field->outfunc);
+
+ numFields++;
+ }
+
+
+ /* Write table create statement */
+ appendStringInfoString(&buf, "CREATE ");
+ if (classForm->relpersistence == RELPERSISTENCE_TEMP)
+ appendStringInfoString(&buf, "TEMPORARY ");
+ appendStringInfoString(&buf, "TABLE ");
+ appendStringInfoString(&buf, relname);
+ appendStringInfoString(&buf, " (");
+ for (field=&fields[0]; field < &fields[numFields]; field++)
+ {
+ if (!field->first)
+ appendStringInfoString(&buf, ",");
+ appendStringInfoString(&buf, "\n ");
+ appendStringInfoString(&buf, field->attname);
+ appendStringInfoString(&buf, " ");
+ appendStringInfoString(&buf, format_type_extended(field->form->atttypid, field->form->atttypmod, FORMAT_TYPE_TYPEMOD_GIVEN | ((field->form->atttypid >= FirstGenbkiObjectId) ? FORMAT_TYPE_FORCE_QUALIFY : 0)));
+ }
+ appendStringInfoString(&buf, "\n);\n\n");
+
+
+ /* Write indexes create statement */
+ if (dump_indexes)
+ {
+ ListCell* lcIndex;
+ Relation rel = table_open(classForm->oid, NoLock);
+
+ foreach(lcIndex, RelationGetIndexList(rel))
+ {
+ Oid indexOid = lfirst_oid(lcIndex);
+ appendStringInfoString(&buf, text_to_cstring(DatumGetTextP(DirectFunctionCall1(pg_get_indexdef, indexOid))));
+ appendStringInfoString(&buf, ";\n\n");
+ }
+
+ table_close(rel, NoLock);
+ }
+
+ FlushStringInfoToFile(&buf, f);
+
+
+ /* Write tables data */
+ if (dump_data && withdata)
+ {
+ Relation table;
+ TableScanDesc scan;
+ HeapTuple tuple;
+ bool first;
+
+ table = table_open(classForm->oid, AccessShareLock);
+ scan = table_beginscan(table, GetActiveSnapshot(), 0, NULL);
+ first = true;
+
+ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
+ {
+ if(first)
+ {
+ first = false;
+
+ appendStringInfoString(&buf, "INSERT INTO ");
+ appendStringInfoString(&buf, relname);
+ appendStringInfoString(&buf, " (");
+
+ for (field=&fields[0]; field < &fields[numFields]; field++)
+ {
+ if(!field->first)
+ appendStringInfoString(&buf, ", ");
+
+ appendStringInfoString(&buf, field->attname);
+ }
+
+ appendStringInfoString(&buf, ") VALUES\n (");
+ FlushStringInfoToFile(&buf, f);
+ }
+ else
+ appendStringInfoString(&buf, ",\n (");
+
+ for (field=&fields[0]; field < &fields[numFields]; field++)
+ {
+ const char *s;
+ char *quoted;
+ bool isnull;
+ Datum datum;
+
+ if (!field->first)
+ appendStringInfoString(&buf, ",");
+
+ datum = heap_getattr(tuple, field->attnum, table->rd_att, &isnull);
+
+ if (isnull)
+ {
+ appendStringInfoString(&buf, "NULL");
+ continue;
+ }
+
+ s = OutputFunctionCall(&field->outfunc, datum);
+
+ switch (field->form->atttypid)
+ {
+ case INT2OID:
+ case INT4OID:
+ case INT8OID:
+ case OIDOID:
+ case FLOAT4OID:
+ case FLOAT8OID:
+ case NUMERICOID:
+ {
+ if (strspn(s, "0123456789 +-eE.") == strlen(s))
+ appendStringInfoString(&buf, s);
+ else
+ {
+ appendStringInfoString(&buf, "'");
+ appendStringInfoString(&buf, s);
+ appendStringInfoString(&buf, "'");
+ }
+ }
+ break;
+
+ case BITOID:
+ case VARBITOID:
+ appendStringInfoString(&buf, "B'");
+ appendStringInfoString(&buf, s);
+ appendStringInfoString(&buf, "'");
+ break;
+
+ case BOOLOID:
+ if (strcmp(s, "t") == 0)
+ appendStringInfoString(&buf, "true");
+ else
+ appendStringInfoString(&buf, "false");
+ break;
+
+ default:
+ quoted = quote_literal_cstr(s);
+ appendStringInfoString(&buf, quoted);
+ pfree(quoted);
+ break;
+ }
+ }
+ appendStringInfoString(&buf, ")");
+ FlushStringInfoToFile(&buf, f);
+ }
+
+ table_endscan(scan);
+ table_close(table, AccessShareLock);
+
+ appendStringInfoString(&buf, ";\n\n");
+ FlushStringInfoToFile(&buf, f);
+ }
+
+
+ /* Write analyze statement */
+ appendStringInfoString(&buf, "ANALYZE ");
+ appendStringInfoString(&buf, relname);
+ appendStringInfoString(&buf, ";\n\n");
+ FlushStringInfoToFile(&buf, f);
+
+
+ for (field=&fields[0]; field < &fields[numFields]; field++)
+ {
+ pfree(field->attname);
+ ReleaseSysCache(field->tuple);
+ }
+ pfree(fields);
+ pfree(relname);
+ }
+
+ ReleaseSysCache(classTuple);
+ }
+
+ pfree(buf.data);
+
+ fclose(f);
+}
+
+
+// TODO revert asap to original per-file save design after publication
+static void
+SaveTables(QueryDesc* queryDesc){
+ SaveTables_(queryDesc, false);
+ if (dump_data)
+ SaveTables_(queryDesc, true);
+}
+
+
+static void
+SaveQuery_(QueryDesc* queryDesc, bool withdata)
+{
+ FILE* f;
+ mode_t oumask;
+ char* unescaped;
+ char* dst;
+ char const* src;
+
+ oumask = umask((mode_t) ((~(S_IRUSR | S_IWUSR | S_IWUSR)) & (S_IRWXU | S_IRWXG | S_IRWXO)));
+ // TODO revert to original per-file save behaviour
+ f = fopen(psprintf("%s/%s.sql", query_output_directory, withdata?"createwithdata":"create"), "ab");
+ umask(oumask);
+
+ unescaped = palloc( strlen(queryDesc->sourceText) + 3 );
+ dst = unescaped;
+ src = queryDesc->sourceText;
+ while(*src){
+ if( src[0] == '\\' && src[1] =='\\' ){
+ src++;
+ }
+ *(dst++) = *(src++);
+ }
+ *(dst++) = ';';
+ *(dst++) = '\n';
+ *(dst++) = 0;
+ fputs("/*\n", f);
+ fputs(unescaped, f);
+ fputs("\n*/\n", f);
+ pfree(unescaped);
+
+ fclose(f);
+}
+
+
+// TODO revert asap to original per-file save design after publication
+static void
+SaveQuery(QueryDesc* queryDesc){
+ SaveQuery_(queryDesc, false);
+ if (dump_data)
+ SaveQuery_(queryDesc, true);
+}
+
+
+static void
+Dump(QueryDesc* queryDesc, bool havePlan, bool haveAnalysis)
+{
+ if (!query_dumped)
+ {
+ if(!PrepareDump())
+ return;
+
+ query_dumped = true;
+
+ if (dump_create)
+ SaveTables(queryDesc);
+
+ if (dump_query)
+ SaveQuery(queryDesc);
+ }
+
+ if (havePlan && query_dumped && dump_plan && !plan_dumped)
+ {
+ plan_dumped = true;
+ SavePlan(queryDesc, "plan_explain", false);
+ }
+
+ if (haveAnalysis && query_dumped && dump_plan && !plan_analysis_dumped)
+ {
+ plan_analysis_dumped = true;
+ SavePlan(queryDesc, "plan_analyze", true);
+ }
+}
+
+static void
+ExecutorStart_hook_auto_dump(QueryDesc *queryDesc, int eflags)
+{
+ if (dump_enable && nesting_level == 0)
+ {
+ query_dumped = false;
+ plan_dumped = false;
+ plan_analysis_dumped = false;
+
+ if (dump_plan)
+ queryDesc->instrument_options |= INSTRUMENT_ALL;
+
+ if (dump_plan || dump_on_bad_plan)
+ queryDesc->instrument_options |= INSTRUMENT_ROWS;
+
+ if(queryDesc->operation == CMD_SELECT
+ || queryDesc->operation == CMD_DELETE
+ || queryDesc->operation == CMD_INSERT
+ || queryDesc->operation == CMD_UPDATE
+ ){
+ if (*dump_on_query_string && IsQueryMatch(queryDesc->sourceText, dump_on_query_string)) {
+ Dump( queryDesc, false, false );
+ }
+ }
+ }
+
+ if (prev_ExecutorStart)
+ prev_ExecutorStart(queryDesc, eflags);
+ else
+ standard_ExecutorStart(queryDesc, eflags);
+
+ if (dump_enable && query_dumped)
+ Dump(queryDesc, true, false);
+
+}
+
+
+static void
+ExecutorRun_hook_auto_dump(QueryDesc *queryDesc, ScanDirection direction,
+ uint64 count, bool execute_once)
+{
+ nesting_level++;
+ PG_TRY();
+ {
+ if (prev_ExecutorRun)
+ prev_ExecutorRun(queryDesc, direction, count, execute_once);
+ else
+ standard_ExecutorRun(queryDesc, direction, count, execute_once);
+ }
+ PG_FINALLY();
+ {
+ nesting_level--;
+ }
+ PG_END_TRY();
+
+ if(dump_enable && nesting_level==0 && ((dump_plan && query_dumped) || (dump_on_bad_plan && IsBadPlan(queryDesc->planstate))))
+ Dump(queryDesc, true, true);
+}
+
+static void
+ExecutorFinish_hook_auto_dump(QueryDesc *queryDesc)
+{
+ nesting_level++;
+ PG_TRY();
+ {
+ if (prev_ExecutorFinish)
+ prev_ExecutorFinish(queryDesc);
+ else
+ standard_ExecutorFinish(queryDesc);
+ }
+ PG_FINALLY();
+ {
+ nesting_level--;
+ }
+ PG_END_TRY();
+}
+
+
+static void
+ExecutorEnd_hook_auto_dump(QueryDesc *queryDesc)
+{
+ if (prev_ExecutorEnd)
+ prev_ExecutorEnd(queryDesc);
+ else
+ standard_ExecutorEnd(queryDesc);
+}
+
+
+
+static void
+ProcessInterrupts_hook_auto_dump(void)
+{
+ if (dump_on_cancel)
+ {
+ PG_TRY();
+ if (likely(!prev_ProcessInterrupts))
+ standard_ProcessInterrupts();
+ else
+ prev_ProcessInterrupts();
+ PG_CATCH();
+ InterruptHoldoffCount++;
+ if (dump_enable && ActivePortal && ActivePortal->queryDesc)
+ Dump(ActivePortal->queryDesc, true, true);
+ InterruptHoldoffCount--;
+ PG_RE_THROW();
+ PG_END_TRY();
+ }
+ else
+ {
+ if (likely(!prev_ProcessInterrupts))
+ standard_ProcessInterrupts();
+ else
+ prev_ProcessInterrupts();
+ }
+}
+
+
+
+
+
+Datum
+auto_dump(PG_FUNCTION_ARGS) {
+ // TODO
+ // int notify_pid = PG_GETARG_INT32(0);
+ // if (notify_pid != 0) {
+ // kill(notify_pid, SIGUSR1);
+ // elog(WARNING,"Command 'auto_dump' has send to process '%d'.\nLook for result in %s/%d", notify_pid, output_directory, notify_pid);
+ // }
+ PG_RETURN_VOID();
+}
+
+void
+_PG_init(void)
+{
+ DefineCustomBoolVariable("auto_dump.enable",
+ "Enable auto-dump.",
+ NULL,
+ &dump_enable,
+ dump_enable,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomStringVariable("auto_dump.output_directory",
+ "Output directory for dumped tables",
+ NULL,
+ &output_directory,
+ output_directory,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomStringVariable("auto_dump.dump_on_query_string",
+ "Activation phrase for start dump query tables (more 10 characters).",
+ NULL,
+ &dump_on_query_string,
+ dump_on_query_string,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_on_cancel",
+ "Dump tables when query is cancelled.",
+ NULL,
+ &dump_on_cancel,
+ dump_on_cancel,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_on_bad_plan",
+ "Dump tables when query plan is considered bad.",
+ NULL,
+ &dump_on_bad_plan,
+ dump_on_bad_plan,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_all_temp_tables",
+ "Dump all temporary tables of backend.",
+ NULL,
+ &dump_all_temp_tables,
+ dump_all_temp_tables,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_persistent_tables",
+ "Dump persistent tables.",
+ NULL,
+ &dump_persistent_tables,
+ dump_persistent_tables,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_temporary_tables",
+ "Dump temporary tables.",
+ NULL,
+ &dump_temporary_tables,
+ dump_temporary_tables,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_data",
+ "Dump tables data.",
+ NULL,
+ &dump_data,
+ dump_data,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_indexes",
+ "Dump indexes for tables.",
+ NULL,
+ &dump_indexes,
+ dump_indexes,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_query",
+ "Dump query itself.",
+ NULL,
+ &dump_query,
+ dump_query,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_create",
+ "Dump creation of tables.",
+ NULL,
+ &dump_create,
+ dump_create,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomBoolVariable("auto_dump.dump_plan",
+ "Dump execution plan of query.",
+ NULL,
+ &dump_plan,
+ dump_plan,
+ PGC_SUSET,
+ 0,
+ NULL, NULL, NULL);
+
+ DefineCustomIntVariable("auto_dump.bad_plan_percent_threshold",
+ "Sets the percent difference between estimated and actual row count to trigger plan dump.",
+ NULL,
+ &dump_plan_percent_threshold,
+ dump_plan_percent_threshold,
+ -1, INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomIntVariable("auto_dump.bad_plan_count_threshold",
+ "Sets the row count difference between estimated and actual row count to trigger plan dump.",
+ NULL,
+ &dump_plan_count_threshold,
+ dump_plan_count_threshold,
+ -1, INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ MarkGUCPrefixReserved("auto_dump");
+
+ prev_ExecutorStart = ExecutorStart_hook;
+ ExecutorStart_hook = ExecutorStart_hook_auto_dump;
+
+ prev_ExecutorRun = ExecutorRun_hook;
+ ExecutorRun_hook = ExecutorRun_hook_auto_dump;
+
+ prev_ExecutorFinish = ExecutorFinish_hook;
+ ExecutorFinish_hook = ExecutorFinish_hook_auto_dump;
+
+ prev_ExecutorEnd = ExecutorEnd_hook;
+ ExecutorEnd_hook = ExecutorEnd_hook_auto_dump;
+
+ prev_ProcessInterrupts = ProcessInterrupts_hook;
+ ProcessInterrupts_hook = ProcessInterrupts_hook_auto_dump;
+}
diff --git a/contrib/auto_dump/auto_dump.control b/contrib/auto_dump/auto_dump.control
new file mode 100644
index 00000000000..f288ba5dab6
--- /dev/null
+++ b/contrib/auto_dump/auto_dump.control
@@ -0,0 +1,4 @@
+comment = 'auto_dump'
+default_version = '1.0'
+module_pathname = '$libdir/auto_dump'
+relocatable = true
diff --git a/contrib/auto_dump/meson.build b/contrib/auto_dump/meson.build
new file mode 100644
index 00000000000..614c34d698e
--- /dev/null
+++ b/contrib/auto_dump/meson.build
@@ -0,0 +1,27 @@
+auto_dump_sources = files(
+ 'auto_dump.c',
+)
+
+auto_dump = shared_module('auto_dump',
+ auto_dump_sources,
+ kwargs: contrib_mod_args,
+)
+contrib_targets += auto_dump
+
+install_data(
+ 'auto_dump.control',
+ 'auto_dump--1.0.sql',
+ kwargs: contrib_data_args,
+)
+
+# tests += {
+# 'name': 'auto_dump',
+# 'sd': meson.current_source_dir(),
+# 'bd': meson.current_build_dir(),
+# 'regress': {
+# 'sql': [
+# 'dump',
+# # 'bad_plan',
+# ],
+# },
+# }
diff --git a/contrib/dbcopies_decoding/Makefile b/contrib/dbcopies_decoding/Makefile
new file mode 100644
index 00000000000..aa6fbc538c6
--- /dev/null
+++ b/contrib/dbcopies_decoding/Makefile
@@ -0,0 +1,36 @@
+MODULE_big = dbcopies_decoding
+OBJS=dbcopies_decoding.o mchar_recode.o
+REGRESS = simple
+
+REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/dbcopies_decoding/logical.conf
+
+# Disabled because these tests require "wal_level=logical", which
+# typical installcheck users do not have (e.g. buildfarm clients).
+NO_INSTALLCHECK = 1
+
+PG_CPPFLAGS += -I/usr/local/include -I$(top_srcdir)/contrib/mchar
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS = $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/dbcopies_decoding
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+ifeq ($(PORTNAME),win32)
+ICUNAME=icuin
+else
+ICUNAME=icui18n
+endif
+
+SHLIB_LINK += -L/usr/local/lib -licuuc -l$(ICUNAME) -Wl,-rpath,'$$ORIGIN'
+
+installcheck-force:
+ $(pg_regress_installcheck) $(REGRESS)
+
+mchar_recode.c: $(top_srcdir)/contrib/mchar/mchar_recode.c
+ cp -f $(top_srcdir)/contrib/mchar/mchar_recode.c ./
diff --git a/contrib/dbcopies_decoding/dbcopies_decoding.c b/contrib/dbcopies_decoding/dbcopies_decoding.c
new file mode 100644
index 00000000000..608439a3009
--- /dev/null
+++ b/contrib/dbcopies_decoding/dbcopies_decoding.c
@@ -0,0 +1,898 @@
+#include "postgres.h"
+
+#include "catalog/pg_type.h"
+
+#include "replication/logical.h"
+#include "replication/origin.h"
+
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/catcache.h"
+#include "utils/timestamp.h"
+#include "utils/cash.h"
+#include "utils/pg_locale.h"
+#include "utils/date.h"
+#include "utils/datetime.h"
+
+#include "mchar.h"
+
+PG_MODULE_MAGIC;
+
+
+Oid MCHAROID = InvalidOid;
+Oid MVARCHAROID = InvalidOid;
+const char cQuoteChar = '\'';
+const char cContinueChar = '!';
+
+extern PGDLLEXPORT void _PG_init(void);
+extern PGDLLEXPORT void _PG_output_plugin_init(OutputPluginCallbacks* cb);
+
+typedef struct
+{
+ MemoryContext context;
+ int record_buf_size;
+ //Заказанный размер записи
+ int prepare_header_size;
+ //Размер заголовка, который записывает OutputPluginPrepareWrite в ctx->out
+ bool include_xids;
+ //флаг Записывать идентификатор транзакции
+ bool skip_change;
+ //флаг пропустить все, ничего не выводить
+ bool xact_wrote_changes;
+ //Признак того, что старт транзакции уже выведен.
+} DecodingData;
+
+static void decode_startup(LogicalDecodingContext* ctx,
+ OutputPluginOptions* opt, bool is_init);
+static void decode_shutdown(LogicalDecodingContext* ctx
+ );
+static void decode_begin_txn(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn);
+static void decode_commit_txn(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn, XLogRecPtr commit_lsn);
+static void decode_change(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn, Relation rel, ReorderBufferChange* change);
+static void decode_truncate(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn, int nrelations, Relation relations[],
+ ReorderBufferChange* change);
+static bool filter_by_origin(LogicalDecodingContext *ctx,
+ RepOriginId origin_id);
+
+#ifndef U8_TRUNCATE_IF_INCOMPLETE
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf8.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep13
+* created by: Markus W. Scherer
+*/
+#define U8_LEAD4_T1_BITS \
+"\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
+#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) \
+(U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
+#define U8_LEAD3_T1_BITS \
+"\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
+#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) \
+(U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
+#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) { \
+ if((length)>(start)) { \
+ uint8_t __b1=s[(length)-1]; \
+ if(U8_IS_SINGLE(__b1)) { \
+ /* common ASCII character */ \
+ } else if(U8_IS_LEAD(__b1)) { \
+ --(length); \
+ } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
+ uint8_t __b2=s[(length)-2]; \
+ if(0xe0<=__b2 && __b2<=0xf4) { \
+ if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
+ U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
+ (length)-=2; \
+ } \
+ } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
+ uint8_t __b3=s[(length)-3]; \
+ if(0xf0<=__b3 && __b3<=0xf4 && \
+ U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
+ (length)-=3; \
+ } \
+ } \
+ } \
+ } \
+}
+
+#endif
+
+void _PG_init(void)
+{
+}
+
+void _PG_output_plugin_init(OutputPluginCallbacks* cb)
+{
+ AssertVariableIsOfType(&_PG_output_plugin_init, LogicalOutputPluginInit);
+
+ cb->startup_cb = decode_startup;
+ cb->begin_cb = decode_begin_txn;
+ cb->change_cb = decode_change;
+ cb->truncate_cb = decode_truncate;
+ cb->commit_cb = decode_commit_txn;
+ cb->shutdown_cb = decode_shutdown;
+ cb->filter_by_origin_cb = filter_by_origin;
+}
+
+static bool tryExtractBoolOption(DefElem* elem, const char* name, bool* dest)
+{
+ if (strcmp(elem->defname, name) == 0)
+ {
+ if (elem->arg != NULL)
+ {
+ if (!parse_bool(strVal(elem->arg), dest))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not parse value \"%s\" for parameter \"%s\"",
+ strVal(elem->arg), elem->defname)));
+ }
+ return true;
+ }
+ else
+ return false;
+}
+
+static bool tryExtractIntOption(DefElem* elem, const char* name, int32* dest)
+{
+ if (strcmp(elem->defname, name) == 0)
+ {
+ if (elem->arg != NULL)
+ *dest = pg_strtoint32(strVal(elem->arg));
+ return true;
+ }
+ else
+ return false;
+}
+
+static void readTypeOID(char* typeName, Oid* typeOid)
+{
+ if (*typeOid == InvalidOid)
+ {
+ CatCList* catlist = SearchSysCacheList(TYPENAMENSP,
+ 1, CStringGetDatum(typeName), 0, 0);
+ if (catlist->n_members == 1)
+ *typeOid = ((Form_pg_type)GETSTRUCT(
+ &catlist->members[0]->tuple))->oid;
+ ReleaseSysCacheList(catlist);
+
+ if (*typeOid == InvalidOid)
+ elog(WARNING, "OID of type %s not defined!", typeName);
+ }
+}
+
+static void decode_startup(LogicalDecodingContext* ctx,
+ OutputPluginOptions* opt, bool is_init)
+{
+ ListCell* option;
+ DecodingData* data = palloc0(sizeof(DecodingData));
+
+ data->include_xids = true;
+ data->skip_change = false;
+ data->record_buf_size = ALLOCSET_DEFAULT_MAXSIZE / 4;
+ foreach(option, ctx->output_plugin_options)
+ {
+ DefElem* elem = lfirst(option);
+ Assert(elem->arg == NULL || IsA(elem->arg, String));
+
+ if (!tryExtractBoolOption(elem, "include-xids",
+ &data->include_xids))
+ if (!tryExtractBoolOption(elem, "skip-change",
+ &data->skip_change))
+ if (!tryExtractIntOption(elem, "slice_size",
+ &data->record_buf_size))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("option \"%s\" = \"%s\" is unknown",
+ elem->defname,
+ elem->arg ? strVal(elem->arg) : "(null)")
+ )
+ );
+ }
+ }
+ data->context = AllocSetContextCreate(ctx->context,
+ "text conversion context",
+ ALLOCSET_DEFAULT_SIZES);
+ ctx->output_plugin_private = data;
+
+ opt->output_type = OUTPUT_PLUGIN_TEXTUAL_OUTPUT;
+ opt->receive_rewrites = false;
+}
+
+static void decode_shutdown(LogicalDecodingContext* ctx)
+{
+ DecodingData* data = ctx->output_plugin_private;
+
+ MemoryContextDelete(data->context);
+}
+
+static bool filter_by_origin(LogicalDecodingContext *ctx,
+ RepOriginId origin_id)
+{
+ DecodingData* data = ctx->output_plugin_private;
+ return data && data->skip_change;
+}
+
+static void decode_begin_txn(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn)
+{
+ DecodingData* data = ctx->output_plugin_private;
+
+ data->xact_wrote_changes = false;
+}
+
+static void decode_commit_txn(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn, XLogRecPtr commit_lsn)
+{
+ DecodingData* data = ctx->output_plugin_private;
+
+ if (!data->xact_wrote_changes || data->skip_change)
+ return;
+
+ OutputPluginPrepareWrite(ctx, true);
+ if (data->include_xids)
+ appendStringInfo(ctx->out, "C %u", txn->xid);
+ else
+ appendStringInfo(ctx->out, "C");
+ OutputPluginWrite(ctx, true);
+}
+
+static int record_buf_size(LogicalDecodingContext* ctx) {
+ return ((DecodingData*)(ctx->output_plugin_private))->record_buf_size;
+}
+
+static void prepareFlushedCtx(LogicalDecodingContext* ctx)
+{
+ ctx->out->len = 0;
+ OutputPluginPrepareWrite(ctx, true);
+ ((DecodingData*)(ctx->output_plugin_private)
+ )->prepare_header_size = ctx->out->len;
+}
+
+static int checkFlushCtx(LogicalDecodingContext* ctx, int toWriteSize)
+{ //возвращает максимальное число байт,
+ //которое можно записать до превышения лимита длинны
+ int overflowRemain = record_buf_size(ctx) - ctx->out->len - 1;
+ if (overflowRemain <= toWriteSize)
+ {
+ appendStringInfoChar(ctx->out, cContinueChar);
+ switch (((DecodingData*)(ctx->output_plugin_private)
+ )->prepare_header_size)
+ {
+ case 0:
+ break;
+ case 1 + sizeof(int64) * 3:
+ memset(&ctx->out->data[1], 0, sizeof(int64) * 2);
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("Unsupported ctx->prepare_write function!")));
+
+ }
+ OutputPluginWrite(ctx, false);
+ prepareFlushedCtx(ctx);
+ return record_buf_size(ctx) - ctx->out->len - 1;
+ }
+ else
+ return overflowRemain;
+}
+
+static void printByts(LogicalDecodingContext* ctx, Datum val)
+{
+ const char n[] = { "0123456789abcdef" };
+ const int cDig = 16;
+ char* bytsData = VARDATA(val);
+ int32 bytsLen = VARSIZE_ANY_EXHDR(val);
+ int resultSize = 3 + bytsLen * 2 + 1; //остаток, который требуется записать
+ int overflowRemain = checkFlushCtx(ctx, 3);
+ if (resultSize > overflowRemain)
+ enlargeStringInfo(ctx->out, overflowRemain);
+ else
+ enlargeStringInfo(ctx->out, resultSize);
+
+ appendStringInfoString(ctx->out, "\'\\x");
+ overflowRemain -= 3;
+ resultSize -= 3;
+
+ {
+ int32 i;
+ for (i = 0; i < bytsLen; ++i)
+ {
+ int x;
+ if (overflowRemain < 2)
+ {
+ overflowRemain = checkFlushCtx(ctx, 2);
+ if (resultSize > overflowRemain)
+ enlargeStringInfo(ctx->out, overflowRemain);
+ else
+ enlargeStringInfo(ctx->out, resultSize);
+ }
+ x = bytsData[i] & 255;
+ ctx->out->data[ctx->out->len] = n[x / cDig];
+ ctx->out->data[ctx->out->len + 1] = n[x % cDig];
+ ctx->out->len += 2;
+ overflowRemain -= 2;
+ resultSize -= 2;
+ }
+ }
+ if (overflowRemain < 1)
+ checkFlushCtx(ctx, 1);
+ appendStringInfoChar(ctx->out, cQuoteChar);
+}
+
+static bool truncateIfIncmoplete(const int maxCharSize, const char* str, int* len)
+{
+ if (maxCharSize == 1)
+ return true;
+ else
+ {
+ int dbEnc = GetDatabaseEncoding();
+ if (dbEnc == PG_UTF8)
+ {
+ U8_TRUNCATE_IF_INCOMPLETE(str, 0, *len);
+ return (len > 0);
+ }
+ else
+ { //медленный экзотичный вариант
+ int truncCount;
+ for (truncCount = 1; truncCount < maxCharSize; ++truncCount)
+ {
+ int charLen;
+ for (charLen = 1;
+ charLen <= maxCharSize && *len >= charLen;
+ ++charLen)
+ if (pg_verify_mbstr(dbEnc, &str[*len - charLen],
+ charLen, true))
+ return true;
+
+ --(*len);
+ }
+ }
+ }
+
+ return false;
+}
+
+static void printCharVarchar(LogicalDecodingContext* ctx, Datum val)
+{
+ const int maxCharSize = pg_database_encoding_max_length();
+ char* bytsData = VARDATA(val);
+ int32 bytsLen = VARSIZE_ANY_EXHDR(val);
+ int overflowRemain = checkFlushCtx(ctx, 1 + maxCharSize);
+
+ appendStringInfoChar(ctx->out, cQuoteChar);
+ --overflowRemain;
+ {
+ char* pBegin = bytsData;
+ int L = 0;
+ int i;
+ for (i = 0; i < bytsLen; ++i)
+ {
+ bool overflow;
+ ++L;
+ overflow = (L >= overflowRemain);
+ if (bytsData[i] == cQuoteChar || overflow || i + 1 == bytsLen)
+ {
+ if (overflow &&
+ !(bytsData[i] == cQuoteChar || i + 1 == bytsLen))
+ {
+ if (!truncateIfIncmoplete(maxCharSize, pBegin, &L))
+
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_CHARACTER_VALUE_FOR_CAST),
+ errmsg("invalid string value")
+ )
+ );
+ }
+ appendBinaryStringInfo(ctx->out, pBegin, L);
+ pBegin += L;
+ if (bytsData[i] == cQuoteChar)
+ {
+ overflowRemain = checkFlushCtx(ctx, maxCharSize+1);
+ appendStringInfoChar(ctx->out, bytsData[i]);
+ --overflowRemain;
+ }
+ else if (overflow) //гарантированный сброс буфера
+ overflowRemain = checkFlushCtx(ctx, record_buf_size(ctx));
+ else
+ overflowRemain = checkFlushCtx(ctx, maxCharSize);
+ L = 0;
+ }
+
+ }
+ }
+ if (overflowRemain < 1)
+ checkFlushCtx(ctx, 1);
+ appendStringInfoChar(ctx->out, cQuoteChar);
+}
+
+static int printM(const UChar* wordsData,
+ int wordsLen, LogicalDecodingContext* ctx)
+{
+ const int maxCharSize = pg_database_encoding_max_length();
+ const UChar cQuoteUChar = L'\'';
+ int overflowRemain = checkFlushCtx(ctx, 1 + maxCharSize);
+
+ appendStringInfoChar(ctx->out, cQuoteChar);
+ --overflowRemain;
+ {
+ const UChar* pBegin = wordsData;
+ int L = 0;
+ int i;
+ for (i = 0; i < wordsLen; ++i)
+ {
+ bool overflow;
+ ++L;
+ overflow = (L*maxCharSize >= overflowRemain);
+ if (wordsData[i] == cQuoteUChar ||
+ overflow ||
+ i + 1 == wordsLen)
+ {
+ if (overflow &&
+ !(wordsData[i] == cQuoteUChar || i + 1 == wordsLen))
+ {
+ if (U16_IS_LEAD(wordsData[i]))
+ --L;
+
+ if (L == 0 || (i > 0 && U16_IS_LEAD(wordsData[i - 1])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_CHARACTER_VALUE_FOR_CAST),
+ errmsg("invalid utf16 string value")
+ )
+ );
+ }
+ enlargeStringInfo(ctx->out, L * maxCharSize);
+ ctx->out->len += UChar2Char(pBegin, L,
+ &ctx->out->data[ctx->out->len]);
+ pBegin += L;
+
+ if (wordsData[i] == cQuoteUChar)
+ {
+ overflowRemain = checkFlushCtx(ctx, maxCharSize+1);
+ appendStringInfoChar(ctx->out, cQuoteChar);
+ --overflowRemain;
+ }
+ else if (overflow)
+ overflowRemain = checkFlushCtx(ctx, record_buf_size(ctx));
+ else
+ overflowRemain = checkFlushCtx(ctx, maxCharSize);
+ L = 0;
+ }
+ }
+ }
+ return overflowRemain;
+}
+
+static void printMVarchar(LogicalDecodingContext* ctx, Datum val)
+{
+ const UChar* pBegin = (UChar*)(DatumGetPointer(val) + MVARCHARHDRSZ);
+ if (printM(pBegin, UVARCHARLENGTH(val), ctx) < 1)
+ checkFlushCtx(ctx, 1);
+ appendStringInfoChar(ctx->out, cQuoteChar);
+}
+
+static void printMChar(LogicalDecodingContext* ctx, Datum val)
+{
+ const UChar* pBegin = (UChar*)(DatumGetPointer(val) + MCHARHDRSZ);
+ int32 trailBlanksCount =
+ DatumGetMChar(val)->typmod - u_countChar32(pBegin, UCHARLENGTH(val));
+ int overflowRemain = printM(pBegin, UCHARLENGTH(val), ctx);
+ while (trailBlanksCount > 0)
+ {
+
+ if (trailBlanksCount > overflowRemain)
+ {
+ appendStringInfoSpaces(ctx->out, overflowRemain);
+ trailBlanksCount -= overflowRemain;
+ overflowRemain = checkFlushCtx(ctx, 1);
+ }
+ else
+ {
+ appendStringInfoSpaces(ctx->out, trailBlanksCount);
+ overflowRemain -= trailBlanksCount;
+ trailBlanksCount = 0;
+ }
+ }
+ if (overflowRemain < 1)
+ checkFlushCtx(ctx, 1);
+ appendStringInfoChar(ctx->out, cQuoteChar);
+}
+
+static void appendCtxString(LogicalDecodingContext* ctx, char* str)
+{
+ int l = strlen(str);
+ checkFlushCtx(ctx, l);
+ appendBinaryStringInfo(ctx->out, str, l);
+}
+
+static void printTimestamp(LogicalDecodingContext* ctx, Datum val)
+{
+ Timestamp ts = DatumGetTimestamp(val);
+ if (!TIMESTAMP_NOT_FINITE(ts))
+ {
+ struct pg_tm tm;
+ fsec_t fsec;
+ if (timestamp2tm(ts, NULL, &tm, &fsec, NULL, NULL) == 0)
+ { //отсутствие в параметрах указателя на tz
+ //приводит к конвертации часов (ts with timezone)
+ //например было 10:23:54.123+02 получим 08:23:54
+ char* str;
+ checkFlushCtx(ctx, 14);
+ enlargeStringInfo(ctx->out, 14);
+ str = ctx->out->data + ctx->out->len;
+ ctx->out->len += 14;
+ str = pg_ultostr_zeropad(str,
+ (tm.tm_year > 0) ? tm.tm_year : -(tm.tm_year - 1), 4);
+ str = pg_ultostr_zeropad(str, tm.tm_mon, 2);
+ str = pg_ultostr_zeropad(str, tm.tm_mday, 2);
+ str = pg_ultostr_zeropad(str, tm.tm_hour, 2);
+ str = pg_ultostr_zeropad(str, tm.tm_min, 2);
+ str = pg_ultostr_zeropad(str, abs(tm.tm_sec), 2);
+ return;
+ }
+ }
+ appendCtxString(ctx, "'invalid timestamp'");
+}
+
+static void printDate(LogicalDecodingContext* ctx, Datum val)
+{
+ DateADT d = DatumGetDateADT(val);
+ if (!DATE_NOT_FINITE(d))
+ {
+ char* str;
+ int year, mon, day;
+ j2date(d + POSTGRES_EPOCH_JDATE, &year, &mon, &day);
+ checkFlushCtx(ctx, 8);
+ enlargeStringInfo(ctx->out, 8);
+ str = ctx->out->data + ctx->out->len;
+ ctx->out->len += 8;
+ str = pg_ultostr_zeropad(str, (year > 0) ? year : -(year - 1), 4);
+ str = pg_ultostr_zeropad(str, mon, 2);
+ str = pg_ultostr_zeropad(str, day, 2);
+ return;
+ }
+ appendCtxString(ctx, "'invalid date'");
+}
+
+static void printTime(LogicalDecodingContext* ctx, Datum val)
+{
+ TimeADT t = DatumGetTimeADT(val);
+ char* str;
+ struct pg_tm tm;
+ fsec_t fsec;
+ time2tm(t, &tm, &fsec);
+ checkFlushCtx(ctx, 14);
+ enlargeStringInfo(ctx->out, 14);
+ str = ctx->out->data + ctx->out->len;
+ ctx->out->len += 14;
+ str = pg_ultostr_zeropad(str, 0, 4);
+ str = pg_ultostr_zeropad(str, 0, 2);
+ str = pg_ultostr_zeropad(str, 0, 2);
+ str = pg_ultostr_zeropad(str, tm.tm_hour, 2);
+ str = pg_ultostr_zeropad(str, tm.tm_min, 2);
+ str = pg_ultostr_zeropad(str, abs(tm.tm_sec), 2);
+}
+
+static void printMoney(LogicalDecodingContext* ctx, Datum val)
+{
+ Cash v = DatumGetCash(val);
+ char buf[128];
+ char* pBuf = &buf[127];
+ bool minus = (v < 0);
+ struct lconv *lconvert = PGLC_localeconv();
+ int points = lconvert->frac_digits;
+
+ if (points < 0 || points > 10)
+ points = 2;
+
+ buf[127] = 0;
+ if (minus)
+ v = -v;
+
+ do {
+ *(--pBuf) = ((uint64)v % 10) + '0';
+ --points;
+
+ if (points == 0)
+ *(--pBuf) = '.';
+
+ if (v)
+ v = ((uint64)v) / 10;
+ } while (v || points >= 0);
+ if (minus)
+ *(--pBuf) = '-';
+
+ appendCtxString(ctx, pBuf);
+}
+
+static void printBool(LogicalDecodingContext* ctx, Datum val)
+{
+ appendCtxString(ctx, DatumGetBool(val) ? "true" : "false");
+}
+
+static void printDefault(LogicalDecodingContext* ctx,
+ Datum val, Oid typid, Oid typoutput)
+{ // Вывод с помощью стандартной OUTPUT функции ..._out
+ char* dataAsChar = OidOutputFunctionCall(typoutput, val);
+ switch (typid)
+ {
+ case INT2OID:
+ case INT4OID:
+ case INT8OID:
+ case OIDOID:
+ case FLOAT4OID:
+ case FLOAT8OID:
+ case NUMERICOID:
+ appendCtxString(ctx, dataAsChar);
+ break;
+
+ case BITOID:
+ case VARBITOID:
+ checkFlushCtx(ctx, (int)strlen(dataAsChar) + 3);
+ appendStringInfo(ctx->out, "B'%s'", dataAsChar);
+ break;
+
+ default:
+ {
+ const int maxCharSize = pg_database_encoding_max_length();
+ const char* pBegin;
+ const char* pEnd = dataAsChar;
+ int overflowRemain = checkFlushCtx(ctx, maxCharSize + 1);
+
+ appendStringInfoChar(ctx->out, cQuoteChar);
+ --overflowRemain;
+ //в отличие от printCharVarchar,
+ //здесь я не знаю длинну, но точно знаю, что на конце ноль
+ for (pBegin = dataAsChar; *pBegin; pBegin = pEnd)
+ {
+ bool overflow;
+ while (*pEnd &&
+ *pEnd != cQuoteChar &&
+ (int)(pEnd - pBegin) < overflowRemain)
+ ++pEnd;
+ overflow = (int)(pEnd - pBegin) >= overflowRemain;
+ if (pEnd != pBegin)
+ {
+ if (overflow && *pEnd && *pEnd != cQuoteChar)
+ {
+ int32 L = (int32)(pEnd - pBegin);
+ if (!truncateIfIncmoplete(maxCharSize, pBegin, &L))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_CHARACTER_VALUE_FOR_CAST),
+ errmsg("invalid string value")
+ ));
+ pEnd = pBegin + L;
+ }
+ appendBinaryStringInfo(ctx->out,
+ pBegin, (int)(pEnd - pBegin));
+ }
+
+ if (*pEnd == cQuoteChar)
+ {
+ overflowRemain = checkFlushCtx(ctx, maxCharSize + 2);
+ appendStringInfoChar(ctx->out, *pEnd);
+ appendStringInfoChar(ctx->out, *pEnd);
+ ++pEnd;
+ overflowRemain -= 2;
+ }
+ else if (overflow)
+ overflowRemain = checkFlushCtx(ctx, record_buf_size(ctx));
+ else
+ overflowRemain = checkFlushCtx(ctx, maxCharSize);
+ }
+ if (overflowRemain < 1)
+ checkFlushCtx(ctx, 1);
+ appendStringInfoChar(ctx->out, cQuoteChar);
+ }
+ break;
+ }
+ pfree(dataAsChar);
+}
+
+static void printTuple(LogicalDecodingContext* ctx,
+ TupleDesc tupdesc, HeapTuple tuple,
+ bool skip_nulls, char* tableName)
+{
+
+ if (tuple == NULL)
+ appendCtxString(ctx, " (no-tuple-data)");
+ else
+ {
+ int natt;
+ for (natt = 0; natt < tupdesc->natts; natt++)
+ {
+ bool typisvarlena;
+ Oid typoutput;
+ Form_pg_attribute attr = TupleDescAttr(tupdesc, natt);
+ Oid typid = attr->atttypid;
+ bool isnull;
+ Datum origval;
+
+ if (attr->attisdropped)
+ continue;
+ if (attr->attnum < 0) // Don't print system columns,
+ continue;//oid will already have been printed if present.
+
+ origval = heap_getattr(tuple, natt + 1, tupdesc, &isnull);
+ if (isnull)
+ {
+ if (!skip_nulls)
+ appendCtxString(ctx, " null");
+ continue;
+ }
+
+ checkFlushCtx(ctx, 1);
+ appendStringInfoChar(ctx->out, ' ');
+
+ getTypeOutputInfo(typid, &typoutput, &typisvarlena);
+
+ if (typisvarlena)
+ {
+ if (VARATT_IS_EXTERNAL_ONDISK(origval))
+ appendCtxString(ctx, "unchanged-toast-datum");
+ else
+ {
+ Datum val = PointerGetDatum(PG_DETOAST_DATUM(origval));
+
+ if (typid == BPCHAROID ||
+ typid == VARCHAROID ||
+ typid == TEXTOID)
+ printCharVarchar(ctx, val);
+ else if (typid == BYTEAOID)
+ printByts(ctx, val);
+ else if (typid > FirstNormalObjectId) {
+ readTypeOID("mchar", &MCHAROID);
+ readTypeOID("mvarchar", &MVARCHAROID);
+
+ if (typid == MCHAROID)
+ printMChar(ctx, val);
+ else if (typid == MVARCHAROID)
+ printMVarchar(ctx, val);
+ else
+ printDefault(ctx, val, typid, typoutput);
+ } else
+ printDefault(ctx, val, typid, typoutput);
+
+ if (DatumGetPointer(val) != DatumGetPointer(origval))
+ pfree(DatumGetPointer(val));
+ }
+
+ }
+ else
+ {
+ switch (typid)
+ {
+ case MONEYOID:
+ printMoney(ctx, origval);
+ break;
+ case TIMESTAMPOID:
+ case TIMESTAMPTZOID:
+ printTimestamp(ctx, origval);
+ break;
+ case DATEOID:
+ printDate(ctx, origval);
+ break;
+ case TIMEOID:
+ printTime(ctx, origval);
+ break;
+ case BOOLOID:
+ printBool(ctx, origval);
+ break;
+ default:
+ printDefault(ctx, origval, typid, typoutput);
+ break;
+ }
+ }
+ }
+ }
+}
+
+static void printTransaction(DecodingData* data,
+ LogicalDecodingContext* ctx, ReorderBufferTXN* txn)
+{
+ if (data->xact_wrote_changes)
+ return;
+
+ OutputPluginPrepareWrite(ctx, false);
+ if (data->include_xids)
+ appendStringInfo(ctx->out, "B %u", txn->xid);
+ else
+ appendStringInfoString(ctx->out, "B");
+ OutputPluginWrite(ctx, false);
+ data->xact_wrote_changes = true;
+}
+
+static void decode_change(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn, Relation relation, ReorderBufferChange* change)
+{
+ DecodingData* data = ctx->output_plugin_private;
+ if (data->skip_change)
+ return;
+ {
+ MemoryContext old = MemoryContextSwitchTo(data->context);
+ TupleDesc tupdesc = RelationGetDescr(relation);
+ char* tableName = RelationGetRelationName(relation);
+
+ printTransaction(data, ctx, txn);
+ prepareFlushedCtx(ctx);
+ switch (change->action)
+ {
+ case REORDER_BUFFER_CHANGE_INSERT:
+ {
+ appendStringInfoString(ctx->out, "I ");
+ appendStringInfoString(ctx->out, tableName);
+ printTuple(ctx, tupdesc, change->data.tp.newtuple, false, tableName);
+ }
+ break;
+ case REORDER_BUFFER_CHANGE_UPDATE:
+ {
+ appendStringInfoString(ctx->out, "U ");
+ appendStringInfoString(ctx->out, tableName);
+ printTuple(ctx, tupdesc, change->data.tp.newtuple, false, tableName);
+ }
+ break;
+ case REORDER_BUFFER_CHANGE_DELETE:
+ {
+ appendStringInfoString(ctx->out, "D ");
+ appendStringInfoString(ctx->out, tableName);
+ printTuple(ctx, tupdesc, change->data.tp.oldtuple, true, tableName);
+ }
+ break;
+ default:
+ Assert(false);
+ }
+ MemoryContextSwitchTo(old);
+ }
+ OutputPluginWrite(ctx, true);
+ MemoryContextReset(data->context);
+}
+
+static void decode_truncate(LogicalDecodingContext* ctx,
+ ReorderBufferTXN* txn, int nrelations,
+ Relation relations[], ReorderBufferChange* change)
+{
+ int i;
+ DecodingData* data = ctx->output_plugin_private;
+ if (data->skip_change)
+ return;
+
+ printTransaction(data, ctx, txn);
+ {
+ MemoryContext old = MemoryContextSwitchTo(data->context);
+
+ OutputPluginPrepareWrite(ctx, true);
+
+ appendStringInfoString(ctx->out, "T ");
+
+ for (i = 0; i < nrelations; i++)
+ {
+ if (i > 0)
+ appendStringInfoString(ctx->out, ", ");
+
+ appendStringInfoString(ctx->out,
+ RelationGetRelationName(relations[i]));
+ }
+
+ MemoryContextSwitchTo(old);
+ }
+ OutputPluginWrite(ctx, true);
+ MemoryContextReset(data->context);
+}
diff --git a/contrib/dbcopies_decoding/expected/simple.out b/contrib/dbcopies_decoding/expected/simple.out
new file mode 100644
index 00000000000..4053ec1935a
--- /dev/null
+++ b/contrib/dbcopies_decoding/expected/simple.out
@@ -0,0 +1,28 @@
+-- predictability
+SET synchronous_commit = on;
+CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120));
+SELECT 'init' FROM pg_create_logical_replication_slot('dbcopies_slot', 'dbcopies_decoding');
+ ?column?
+----------
+ init
+(1 row)
+
+BEGIN;
+INSERT INTO replication_example(somedata, text) VALUES (1, 1);
+INSERT INTO replication_example(somedata, text) VALUES (1, 2);
+COMMIT;
+SELECT data FROM pg_logical_slot_get_changes('dbcopies_slot', NULL, NULL, 'include-xids', '0');
+ data
+-------------------------------
+ B
+ I replication_example 1 1 '1'
+ I replication_example 2 1 '2'
+ C
+(4 rows)
+
+SELECT pg_drop_replication_slot('dbcopies_slot');
+ pg_drop_replication_slot
+--------------------------
+
+(1 row)
+
diff --git a/contrib/dbcopies_decoding/logical.conf b/contrib/dbcopies_decoding/logical.conf
new file mode 100644
index 00000000000..367f7066514
--- /dev/null
+++ b/contrib/dbcopies_decoding/logical.conf
@@ -0,0 +1,2 @@
+wal_level = logical
+max_replication_slots = 4
diff --git a/contrib/dbcopies_decoding/meson.build b/contrib/dbcopies_decoding/meson.build
new file mode 100644
index 00000000000..9fea46ca4c7
--- /dev/null
+++ b/contrib/dbcopies_decoding/meson.build
@@ -0,0 +1,38 @@
+dbcopies_decoding_sources = files(
+ 'dbcopies_decoding.c',
+ '../mchar/mchar_recode.c'
+)
+
+if host_system == 'windows'
+ dbcopies_decoding_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'dbcopies_decoding',
+ '--FILEDESC', 'dbcopies_decoding',])
+endif
+
+dbcopies_decoding = shared_module('dbcopies_decoding',
+ dbcopies_decoding_sources,
+ include_directories: '../mchar',
+ kwargs: contrib_mod_args + {
+ 'dependencies': [icu_i18n, contrib_mod_args['dependencies']],
+ },
+)
+contrib_targets += dbcopies_decoding
+
+install_data(
+ kwargs: contrib_data_args,
+)
+
+tests += {
+ 'name': 'dbcopies_decoding',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'simple'
+ ],
+ 'regress_args': ['--temp-config', files('logical.conf')],
+ # Disabled because these tests require "wal_level=logical", which
+ # typical installcheck users do not have (e.g. buildfarm clients).
+ 'runningcheck': false,
+ },
+}
diff --git a/contrib/dbcopies_decoding/sql/simple.sql b/contrib/dbcopies_decoding/sql/simple.sql
new file mode 100644
index 00000000000..1e9d2f72323
--- /dev/null
+++ b/contrib/dbcopies_decoding/sql/simple.sql
@@ -0,0 +1,15 @@
+-- predictability
+SET synchronous_commit = on;
+
+CREATE TABLE replication_example(id SERIAL PRIMARY KEY, somedata int, text varchar(120));
+
+SELECT 'init' FROM pg_create_logical_replication_slot('dbcopies_slot', 'dbcopies_decoding');
+
+BEGIN;
+INSERT INTO replication_example(somedata, text) VALUES (1, 1);
+INSERT INTO replication_example(somedata, text) VALUES (1, 2);
+COMMIT;
+
+SELECT data FROM pg_logical_slot_get_changes('dbcopies_slot', NULL, NULL, 'include-xids', '0');
+
+SELECT pg_drop_replication_slot('dbcopies_slot');
diff --git a/contrib/fasttrun/Makefile b/contrib/fasttrun/Makefile
new file mode 100644
index 00000000000..78e92b86cbe
--- /dev/null
+++ b/contrib/fasttrun/Makefile
@@ -0,0 +1,17 @@
+MODULE_big = fasttrun
+OBJS = fasttrun.o
+DATA = fasttrun--2.0.sql fasttrun--unpackaged--2.0.sql
+DOCS = README.fasttrun
+REGRESS = fasttrun
+EXTENSION=fasttrun
+
+
+ifdef USE_PGXS
+PGXS := $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/fasttrun
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/fasttrun/README.fasttrun b/contrib/fasttrun/README.fasttrun
new file mode 100644
index 00000000000..b6d1b41a6d2
--- /dev/null
+++ b/contrib/fasttrun/README.fasttrun
@@ -0,0 +1,16 @@
+select fasttruncate('TABLE_NAME');
+
+Function truncates the temporary table and doesn't grow
+pg_class size.
+
+Warning: function isn't transaction safe!
+
+For tests:
+create or replace function f() returns void as $$
+begin
+for i in 1..1000
+loop
+ PERFORM fasttruncate('tt1');
+end loop;
+end;
+$$ language plpgsql;
diff --git a/contrib/fasttrun/expected/fasttrun.out b/contrib/fasttrun/expected/fasttrun.out
new file mode 100644
index 00000000000..ef64fa6400e
--- /dev/null
+++ b/contrib/fasttrun/expected/fasttrun.out
@@ -0,0 +1,115 @@
+CREATE EXTENSION fasttrun;
+create table persist ( a int );
+insert into persist values (1);
+select fasttruncate('persist');
+ERROR: Relation isn't a temporary table
+insert into persist values (2);
+select * from persist order by a;
+ a
+---
+ 1
+ 2
+(2 rows)
+
+create temp table temp1 (a int);
+insert into temp1 values (1);
+BEGIN;
+create temp table temp2 (a int);
+insert into temp2 values (1);
+select * from temp1 order by a;
+ a
+---
+ 1
+(1 row)
+
+select * from temp2 order by a;
+ a
+---
+ 1
+(1 row)
+
+insert into temp1 (select * from generate_series(1,10000));
+insert into temp2 (select * from generate_series(1,11000));
+analyze temp2;
+select relname, relpages>0, reltuples>0 from pg_class where relname in ('temp1', 'temp2') order by relname;
+ relname | ?column? | ?column?
+---------+----------+----------
+ temp1 | f | f
+ temp2 | t | t
+(2 rows)
+
+select fasttruncate('temp1');
+ fasttruncate
+--------------
+
+(1 row)
+
+select fasttruncate('temp2');
+ fasttruncate
+--------------
+
+(1 row)
+
+insert into temp1 values (-2);
+insert into temp2 values (-2);
+select * from temp1 order by a;
+ a
+----
+ -2
+(1 row)
+
+select * from temp2 order by a;
+ a
+----
+ -2
+(1 row)
+
+COMMIT;
+select * from temp1 order by a;
+ a
+----
+ -2
+(1 row)
+
+select * from temp2 order by a;
+ a
+----
+ -2
+(1 row)
+
+select relname, relpages>0, reltuples>0 from pg_class where relname in ('temp1', 'temp2') order by relname;
+ relname | ?column? | ?column?
+---------+----------+----------
+ temp1 | f | f
+ temp2 | f | f
+(2 rows)
+
+select fasttruncate('temp1');
+ fasttruncate
+--------------
+
+(1 row)
+
+select fasttruncate('temp2');
+ fasttruncate
+--------------
+
+(1 row)
+
+select * from temp1 order by a;
+ a
+---
+(0 rows)
+
+select * from temp2 order by a;
+ a
+---
+(0 rows)
+
+select relname, relpages>0, reltuples>0 from pg_class where relname in ('temp1', 'temp2') order by relname;
+ relname | ?column? | ?column?
+---------+----------+----------
+ temp1 | f | f
+ temp2 | f | f
+(2 rows)
+
diff --git a/contrib/fasttrun/fasttrun--2.0.sql b/contrib/fasttrun/fasttrun--2.0.sql
new file mode 100644
index 00000000000..708c2753151
--- /dev/null
+++ b/contrib/fasttrun/fasttrun--2.0.sql
@@ -0,0 +1,6 @@
+\echo Use "CREATE EXTENSION fasttrun" to load this file. \quit
+
+
+CREATE OR REPLACE FUNCTION fasttruncate(text)
+RETURNS void AS 'MODULE_PATHNAME'
+LANGUAGE C RETURNS NULL ON NULL INPUT VOLATILE;
diff --git a/contrib/fasttrun/fasttrun--unpackaged--2.0.sql b/contrib/fasttrun/fasttrun--unpackaged--2.0.sql
new file mode 100644
index 00000000000..3a071f077e1
--- /dev/null
+++ b/contrib/fasttrun/fasttrun--unpackaged--2.0.sql
@@ -0,0 +1,3 @@
+\echo Use "CREATE EXTENSION fasttrun FROM unpackaged" to load this file. \quit
+
+ALTER EXTENSION fasttrun ADD function fasttruncate(text);
diff --git a/contrib/fasttrun/fasttrun.c b/contrib/fasttrun/fasttrun.c
new file mode 100644
index 00000000000..494fcf6c7b3
--- /dev/null
+++ b/contrib/fasttrun/fasttrun.c
@@ -0,0 +1,90 @@
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "miscadmin.h"
+#include "storage/lmgr.h"
+#include "storage/bufmgr.h"
+#include "catalog/namespace.h"
+#include "utils/lsyscache.h"
+#include "utils/builtins.h"
+#include <fmgr.h>
+#include <funcapi.h>
+#include <access/heapam.h>
+#include <catalog/pg_type.h>
+#include <catalog/heap.h>
+#include <commands/vacuum.h>
+#include <utils/regproc.h>
+#include <utils/varlena.h>
+
+#ifdef PG_MODULE_MAGIC
+PG_MODULE_MAGIC;
+#endif
+
+PG_FUNCTION_INFO_V1(fasttruncate);
+Datum fasttruncate(PG_FUNCTION_ARGS);
+Datum
+fasttruncate(PG_FUNCTION_ARGS) {
+ text *name=PG_GETARG_TEXT_P(0);
+ char *relname;
+ List *relname_list;
+ RangeVar *relvar;
+ Oid relOid;
+ Relation rel;
+ bool makeanalyze = false;
+
+ relname = palloc( VARSIZE(name) + 1);
+ memcpy(relname, VARDATA(name), VARSIZE(name)-VARHDRSZ);
+ relname[ VARSIZE(name)-VARHDRSZ ] = '\0';
+
+ relname_list = stringToQualifiedNameList(relname, NULL);
+ relvar = makeRangeVarFromNameList(relname_list);
+ relOid = RangeVarGetRelid(relvar, AccessExclusiveLock, false);
+
+ if ( get_rel_relkind(relOid) != RELKIND_RELATION )
+ elog(ERROR,"Relation isn't a ordinary table");
+
+ rel = table_open(relOid, NoLock);
+
+ if ( !isTempNamespace(get_rel_namespace(relOid)) )
+ elog(ERROR,"Relation isn't a temporary table");
+
+ heap_truncate(list_make1_oid(relOid));
+
+ if ( rel->rd_rel->relpages > 0 || rel->rd_rel->reltuples > 0 )
+ makeanalyze = true;
+
+ /*
+ * heap_truncate doesn't unlock the table,
+ * so we should unlock it.
+ */
+
+ table_close(rel, AccessExclusiveLock);
+
+ if ( makeanalyze ) {
+ VacuumParams params;
+ VacuumRelation *rel;
+ MemoryContext cntx;
+
+ params.options = VACOPT_ANALYZE;
+ params.freeze_min_age = -1;
+ params.freeze_table_age = -1;
+ params.multixact_freeze_min_age = -1;
+ params.multixact_freeze_table_age = -1;
+ params.is_wraparound = false;
+ params.log_min_duration = -1;
+
+ rel = makeNode(VacuumRelation);
+ rel->relation = relvar;
+ rel->oid = relOid;
+ rel->va_cols = NULL;
+
+ cntx = AllocSetContextCreate(CurrentMemoryContext,
+ "Vacuum", ALLOCSET_DEFAULT_SIZES);
+ vacuum(list_make1(rel), ¶ms,
+ GetAccessStrategy(BAS_VACUUM), cntx, false);
+ MemoryContextDelete(cntx);
+ }
+
+ PG_RETURN_VOID();
+}
diff --git a/contrib/fasttrun/fasttrun.control b/contrib/fasttrun/fasttrun.control
new file mode 100644
index 00000000000..7862c0bf8ad
--- /dev/null
+++ b/contrib/fasttrun/fasttrun.control
@@ -0,0 +1,5 @@
+comment = 'fast transaction-unsafe truncate'
+default_version = '2.0'
+module_pathname = '$libdir/fasttrun'
+relocatable = true
+trusted = true
diff --git a/contrib/fasttrun/meson.build b/contrib/fasttrun/meson.build
new file mode 100644
index 00000000000..8d6c5aeda79
--- /dev/null
+++ b/contrib/fasttrun/meson.build
@@ -0,0 +1,37 @@
+fasttrun_sources = files(
+ 'fasttrun.c'
+)
+
+if host_system == 'windows'
+ fasttrun_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'fasttrun',
+ '--FILEDESC', 'fasttrun',])
+endif
+
+fasttrun = shared_module('fasttrun',
+ fasttrun_sources,
+ kwargs: contrib_mod_args + {
+ 'dependencies': contrib_mod_args['dependencies'],
+ },
+)
+contrib_targets += fasttrun
+
+install_data(
+ 'fasttrun.control',
+ 'fasttrun--2.0.sql',
+ 'fasttrun--unpackaged--2.0.sql',
+ kwargs: contrib_data_args,
+)
+
+tests += {
+ 'name': 'fasttrun',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'fasttrun'
+ ]
+ },
+}
+
+# TODO: DOCS = README.fasttrun
\ No newline at end of file
diff --git a/contrib/fasttrun/sql/fasttrun.sql b/contrib/fasttrun/sql/fasttrun.sql
new file mode 100644
index 00000000000..0e3cb6c9beb
--- /dev/null
+++ b/contrib/fasttrun/sql/fasttrun.sql
@@ -0,0 +1,48 @@
+CREATE EXTENSION fasttrun;
+
+create table persist ( a int );
+insert into persist values (1);
+select fasttruncate('persist');
+insert into persist values (2);
+select * from persist order by a;
+
+create temp table temp1 (a int);
+insert into temp1 values (1);
+
+BEGIN;
+
+create temp table temp2 (a int);
+insert into temp2 values (1);
+
+select * from temp1 order by a;
+select * from temp2 order by a;
+
+insert into temp1 (select * from generate_series(1,10000));
+insert into temp2 (select * from generate_series(1,11000));
+
+analyze temp2;
+select relname, relpages>0, reltuples>0 from pg_class where relname in ('temp1', 'temp2') order by relname;
+
+select fasttruncate('temp1');
+select fasttruncate('temp2');
+
+insert into temp1 values (-2);
+insert into temp2 values (-2);
+
+select * from temp1 order by a;
+select * from temp2 order by a;
+
+COMMIT;
+
+select * from temp1 order by a;
+select * from temp2 order by a;
+
+select relname, relpages>0, reltuples>0 from pg_class where relname in ('temp1', 'temp2') order by relname;
+
+select fasttruncate('temp1');
+select fasttruncate('temp2');
+
+select * from temp1 order by a;
+select * from temp2 order by a;
+
+select relname, relpages>0, reltuples>0 from pg_class where relname in ('temp1', 'temp2') order by relname;
diff --git a/contrib/fulleq/Makefile b/contrib/fulleq/Makefile
new file mode 100644
index 00000000000..99cc8aca35e
--- /dev/null
+++ b/contrib/fulleq/Makefile
@@ -0,0 +1,28 @@
+MODULE_big = fulleq
+OBJS = fulleq.o
+DOCS = README.fulleq
+REGRESS = fulleq
+DATA_built = fulleq--2.0.sql fulleq--unpackaged--2.0.sql
+EXTENSION=fulleq
+
+ARGTYPE = bool bytea char name int8 int2 int4 text \
+ oid xid cid oidvector float4 float8 macaddr \
+ inet cidr varchar date time timestamp timestamptz \
+ interval timetz
+
+ifdef USE_PGXS
+PGXS := $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/fulleq
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+fulleq--2.0.sql: fulleq--2.0.sql.in
+ $(top_srcdir)/$(subdir)/generate.sh packaged "$<" > "$@"
+
+fulleq--unpackaged--2.0.sql: fulleq--unpackaged--2.0.sql.in
+ $(top_srcdir)/$(subdir)/generate.sh unpackaged "$<" > "$@"
+
diff --git a/contrib/fulleq/README.fulleq b/contrib/fulleq/README.fulleq
new file mode 100644
index 00000000000..93bf0cad20e
--- /dev/null
+++ b/contrib/fulleq/README.fulleq
@@ -0,0 +1,2 @@
+Introduce operator == which returns true when
+operands are equal or both are nulls.
diff --git a/contrib/fulleq/expected/fulleq.out b/contrib/fulleq/expected/fulleq.out
new file mode 100644
index 00000000000..452f8593432
--- /dev/null
+++ b/contrib/fulleq/expected/fulleq.out
@@ -0,0 +1,61 @@
+CREATE EXTENSION fulleq;
+select 4::int == 4;
+ ?column?
+----------
+ t
+(1 row)
+
+select 4::int == 5;
+ ?column?
+----------
+ f
+(1 row)
+
+select 4::int == NULL;
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL::int == 5;
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL::int == NULL;
+ ?column?
+----------
+ t
+(1 row)
+
+select '4'::text == '4';
+ ?column?
+----------
+ t
+(1 row)
+
+select '4'::text == '5';
+ ?column?
+----------
+ f
+(1 row)
+
+select '4'::text == NULL;
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL::text == '5';
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL::text == NULL;
+ ?column?
+----------
+ t
+(1 row)
+
diff --git a/contrib/fulleq/fulleq--2.0.sql.in b/contrib/fulleq/fulleq--2.0.sql.in
new file mode 100644
index 00000000000..c270647c720
--- /dev/null
+++ b/contrib/fulleq/fulleq--2.0.sql.in
@@ -0,0 +1,25 @@
+-- For ARGTYPE
+
+CREATE OR REPLACE FUNCTION isfulleq_ARGTYPE(ARGTYPE, ARGTYPE)
+RETURNS bool AS 'MODULE_PATHNAME'
+LANGUAGE C CALLED ON NULL INPUT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION fullhash_ARGTYPE(ARGTYPE)
+RETURNS int4 AS 'MODULE_PATHNAME'
+LANGUAGE C CALLED ON NULL INPUT IMMUTABLE;
+
+
+CREATE OPERATOR == (
+ LEFTARG = ARGTYPE,
+ RIGHTARG = ARGTYPE,
+ PROCEDURE = isfulleq_ARGTYPE,
+ COMMUTATOR = '==',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ HASHES
+);
+
+CREATE OPERATOR CLASS ARGTYPE_fill_ops
+ FOR TYPE ARGTYPE USING hash AS
+ OPERATOR 1 ==,
+ FUNCTION 1 fullhash_ARGTYPE(ARGTYPE);
diff --git a/contrib/fulleq/fulleq--unpackaged--2.0.sql.in b/contrib/fulleq/fulleq--unpackaged--2.0.sql.in
new file mode 100644
index 00000000000..8d759d8221f
--- /dev/null
+++ b/contrib/fulleq/fulleq--unpackaged--2.0.sql.in
@@ -0,0 +1,10 @@
+-- For ARGTYPE
+
+ALTER EXTENSION fulleq ADD FUNCTION isfulleq_ARGTYPE(ARGTYPE, ARGTYPE);
+
+ALTER EXTENSION fulleq ADD FUNCTION fullhash_ARGTYPE(ARGTYPE);
+
+ALTER EXTENSION fulleq ADD OPERATOR == (ARGTYPE, ARGTYPE);
+
+ALTER EXTENSION fulleq ADD OPERATOR CLASS ARGTYPE_fill_ops USING hash;
+
diff --git a/contrib/fulleq/fulleq.c b/contrib/fulleq/fulleq.c
new file mode 100644
index 00000000000..e435be4b93a
--- /dev/null
+++ b/contrib/fulleq/fulleq.c
@@ -0,0 +1,112 @@
+#include "postgres.h"
+#include "fmgr.h"
+#include "access/hash.h"
+#include "catalog/pg_collation.h"
+#include "utils/builtins.h"
+#include "utils/bytea.h"
+#include "utils/timestamp.h"
+#include "utils/date.h"
+
+#ifdef PG_MODULE_MAGIC
+PG_MODULE_MAGIC;
+#endif
+
+#define NULLHASHVALUE (-2147483647)
+
+#define FULLEQ_FUNC(type, cmpfunc, hashfunc) \
+PG_FUNCTION_INFO_V1( isfulleq_##type ); \
+Datum isfulleq_##type(PG_FUNCTION_ARGS); \
+Datum \
+isfulleq_##type(PG_FUNCTION_ARGS) { \
+ if ( PG_ARGISNULL(0) && PG_ARGISNULL(1) ) \
+ PG_RETURN_BOOL(true); \
+ else if ( PG_ARGISNULL(0) || PG_ARGISNULL(1) ) \
+ PG_RETURN_BOOL(false); \
+ \
+ PG_RETURN_DATUM( DirectFunctionCall2Coll( cmpfunc, \
+ DEFAULT_COLLATION_OID, \
+ PG_GETARG_DATUM(0), \
+ PG_GETARG_DATUM(1) \
+ ) ); \
+} \
+ \
+PG_FUNCTION_INFO_V1( fullhash_##type ); \
+Datum fullhash_##type(PG_FUNCTION_ARGS); \
+Datum \
+fullhash_##type(PG_FUNCTION_ARGS) { \
+ if ( PG_ARGISNULL(0) ) \
+ PG_RETURN_INT32(NULLHASHVALUE); \
+ \
+ PG_RETURN_DATUM( DirectFunctionCall1( hashfunc, \
+ PG_GETARG_DATUM(0) \
+ ) ); \
+}
+
+
+static Datum
+hashint2vector(PG_FUNCTION_ARGS)
+{
+ int2vector *key = (int2vector *) PG_GETARG_POINTER(0);
+
+ return hash_any((unsigned char *) key->values, key->dim1 * sizeof(int16));
+}
+
+/*
+ * We don't have a complete set of int2vector support routines,
+ * but we need int2vectoreq for catcache indexing.
+ */
+static Datum
+int2vectoreq(PG_FUNCTION_ARGS)
+{
+ int2vector *a = (int2vector *) PG_GETARG_POINTER(0);
+ int2vector *b = (int2vector *) PG_GETARG_POINTER(1);
+
+ if (a->dim1 != b->dim1)
+ PG_RETURN_BOOL(false);
+ PG_RETURN_BOOL(memcmp(a->values, b->values, a->dim1 * sizeof(int16)) == 0);
+}
+
+
+FULLEQ_FUNC( bool , booleq , hashchar );
+FULLEQ_FUNC( bytea , byteaeq , hashvarlena );
+FULLEQ_FUNC( char , chareq , hashchar );
+FULLEQ_FUNC( name , nameeq , hashname );
+FULLEQ_FUNC( int8 , int8eq , hashint8 );
+FULLEQ_FUNC( int2 , int2eq , hashint2 );
+FULLEQ_FUNC( int4 , int4eq , hashint4 );
+FULLEQ_FUNC( text , texteq , hashtext );
+FULLEQ_FUNC( oid , oideq , hashoid );
+FULLEQ_FUNC( xid , xideq , hashint4 );
+FULLEQ_FUNC( cid , cideq , hashint4 );
+FULLEQ_FUNC( oidvector , oidvectoreq , hashoidvector );
+FULLEQ_FUNC( float4 , float4eq , hashfloat4 );
+FULLEQ_FUNC( float8 , float8eq , hashfloat8 );
+/*FULLEQ_FUNC( abstime , abstimeeq , hashint4 );*/
+/*FULLEQ_FUNC( reltime , reltimeeq , hashint4 );*/
+FULLEQ_FUNC( macaddr , macaddr_eq , hashmacaddr );
+FULLEQ_FUNC( inet , network_eq , hashinet );
+FULLEQ_FUNC( cidr , network_eq , hashinet );
+FULLEQ_FUNC( varchar , texteq , hashtext );
+FULLEQ_FUNC( date , date_eq , hashint4 );
+FULLEQ_FUNC( time , time_eq , hashfloat8 );
+FULLEQ_FUNC( timestamp , timestamp_eq , hashfloat8 );
+FULLEQ_FUNC( timestamptz , timestamp_eq , hashfloat8 );
+FULLEQ_FUNC( interval , interval_eq , interval_hash );
+FULLEQ_FUNC( timetz , timetz_eq , timetz_hash );
+
+/*
+ * v10 drop * support for int2vector equality and hash operator in commit
+ * 5c80642aa8de8393b08cd3cbf612b325cedd98dc, but for compatibility
+ * we still add this operators
+ */
+FULLEQ_FUNC( int2vector , int2vectoreq , hashint2vector );
+
+static Datum
+dummy_eq(PG_FUNCTION_ARGS)
+{
+ elog(ERROR, "unimplemented");
+ PG_RETURN_DATUM(0); //keep compiler quiet
+}
+
+FULLEQ_FUNC( abstime , dummy_eq , hashint4 );
+FULLEQ_FUNC( reltime , dummy_eq , hashint4 );
diff --git a/contrib/fulleq/fulleq.control b/contrib/fulleq/fulleq.control
new file mode 100644
index 00000000000..30a26c65fff
--- /dev/null
+++ b/contrib/fulleq/fulleq.control
@@ -0,0 +1,5 @@
+comment = 'exact equal operation'
+default_version = '2.0'
+module_pathname = '$libdir/fulleq'
+relocatable = true
+trusted = true
diff --git a/contrib/fulleq/generate.sh b/contrib/fulleq/generate.sh
new file mode 100755
index 00000000000..bc925e78b98
--- /dev/null
+++ b/contrib/fulleq/generate.sh
@@ -0,0 +1,50 @@
+#!/bin/bash -e
+
+type="$1"
+template="$2"
+
+if [ "$type" = "packaged" ]; then
+ echo '\echo Use "CREATE EXTENSION fulleq" to load this file. \quit'
+elif [ "$type" = "unpackaged" ]; then
+ echo '\echo Use "CREATE EXTENSION fulleq FROM unpackaged" to load this file. \quit'
+ echo 'DROP OPERATOR CLASS IF EXISTS int2vector_fill_ops USING hash;'
+ echo 'DROP OPERATOR FAMILY IF EXISTS int2vector_fill_ops USING hash;'
+ echo 'DROP FUNCTION IF EXISTS fullhash_int2vector(int2vector);'
+ echo 'DROP OPERATOR IF EXISTS == (int2vector, int2vector);'
+ echo 'DROP FUNCTION IF EXISTS isfulleq_int2vector(int2vector, int2vector);'
+else
+ echo "invalid arguments"
+ exit 1
+fi
+
+
+ARGTYPE=(
+ bool
+ bytea
+ char
+ name
+ int8
+ int2
+ int4
+ text
+ oid
+ xid
+ cid
+ oidvector
+ float4
+ float8
+ macaddr
+ inet
+ cidr
+ varchar
+ date
+ time
+ timestamp
+ timestamptz
+ interval
+ timetz
+)
+
+for type in "${ARGTYPE[@]}"; do
+ sed -e "s/ARGTYPE/$type/g" < "$template"
+done
diff --git a/contrib/fulleq/meson.build b/contrib/fulleq/meson.build
new file mode 100644
index 00000000000..3402dd0c764
--- /dev/null
+++ b/contrib/fulleq/meson.build
@@ -0,0 +1,53 @@
+fulleq_sources = files(
+ 'fulleq.c'
+)
+
+if host_system == 'windows'
+ fulleq_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'fulleq',
+ '--FILEDESC', 'fulleq',])
+endif
+
+contrib_targets += custom_target('fulleq--2.0.sql',
+ input: 'fulleq--2.0.sql.in',
+ output: 'fulleq--2.0.sql',
+ command: [meson.current_source_dir() / 'generate.sh', 'packaged', '@INPUT@'],
+ capture: true,
+ install: true,
+ install_dir: contrib_data_args['install_dir'],
+)
+
+contrib_targets += custom_target('fulleq--unpackaged--2.0.sql',
+ input: 'fulleq--unpackaged--2.0.sql.in',
+ output: 'fulleq--unpackaged--2.0.sql',
+ command: [meson.current_source_dir() / 'generate.sh', 'unpackaged', '@INPUT@'],
+ capture: true,
+ install: true,
+ install_dir: contrib_data_args['install_dir'],
+)
+
+fulleq = shared_module('fulleq',
+ fulleq_sources,
+ kwargs: contrib_mod_args + {
+ 'dependencies': contrib_mod_args['dependencies'],
+ },
+)
+contrib_targets += fulleq
+
+install_data(
+ 'fulleq.control',
+ kwargs: contrib_data_args,
+)
+
+tests += {
+ 'name': 'fulleq',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'fulleq'
+ ]
+ },
+}
+
+# TODO: DOCS = README.fulleq
\ No newline at end of file
diff --git a/contrib/fulleq/sql/fulleq.sql b/contrib/fulleq/sql/fulleq.sql
new file mode 100644
index 00000000000..d43abeb34b7
--- /dev/null
+++ b/contrib/fulleq/sql/fulleq.sql
@@ -0,0 +1,13 @@
+CREATE EXTENSION fulleq;
+
+select 4::int == 4;
+select 4::int == 5;
+select 4::int == NULL;
+select NULL::int == 5;
+select NULL::int == NULL;
+
+select '4'::text == '4';
+select '4'::text == '5';
+select '4'::text == NULL;
+select NULL::text == '5';
+select NULL::text == NULL;
diff --git a/contrib/mchar/Changes b/contrib/mchar/Changes
new file mode 100644
index 00000000000..b7f6e0c5718
--- /dev/null
+++ b/contrib/mchar/Changes
@@ -0,0 +1,20 @@
+2.0 make an extension
+0.17 add == operation:
+ a == b => ( a = b or a is null and b is null )
+0.16 fix pg_dump - now mchar in pg_catalog scheme, not public
+ fix bug in mvarchar_substr()
+0.15 add upper()/lower()
+0.14 Add ESCAPE for LIKE, SIMILAR TO [ESCAPE], POSIX regexp
+0.13 Outer binary format is now different from
+ inner: it's just a UTF-16 string
+0.12 Fix copy binary
+0.11 Force UTF-8 convertor if server_encoding='UTF8'
+0.10 add (mchar|mvarchar)_(send|recv) functions to
+ allow binary copying. Note: that functions
+ don't recode values.
+0.9 index support for like, improve recoding functions
+0.8 initial suport for like optimizioation with index:
+ still thres no algo to find the nearest greater string
+0.7 hash indexes and enable a hash joins
+0.6 implicit casting mchar-mvarchar
+ cross type comparison operations
diff --git a/contrib/mchar/Makefile b/contrib/mchar/Makefile
new file mode 100644
index 00000000000..81826afd296
--- /dev/null
+++ b/contrib/mchar/Makefile
@@ -0,0 +1,31 @@
+MODULE_big = mchar
+OBJS = mchar_io.o mchar_proc.o mchar_op.o mchar_recode.o mchar_like.o
+EXTENSION=mchar
+DATA = mchar--2.2.1.sql mchar--2.0.1--2.1.sql mchar--2.0--2.1.sql \
+ mchar--2.1.1--2.2.sql mchar--2.1--2.2.sql \
+ mchar--2.2--2.2.1.sql \
+ mchar--unpackaged--2.0.sql
+DOCS = README.mchar
+REGRESS = init mchar mvarchar mm like compat
+ENCODING = UTF8
+
+PG_CPPFLAGS=-I/usr/local/include
+
+ifdef USE_PGXS
+PGXS := $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/mchar
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
+ifeq ($(PORTNAME),win32)
+ICUNAME=icuin
+else
+ICUNAME=icui18n
+endif
+
+SHLIB_LINK += -L/usr/local/lib -licuuc -l$(ICUNAME) -Wl,-rpath,'$$ORIGIN'
+
diff --git a/contrib/mchar/README.mchar b/contrib/mchar/README.mchar
new file mode 100644
index 00000000000..479a7d1f40a
--- /dev/null
+++ b/contrib/mchar/README.mchar
@@ -0,0 +1,20 @@
+MCHAR & VARCHAR
+ type modifier
+ length()
+ substr(str, pos[, length])
+ || - concatenation with any (mchar,mvarchar) arguments
+ < <= = >= > - case-insensitive comparisons (libICU)
+ &< &<= &= &>= &> - case-sensitive comparisons (libICU)
+ implicit casting mchar<->mvarchar
+ B-tree and hash index
+ LIKE [ESCAPE]
+ SIMILAR TO [ESCAPE]
+ ~ (POSIX regexp)
+ index support for LIKE
+
+
+Authors:
+ Oleg Bartunov <oleg@sai.msu.ru>
+ Teodor Sigaev <teodor@sigaev.ru>
+
+
diff --git a/contrib/mchar/expected/compat.out b/contrib/mchar/expected/compat.out
new file mode 100644
index 00000000000..480a286e8f6
--- /dev/null
+++ b/contrib/mchar/expected/compat.out
@@ -0,0 +1,66 @@
+--- table based checks
+select '<' || ch || '>', '<' || vch || '>' from chvch;
+ ?column? | ?column?
+----------------+--------------
+ <No spaces > | <No spaces>
+ <One space > | <One space >
+ <1 space > | <1 space >
+(3 rows)
+
+select * from chvch where vch = 'One space';
+ ch | vch
+--------------+------------
+ One space | One space
+(1 row)
+
+select * from chvch where vch = 'One space ';
+ ch | vch
+--------------+------------
+ One space | One space
+(1 row)
+
+select * from ch where chcol = 'abcd' order by chcol;
+ chcol
+----------------------------------
+ abcd
+ AbcD
+(2 rows)
+
+select * from ch t1 join ch t2 on t1.chcol = t2.chcol order by t1.chcol, t2.chcol;
+ chcol | chcol
+----------------------------------+----------------------------------
+ abcd | AbcD
+ abcd | abcd
+ AbcD | AbcD
+ AbcD | abcd
+ abcz | abcz
+ defg | dEfg
+ defg | defg
+ dEfg | dEfg
+ dEfg | defg
+ ee | Ee
+ ee | ee
+ Ee | Ee
+ Ee | ee
+(13 rows)
+
+select * from ch where chcol > 'abcd' and chcol<'ee';
+ chcol
+----------------------------------
+ abcz
+ defg
+ dEfg
+(3 rows)
+
+select * from ch order by chcol;
+ chcol
+----------------------------------
+ abcd
+ AbcD
+ abcz
+ defg
+ dEfg
+ ee
+ Ee
+(7 rows)
+
diff --git a/contrib/mchar/expected/init.out b/contrib/mchar/expected/init.out
new file mode 100644
index 00000000000..7bae978ec35
--- /dev/null
+++ b/contrib/mchar/expected/init.out
@@ -0,0 +1,18 @@
+CREATE EXTENSION mchar;
+create table ch (
+ chcol mchar(32)
+) without oids;
+insert into ch values('abcd');
+insert into ch values('AbcD');
+insert into ch values('abcz');
+insert into ch values('defg');
+insert into ch values('dEfg');
+insert into ch values('ee');
+insert into ch values('Ee');
+create table chvch (
+ ch mchar(12),
+ vch mvarchar(12)
+) without oids;
+insert into chvch values('No spaces', 'No spaces');
+insert into chvch values('One space ', 'One space ');
+insert into chvch values('1 space', '1 space ');
diff --git a/contrib/mchar/expected/like.out b/contrib/mchar/expected/like.out
new file mode 100644
index 00000000000..a3f47f8c710
--- /dev/null
+++ b/contrib/mchar/expected/like.out
@@ -0,0 +1,841 @@
+-- simplest examples
+-- E061-04 like predicate
+set standard_conforming_strings=off;
+SELECT 'hawkeye'::mchar LIKE 'h%' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mchar NOT LIKE 'h%' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'hawkeye'::mchar LIKE 'H%' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mchar NOT LIKE 'H%' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'hawkeye'::mchar LIKE 'indio%' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'hawkeye'::mchar NOT LIKE 'indio%' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mchar LIKE 'h%eye' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mchar NOT LIKE 'h%eye' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mchar LIKE '_ndio' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mchar NOT LIKE '_ndio' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mchar LIKE 'in__o' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mchar NOT LIKE 'in__o' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mchar LIKE 'in_o' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mchar NOT LIKE 'in_o' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mvarchar LIKE 'h%' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mvarchar NOT LIKE 'h%' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'hawkeye'::mvarchar LIKE 'H%' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mvarchar NOT LIKE 'H%' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'hawkeye'::mvarchar LIKE 'indio%' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'hawkeye'::mvarchar NOT LIKE 'indio%' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mvarchar LIKE 'h%eye' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mvarchar NOT LIKE 'h%eye' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mvarchar LIKE '_ndio' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mvarchar NOT LIKE '_ndio' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mvarchar LIKE 'in__o' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mvarchar NOT LIKE 'in__o' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mvarchar LIKE 'in_o' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mvarchar NOT LIKE 'in_o' AS "true";
+ true
+------
+ t
+(1 row)
+
+-- unused escape character
+SELECT 'hawkeye'::mchar LIKE 'h%'::mchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mchar NOT LIKE 'h%'::mchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mchar LIKE 'ind_o'::mchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mchar NOT LIKE 'ind_o'::mchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+-- escape character
+-- E061-05 like predicate with escape clause
+SELECT 'h%'::mchar LIKE 'h#%'::mchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%'::mchar NOT LIKE 'h#%'::mchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%wkeye'::mchar LIKE 'h#%'::mchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%wkeye'::mchar NOT LIKE 'h#%'::mchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%wkeye'::mchar LIKE 'h#%%'::mchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%wkeye'::mchar NOT LIKE 'h#%%'::mchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%awkeye'::mchar LIKE 'h#%a%k%e'::mchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%awkeye'::mchar NOT LIKE 'h#%a%k%e'::mchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mchar LIKE '_ndio'::mchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mchar NOT LIKE '_ndio'::mchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'i_dio'::mchar LIKE 'i$_d_o'::mchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'i_dio'::mchar NOT LIKE 'i$_d_o'::mchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'i_dio'::mchar LIKE 'i$_nd_o'::mchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'i_dio'::mchar NOT LIKE 'i$_nd_o'::mchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'i_dio'::mchar LIKE 'i$_d%o'::mchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'i_dio'::mchar NOT LIKE 'i$_d%o'::mchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+-- escape character same as pattern character
+SELECT 'maca'::mchar LIKE 'm%aca' ESCAPE '%'::mchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'maca'::mchar NOT LIKE 'm%aca' ESCAPE '%'::mchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'ma%a'::mchar LIKE 'm%a%%a' ESCAPE '%'::mchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'ma%a'::mchar NOT LIKE 'm%a%%a' ESCAPE '%'::mchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'bear'::mchar LIKE 'b_ear' ESCAPE '_'::mchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'bear'::mchar NOT LIKE 'b_ear'::mchar ESCAPE '_' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'be_r'::mchar LIKE 'b_e__r' ESCAPE '_'::mchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'be_r'::mchar NOT LIKE 'b_e__r' ESCAPE '_'::mchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'be_r'::mchar LIKE '__e__r' ESCAPE '_'::mchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'be_r'::mchar NOT LIKE '__e__r'::mchar ESCAPE '_' AS "true";
+ true
+------
+ t
+(1 row)
+
+-- unused escape character
+SELECT 'hawkeye'::mvarchar LIKE 'h%'::mvarchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'hawkeye'::mvarchar NOT LIKE 'h%'::mvarchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mvarchar LIKE 'ind_o'::mvarchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mvarchar NOT LIKE 'ind_o'::mvarchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+-- escape character
+-- E061-05 like predicate with escape clause
+SELECT 'h%'::mvarchar LIKE 'h#%'::mvarchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%'::mvarchar NOT LIKE 'h#%'::mvarchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%wkeye'::mvarchar LIKE 'h#%'::mvarchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%wkeye'::mvarchar NOT LIKE 'h#%'::mvarchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%wkeye'::mvarchar LIKE 'h#%%'::mvarchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%wkeye'::mvarchar NOT LIKE 'h#%%'::mvarchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%awkeye'::mvarchar LIKE 'h#%a%k%e'::mvarchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'h%awkeye'::mvarchar NOT LIKE 'h#%a%k%e'::mvarchar ESCAPE '#' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'indio'::mvarchar LIKE '_ndio'::mvarchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'indio'::mvarchar NOT LIKE '_ndio'::mvarchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'i_dio'::mvarchar LIKE 'i$_d_o'::mvarchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'i_dio'::mvarchar NOT LIKE 'i$_d_o'::mvarchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'i_dio'::mvarchar LIKE 'i$_nd_o'::mvarchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'i_dio'::mvarchar NOT LIKE 'i$_nd_o'::mvarchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'i_dio'::mvarchar LIKE 'i$_d%o'::mvarchar ESCAPE '$' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'i_dio'::mvarchar NOT LIKE 'i$_d%o'::mvarchar ESCAPE '$' AS "false";
+ false
+-------
+ f
+(1 row)
+
+-- escape character same as pattern character
+SELECT 'maca'::mvarchar LIKE 'm%aca' ESCAPE '%'::mvarchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'maca'::mvarchar NOT LIKE 'm%aca' ESCAPE '%'::mvarchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'ma%a'::mvarchar LIKE 'm%a%%a' ESCAPE '%'::mvarchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'ma%a'::mvarchar NOT LIKE 'm%a%%a' ESCAPE '%'::mvarchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'bear'::mvarchar LIKE 'b_ear' ESCAPE '_'::mvarchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'bear'::mvarchar NOT LIKE 'b_ear'::mvarchar ESCAPE '_' AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'be_r'::mvarchar LIKE 'b_e__r' ESCAPE '_'::mvarchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'be_r'::mvarchar NOT LIKE 'b_e__r' ESCAPE '_'::mvarchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'be_r'::mvarchar LIKE '__e__r' ESCAPE '_'::mvarchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'be_r'::mvarchar NOT LIKE '__e__r'::mvarchar ESCAPE '_' AS "true";
+ true
+------
+ t
+(1 row)
+
+-- similar to
+SELECT 'abc'::mchar SIMILAR TO 'abc'::mchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'abc'::mchar SIMILAR TO 'a'::mchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'abc'::mchar SIMILAR TO '%(b|d)%'::mchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'abc'::mchar SIMILAR TO '(b|c)%'::mchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%'::mchar SIMILAR TO 'h#%'::mchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%'::mchar SIMILAR TO 'h#%'::mchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'abc'::mvarchar SIMILAR TO 'abc'::mvarchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'abc'::mvarchar SIMILAR TO 'a'::mvarchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'abc'::mvarchar SIMILAR TO '%(b|d)%'::mvarchar AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'abc'::mvarchar SIMILAR TO '(b|c)%'::mvarchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%'::mvarchar SIMILAR TO 'h#%'::mvarchar AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'h%'::mvarchar SIMILAR TO 'h#%'::mvarchar ESCAPE '#' AS "true";
+ true
+------
+ t
+(1 row)
+
+-- index support
+SELECT * from ch where chcol like 'aB_d' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+(2 rows)
+
+SELECT * from ch where chcol like 'aB%d' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+(2 rows)
+
+SELECT * from ch where chcol like 'aB%' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+ abcz
+(3 rows)
+
+SELECT * from ch where chcol like '%BC%' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+ abcz
+(3 rows)
+
+set enable_seqscan = off;
+explain (costs off)
+SELECT * from ch where chcol like 'aB_d' order by chcol using &<;
+ QUERY PLAN
+------------------------------------------------------------------------------
+ Sort
+ Sort Key: chcol USING &<
+ -> Index Only Scan using qq on ch
+ Index Cond: ((chcol >= 'aB'::mvarchar) AND (chcol < 'aC'::mvarchar))
+ Filter: (chcol ~~ 'aB_d'::mvarchar)
+(5 rows)
+
+SELECT * from ch where chcol like 'aB_d' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+(2 rows)
+
+SELECT * from ch where chcol like 'aB%d' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+(2 rows)
+
+SELECT * from ch where chcol like 'aB%' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+ abcz
+(3 rows)
+
+SELECT * from ch where chcol like '%BC%' order by chcol using &<;
+ chcol
+----------------------------------
+ AbcD
+ abcd
+ abcz
+(3 rows)
+
+set enable_seqscan = on;
+create table testt (f1 mchar(10));
+insert into testt values ('Abc-000001');
+insert into testt values ('Abc-000002');
+insert into testt values ('0000000001');
+insert into testt values ('0000000002');
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+create index testindex on testt(f1);
+set enable_seqscan=off;
+explain (costs off)
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+ QUERY PLAN
+---------------------------------------------------
+ Index Only Scan using testindex on testt
+ Filter: ((f1)::mvarchar ~~ 'Abc\\-%'::mvarchar)
+(2 rows)
+
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+set enable_seqscan = on;
+drop table testt;
+create table testt (f1 mvarchar(10));
+insert into testt values ('Abc-000001');
+insert into testt values ('Abc-000002');
+insert into testt values ('0000000001');
+insert into testt values ('0000000002');
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E'Abc\\- %'::mchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E' %'::mchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+ 0000000001
+ 0000000002
+(4 rows)
+
+create index testindex on testt(f1);
+set enable_seqscan=off;
+explain (costs off)
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+ QUERY PLAN
+----------------------------------------------------------------------
+ Index Only Scan using testindex on testt
+ Index Cond: ((f1 >= 'Abc-'::mvarchar) AND (f1 < 'Abc.'::mvarchar))
+ Filter: ((f1)::mvarchar ~~ 'Abc\\-%'::mvarchar)
+(3 rows)
+
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E'Abc\\- %'::mchar;
+ f1
+------------
+ Abc-000001
+ Abc-000002
+(2 rows)
+
+select * from testt where f1::mchar like E' %'::mchar;
+ f1
+------------
+ 0000000001
+ 0000000002
+ Abc-000001
+ Abc-000002
+(4 rows)
+
+set enable_seqscan = on;
+drop table testt;
+CREATE TABLE test ( code mchar(5) NOT NULL );
+insert into test values('1111 ');
+insert into test values('111 ');
+insert into test values('11 ');
+insert into test values('1 ');
+SELECT * FROM test WHERE code LIKE ('% ');
+ code
+-------
+ 1
+(1 row)
+
+set escape_string_warning = off;
+SELECT CASE WHEN ('_'::text SIMILAR TO '[\\_]'::text ESCAPE '\\'::text) THEN TRUE ELSE FALSE END ;
+ case
+------
+ t
+(1 row)
+
+SELECT CASE WHEN ('_'::mchar SIMILAR TO '[\\_]'::mchar ESCAPE '\\'::mchar) THEN TRUE ELSE FALSE END ;
+ case
+------
+ t
+(1 row)
+
+SELECT CASE WHEN ('_'::mvarchar SIMILAR TO '[\\_]'::mvarchar ESCAPE '\\'::mvarchar) THEN TRUE ELSE FALSE END ;
+ case
+------
+ t
+(1 row)
+
+reset escape_string_warning;
+reset standard_conforming_strings;
diff --git a/contrib/mchar/expected/mchar.out b/contrib/mchar/expected/mchar.out
new file mode 100644
index 00000000000..8f2009c50d0
--- /dev/null
+++ b/contrib/mchar/expected/mchar.out
@@ -0,0 +1,382 @@
+-- I/O tests
+select '1'::mchar;
+ mchar
+-------
+ 1
+(1 row)
+
+select '2 '::mchar;
+ mchar
+-------
+ 2
+(1 row)
+
+select '10 '::mchar;
+ mchar
+-------
+ 10
+(1 row)
+
+select '1'::mchar(2);
+ mchar
+-------
+ 1
+(1 row)
+
+select '2 '::mchar(2);
+ mchar
+-------
+ 2
+(1 row)
+
+select '3 '::mchar(2);
+ mchar
+-------
+ 3
+(1 row)
+
+select '10 '::mchar(2);
+ mchar
+-------
+ 10
+(1 row)
+
+select ' '::mchar(10);
+ mchar
+------------
+
+(1 row)
+
+select ' '::mchar;
+ mchar
+-------
+
+(1 row)
+
+-- operations & functions
+select length('1'::mchar);
+ length
+--------
+ 1
+(1 row)
+
+select length('2 '::mchar);
+ length
+--------
+ 1
+(1 row)
+
+select length('10 '::mchar);
+ length
+--------
+ 2
+(1 row)
+
+select length('1'::mchar(2));
+ length
+--------
+ 1
+(1 row)
+
+select length('2 '::mchar(2));
+ length
+--------
+ 1
+(1 row)
+
+select length('3 '::mchar(2));
+ length
+--------
+ 1
+(1 row)
+
+select length('10 '::mchar(2));
+ length
+--------
+ 2
+(1 row)
+
+select length(' '::mchar(10));
+ length
+--------
+ 0
+(1 row)
+
+select length(' '::mchar);
+ length
+--------
+ 0
+(1 row)
+
+select 'asd'::mchar(10) || '>'::mchar(10);
+ ?column?
+----------------------
+ asd >
+(1 row)
+
+select length('asd'::mchar(10) || '>'::mchar(10));
+ length
+--------
+ 11
+(1 row)
+
+select 'asd'::mchar(2) || '>'::mchar(10);
+ ?column?
+--------------
+ as>
+(1 row)
+
+select length('asd'::mchar(2) || '>'::mchar(10));
+ length
+--------
+ 3
+(1 row)
+
+-- Comparisons
+select 'asdf'::mchar = 'aSdf'::mchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar = 'aSdf '::mchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar = 'aSdf 1'::mchar(4);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar = 'aSdf 1'::mchar(5);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar = 'aSdf 1'::mchar(6);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar(3) = 'aSdf 1'::mchar(5);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar(3) = 'aSdf 1'::mchar(3);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar < 'aSdf'::mchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar < 'aSdf '::mchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar < 'aSdf 1'::mchar(4);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar < 'aSdf 1'::mchar(5);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar < 'aSdf 1'::mchar(6);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar <= 'aSdf'::mchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar <= 'aSdf '::mchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar <= 'aSdf 1'::mchar(4);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar <= 'aSdf 1'::mchar(5);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar <= 'aSdf 1'::mchar(6);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar >= 'aSdf'::mchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar >= 'aSdf '::mchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar >= 'aSdf 1'::mchar(4);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar >= 'aSdf 1'::mchar(5);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mchar >= 'aSdf 1'::mchar(6);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar > 'aSdf'::mchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar > 'aSdf '::mchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar > 'aSdf 1'::mchar(4);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar > 'aSdf 1'::mchar(5);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mchar > 'aSdf 1'::mchar(6);
+ ?column?
+----------
+ f
+(1 row)
+
+select max(ch) from chvch;
+ max
+--------------
+ One space
+(1 row)
+
+select min(ch) from chvch;
+ min
+--------------
+ 1 space
+(1 row)
+
+select substr('1234567890'::mchar, 3) = '34567890' as "34567890";
+ 34567890
+----------
+ f
+(1 row)
+
+select substr('1234567890'::mchar, 4, 3) = '456' as "456";
+ 456
+-----
+ t
+(1 row)
+
+select lower('asdfASDF'::mchar);
+ lower
+----------
+ asdfasdf
+(1 row)
+
+select upper('asdfASDF'::mchar);
+ upper
+----------
+ ASDFASDF
+(1 row)
+
+select 'asd'::mchar == 'aSd'::mchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asd'::mchar == 'aCd'::mchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asd'::mchar == NULL;
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL == 'aCd'::mchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL::mchar == NULL;
+ ?column?
+----------
+ t
+(1 row)
+
+--Note: here we use different space symbols, be carefull to copy it!
+select v, count(*) from
+(values (1, '4 242'::mchar), (2, '4 242'), (3, 'aSDF'), (4, 'asdf')) as t(i,v) group by v order by v;
+ v | count
+-------+-------
+ 4 242 | 2
+ aSDF | 2
+(2 rows)
+
+set enable_hashagg=off;
+select v, count(*) from
+(values (1, '4 242'::mchar), (2, '4 242'), (3, 'aSDF'), (4, 'asdf')) as t(i,v) group by v order by v;
+ v | count
+-------+-------
+ 4 242 | 2
+ aSDF | 2
+(2 rows)
+
+reset enable_hashagg;
diff --git a/contrib/mchar/expected/mm.out b/contrib/mchar/expected/mm.out
new file mode 100644
index 00000000000..c5b36c21611
--- /dev/null
+++ b/contrib/mchar/expected/mm.out
@@ -0,0 +1,855 @@
+select 'asd'::mchar::mvarchar;
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd '::mchar::mvarchar;
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd'::mchar(2)::mvarchar;
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd '::mchar(2)::mvarchar;
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd'::mchar(5)::mvarchar;
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd '::mchar(5)::mvarchar;
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd'::mchar::mvarchar(2);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd '::mchar::mvarchar(2);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd'::mchar(2)::mvarchar(2);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd '::mchar(2)::mvarchar(2);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd'::mchar(5)::mvarchar(2);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd '::mchar(5)::mvarchar(2);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd'::mchar::mvarchar(5);
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd '::mchar::mvarchar(5);
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd'::mchar(2)::mvarchar(5);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd '::mchar(2)::mvarchar(5);
+ mvarchar
+----------
+ as
+(1 row)
+
+select 'asd'::mchar(5)::mvarchar(5);
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd '::mchar(5)::mvarchar(5);
+ mvarchar
+----------
+ asd
+(1 row)
+
+select 'asd'::mvarchar::mchar;
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd '::mvarchar::mchar;
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd'::mvarchar(2)::mchar;
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd '::mvarchar(2)::mchar;
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd'::mvarchar(5)::mchar;
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd '::mvarchar(5)::mchar;
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd'::mvarchar::mchar(2);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd '::mvarchar::mchar(2);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd'::mvarchar(2)::mchar(2);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd '::mvarchar(2)::mchar(2);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd'::mvarchar(5)::mchar(2);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd '::mvarchar(5)::mchar(2);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd'::mvarchar::mchar(5);
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd '::mvarchar::mchar(5);
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd'::mvarchar(2)::mchar(5);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd '::mvarchar(2)::mchar(5);
+ mchar
+-------
+ as
+(1 row)
+
+select 'asd'::mvarchar(5)::mchar(5);
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd '::mvarchar(5)::mchar(5);
+ mchar
+-------
+ asd
+(1 row)
+
+select 'asd'::mchar || '123';
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd'::mchar || '123'::mchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd'::mchar || '123'::mvarchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mchar || '123';
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mchar || '123'::mchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mchar || '123'::mvarchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mchar || '123 ';
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mchar || '123 '::mchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mchar || '123 '::mvarchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd'::mvarchar || '123';
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd'::mvarchar || '123'::mchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd'::mvarchar || '123'::mvarchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mvarchar || '123';
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar || '123'::mchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar || '123'::mvarchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar || '123 ';
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar || '123 '::mchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar || '123 '::mvarchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd'::mchar(2) || '123';
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd'::mchar(2) || '123'::mchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd'::mchar(2) || '123'::mvarchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mchar(2) || '123';
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mchar(2) || '123'::mchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mchar(2) || '123'::mvarchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mchar(2) || '123 ';
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mchar(2) || '123 '::mchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mchar(2) || '123 '::mvarchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd'::mvarchar(2) || '123';
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd'::mvarchar(2) || '123'::mchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd'::mvarchar(2) || '123'::mvarchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mvarchar(2) || '123';
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mvarchar(2) || '123'::mchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mvarchar(2) || '123'::mvarchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mvarchar(2) || '123 ';
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mvarchar(2) || '123 '::mchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd '::mvarchar(2) || '123 '::mvarchar;
+ ?column?
+----------
+ as123
+(1 row)
+
+select 'asd'::mchar(4) || '143';
+ ?column?
+----------
+ asd 143
+(1 row)
+
+select 'asd'::mchar(4) || '123'::mchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd'::mchar(4) || '123'::mvarchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mchar(4) || '123';
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mchar(4) || '123'::mchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mchar(4) || '123'::mvarchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mchar(4) || '123 ';
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mchar(4) || '123 '::mchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mchar(4) || '123 '::mvarchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd'::mvarchar(4) || '123';
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd'::mvarchar(4) || '123'::mchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd'::mvarchar(4) || '123'::mvarchar;
+ ?column?
+----------
+ asd123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123';
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123'::mchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123'::mvarchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123 ';
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123 '::mchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123 '::mvarchar;
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123 '::mchar(4);
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123 '::mvarchar(4);
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123'::mchar(4);
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 'asd '::mvarchar(4) || '123'::mvarchar(4);
+ ?column?
+----------
+ asd 123
+(1 row)
+
+select 1 where 'f'::mchar='F'::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f'::mchar='F '::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar='F'::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar='F '::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f'::mchar='F'::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f'::mchar='F '::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar='F'::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar='F '::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f'::mchar(2)='F'::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f'::mchar(2)='F '::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar(2)='F'::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar(2)='F '::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f'::mchar(2)='F'::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f'::mchar(2)='F '::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar(2)='F'::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'f '::mchar(2)='F '::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo'::mchar='FOO'::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo'::mchar='FOO '::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo '::mchar='FOO'::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo '::mchar='FOO '::mvarchar;
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo'::mchar='FOO'::mvarchar(2);
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo'::mchar='FOO '::mvarchar(2);
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo '::mchar='FOO'::mvarchar(2);
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo '::mchar='FOO '::mvarchar(2);
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo'::mchar(2)='FOO'::mvarchar;
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo'::mchar(2)='FOO '::mvarchar;
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo '::mchar(2)='FOO'::mvarchar;
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo '::mchar(2)='FOO '::mvarchar;
+ ?column?
+----------
+(0 rows)
+
+select 1 where 'foo'::mchar(2)='FOO'::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo'::mchar(2)='FOO '::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo '::mchar(2)='FOO'::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+select 1 where 'foo '::mchar(2)='FOO '::mvarchar(2);
+ ?column?
+----------
+ 1
+(1 row)
+
+Select 'f'::mchar(1) Union Select 'o'::mvarchar(1);
+ mchar
+-------
+ f
+ o
+(2 rows)
+
+Select 'f'::mvarchar(1) Union Select 'o'::mchar(1);
+ mvarchar
+----------
+ f
+ o
+(2 rows)
+
+select * from chvch where ch=vch;
+ ch | vch
+--------------+------------
+ No spaces | No spaces
+ One space | One space
+ 1 space | 1 space
+(3 rows)
+
+select ch.* from ch, (select 'dEfg'::mvarchar as q) as p where chcol > p.q;
+ chcol
+----------------------------------
+ ee
+ Ee
+(2 rows)
+
+create index qq on ch (chcol);
+set enable_seqscan=off;
+select ch.* from ch, (select 'dEfg'::mvarchar as q) as p where chcol > p.q;
+ chcol
+----------------------------------
+ ee
+ Ee
+(2 rows)
+
+set enable_seqscan=on;
+--\copy chvch to 'results/chvch.dump' binary
+--truncate table chvch;
+--\copy chvch from 'results/chvch.dump' binary
+--test joins
+CREATE TABLE a (mchar2 MCHAR(2) NOT NULL);
+CREATE TABLE c (mvarchar255 mvarchar NOT NULL);
+SELECT * FROM a, c WHERE mchar2 = mvarchar255;
+ mchar2 | mvarchar255
+--------+-------------
+(0 rows)
+
+SELECT * FROM a, c WHERE mvarchar255 = mchar2;
+ mchar2 | mvarchar255
+--------+-------------
+(0 rows)
+
+DROP TABLE a;
+DROP TABLE c;
+select * from (values
+ ('е'::mchar),('ё'),('еа'),('еб'),('ее'),('еж'),('ёа'),('ёб'),('ёё'),('ёж'),('ёе'),('её'))
+ z order by 1;
+ column1
+---------
+ е
+ ё
+ еа
+ ёа
+ еб
+ ёб
+ ее
+ её
+ ёе
+ ёё
+ еж
+ ёж
+(12 rows)
+
+select 'ё'::mchar = 'е';
+ ?column?
+----------
+ f
+(1 row)
+
+select 'Ё'::mchar = 'Е';
+ ?column?
+----------
+ f
+(1 row)
+
+select 'й'::mchar = 'и';
+ ?column?
+----------
+ f
+(1 row)
+
+select 'Й'::mchar = 'И';
+ ?column?
+----------
+ f
+(1 row)
+
+select mvarchar_icase_cmp('ёа','еб'), mvarchar_icase_cmp('еб','ё'),
+ mvarchar_icase_cmp('ё', 'ёа');
+ mvarchar_icase_cmp | mvarchar_icase_cmp | mvarchar_icase_cmp
+--------------------+--------------------+--------------------
+ -1 | 1 | -1
+(1 row)
+
diff --git a/contrib/mchar/expected/mvarchar.out b/contrib/mchar/expected/mvarchar.out
new file mode 100644
index 00000000000..5c866b43e71
--- /dev/null
+++ b/contrib/mchar/expected/mvarchar.out
@@ -0,0 +1,363 @@
+-- I/O tests
+select '1'::mvarchar;
+ mvarchar
+----------
+ 1
+(1 row)
+
+select '2 '::mvarchar;
+ mvarchar
+----------
+ 2
+(1 row)
+
+select '10 '::mvarchar;
+ mvarchar
+--------------
+ 10
+(1 row)
+
+select '1'::mvarchar(2);
+ mvarchar
+----------
+ 1
+(1 row)
+
+select '2 '::mvarchar(2);
+ mvarchar
+----------
+ 2
+(1 row)
+
+select '3 '::mvarchar(2);
+ mvarchar
+----------
+ 3
+(1 row)
+
+select '10 '::mvarchar(2);
+ mvarchar
+----------
+ 10
+(1 row)
+
+select ' '::mvarchar(10);
+ mvarchar
+------------
+
+(1 row)
+
+select ' '::mvarchar;
+ mvarchar
+--------------------
+
+(1 row)
+
+-- operations & functions
+select length('1'::mvarchar);
+ length
+--------
+ 1
+(1 row)
+
+select length('2 '::mvarchar);
+ length
+--------
+ 1
+(1 row)
+
+select length('10 '::mvarchar);
+ length
+--------
+ 2
+(1 row)
+
+select length('1'::mvarchar(2));
+ length
+--------
+ 1
+(1 row)
+
+select length('2 '::mvarchar(2));
+ length
+--------
+ 1
+(1 row)
+
+select length('3 '::mvarchar(2));
+ length
+--------
+ 1
+(1 row)
+
+select length('10 '::mvarchar(2));
+ length
+--------
+ 2
+(1 row)
+
+select length(' '::mvarchar(10));
+ length
+--------
+ 0
+(1 row)
+
+select length(' '::mvarchar);
+ length
+--------
+ 0
+(1 row)
+
+select 'asd'::mvarchar(10) || '>'::mvarchar(10);
+ ?column?
+----------
+ asd>
+(1 row)
+
+select length('asd'::mvarchar(10) || '>'::mvarchar(10));
+ length
+--------
+ 4
+(1 row)
+
+select 'asd'::mvarchar(2) || '>'::mvarchar(10);
+ ?column?
+----------
+ as>
+(1 row)
+
+select length('asd'::mvarchar(2) || '>'::mvarchar(10));
+ length
+--------
+ 3
+(1 row)
+
+-- Comparisons
+select 'asdf'::mvarchar = 'aSdf'::mvarchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar = 'aSdf '::mvarchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar = 'aSdf 1'::mvarchar(4);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar = 'aSdf 1'::mvarchar(5);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar = 'aSdf 1'::mvarchar(6);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar(3) = 'aSdf 1'::mvarchar(5);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar(3) = 'aSdf 1'::mvarchar(3);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar < 'aSdf'::mvarchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar < 'aSdf '::mvarchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar < 'aSdf 1'::mvarchar(4);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar < 'aSdf 1'::mvarchar(5);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar < 'aSdf 1'::mvarchar(6);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar <= 'aSdf'::mvarchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar <= 'aSdf '::mvarchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar <= 'aSdf 1'::mvarchar(4);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar <= 'aSdf 1'::mvarchar(5);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar <= 'aSdf 1'::mvarchar(6);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar >= 'aSdf'::mvarchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar >= 'aSdf '::mvarchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar >= 'aSdf 1'::mvarchar(4);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar >= 'aSdf 1'::mvarchar(5);
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asdf'::mvarchar >= 'aSdf 1'::mvarchar(6);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar > 'aSdf'::mvarchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar > 'aSdf '::mvarchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar > 'aSdf 1'::mvarchar(4);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar > 'aSdf 1'::mvarchar(5);
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asdf'::mvarchar > 'aSdf 1'::mvarchar(6);
+ ?column?
+----------
+ f
+(1 row)
+
+select max(vch) from chvch;
+ max
+------------
+ One space
+(1 row)
+
+select min(vch) from chvch;
+ min
+----------
+ 1 space
+(1 row)
+
+select substr('1234567890'::mvarchar, 3) = '34567890' as "34567890";
+ 34567890
+----------
+ f
+(1 row)
+
+select substr('1234567890'::mvarchar, 4, 3) = '456' as "456";
+ 456
+-----
+ t
+(1 row)
+
+select lower('asdfASDF'::mvarchar);
+ lower
+----------
+ asdfasdf
+(1 row)
+
+select upper('asdfASDF'::mvarchar);
+ upper
+----------
+ ASDFASDF
+(1 row)
+
+select 'asd'::mvarchar == 'aSd'::mvarchar;
+ ?column?
+----------
+ t
+(1 row)
+
+select 'asd'::mvarchar == 'aCd'::mvarchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select 'asd'::mvarchar == NULL;
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL == 'aCd'::mvarchar;
+ ?column?
+----------
+ f
+(1 row)
+
+select NULL::mvarchar == NULL;
+ ?column?
+----------
+ t
+(1 row)
+
diff --git a/contrib/mchar/mchar--2.0--2.1.sql b/contrib/mchar/mchar--2.0--2.1.sql
new file mode 100644
index 00000000000..a794772f376
--- /dev/null
+++ b/contrib/mchar/mchar--2.0--2.1.sql
@@ -0,0 +1,2 @@
+ALTER FUNCTION mchar_like(mchar, mvarchar) SUPPORT textlike_support;
+ALTER FUNCTION mvarchar_like(mvarchar, mvarchar) SUPPORT textlike_support;
diff --git a/contrib/mchar/mchar--2.0.1--2.1.sql b/contrib/mchar/mchar--2.0.1--2.1.sql
new file mode 100644
index 00000000000..a794772f376
--- /dev/null
+++ b/contrib/mchar/mchar--2.0.1--2.1.sql
@@ -0,0 +1,2 @@
+ALTER FUNCTION mchar_like(mchar, mvarchar) SUPPORT textlike_support;
+ALTER FUNCTION mvarchar_like(mvarchar, mvarchar) SUPPORT textlike_support;
diff --git a/contrib/mchar/mchar--2.1--2.2.sql b/contrib/mchar/mchar--2.1--2.2.sql
new file mode 100644
index 00000000000..98689671499
--- /dev/null
+++ b/contrib/mchar/mchar--2.1--2.2.sql
@@ -0,0 +1,20 @@
+CREATE FUNCTION similar_to_escape(mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mchar, mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mvarchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
diff --git a/contrib/mchar/mchar--2.1.1--2.2.sql b/contrib/mchar/mchar--2.1.1--2.2.sql
new file mode 100644
index 00000000000..98689671499
--- /dev/null
+++ b/contrib/mchar/mchar--2.1.1--2.2.sql
@@ -0,0 +1,20 @@
+CREATE FUNCTION similar_to_escape(mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mchar, mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mvarchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
diff --git a/contrib/mchar/mchar--2.2--2.2.1.sql b/contrib/mchar/mchar--2.2--2.2.1.sql
new file mode 100644
index 00000000000..e663aa24a5d
--- /dev/null
+++ b/contrib/mchar/mchar--2.2--2.2.1.sql
@@ -0,0 +1,10 @@
+CREATE OR REPLACE FUNCTION mvarchar_support(internal)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT
+ PARALLEL SAFE;
+
+ALTER FUNCTION mvarchar(mvarchar, integer, boolean)
+ SUPPORT mvarchar_support;
+
+
diff --git a/contrib/mchar/mchar--2.2.1.sql b/contrib/mchar/mchar--2.2.1.sql
new file mode 100644
index 00000000000..2f975b64edd
--- /dev/null
+++ b/contrib/mchar/mchar--2.2.1.sql
@@ -0,0 +1,1352 @@
+\echo Use "CREATE EXTENSION mchar" to load this file. \quit
+
+-- I/O functions
+
+CREATE FUNCTION mchartypmod_in(cstring[])
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchartypmod_out(int4)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_in(cstring)
+RETURNS mchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_out(mchar)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_send(mchar)
+RETURNS bytea
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_recv(internal)
+RETURNS mchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE TYPE mchar (
+ INTERNALLENGTH = -1,
+ INPUT = mchar_in,
+ OUTPUT = mchar_out,
+ TYPMOD_IN = mchartypmod_in,
+ TYPMOD_OUT = mchartypmod_out,
+ RECEIVE = mchar_recv,
+ SEND = mchar_send,
+ STORAGE = extended
+);
+
+CREATE FUNCTION mchar(mchar, integer, boolean)
+RETURNS mchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE CAST (mchar as mchar)
+WITH FUNCTION mchar(mchar, integer, boolean) as IMPLICIT;
+
+CREATE FUNCTION mvarchar_in(cstring)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_out(mvarchar)
+RETURNS cstring
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_send(mvarchar)
+RETURNS bytea
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_recv(internal)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE TYPE mvarchar (
+ INTERNALLENGTH = -1,
+ INPUT = mvarchar_in,
+ OUTPUT = mvarchar_out,
+ TYPMOD_IN = mchartypmod_in,
+ TYPMOD_OUT = mchartypmod_out,
+ RECEIVE = mvarchar_recv,
+ SEND = mvarchar_send,
+ STORAGE = extended
+);
+
+CREATE FUNCTION mvarchar_support(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT
+PARALLEL SAFE;
+
+CREATE FUNCTION mvarchar(mvarchar, integer, boolean)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT
+SUPPORT mvarchar_support;
+
+CREATE CAST (mvarchar as mvarchar)
+WITH FUNCTION mvarchar(mvarchar, integer, boolean) as IMPLICIT;
+
+--Operations and functions
+
+CREATE FUNCTION length(mchar)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'mchar_length'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION upper(mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_upper'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION lower(mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_lower'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_hash(mchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_concat(mchar, mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR || (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_concat
+);
+
+CREATE FUNCTION mchar_like(mchar, mvarchar)
+RETURNS bool
+SUPPORT textlike_support
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_notlike(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR ~~ (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mchar_like,
+ RESTRICT = likesel,
+ JOIN = likejoinsel,
+ NEGATOR = '!~~'
+);
+
+CREATE OPERATOR !~~ (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mchar_notlike,
+ RESTRICT = nlikesel,
+ JOIN = nlikejoinsel,
+ NEGATOR = '~~'
+);
+
+CREATE FUNCTION mchar_regexeq(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_regexne(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR ~ (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_regexeq,
+ RESTRICT = regexeqsel,
+ JOIN = regexeqjoinsel,
+ NEGATOR = '!~'
+);
+
+CREATE OPERATOR !~ (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_regexne,
+ RESTRICT = regexnesel,
+ JOIN = regexnejoinsel,
+ NEGATOR = '~'
+);
+
+CREATE FUNCTION similar_escape(mchar, mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION length(mvarchar)
+RETURNS int4
+AS 'MODULE_PATHNAME', 'mvarchar_length'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION upper(mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_upper'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION lower(mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_lower'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_hash(mvarchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_concat(mvarchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR || (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_concat
+);
+
+CREATE FUNCTION mvarchar_like(mvarchar, mvarchar)
+RETURNS bool
+SUPPORT textlike_support
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION like_escape(mvarchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_like_escape'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_notlike(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR ~~ (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_like,
+ RESTRICT = likesel,
+ JOIN = likejoinsel,
+ NEGATOR = '!~~'
+);
+
+CREATE OPERATOR !~~ (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_notlike,
+ RESTRICT = nlikesel,
+ JOIN = nlikejoinsel,
+ NEGATOR = '~~'
+);
+
+CREATE FUNCTION mvarchar_regexeq(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_regexne(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR ~ (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_regexeq,
+ RESTRICT = regexeqsel,
+ JOIN = regexeqjoinsel,
+ NEGATOR = '!~'
+);
+
+CREATE OPERATOR !~ (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_regexne,
+ RESTRICT = regexnesel,
+ JOIN = regexnejoinsel,
+ NEGATOR = '~'
+);
+
+CREATE FUNCTION similar_escape(mvarchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_similar_escape'
+LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION substr (mchar, int4)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_substring_no_len'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION substr (mchar, int4, int4)
+RETURNS mchar
+AS 'MODULE_PATHNAME', 'mchar_substring'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION substr (mvarchar, int4)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_substring_no_len'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION substr (mvarchar, int4, int4)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME', 'mvarchar_substring'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+-- Comparing
+-- MCHAR
+
+CREATE FUNCTION mchar_icase_cmp(mchar, mchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_icase_eq(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_icase_ne(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_icase_lt(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_icase_le(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_icase_gt(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_icase_ge(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR < (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_icase_lt,
+ COMMUTATOR = '>',
+ NEGATOR = '>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR > (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_icase_gt,
+ COMMUTATOR = '<',
+ NEGATOR = '<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR <= (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_icase_le,
+ COMMUTATOR = '>=',
+ NEGATOR = '>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR >= (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_icase_ge,
+ COMMUTATOR = '<=',
+ NEGATOR = '<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR = (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_icase_eq,
+ COMMUTATOR = '=',
+ NEGATOR = '<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '<',
+ SORT2 = '<',
+ HASHES
+);
+
+CREATE OPERATOR <> (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_icase_ne,
+ COMMUTATOR = '<>',
+ NEGATOR = '=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+CREATE FUNCTION mchar_case_cmp(mchar, mchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_case_eq(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_case_ne(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_case_lt(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_case_le(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_case_gt(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_case_ge(mchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR &< (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_case_lt,
+ COMMUTATOR = '&>',
+ NEGATOR = '&>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &> (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_case_gt,
+ COMMUTATOR = '&<',
+ NEGATOR = '&<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &<= (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_case_le,
+ COMMUTATOR = '&>=',
+ NEGATOR = '&>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &>= (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_case_ge,
+ COMMUTATOR = '&<=',
+ NEGATOR = '&<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &= (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_case_eq,
+ COMMUTATOR = '&=',
+ NEGATOR = '&<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '&<',
+ SORT2 = '&<'
+);
+
+CREATE OPERATOR &<> (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mchar_case_ne,
+ COMMUTATOR = '&<>',
+ NEGATOR = '&=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+--MVARCHAR
+
+CREATE FUNCTION mvarchar_icase_cmp(mvarchar, mvarchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_icase_eq(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_icase_ne(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_icase_lt(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_icase_le(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_icase_gt(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_icase_ge(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR < (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_icase_lt,
+ COMMUTATOR = '>',
+ NEGATOR = '>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR > (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_icase_gt,
+ COMMUTATOR = '<',
+ NEGATOR = '<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR <= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_icase_le,
+ COMMUTATOR = '>=',
+ NEGATOR = '>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR >= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_icase_ge,
+ COMMUTATOR = '<=',
+ NEGATOR = '<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR = (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_icase_eq,
+ COMMUTATOR = '=',
+ NEGATOR = '<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '<',
+ SORT2 = '<',
+ HASHES
+);
+
+CREATE OPERATOR <> (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_icase_ne,
+ COMMUTATOR = '<>',
+ NEGATOR = '=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+CREATE FUNCTION mvarchar_case_cmp(mvarchar, mvarchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_case_eq(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_case_ne(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_case_lt(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_case_le(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_case_gt(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mvarchar_case_ge(mvarchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR &< (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_case_lt,
+ COMMUTATOR = '&>',
+ NEGATOR = '&>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &> (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_case_gt,
+ COMMUTATOR = '&<',
+ NEGATOR = '&<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &<= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_case_le,
+ COMMUTATOR = '&>=',
+ NEGATOR = '&>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &>= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_case_ge,
+ COMMUTATOR = '&<=',
+ NEGATOR = '&<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_case_eq,
+ COMMUTATOR = '&=',
+ NEGATOR = '&<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '&<',
+ SORT2 = '&<'
+);
+
+CREATE OPERATOR &<> (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mvarchar_case_ne,
+ COMMUTATOR = '&<>',
+ NEGATOR = '&=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+-- MCHAR <> MVARCHAR
+
+CREATE FUNCTION mc_mv_icase_cmp(mchar, mvarchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_icase_eq(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_icase_ne(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_icase_lt(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_icase_le(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_icase_gt(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_icase_ge(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR < (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_icase_lt,
+ COMMUTATOR = '>',
+ NEGATOR = '>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR > (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_icase_gt,
+ COMMUTATOR = '<',
+ NEGATOR = '<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR <= (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_icase_le,
+ COMMUTATOR = '>=',
+ NEGATOR = '>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR >= (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_icase_ge,
+ COMMUTATOR = '<=',
+ NEGATOR = '<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR = (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_icase_eq,
+ COMMUTATOR = '=',
+ NEGATOR = '<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '<',
+ SORT2 = '<'
+);
+
+CREATE OPERATOR <> (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_icase_ne,
+ COMMUTATOR = '<>',
+ NEGATOR = '=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+CREATE FUNCTION mc_mv_case_cmp(mchar, mvarchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_case_eq(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_case_ne(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_case_lt(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_case_le(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_case_gt(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mc_mv_case_ge(mchar, mvarchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR &< (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_case_lt,
+ COMMUTATOR = '&>',
+ NEGATOR = '&>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &> (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_case_gt,
+ COMMUTATOR = '&<',
+ NEGATOR = '&<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &<= (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_case_le,
+ COMMUTATOR = '&>=',
+ NEGATOR = '&>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &>= (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_case_ge,
+ COMMUTATOR = '&<=',
+ NEGATOR = '&<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &= (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_case_eq,
+ COMMUTATOR = '&=',
+ NEGATOR = '&<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '&<',
+ SORT2 = '&<'
+);
+
+CREATE OPERATOR &<> (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mc_mv_case_ne,
+ COMMUTATOR = '&<>',
+ NEGATOR = '&=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+-- MVARCHAR <> MCHAR
+
+CREATE FUNCTION mv_mc_icase_cmp(mvarchar, mchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_icase_eq(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_icase_ne(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_icase_lt(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_icase_le(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_icase_gt(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_icase_ge(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR < (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_icase_lt,
+ COMMUTATOR = '>',
+ NEGATOR = '>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR > (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_icase_gt,
+ COMMUTATOR = '<',
+ NEGATOR = '<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR <= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_icase_le,
+ COMMUTATOR = '>=',
+ NEGATOR = '>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR >= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_icase_ge,
+ COMMUTATOR = '<=',
+ NEGATOR = '<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR = (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_icase_eq,
+ COMMUTATOR = '=',
+ NEGATOR = '<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '<',
+ SORT2 = '<'
+);
+
+CREATE OPERATOR <> (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_icase_ne,
+ COMMUTATOR = '<>',
+ NEGATOR = '=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+CREATE FUNCTION mv_mc_case_cmp(mvarchar, mchar)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_case_eq(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_case_ne(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_case_lt(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_case_le(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_case_gt(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mv_mc_case_ge(mvarchar, mchar)
+RETURNS bool
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+
+CREATE OPERATOR &< (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_case_lt,
+ COMMUTATOR = '&>',
+ NEGATOR = '&>=',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &> (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_case_gt,
+ COMMUTATOR = '&<',
+ NEGATOR = '&<=',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &<= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_case_le,
+ COMMUTATOR = '&>=',
+ NEGATOR = '&>',
+ RESTRICT = scalarltsel,
+ JOIN = scalarltjoinsel
+);
+
+CREATE OPERATOR &>= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_case_ge,
+ COMMUTATOR = '&<=',
+ NEGATOR = '&<',
+ RESTRICT = scalargtsel,
+ JOIN = scalargtjoinsel
+);
+
+CREATE OPERATOR &= (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_case_eq,
+ COMMUTATOR = '&=',
+ NEGATOR = '&<>',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ SORT1 = '&<',
+ SORT2 = '&<'
+);
+
+CREATE OPERATOR &<> (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mv_mc_case_ne,
+ COMMUTATOR = '&<>',
+ NEGATOR = '&=',
+ RESTRICT = neqsel,
+ JOIN = neqjoinsel
+);
+
+-- MCHAR - VARCHAR operations
+
+CREATE FUNCTION mchar_mvarchar_concat(mchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR || (
+ LEFTARG = mchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = mchar_mvarchar_concat
+);
+
+CREATE FUNCTION mvarchar_mchar_concat(mvarchar, mchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OPERATOR || (
+ LEFTARG = mvarchar,
+ RIGHTARG = mchar,
+ PROCEDURE = mvarchar_mchar_concat
+);
+
+CREATE FUNCTION mvarchar_mchar(mvarchar, integer, boolean)
+RETURNS mchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE CAST (mvarchar as mchar)
+WITH FUNCTION mvarchar_mchar(mvarchar, integer, boolean) as IMPLICIT;
+
+CREATE FUNCTION mchar_mvarchar(mchar, integer, boolean)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE CAST (mchar as mvarchar)
+WITH FUNCTION mchar_mvarchar(mchar, integer, boolean) as IMPLICIT;
+
+-- Aggregates
+
+CREATE FUNCTION mchar_larger(mchar, mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE AGGREGATE max (
+ BASETYPE = mchar,
+ SFUNC = mchar_larger,
+ STYPE = mchar,
+ SORTOP = '>'
+);
+
+CREATE FUNCTION mchar_smaller(mchar, mchar)
+RETURNS mchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE AGGREGATE min (
+ BASETYPE = mchar,
+ SFUNC = mchar_smaller,
+ STYPE = mchar,
+ SORTOP = '<'
+);
+
+CREATE FUNCTION mvarchar_larger(mvarchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE AGGREGATE max (
+ BASETYPE = mvarchar,
+ SFUNC = mvarchar_larger,
+ STYPE = mvarchar,
+ SORTOP = '>'
+);
+
+CREATE FUNCTION mvarchar_smaller(mvarchar, mvarchar)
+RETURNS mvarchar
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE AGGREGATE min (
+ BASETYPE = mvarchar,
+ SFUNC = mvarchar_smaller,
+ STYPE = mvarchar,
+ SORTOP = '<'
+);
+
+-- B-tree support
+CREATE OPERATOR FAMILY icase_ops USING btree;
+CREATE OPERATOR FAMILY case_ops USING btree;
+
+CREATE OPERATOR CLASS mchar_icase_ops
+DEFAULT FOR TYPE mchar USING btree FAMILY icase_ops AS
+ OPERATOR 1 < ,
+ OPERATOR 2 <= ,
+ OPERATOR 3 = ,
+ OPERATOR 4 >= ,
+ OPERATOR 5 > ,
+ FUNCTION 1 mchar_icase_cmp(mchar, mchar),
+ OPERATOR 1 < (mchar, mvarchar),
+ OPERATOR 2 <= (mchar, mvarchar),
+ OPERATOR 3 = (mchar, mvarchar),
+ OPERATOR 4 >= (mchar, mvarchar),
+ OPERATOR 5 > (mchar, mvarchar),
+ FUNCTION 1 mc_mv_icase_cmp(mchar, mvarchar);
+
+CREATE OPERATOR CLASS mchar_case_ops
+FOR TYPE mchar USING btree FAMILY case_ops AS
+ OPERATOR 1 &< ,
+ OPERATOR 2 &<= ,
+ OPERATOR 3 &= ,
+ OPERATOR 4 &>= ,
+ OPERATOR 5 &> ,
+ FUNCTION 1 mchar_case_cmp(mchar, mchar),
+ OPERATOR 1 &< (mchar, mvarchar),
+ OPERATOR 2 &<= (mchar, mvarchar),
+ OPERATOR 3 &= (mchar, mvarchar),
+ OPERATOR 4 &>= (mchar, mvarchar),
+ OPERATOR 5 &> (mchar, mvarchar),
+ FUNCTION 1 mc_mv_case_cmp(mchar, mvarchar);
+
+CREATE OPERATOR CLASS mchar_icase_ops
+DEFAULT FOR TYPE mchar USING hash AS
+ OPERATOR 1 = ,
+ FUNCTION 1 mchar_hash(mchar);
+
+CREATE OPERATOR CLASS mvarchar_icase_ops
+DEFAULT FOR TYPE mvarchar USING btree FAMILY icase_ops AS
+ OPERATOR 1 < ,
+ OPERATOR 2 <= ,
+ OPERATOR 3 = ,
+ OPERATOR 4 >= ,
+ OPERATOR 5 > ,
+ FUNCTION 1 mvarchar_icase_cmp(mvarchar, mvarchar),
+ OPERATOR 1 < (mvarchar, mchar),
+ OPERATOR 2 <= (mvarchar, mchar),
+ OPERATOR 3 = (mvarchar, mchar),
+ OPERATOR 4 >= (mvarchar, mchar),
+ OPERATOR 5 > (mvarchar, mchar),
+ FUNCTION 1 mv_mc_icase_cmp(mvarchar, mchar);
+
+CREATE OPERATOR CLASS mvarchar_case_ops
+FOR TYPE mvarchar USING btree FAMILY case_ops AS
+ OPERATOR 1 &< ,
+ OPERATOR 2 &<= ,
+ OPERATOR 3 &= ,
+ OPERATOR 4 &>= ,
+ OPERATOR 5 &> ,
+ FUNCTION 1 mvarchar_case_cmp(mvarchar, mvarchar),
+ OPERATOR 1 &< (mvarchar, mchar),
+ OPERATOR 2 &<= (mvarchar, mchar),
+ OPERATOR 3 &= (mvarchar, mchar),
+ OPERATOR 4 &>= (mvarchar, mchar),
+ OPERATOR 5 &> (mvarchar, mchar),
+ FUNCTION 1 mv_mc_case_cmp(mvarchar, mchar);
+
+CREATE OPERATOR CLASS mvarchar_icase_ops
+DEFAULT FOR TYPE mvarchar USING hash AS
+ OPERATOR 1 = ,
+ FUNCTION 1 mvarchar_hash(mvarchar);
+
+
+-- Index support for LIKE
+
+CREATE FUNCTION mchar_pattern_fixed_prefix(internal, internal, internal)
+RETURNS int4
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE FUNCTION mchar_greaterstring(internal)
+RETURNS internal
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE RETURNS NULL ON NULL INPUT;
+
+CREATE OR REPLACE FUNCTION isfulleq_mchar(mchar, mchar)
+RETURNS bool AS 'MODULE_PATHNAME'
+LANGUAGE C CALLED ON NULL INPUT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION fullhash_mchar(mchar)
+RETURNS int4 AS 'MODULE_PATHNAME'
+LANGUAGE C CALLED ON NULL INPUT IMMUTABLE;
+
+
+CREATE OPERATOR == (
+ LEFTARG = mchar,
+ RIGHTARG = mchar,
+ PROCEDURE = isfulleq_mchar,
+ COMMUTATOR = '==',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ HASHES
+);
+
+CREATE OPERATOR CLASS mchar_fill_ops
+ FOR TYPE mchar USING hash AS
+ OPERATOR 1 ==,
+ FUNCTION 1 fullhash_mchar(mchar);
+
+CREATE OR REPLACE FUNCTION isfulleq_mvarchar(mvarchar, mvarchar)
+RETURNS bool AS 'MODULE_PATHNAME'
+LANGUAGE C CALLED ON NULL INPUT IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION fullhash_mvarchar(mvarchar)
+RETURNS int4 AS 'MODULE_PATHNAME'
+LANGUAGE C CALLED ON NULL INPUT IMMUTABLE;
+
+
+CREATE OPERATOR == (
+ LEFTARG = mvarchar,
+ RIGHTARG = mvarchar,
+ PROCEDURE = isfulleq_mvarchar,
+ COMMUTATOR = '==',
+ RESTRICT = eqsel,
+ JOIN = eqjoinsel,
+ HASHES
+);
+
+CREATE OPERATOR CLASS mvarchar_fill_ops
+ FOR TYPE mvarchar USING hash AS
+ OPERATOR 1 ==,
+ FUNCTION 1 fullhash_mvarchar(mvarchar);
+
+CREATE FUNCTION similar_to_escape(mchar)
+ RETURNS mchar
+ AS 'MODULE_PATHNAME', 'mchar_similar_escape'
+ LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mchar, mchar)
+ RETURNS mchar
+ AS 'MODULE_PATHNAME', 'mchar_similar_escape'
+ LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mvarchar)
+ RETURNS mvarchar
+ AS 'MODULE_PATHNAME', 'mvarchar_similar_escape'
+ LANGUAGE C IMMUTABLE;
+
+CREATE FUNCTION similar_to_escape(mvarchar, mvarchar)
+ RETURNS mvarchar
+ AS 'MODULE_PATHNAME', 'mvarchar_similar_escape'
+ LANGUAGE C IMMUTABLE;
+
diff --git a/contrib/mchar/mchar--unpackaged--2.0.sql b/contrib/mchar/mchar--unpackaged--2.0.sql
new file mode 100644
index 00000000000..1acc4ccec1e
--- /dev/null
+++ b/contrib/mchar/mchar--unpackaged--2.0.sql
@@ -0,0 +1,404 @@
+\echo Use "CREATE EXTENSION mchar FROM unpackaged" to load this file. \quit
+
+-- I/O functions
+
+ALTER EXTENSION mchar ADD FUNCTION mchartypmod_in(cstring[]);
+
+ALTER EXTENSION mchar ADD FUNCTION mchartypmod_out(int4);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_in(cstring);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_out(mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_send(mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_recv(internal);
+
+ALTER EXTENSION mchar ADD TYPE mchar;
+
+ALTER EXTENSION mchar ADD FUNCTION mchar(mchar, integer, boolean);
+
+ALTER EXTENSION mchar ADD CAST (mchar as mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_in(cstring);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_out(mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_send(mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_recv(internal);
+
+ALTER EXTENSION mchar ADD TYPE mvarchar;
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar(mvarchar, integer, boolean);
+
+ALTER EXTENSION mchar ADD CAST (mvarchar as mvarchar);
+
+--Operations and functions
+
+ALTER EXTENSION mchar ADD FUNCTION length(mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION upper(mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION lower(mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_hash(mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_concat(mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR || (mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_like(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_notlike(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR ~~ (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR !~~ (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_regexeq(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_regexne(mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR ~ (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR !~ (mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION similar_escape(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION length(mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION upper(mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION lower(mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_hash(mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_concat(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR || (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_like(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION like_escape(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_notlike(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR ~~ (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR !~~ (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_regexeq(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_regexne(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR ~ (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR !~ (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION similar_escape(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION substr (mchar, int4);
+
+ALTER EXTENSION mchar ADD FUNCTION substr (mchar, int4, int4);
+
+ALTER EXTENSION mchar ADD FUNCTION substr (mvarchar, int4);
+
+ALTER EXTENSION mchar ADD FUNCTION substr (mvarchar, int4, int4);
+
+-- Comparing
+-- MCHAR
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_icase_cmp(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_icase_eq(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_icase_ne(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_icase_lt(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_icase_le(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_icase_gt(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_icase_ge(mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR < (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR > (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <= (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR >= (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR = (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <> (mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_case_cmp(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_case_eq(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_case_ne(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_case_lt(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_case_le(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_case_gt(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_case_ge(mchar, mchar);
+
+
+ALTER EXTENSION mchar ADD OPERATOR &< (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &> (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<= (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &>= (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &= (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<> (mchar, mchar);
+
+--MVARCHAR
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_icase_cmp(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_icase_eq(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_icase_ne(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_icase_lt(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_icase_le(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_icase_gt(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_icase_ge(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR < (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR > (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <= (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR >= (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR = (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <> (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_case_cmp(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_case_eq(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_case_ne(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_case_lt(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_case_le(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_case_gt(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_case_ge(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &< (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &> (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<= (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &>= (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &= (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<> (mvarchar, mvarchar);
+
+-- MCHAR <> MVARCHAR
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_icase_cmp(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_icase_eq(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_icase_ne(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_icase_lt(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_icase_le(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_icase_gt(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_icase_ge(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR < (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR > (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <= (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR >= (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR = (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <> (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_case_cmp(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_case_eq(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_case_ne(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_case_lt(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_case_le(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_case_gt(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mc_mv_case_ge(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &< (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &> (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<= (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &>= (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &= (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<> (mchar, mvarchar);
+
+-- MVARCHAR <> MCHAR
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_icase_cmp(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_icase_eq(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_icase_ne(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_icase_lt(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_icase_le(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_icase_gt(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_icase_ge(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR < (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR > (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <= (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR >= (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR = (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR <> (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_case_cmp(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_case_eq(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_case_ne(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_case_lt(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_case_le(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_case_gt(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mv_mc_case_ge(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &< (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &> (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<= (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &>= (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &= (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR &<> (mvarchar, mchar);
+
+-- MCHAR - VARCHAR operations
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_mvarchar_concat(mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR || (mchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_mchar_concat(mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR || (mvarchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_mchar(mvarchar, integer, boolean);
+
+ALTER EXTENSION mchar ADD CAST (mvarchar as mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_mvarchar(mchar, integer, boolean);
+
+ALTER EXTENSION mchar ADD CAST (mchar as mvarchar);
+
+-- Aggregates
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_larger(mchar, mchar);
+
+ALTER EXTENSION mchar ADD AGGREGATE max (mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_smaller(mchar, mchar);
+
+ALTER EXTENSION mchar ADD AGGREGATE min (mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_larger(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD AGGREGATE max (mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION mvarchar_smaller(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD AGGREGATE min (mvarchar);
+
+-- B-tree support
+ALTER EXTENSION mchar ADD OPERATOR FAMILY icase_ops USING btree;
+
+ALTER EXTENSION mchar ADD OPERATOR FAMILY case_ops USING btree;
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mchar_icase_ops USING btree;
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mchar_case_ops USING btree;
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mchar_icase_ops USING hash;
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mvarchar_icase_ops USING btree;
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mvarchar_case_ops USING btree;
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mvarchar_icase_ops USING hash;
+
+
+-- Index support for LIKE
+
+--mchar_pattern_fixed_prefix could be with wrong number of arguments
+ALTER EXTENSION mchar ADD FUNCTION mchar_pattern_fixed_prefix;
+
+ALTER EXTENSION mchar ADD FUNCTION mchar_greaterstring(internal);
+
+ALTER EXTENSION mchar ADD FUNCTION isfulleq_mchar(mchar, mchar);
+
+ALTER EXTENSION mchar ADD FUNCTION fullhash_mchar(mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR == (mchar, mchar);
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mchar_fill_ops USING hash;
+
+ALTER EXTENSION mchar ADD FUNCTION isfulleq_mvarchar(mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD FUNCTION fullhash_mvarchar(mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR == (mvarchar, mvarchar);
+
+ALTER EXTENSION mchar ADD OPERATOR CLASS mvarchar_fill_ops USING hash;
+
+
diff --git a/contrib/mchar/mchar.control b/contrib/mchar/mchar.control
new file mode 100644
index 00000000000..02668a5d617
--- /dev/null
+++ b/contrib/mchar/mchar.control
@@ -0,0 +1,6 @@
+# mchar extension
+comment = 'SQL Server text type'
+default_version = '2.2.1'
+module_pathname = '$libdir/mchar'
+relocatable = true
+trusted = true
diff --git a/contrib/mchar/mchar.h b/contrib/mchar/mchar.h
new file mode 100644
index 00000000000..2bfd14004cd
--- /dev/null
+++ b/contrib/mchar/mchar.h
@@ -0,0 +1,64 @@
+#ifndef __MCHAR_H__
+#define __MCHAR_H__
+
+#include "postgres.h"
+#include "mb/pg_wchar.h"
+#include "utils/builtins.h"
+#include "unicode/uchar.h"
+#include "unicode/ustring.h"
+#include "varatt.h"
+
+typedef struct {
+ int32 len;
+ int32 typmod;
+ UChar data[1];
+} MChar;
+
+#define MCHARHDRSZ offsetof(MChar, data)
+#define MCHARLENGTH(m) ( VARSIZE(m)-MCHARHDRSZ )
+#define UCHARLENGTH(m) ( MCHARLENGTH(m)/sizeof(UChar) )
+
+#define DatumGetMChar(m) ((MChar*)DatumGetPointer(m))
+#define MCharGetDatum(m) PointerGetDatum(m)
+
+#define PG_GETARG_MCHAR(n) ((MChar*) PG_DETOAST_DATUM(PG_GETARG_DATUM(n)))
+#define PG_GETARG_MCHAR_COPY(n) ((MChar*) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(n)))
+
+#define PG_RETURN_MCHAR(m) PG_RETURN_POINTER(m)
+
+typedef struct {
+ int32 len;
+ UChar data[1];
+} MVarChar;
+
+#define MVARCHARHDRSZ offsetof(MVarChar, data)
+#define MVARCHARLENGTH(m) ( VARSIZE(m)-MVARCHARHDRSZ )
+#define UVARCHARLENGTH(m) ( MVARCHARLENGTH(m)/sizeof(UChar) )
+
+#define DatumGetMVarChar(m) ((MVarChar*)DatumGetPointer(m))
+#define MVarCharGetDatum(m) PointerGetDatum(m)
+
+#define PG_GETARG_MVARCHAR(n) ((MVarChar*) PG_DETOAST_DATUM(PG_GETARG_DATUM(n)))
+#define PG_GETARG_MVARCHAR_COPY(n) ((MVarChar*) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(n)))
+
+#define PG_RETURN_MVARCHAR(m) PG_RETURN_POINTER(m)
+
+
+int Char2UChar(const char * src, int srclen, UChar *dst);
+int UChar2Char(const UChar * src, int srclen, char *dst);
+int UChar2Wchar(UChar * src, int srclen, pg_wchar *dst);
+int UCharCompare(UChar * a, int alen, UChar *b, int blen);
+int UCharCaseCompare(UChar * a, int alen, UChar *b, int blen);
+
+void FillWhiteSpace( UChar *dst, int n );
+
+int lengthWithoutSpaceVarChar(MVarChar *m);
+int lengthWithoutSpaceChar(MChar *m);
+
+extern PGDLLEXPORT Datum mchar_hash(PG_FUNCTION_ARGS);
+extern PGDLLEXPORT Datum mvarchar_hash(PG_FUNCTION_ARGS);
+
+int m_isspace(UChar c); /* is == ' ' */
+
+Datum hash_uchar( UChar *s, int len );
+#endif
diff --git a/contrib/mchar/mchar_io.c b/contrib/mchar/mchar_io.c
new file mode 100644
index 00000000000..d6c2ac7d393
--- /dev/null
+++ b/contrib/mchar/mchar_io.c
@@ -0,0 +1,403 @@
+#include "mchar.h"
+#include "mb/pg_wchar.h"
+#include "fmgr.h"
+#include "libpq/pqformat.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include <utils/array.h>
+
+#ifdef PG_MODULE_MAGIC
+PG_MODULE_MAGIC;
+#endif
+
+PG_FUNCTION_INFO_V1(mchar_in);
+Datum mchar_in(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(mchar_out);
+Datum mchar_out(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(mchar);
+Datum mchar(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(mvarchar_in);
+Datum mvarchar_in(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(mvarchar_out);
+Datum mvarchar_out(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(mvarchar);
+Datum mvarchar(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(mvarchar_support);
+Datum varchar_support(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(mchartypmod_in);
+Datum mchartypmod_in(PG_FUNCTION_ARGS);
+Datum
+mchartypmod_in(PG_FUNCTION_ARGS) {
+ ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
+ int32 *tl;
+ int n;
+
+ tl = ArrayGetIntegerTypmods(ta, &n);
+
+ if (n != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid type modifier")));
+ if (*tl < 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("length for type mchar/mvarchar must be at least 1")));
+
+ return *tl;
+}
+
+PG_FUNCTION_INFO_V1(mchartypmod_out);
+Datum mchartypmod_out(PG_FUNCTION_ARGS);
+Datum
+mchartypmod_out(PG_FUNCTION_ARGS) {
+ int32 typmod = PG_GETARG_INT32(0);
+ char *res = (char *) palloc(64);
+
+ if (typmod >0)
+ snprintf(res, 64, "(%d)", (int) (typmod));
+ else
+ *res = '\0';
+
+ PG_RETURN_CSTRING( res );
+}
+
+static void
+mchar_strip( MChar * m, int atttypmod ) {
+ int maxlen;
+
+ if ( atttypmod<=0 ) {
+ atttypmod =-1;
+ } else {
+ int charlen = u_countChar32( m->data, UCHARLENGTH(m) );
+
+ if ( charlen > atttypmod ) {
+ int i=0;
+ U16_FWD_N( m->data, i, UCHARLENGTH(m), atttypmod);
+ SET_VARSIZE( m, sizeof(UChar) * i + MCHARHDRSZ );
+ }
+ }
+
+ m->typmod = atttypmod;
+
+ maxlen = UCHARLENGTH(m);
+ while( maxlen>0 && m_isspace( m->data[ maxlen-1 ] ) )
+ maxlen--;
+
+ SET_VARSIZE(m, sizeof(UChar) * maxlen + MCHARHDRSZ);
+}
+
+
+Datum
+mchar_in(PG_FUNCTION_ARGS) {
+ char *s = PG_GETARG_CSTRING(0);
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ MChar *result;
+ int32 slen = strlen(s), rlen;
+
+ pg_verifymbstr(s, slen, false);
+
+ result = (MChar*)palloc( MCHARHDRSZ + slen * sizeof(UChar) * 4 /* upper limit of length */ );
+ rlen = Char2UChar( s, slen, result->data );
+ SET_VARSIZE(result, sizeof(UChar) * rlen + MCHARHDRSZ);
+
+ mchar_strip(result, atttypmod);
+
+ PG_RETURN_MCHAR(result);
+}
+
+Datum
+mchar_out(PG_FUNCTION_ARGS) {
+ MChar *in = PG_GETARG_MCHAR(0);
+ char *out;
+ size_t size, inlen = UCHARLENGTH(in);
+ size_t charlen = u_countChar32(in->data, inlen);
+
+ Assert( in->typmod < 0 || charlen<=in->typmod );
+ size = ( in->typmod < 0 ) ? inlen : in->typmod;
+ size *= pg_database_encoding_max_length();
+
+ out = (char*)palloc( size+1 );
+ size = UChar2Char( in->data, inlen, out );
+
+ if ( in->typmod>0 && charlen < in->typmod ) {
+ memset( out+size, ' ', in->typmod - charlen);
+ size += in->typmod - charlen;
+ }
+
+ out[size] = '\0';
+
+ PG_FREE_IF_COPY(in,0);
+
+ PG_RETURN_CSTRING(out);
+}
+
+Datum
+mchar(PG_FUNCTION_ARGS) {
+ MChar *source = PG_GETARG_MCHAR(0);
+ MChar *result;
+ int32 typmod = PG_GETARG_INT32(1);
+#ifdef NOT_USED
+ bool isExplicit = PG_GETARG_BOOL(2);
+#endif
+
+ result = palloc( VARSIZE(source) );
+ memcpy( result, source, VARSIZE(source) );
+ PG_FREE_IF_COPY(source,0);
+
+ mchar_strip(result, typmod);
+
+ PG_RETURN_MCHAR(result);
+}
+
+Datum
+mvarchar_in(PG_FUNCTION_ARGS) {
+ char *s = PG_GETARG_CSTRING(0);
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ MVarChar *result;
+ int32 slen = strlen(s), rlen;
+
+ pg_verifymbstr(s, slen, false);
+
+ result = (MVarChar*)palloc( MVARCHARHDRSZ + slen * sizeof(UChar) * 2 /* upper limit of length */ );
+ rlen = Char2UChar( s, slen, result->data );
+ SET_VARSIZE(result, sizeof(UChar) * rlen + MVARCHARHDRSZ);
+
+ if ( atttypmod > 0 && atttypmod < u_countChar32(result->data, UVARCHARLENGTH(result)) )
+ elog(ERROR,"value too long for type mvarchar(%d)", atttypmod);
+
+ PG_RETURN_MVARCHAR(result);
+}
+
+Datum
+mvarchar_out(PG_FUNCTION_ARGS) {
+ MVarChar *in = PG_GETARG_MVARCHAR(0);
+ char *out;
+ size_t size = UVARCHARLENGTH(in);
+
+ size *= pg_database_encoding_max_length();
+
+ out = (char*)palloc( size+1 );
+ size = UChar2Char( in->data, UVARCHARLENGTH(in), out );
+
+ out[size] = '\0';
+
+ PG_FREE_IF_COPY(in,0);
+
+ PG_RETURN_CSTRING(out);
+}
+
+static void
+mvarchar_strip(MVarChar *m, int atttypmod) {
+ int charlen = u_countChar32(m->data, UVARCHARLENGTH(m));
+
+ if ( atttypmod>=0 && atttypmod < charlen ) {
+ int i=0;
+ U16_FWD_N( m->data, i, charlen, atttypmod);
+ SET_VARSIZE(m, sizeof(UChar) * i + MVARCHARHDRSZ);
+ }
+}
+
+Datum
+mvarchar(PG_FUNCTION_ARGS) {
+ MVarChar *source = PG_GETARG_MVARCHAR(0);
+ MVarChar *result;
+ int32 typmod = PG_GETARG_INT32(1);
+ bool isExplicit = PG_GETARG_BOOL(2);
+ int charlen = u_countChar32(source->data, UVARCHARLENGTH(source));
+
+ result = palloc( VARSIZE(source) );
+ memcpy( result, source, VARSIZE(source) );
+ PG_FREE_IF_COPY(source,0);
+
+ if ( typmod>=0 && typmod < charlen ) {
+ if ( isExplicit )
+ mvarchar_strip(result, typmod);
+ else
+ elog(ERROR,"value too long for type mvarchar(%d)", typmod);
+ }
+
+ PG_RETURN_MVARCHAR(result);
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_mchar);
+Datum mvarchar_mchar(PG_FUNCTION_ARGS);
+Datum
+mvarchar_mchar(PG_FUNCTION_ARGS) {
+ MVarChar *source = PG_GETARG_MVARCHAR(0);
+ MChar *result;
+ int32 typmod = PG_GETARG_INT32(1);
+#ifdef NOT_USED
+ bool isExplicit = PG_GETARG_BOOL(2);
+#endif
+
+ result = palloc( MVARCHARLENGTH(source) + MCHARHDRSZ );
+ SET_VARSIZE(result, MVARCHARLENGTH(source) + MCHARHDRSZ);
+ memcpy( result->data, source->data, MVARCHARLENGTH(source));
+
+ PG_FREE_IF_COPY(source,0);
+
+ mchar_strip( result, typmod );
+
+ PG_RETURN_MCHAR(result);
+}
+
+PG_FUNCTION_INFO_V1(mchar_mvarchar);
+Datum mchar_mvarchar(PG_FUNCTION_ARGS);
+Datum
+mchar_mvarchar(PG_FUNCTION_ARGS) {
+ MChar *source = PG_GETARG_MCHAR(0);
+ MVarChar *result;
+ int32 typmod = PG_GETARG_INT32(1);
+ int32 scharlen = u_countChar32(source->data, UCHARLENGTH(source));
+ int32 curlen = 0, maxcharlen;
+#ifdef NOT_USED
+ bool isExplicit = PG_GETARG_BOOL(2);
+#endif
+
+ maxcharlen = (source->typmod > 0) ? source->typmod : scharlen;
+
+ result = palloc( MVARCHARHDRSZ + sizeof(UChar) * 2 * maxcharlen );
+
+ curlen = UCHARLENGTH( source );
+ if ( curlen > 0 )
+ memcpy( result->data, source->data, MCHARLENGTH(source) );
+ if ( source->typmod > 0 && scharlen < source->typmod ) {
+ FillWhiteSpace( result->data + curlen, source->typmod-scharlen );
+ curlen += source->typmod-scharlen;
+ }
+ SET_VARSIZE(result, MVARCHARHDRSZ + curlen *sizeof(UChar));
+
+ PG_FREE_IF_COPY(source,0);
+
+ mvarchar_strip( result, typmod );
+
+ PG_RETURN_MCHAR(result);
+}
+
+PG_FUNCTION_INFO_V1(mchar_send);
+Datum mchar_send(PG_FUNCTION_ARGS);
+Datum
+mchar_send(PG_FUNCTION_ARGS) {
+ MChar *in = PG_GETARG_MCHAR(0);
+ size_t inlen = UCHARLENGTH(in);
+ size_t charlen = u_countChar32(in->data, inlen);
+ StringInfoData buf;
+
+ Assert( in->typmod < 0 || charlen<=in->typmod );
+
+ pq_begintypsend(&buf);
+ pq_sendbytes(&buf, (char*)in->data, inlen * sizeof(UChar) );
+
+ if ( in->typmod>0 && charlen < in->typmod ) {
+ int nw = in->typmod - charlen;
+ UChar *white = palloc( sizeof(UChar) * nw );
+
+ FillWhiteSpace( white, nw );
+ pq_sendbytes(&buf, (char*)white, sizeof(UChar) * nw);
+ pfree(white);
+ }
+
+ PG_FREE_IF_COPY(in,0);
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+PG_FUNCTION_INFO_V1(mchar_recv);
+Datum mchar_recv(PG_FUNCTION_ARGS);
+Datum
+mchar_recv(PG_FUNCTION_ARGS) {
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ MChar *res;
+ int nbytes;
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+
+ nbytes = buf->len - buf->cursor;
+ res = (MChar*)palloc( nbytes + MCHARHDRSZ );
+ res->len = nbytes + MCHARHDRSZ;
+ res->typmod = -1;
+ SET_VARSIZE(res, res->len);
+ pq_copymsgbytes(buf, (char*)res->data, nbytes);
+
+ mchar_strip( res, atttypmod );
+
+ PG_RETURN_MCHAR(res);
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_send);
+Datum mvarchar_send(PG_FUNCTION_ARGS);
+Datum
+mvarchar_send(PG_FUNCTION_ARGS) {
+ MVarChar *in = PG_GETARG_MVARCHAR(0);
+ size_t inlen = UVARCHARLENGTH(in);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendbytes(&buf, (char*)in->data, inlen * sizeof(UChar) );
+
+ PG_FREE_IF_COPY(in,0);
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_recv);
+Datum mvarchar_recv(PG_FUNCTION_ARGS);
+Datum
+mvarchar_recv(PG_FUNCTION_ARGS) {
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ MVarChar *res;
+ int nbytes;
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+
+ nbytes = buf->len - buf->cursor;
+ res = (MVarChar*)palloc( nbytes + MVARCHARHDRSZ );
+ res->len = nbytes + MVARCHARHDRSZ;
+ SET_VARSIZE(res, res->len);
+ pq_copymsgbytes(buf, (char*)res->data, nbytes);
+
+ mvarchar_strip( res, atttypmod );
+
+ PG_RETURN_MVARCHAR(res);
+}
+
+Datum
+mvarchar_support(PG_FUNCTION_ARGS)
+{
+ Node *node = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(node, SupportRequestSimplify))
+ {
+ SupportRequestSimplify *req = (SupportRequestSimplify *) node;
+ FuncExpr *expr = req->fcall;
+ Node *typmodnode;
+
+ typmodnode = (Node *) lsecond(expr->args);
+
+ if (IsA(typmodnode, Const) && !((Const *) typmodnode)->constisnull)
+ {
+ Node *source = (Node *) linitial(expr->args);
+ int32 source_typmod = exprTypmod(source);
+ int32 req_typemod = DatumGetInt32(((Const *) typmodnode)->constvalue);
+
+ if (req_typemod < 0 || (source_typmod >= 0 && source_typmod <= req_typemod))
+ ret = relabel_to_typmod(source, req_typemod);
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
diff --git a/contrib/mchar/mchar_like.c b/contrib/mchar/mchar_like.c
new file mode 100644
index 00000000000..47b4d7302c7
--- /dev/null
+++ b/contrib/mchar/mchar_like.c
@@ -0,0 +1,984 @@
+#include "mchar.h"
+#include "mb/pg_wchar.h"
+
+#include "catalog/pg_collation.h"
+#include "utils/selfuncs.h"
+#include "utils/memutils.h"
+#include "nodes/primnodes.h"
+#include "nodes/makefuncs.h"
+#include "nodes/supportnodes.h"
+#include "regex/regex.h"
+
+/*
+** Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
+** Rich $alz is now <rsalz@bbn.com>.
+** Special thanks to Lars Mathiesen <thorinn@diku.dk> for the LABORT code.
+**
+** This code was shamelessly stolen from the "pql" code by myself and
+** slightly modified :)
+**
+** All references to the word "star" were replaced by "percent"
+** All references to the word "wild" were replaced by "like"
+**
+** All the nice shell RE matching stuff was replaced by just "_" and "%"
+**
+** As I don't have a copy of the SQL standard handy I wasn't sure whether
+** to leave in the '\' escape character handling.
+**
+** Keith Parks. <keith@mtcc.demon.co.uk>
+**
+** SQL92 lets you specify the escape character by saying
+** LIKE <pattern> ESCAPE <escape character>. We are a small operation
+** so we force you to use '\'. - ay 7/95
+**
+** Now we have the like_escape() function that converts patterns with
+** any specified escape character (or none at all) to the internal
+** default escape character, which is still '\'. - tgl 9/2000
+**
+** The code is rewritten to avoid requiring null-terminated strings,
+** which in turn allows us to leave out some memcpy() operations.
+** This code should be faster and take less memory, but no promises...
+** - thomas 2000-08-06
+**
+** Adopted for UTF-16 by teodor
+*/
+
+#define LIKE_TRUE 1
+#define LIKE_FALSE 0
+#define LIKE_ABORT (-1)
+
+
+static int
+uchareq(UChar *p1, UChar *p2) {
+ int l1=0, l2=0;
+ /*
+ * Count length of char:
+ * We suppose that string is correct!!
+ */
+ U16_FWD_1(p1, l1, 2);
+ U16_FWD_1(p2, l2, 2);
+
+ return (UCharCaseCompare(p1, l1, p2, l2)==0) ? 1 : 0;
+}
+
+#define NextChar(p, plen) \
+ do { \
+ int __l = 0; \
+ U16_FWD_1((p), __l, (plen));\
+ (p) +=__l; \
+ (plen) -=__l; \
+ } while(0)
+
+#define CopyAdvChar(dst, src, srclen) \
+ do { \
+ int __l = 0; \
+ U16_FWD_1((src), __l, (srclen));\
+ (srclen) -= __l; \
+ while (__l-- > 0) \
+ *(dst)++ = *(src)++; \
+ } while (0)
+
+
+static UChar UCharPercent = 0;
+static UChar UCharBackSlesh = 0;
+static UChar UCharUnderLine = 0;
+static UChar UCharStar = 0;
+static UChar UCharDotDot = 0;
+static UChar UCharUp = 0;
+static UChar UCharLBracket = 0;
+static UChar UCharQ = 0;
+static UChar UCharRBracket = 0;
+static UChar UCharDollar = 0;
+static UChar UCharDot = 0;
+static UChar UCharLFBracket = 0;
+static UChar UCharRFBracket = 0;
+static UChar UCharQuote = 0;
+static UChar UCharSpace = 0;
+static UChar UCharOne = 0;
+static UChar UCharComma = 0;
+static UChar UCharLQBracket = 0;
+static UChar UCharRQBracket = 0;
+
+#define MkUChar(uc, c) do { \
+ char __c = (c); \
+ u_charsToUChars( &__c, &(uc), 1 ); \
+} while(0)
+
+#define SET_UCHAR if ( UCharPercent == 0 ) { \
+ MkUChar( UCharPercent, '%' ); \
+ MkUChar( UCharBackSlesh, '\\' ); \
+ MkUChar( UCharUnderLine, '_' ); \
+ MkUChar( UCharStar, '*' ); \
+ MkUChar( UCharDotDot, ':' ); \
+ MkUChar( UCharUp, '^' ); \
+ MkUChar( UCharLBracket, '(' ); \
+ MkUChar( UCharQ, '?' ); \
+ MkUChar( UCharRBracket, ')' ); \
+ MkUChar( UCharDollar, '$' ); \
+ MkUChar( UCharDot, '.' ); \
+ MkUChar( UCharLFBracket, '{' ); \
+ MkUChar( UCharRFBracket, '}' ); \
+ MkUChar( UCharQuote, '"' ); \
+ MkUChar( UCharSpace, ' ' ); \
+ MkUChar( UCharOne, '1' ); \
+ MkUChar( UCharComma, ',' ); \
+ MkUChar( UCharLQBracket, '[' ); \
+ MkUChar( UCharRQBracket, ']' ); \
+ }
+
+int
+m_isspace(UChar c) {
+ SET_UCHAR;
+
+ return (c == UCharSpace);
+}
+
+static int
+MatchUChar(UChar *t, int tlen, UChar *p, int plen) {
+ SET_UCHAR;
+
+ /* Fast path for find substring pattern */
+ if ((plen >= 2) && p[0] == UCharPercent && p[plen-1] == UCharPercent && !u_strFindFirst(p+1, plen-2, &UCharPercent, 1) && !u_strFindFirst(p+1, plen-2, &UCharBackSlesh, 1) && !u_strFindFirst(p+1, plen-2, &UCharUnderLine, 1))
+ {
+ if (plen-2 > tlen)
+ return LIKE_FALSE;
+
+ if (tlen > 100 || plen > 100)
+ {
+ UChar* tbuf;
+ UChar* pbuf;
+ int tbufsz = tlen + 512;
+ int pbufsz = plen + 512;
+ int tbuflen;
+ int pbuflen;
+ bool found;
+ UErrorCode status1 = U_ZERO_ERROR;
+ UErrorCode status2 = U_ZERO_ERROR;
+
+ tbuf = malloc(tbufsz*sizeof(UChar));
+ pbuf = malloc(pbufsz*sizeof(UChar));
+ tbuflen = u_strToLower(tbuf, tbufsz, t, tlen, NULL, &status1);
+ pbuflen = u_strToLower(pbuf, pbufsz, p+1, plen-2, NULL, &status2);
+ if (tbuflen < tbufsz && pbuflen < pbufsz && U_SUCCESS(status1) && U_SUCCESS(status2))
+ {
+ found = u_strFindFirst(tbuf, tbuflen, pbuf, pbuflen);
+ free(tbuf);
+ free(pbuf);
+ return found ? LIKE_TRUE : LIKE_FALSE;
+ }
+ free(tbuf);
+ free(pbuf);
+ }
+ }
+
+ /* Fast path for match-everything pattern */
+ if ((plen == 1) && (*p == UCharPercent))
+ return LIKE_TRUE;
+
+ while ((tlen > 0) && (plen > 0)) {
+ if (*p == UCharBackSlesh) {
+ /* Next pattern char must match literally, whatever it is */
+ NextChar(p, plen);
+ if ((plen <= 0) || !uchareq(t, p))
+ return LIKE_FALSE;
+ } else if (*p == UCharPercent) {
+ /* %% is the same as % according to the SQL standard */
+ /* Advance past all %'s */
+ while ((plen > 0) && (*p == UCharPercent))
+ NextChar(p, plen);
+ /* Trailing percent matches everything. */
+ if (plen <= 0)
+ return LIKE_TRUE;
+
+ /*
+ * Otherwise, scan for a text position at which we can match the
+ * rest of the pattern.
+ */
+ while (tlen > 0) {
+ /*
+ * Optimization to prevent most recursion: don't recurse
+ * unless first pattern char might match this text char.
+ */
+ if (uchareq(t, p) || (*p == UCharBackSlesh) || (*p == UCharUnderLine)) {
+ int matched = MatchUChar(t, tlen, p, plen);
+
+ if (matched != LIKE_FALSE)
+ return matched; /* TRUE or ABORT */
+ }
+
+ NextChar(t, tlen);
+ }
+
+ /*
+ * End of text with no match, so no point in trying later places
+ * to start matching this pattern.
+ */
+ return LIKE_ABORT;
+ } if ((*p != UCharUnderLine) && !uchareq(t, p)) {
+ /*
+ * Not the single-character wildcard and no explicit match? Then
+ * time to quit...
+ */
+ return LIKE_FALSE;
+ }
+
+ NextChar(t, tlen);
+ NextChar(p, plen);
+ }
+
+ if (tlen > 0)
+ return LIKE_FALSE; /* end of pattern, but not of text */
+
+ /* End of input string. Do we have matching pattern remaining? */
+ while ((plen > 0) && (*p == UCharPercent)) /* allow multiple %'s at end of
+ * pattern */
+ NextChar(p, plen);
+ if (plen <= 0)
+ return LIKE_TRUE;
+
+ /*
+ * End of text with no match, so no point in trying later places to start
+ * matching this pattern.
+ */
+
+ return LIKE_ABORT;
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_like );
+Datum mvarchar_like( PG_FUNCTION_ARGS );
+Datum
+mvarchar_like( PG_FUNCTION_ARGS ) {
+ MVarChar *str = PG_GETARG_MVARCHAR(0);
+ MVarChar *pat = PG_GETARG_MVARCHAR(1);
+ int result;
+
+ result = MatchUChar( str->data, UVARCHARLENGTH(str), pat->data, UVARCHARLENGTH(pat) );
+
+ PG_FREE_IF_COPY(str,0);
+ PG_FREE_IF_COPY(pat,1);
+
+ PG_RETURN_BOOL(result == LIKE_TRUE);
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_notlike );
+Datum mvarchar_notlike( PG_FUNCTION_ARGS );
+Datum
+mvarchar_notlike( PG_FUNCTION_ARGS ) {
+ bool res = DatumGetBool( DirectFunctionCall2(
+ mvarchar_like,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1)
+ ));
+ PG_RETURN_BOOL( !res );
+}
+
+/*
+ * Removes trailing spaces in '111 %' pattern
+ */
+static UChar *
+removeTrailingSpaces( UChar *src, int srclen, int *dstlen, bool *isSpecialLast) {
+ UChar* dst = src;
+ UChar *ptr, *dptr, *markptr;
+
+ *dstlen = srclen;
+ ptr = src + srclen-1;
+ SET_UCHAR;
+
+ *isSpecialLast = ( srclen > 0 && (u_isspace(*ptr) || *ptr == UCharPercent || *ptr == UCharUnderLine ) ) ? true : false;
+ while( ptr>=src ) {
+ if ( *ptr == UCharPercent || *ptr == UCharUnderLine ) {
+ if ( ptr==src )
+ return dst; /* first character */
+
+ if ( *(ptr-1) == UCharBackSlesh )
+ return dst; /* use src as is */
+
+ if ( u_isspace( *(ptr-1) ) ) {
+ ptr--;
+ break; /* % or _ is after space which should be removed */
+ }
+ } else {
+ return dst;
+ }
+ ptr--;
+ }
+
+ markptr = ptr+1;
+ dst = (UChar*)palloc( sizeof(UChar) * srclen );
+
+ /* find last non-space character */
+ while( ptr>=src && u_isspace(*ptr) )
+ ptr--;
+
+ dptr = dst + (ptr-src+1);
+
+ if ( ptr>=src )
+ memcpy( dst, src, sizeof(UChar) * (ptr-src+1) );
+
+ while( markptr - src < srclen ) {
+ *dptr = *markptr;
+ dptr++;
+ markptr++;
+ }
+
+ *dstlen = dptr - dst;
+ return dst;
+}
+
+static UChar*
+addTrailingSpace( MChar *src, int *newlen ) {
+ int scharlen = u_countChar32(src->data, UCHARLENGTH(src));
+
+ if ( src->typmod > scharlen ) {
+ UChar *res = (UChar*) palloc( sizeof(UChar) * (UCHARLENGTH(src) + src->typmod) );
+
+ memcpy( res, src->data, sizeof(UChar) * UCHARLENGTH(src));
+ FillWhiteSpace( res+UCHARLENGTH(src), src->typmod - scharlen );
+
+ *newlen = src->typmod;
+
+ return res;
+ } else {
+ *newlen = UCHARLENGTH(src);
+ return src->data;
+ }
+}
+
+PG_FUNCTION_INFO_V1( mchar_like );
+Datum mchar_like( PG_FUNCTION_ARGS );
+Datum
+mchar_like( PG_FUNCTION_ARGS ) {
+ MChar *str = PG_GETARG_MCHAR(0);
+ MVarChar *pat = PG_GETARG_MVARCHAR(1);
+ int result;
+ bool isNeedAdd = false;
+ UChar *cleaned, *filled;
+ int clen=0, flen=0;
+
+ cleaned = removeTrailingSpaces(pat->data, UVARCHARLENGTH(pat), &clen, &isNeedAdd);
+ if ( isNeedAdd )
+ filled = addTrailingSpace(str, &flen);
+ else {
+ filled = str->data;
+ flen = UCHARLENGTH(str);
+ }
+
+ result = MatchUChar( filled, flen, cleaned, clen );
+
+ if ( pat->data != cleaned )
+ pfree( cleaned );
+ if ( str->data != filled )
+ pfree( filled );
+
+ PG_FREE_IF_COPY(str,0);
+ PG_FREE_IF_COPY(pat,1);
+
+ PG_RETURN_BOOL(result == LIKE_TRUE);
+}
+
+PG_FUNCTION_INFO_V1( mchar_notlike );
+Datum mchar_notlike( PG_FUNCTION_ARGS );
+Datum
+mchar_notlike( PG_FUNCTION_ARGS ) {
+ bool res = DatumGetInt32( DirectFunctionCall2(
+ mchar_like,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1)
+ ));
+
+ PG_RETURN_BOOL( !res );
+}
+
+
+
+PG_FUNCTION_INFO_V1( mchar_pattern_fixed_prefix );
+Datum mchar_pattern_fixed_prefix( PG_FUNCTION_ARGS );
+Datum
+mchar_pattern_fixed_prefix( PG_FUNCTION_ARGS ) {
+ Const *patt = (Const*)PG_GETARG_POINTER(0);
+ Pattern_Type ptype = (Pattern_Type)PG_GETARG_INT32(1);
+ Const **prefix = (Const**)PG_GETARG_POINTER(2);
+ UChar *spatt;
+ int32 slen, prefixlen=0, restlen=0, i=0;
+ MVarChar *sprefix;
+ MVarChar *srest;
+ Pattern_Prefix_Status status = Pattern_Prefix_None;
+
+ *prefix = NULL;
+
+ if ( ptype != Pattern_Type_Like )
+ PG_RETURN_INT32(Pattern_Prefix_None);
+
+ SET_UCHAR;
+
+ spatt = ((MVarChar*)DatumGetPointer(patt->constvalue))->data;
+ slen = UVARCHARLENGTH( DatumGetPointer(patt->constvalue) );
+
+ sprefix = (MVarChar*)palloc( MCHARHDRSZ /*The biggest hdr!! */ + sizeof(UChar) * slen );
+ srest = (MVarChar*)palloc( MCHARHDRSZ /*The biggest hdr!! */ + sizeof(UChar) * slen );
+
+ while( prefixlen < slen && i < slen ) {
+ if ( spatt[i] == UCharPercent || spatt[i] == UCharUnderLine )
+ break;
+ else if ( spatt[i] == UCharBackSlesh ) {
+ i++;
+ if ( i>= slen )
+ break;
+ }
+ sprefix->data[ prefixlen++ ] = spatt[i++];
+ }
+
+ while( prefixlen > 0 ) {
+ if ( ! u_isspace( sprefix->data[ prefixlen-1 ] ) )
+ break;
+ prefixlen--;
+ }
+
+ if ( prefixlen == 0 )
+ PG_RETURN_INT32(Pattern_Prefix_None);
+
+ for(;i<slen;i++)
+ srest->data[ restlen++ ] = spatt[i];
+
+ SET_VARSIZE(sprefix, sizeof(UChar) * prefixlen + MVARCHARHDRSZ);
+ SET_VARSIZE(srest, sizeof(UChar) * restlen + MVARCHARHDRSZ);
+
+ *prefix = makeConst( patt->consttype, -1, InvalidOid, VARSIZE(sprefix), PointerGetDatum(sprefix), false, false );
+
+ if ( prefixlen == slen ) /* in LIKE, an empty pattern is an exact match! */
+ status = Pattern_Prefix_Exact;
+ else if ( prefixlen > 0 )
+ status = Pattern_Prefix_Partial;
+
+ PG_RETURN_INT32( status );
+}
+
+static bool
+checkCmp( UChar *left, int32 leftlen, UChar *right, int32 rightlen ) {
+
+ return (UCharCaseCompare( left, leftlen, right, rightlen) < 0 ) ? true : false;
+}
+
+
+PG_FUNCTION_INFO_V1( mchar_greaterstring );
+Datum mchar_greaterstring( PG_FUNCTION_ARGS );
+Datum
+mchar_greaterstring( PG_FUNCTION_ARGS ) {
+ Const *patt = (Const*)PG_GETARG_POINTER(0);
+ char *src = (char*)DatumGetPointer( patt->constvalue );
+ int dstlen, srclen = VARSIZE(src);
+ char *dst = palloc( srclen );
+ UChar *ptr, *srcptr;
+
+ memcpy( dst, src, srclen );
+
+ srclen = dstlen = UVARCHARLENGTH( dst );
+ ptr = ((MVarChar*)dst)->data;
+ srcptr = ((MVarChar*)src)->data;
+
+ while( dstlen > 0 ) {
+ UChar *lastchar = ptr + dstlen - 1;
+
+ if ( !U16_IS_LEAD( *lastchar ) ) {
+ while( *lastchar<0xffff ) {
+
+ (*lastchar)++;
+
+ if ( ublock_getCode(*lastchar) == UBLOCK_INVALID_CODE || !checkCmp( srcptr, srclen, ptr, dstlen ) )
+ continue;
+ else {
+ SET_VARSIZE(dst, sizeof(UChar) * dstlen + MVARCHARHDRSZ);
+
+ PG_RETURN_POINTER( makeConst( patt->consttype, -1,
+ InvalidOid, VARSIZE(dst), PointerGetDatum(dst), false, false ) );
+ }
+ }
+ }
+
+ dstlen--;
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
+
+static int
+do_like_escape( UChar *pat, int plen, UChar *esc, int elen, UChar *result) {
+ UChar *p = pat,*e =esc ,*r;
+ bool afterescape;
+
+ r = result;
+ SET_UCHAR;
+
+ if ( elen == 0 ) {
+ /*
+ * No escape character is wanted. Double any backslashes in the
+ * pattern to make them act like ordinary characters.
+ */
+ while (plen > 0) {
+ if (*p == UCharBackSlesh )
+ *r++ = UCharBackSlesh;
+ CopyAdvChar(r, p, plen);
+ }
+ } else {
+ /*
+ * The specified escape must be only a single character.
+ */
+ NextChar(e, elen);
+
+ if (elen != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
+ errmsg("invalid escape string"),
+ errhint("Escape string must be empty or one character.")));
+
+ e = esc;
+
+ /*
+ * If specified escape is '\', just copy the pattern as-is.
+ */
+ if ( *e == UCharBackSlesh ) {
+ memcpy(result, pat, plen * sizeof(UChar));
+ return plen;
+ }
+
+ /*
+ * Otherwise, convert occurrences of the specified escape character to
+ * '\', and double occurrences of '\' --- unless they immediately
+ * follow an escape character!
+ */
+ afterescape = false;
+
+ while (plen > 0) {
+ if ( uchareq(p,e) && !afterescape) {
+ *r++ = UCharBackSlesh;
+ NextChar(p, plen);
+ afterescape = true;
+ } else if ( *p == UCharBackSlesh ) {
+ *r++ = UCharBackSlesh;
+ if (!afterescape)
+ *r++ = UCharBackSlesh;
+ NextChar(p, plen);
+ afterescape = false;
+ } else {
+ CopyAdvChar(r, p, plen);
+ afterescape = false;
+ }
+ }
+ }
+
+ return ( r - result );
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_like_escape );
+Datum mvarchar_like_escape( PG_FUNCTION_ARGS );
+Datum
+mvarchar_like_escape( PG_FUNCTION_ARGS ) {
+ MVarChar *pat = PG_GETARG_MVARCHAR(0);
+ MVarChar *esc = PG_GETARG_MVARCHAR(1);
+ MVarChar *result;
+
+ result = (MVarChar*)palloc( MVARCHARHDRSZ + sizeof(UChar)*2*UVARCHARLENGTH(pat) );
+ result->len = MVARCHARHDRSZ + do_like_escape( pat->data, UVARCHARLENGTH(pat),
+ esc->data, UVARCHARLENGTH(esc),
+ result->data ) * sizeof(UChar);
+
+ SET_VARSIZE(result, result->len);
+ PG_FREE_IF_COPY(pat,0);
+ PG_FREE_IF_COPY(esc,1);
+
+ PG_RETURN_MVARCHAR(result);
+}
+
+static MemoryContext McharRgCntx;
+
+#define RE_CACHE_SIZE 32
+typedef struct ReCache {
+ MemoryContext cntx;
+ UChar *pattern;
+ int length;
+ int flags;
+ regex_t re;
+} ReCache;
+
+static int num_res = 0;
+static ReCache re_array[RE_CACHE_SIZE]; /* cached re's */
+static const int mchar_regex_flavor = REG_ADVANCED | REG_ICASE;
+
+static regex_t *
+URE_compile_and_cache(UChar *text_re, int text_re_len, int cflags) {
+ pg_wchar *pattern;
+ size_t pattern_len;
+ int i;
+ int regcomp_result;
+ ReCache re_temp;
+ char errMsg[128];
+ MemoryContext oldcntx;
+ char* patternId;
+
+
+ for (i = 0; i < num_res; i++) {
+ if ( re_array[i].length == text_re_len &&
+ re_array[i].flags == cflags &&
+ memcmp(re_array[i].pattern, text_re, sizeof(UChar)*text_re_len) == 0 ) {
+
+ /* Found, move it to front */
+ if ( i>0 ) {
+ re_temp = re_array[i];
+ memmove(&re_array[1], &re_array[0], i * sizeof(ReCache));
+ re_array[0] = re_temp;
+ }
+
+ return &re_array[0].re;
+ }
+ }
+
+ if (McharRgCntx == NULL)
+ McharRgCntx = AllocSetContextCreate(TopMemoryContext,
+ "McharRgCntx",
+ ALLOCSET_SMALL_SIZES);
+
+ pattern = (pg_wchar *) palloc((1 + text_re_len) * sizeof(pg_wchar));
+ pattern_len = UChar2Wchar(text_re, text_re_len, pattern);
+
+ re_temp.cntx = AllocSetContextCreate(CurrentMemoryContext,
+ "McharRegex",
+ ALLOCSET_SMALL_SIZES);
+
+ oldcntx = MemoryContextSwitchTo(re_temp.cntx);
+
+ regcomp_result = pg_regcomp(&re_temp.re,
+ pattern,
+ pattern_len,
+ cflags,
+ DEFAULT_COLLATION_OID);
+ pfree( pattern );
+
+ if (regcomp_result != REG_OKAY) {
+ pg_regerror(regcomp_result, &re_temp.re, errMsg, sizeof(errMsg));
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+ errmsg("invalid regular expression: %s", errMsg)));
+ }
+
+ re_temp.pattern = palloc(text_re_len * sizeof(UChar));
+ memcpy(re_temp.pattern, text_re, text_re_len*sizeof(UChar));
+ re_temp.length = text_re_len;
+ re_temp.flags = cflags;
+
+ patternId = palloc0(text_re_len * sizeof(UChar) + 1);
+ UChar2Char(re_temp.pattern, text_re_len, patternId);
+ MemoryContextSetIdentifier(re_temp.cntx, patternId);
+
+ if (num_res >= RE_CACHE_SIZE) {
+ --num_res;
+ Assert(num_res < RE_CACHE_SIZE);
+ MemoryContextDelete(re_array[num_res].cntx);
+ }
+
+ MemoryContextSetParent(re_temp.cntx, McharRgCntx);
+
+ if (num_res > 0)
+ memmove(&re_array[1], &re_array[0], num_res * sizeof(ReCache));
+
+ re_array[0] = re_temp;
+ num_res++;
+
+ MemoryContextSwitchTo(oldcntx);
+
+ return &re_array[0].re;
+}
+
+static bool
+URE_compile_and_execute(UChar *pat, int pat_len, UChar *dat, int dat_len,
+ int cflags, int nmatch, regmatch_t *pmatch) {
+ pg_wchar *data;
+ size_t data_len;
+ int regexec_result;
+ regex_t *re;
+ char errMsg[128];
+
+ data = (pg_wchar *) palloc((1+dat_len) * sizeof(pg_wchar));
+ data_len = UChar2Wchar(dat, dat_len, data);
+
+ re = URE_compile_and_cache(pat, pat_len, cflags);
+
+ regexec_result = pg_regexec(re,
+ data,
+ data_len,
+ 0,
+ NULL,
+ nmatch,
+ pmatch,
+ 0);
+ pfree(data);
+
+ if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH) {
+ /* re failed??? */
+ pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+ errmsg("regular expression failed: %s", errMsg)));
+ }
+
+ return (regexec_result == REG_OKAY);
+}
+
+PG_FUNCTION_INFO_V1( mchar_regexeq );
+Datum mchar_regexeq( PG_FUNCTION_ARGS );
+Datum
+mchar_regexeq( PG_FUNCTION_ARGS ) {
+ MChar *t = PG_GETARG_MCHAR(0);
+ MChar *p = PG_GETARG_MCHAR(1);
+ bool res;
+
+ res = URE_compile_and_execute(p->data, UCHARLENGTH(p),
+ t->data, UCHARLENGTH(t),
+ mchar_regex_flavor,
+ 0, NULL);
+ PG_FREE_IF_COPY(t, 0);
+ PG_FREE_IF_COPY(p, 1);
+
+ PG_RETURN_BOOL(res);
+}
+
+PG_FUNCTION_INFO_V1( mchar_regexne );
+Datum mchar_regexne( PG_FUNCTION_ARGS );
+Datum
+mchar_regexne( PG_FUNCTION_ARGS ) {
+ MChar *t = PG_GETARG_MCHAR(0);
+ MChar *p = PG_GETARG_MCHAR(1);
+ bool res;
+
+ res = URE_compile_and_execute(p->data, UCHARLENGTH(p),
+ t->data, UCHARLENGTH(t),
+ mchar_regex_flavor,
+ 0, NULL);
+ PG_FREE_IF_COPY(t, 0);
+ PG_FREE_IF_COPY(p, 1);
+
+ PG_RETURN_BOOL(!res);
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_regexeq );
+Datum mvarchar_regexeq( PG_FUNCTION_ARGS );
+Datum
+mvarchar_regexeq( PG_FUNCTION_ARGS ) {
+ MVarChar *t = PG_GETARG_MVARCHAR(0);
+ MVarChar *p = PG_GETARG_MVARCHAR(1);
+ bool res;
+
+ res = URE_compile_and_execute(p->data, UVARCHARLENGTH(p),
+ t->data, UVARCHARLENGTH(t),
+ mchar_regex_flavor,
+ 0, NULL);
+ PG_FREE_IF_COPY(t, 0);
+ PG_FREE_IF_COPY(p, 1);
+
+ PG_RETURN_BOOL(res);
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_regexne );
+Datum mvarchar_regexne( PG_FUNCTION_ARGS );
+Datum
+mvarchar_regexne( PG_FUNCTION_ARGS ) {
+ MVarChar *t = PG_GETARG_MVARCHAR(0);
+ MVarChar *p = PG_GETARG_MVARCHAR(1);
+ bool res;
+
+ res = URE_compile_and_execute(p->data, UVARCHARLENGTH(p),
+ t->data, UVARCHARLENGTH(t),
+ mchar_regex_flavor,
+ 0, NULL);
+ PG_FREE_IF_COPY(t, 0);
+ PG_FREE_IF_COPY(p, 1);
+
+ PG_RETURN_BOOL(!res);
+}
+
+static int
+do_similar_escape(UChar *p, int plen, UChar *e, int elen, UChar *result) {
+ UChar *r;
+ bool afterescape = false;
+ bool incharclass = false;
+ int nquotes = 0;
+
+ SET_UCHAR;
+
+ if (e==NULL || elen <0 ) {
+ e = &UCharBackSlesh;
+ elen = 1;
+ } else {
+ if ( elen == 0 )
+ e = NULL;
+ else if ( elen != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
+ errmsg("invalid escape string"),
+ errhint("Escape string must be empty or one character.")));
+ }
+
+ /*
+ * Look explanation of following in ./utils/adt/regexp.c
+ */
+ r = result;
+
+ *r++ = UCharUp;
+ *r++ = UCharLBracket;
+ *r++ = UCharQ;
+ *r++ = UCharDotDot;
+
+ while( plen>0 ) {
+ UChar pchar = *p;
+
+ if (afterescape)
+ {
+ if (pchar == UCharQuote && !incharclass) /* escape-double-quote? */
+ {
+ if (nquotes == 0)
+ {
+ *r++ = UCharRBracket;
+ *r++ = UCharLFBracket;
+ *r++ = UCharOne;
+ *r++ = UCharComma;
+ *r++ = UCharOne;
+ *r++ = UCharRFBracket;
+ *r++ = UCharQ;
+ *r++ = UCharLBracket;
+ }
+ else if (nquotes == 1)
+ {
+ *r++ = UCharRBracket;
+ *r++ = UCharLFBracket;
+ *r++ = UCharOne;
+ *r++ = UCharComma;
+ *r++ = UCharOne;
+ *r++ = UCharRFBracket;
+ *r++ = UCharLBracket;
+ *r++ = UCharQ;
+ *r++ = UCharDotDot;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER),
+ errmsg("SQL regular expression may not contain more than two escape-double-quote separators")));
+ nquotes++;
+ }
+ else
+ {
+ *r++ = UCharBackSlesh;
+ *r++ = pchar;
+ }
+ afterescape = false;
+ }
+ else if (e && elen > 0 && pchar == *e)
+ {
+ afterescape = true;
+ }
+ else if (incharclass)
+ {
+ if (pchar == UCharBackSlesh)
+ *r++ = UCharBackSlesh;
+ *r++ = pchar;
+ if (pchar == UCharRQBracket)
+ incharclass = false;
+ }
+ else if (pchar == UCharLQBracket)
+ {
+ *r++ = pchar;
+ incharclass = true;
+ }
+ else if (pchar == UCharPercent)
+ {
+ *r++ = UCharDot;
+ *r++ = UCharStar;
+ }
+ else if (pchar == UCharUnderLine)
+ *r++ = UCharDot;
+ else if (pchar == UCharLBracket)
+ {
+ *r++ = UCharLBracket;
+ *r++ = UCharQ;
+ *r++ = UCharDotDot;
+ }
+ else if (pchar == UCharBackSlesh || pchar == UCharDot ||
+ pchar == UCharUp || pchar == UCharDollar)
+ {
+ *r++ = UCharBackSlesh;
+ *r++ = pchar;
+ }
+ else
+ *r++ = pchar;
+
+ p++, plen--;
+ }
+
+ *r++ = UCharRBracket;
+ *r++ = UCharDollar;
+
+ return r-result;
+}
+
+PG_FUNCTION_INFO_V1( mchar_similar_escape );
+Datum mchar_similar_escape( PG_FUNCTION_ARGS );
+Datum
+mchar_similar_escape( PG_FUNCTION_ARGS ) {
+ MChar *pat;
+ MChar *esc;
+ MChar *result;
+
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+ pat = PG_GETARG_MCHAR(0);
+
+ if (PG_NARGS() < 2 || PG_ARGISNULL(1)) {
+ esc = NULL;
+ } else {
+ esc = PG_GETARG_MCHAR(1);
+ }
+
+ result = (MChar*)palloc( MCHARHDRSZ + sizeof(UChar)*(23 + 3*UCHARLENGTH(pat)) );
+ result->len = MCHARHDRSZ + do_similar_escape( pat->data, UCHARLENGTH(pat),
+ (esc) ? esc->data : NULL, (esc) ? UCHARLENGTH(esc) : -1,
+ result->data ) * sizeof(UChar);
+ result->typmod=-1;
+
+ SET_VARSIZE(result, result->len);
+ PG_FREE_IF_COPY(pat,0);
+ if ( esc )
+ PG_FREE_IF_COPY(esc,1);
+
+ PG_RETURN_MCHAR(result);
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_similar_escape );
+Datum mvarchar_similar_escape( PG_FUNCTION_ARGS );
+Datum
+mvarchar_similar_escape( PG_FUNCTION_ARGS ) {
+ MVarChar *pat;
+ MVarChar *esc;
+ MVarChar *result;
+
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+ pat = PG_GETARG_MVARCHAR(0);
+
+ if (PG_NARGS() < 2 || PG_ARGISNULL(1)) {
+ esc = NULL;
+ } else {
+ esc = PG_GETARG_MVARCHAR(1);
+ }
+
+ result = (MVarChar*)palloc( MVARCHARHDRSZ + sizeof(UChar)*(23 + 3*UVARCHARLENGTH(pat)) );
+ result->len = MVARCHARHDRSZ + do_similar_escape( pat->data, UVARCHARLENGTH(pat),
+ (esc) ? esc->data : NULL, (esc) ? UVARCHARLENGTH(esc) : -1,
+ result->data ) * sizeof(UChar);
+
+ SET_VARSIZE(result, result->len);
+ PG_FREE_IF_COPY(pat,0);
+ if ( esc )
+ PG_FREE_IF_COPY(esc,1);
+
+ PG_RETURN_MVARCHAR(result);
+}
+
+#define RE_CACHE_SIZE 32
diff --git a/contrib/mchar/mchar_op.c b/contrib/mchar/mchar_op.c
new file mode 100644
index 00000000000..4694d9cf3c3
--- /dev/null
+++ b/contrib/mchar/mchar_op.c
@@ -0,0 +1,449 @@
+#include "mchar.h"
+
+int
+lengthWithoutSpaceVarChar(MVarChar *m) {
+ int l = UVARCHARLENGTH(m);
+
+ while( l>0 && m_isspace( m->data[ l-1 ] ) )
+ l--;
+
+ return l;
+}
+
+int
+lengthWithoutSpaceChar(MChar *m) {
+ int l = UCHARLENGTH(m);
+
+ while( l>0 && m_isspace( m->data[ l-1 ] ) )
+ l--;
+
+ return l;
+}
+
+static inline int
+mchar_icase_compare( MChar *a, MChar *b ) {
+ return UCharCaseCompare(
+ a->data, lengthWithoutSpaceChar(a),
+ b->data, lengthWithoutSpaceChar(b)
+ );
+}
+
+static inline int
+mchar_case_compare( MChar *a, MChar *b ) {
+ return UCharCompare(
+ a->data, lengthWithoutSpaceChar(a),
+ b->data, lengthWithoutSpaceChar(b)
+ );
+}
+
+#define MCHARCMPFUNC( c, type, action, ret ) \
+PG_FUNCTION_INFO_V1( mchar_##c##_##type ); \
+Datum mchar_##c##_##type(PG_FUNCTION_ARGS);\
+Datum \
+mchar_##c##_##type(PG_FUNCTION_ARGS) { \
+ MChar *a = PG_GETARG_MCHAR(0); \
+ MChar *b = PG_GETARG_MCHAR(1); \
+ int res = mchar_##c##_compare(a,b); \
+ \
+ PG_FREE_IF_COPY(a,0); \
+ PG_FREE_IF_COPY(b,1); \
+ PG_RETURN_##ret( res action 0 ); \
+}
+
+
+MCHARCMPFUNC( case, eq, ==, BOOL )
+MCHARCMPFUNC( case, ne, !=, BOOL )
+MCHARCMPFUNC( case, lt, <, BOOL )
+MCHARCMPFUNC( case, le, <=, BOOL )
+MCHARCMPFUNC( case, ge, >=, BOOL )
+MCHARCMPFUNC( case, gt, >, BOOL )
+MCHARCMPFUNC( case, cmp, +, INT32 )
+
+MCHARCMPFUNC( icase, eq, ==, BOOL )
+MCHARCMPFUNC( icase, ne, !=, BOOL )
+MCHARCMPFUNC( icase, lt, <, BOOL )
+MCHARCMPFUNC( icase, le, <=, BOOL )
+MCHARCMPFUNC( icase, ge, >=, BOOL )
+MCHARCMPFUNC( icase, gt, >, BOOL )
+MCHARCMPFUNC( icase, cmp, +, INT32 )
+
+PG_FUNCTION_INFO_V1( mchar_larger );
+Datum mchar_larger( PG_FUNCTION_ARGS );
+Datum
+mchar_larger( PG_FUNCTION_ARGS ) {
+ MChar *a = PG_GETARG_MCHAR(0);
+ MChar *b = PG_GETARG_MCHAR(1);
+ MChar *r;
+
+ r = ( mchar_icase_compare(a,b) > 0 ) ? a : b;
+
+ PG_RETURN_MCHAR(r);
+}
+
+PG_FUNCTION_INFO_V1( mchar_smaller );
+Datum mchar_smaller( PG_FUNCTION_ARGS );
+Datum
+mchar_smaller( PG_FUNCTION_ARGS ) {
+ MChar *a = PG_GETARG_MCHAR(0);
+ MChar *b = PG_GETARG_MCHAR(1);
+ MChar *r;
+
+ r = ( mchar_icase_compare(a,b) < 0 ) ? a : b;
+
+ PG_RETURN_MCHAR(r);
+}
+
+
+PG_FUNCTION_INFO_V1( mchar_concat );
+Datum mchar_concat( PG_FUNCTION_ARGS );
+Datum
+mchar_concat( PG_FUNCTION_ARGS ) {
+ MChar *a = PG_GETARG_MCHAR(0);
+ MChar *b = PG_GETARG_MCHAR(1);
+ MChar *result;
+ int maxcharlen, curlen;
+ int acharlen = u_countChar32(a->data, UCHARLENGTH(a)),
+ bcharlen = u_countChar32(b->data, UCHARLENGTH(b));
+
+
+ maxcharlen = ((a->typmod<=0) ? acharlen : a->typmod) +
+ ((b->typmod<=0) ? bcharlen : b->typmod);
+
+ result = (MChar*)palloc( MCHARHDRSZ + sizeof(UChar) * 2 * maxcharlen );
+
+ curlen = UCHARLENGTH( a );
+ if ( curlen > 0 )
+ memcpy( result->data, a->data, MCHARLENGTH(a) );
+ if ( a->typmod > 0 && acharlen < a->typmod ) {
+ FillWhiteSpace( result->data + curlen, a->typmod-acharlen );
+ curlen += a->typmod-acharlen;
+ }
+
+ if ( UCHARLENGTH(b) > 0 ) {
+ memcpy( result->data + curlen, b->data, MCHARLENGTH( b ) );
+ curlen += UCHARLENGTH( b );
+ }
+ if ( b->typmod > 0 && bcharlen < b->typmod ) {
+ FillWhiteSpace( result->data + curlen, b->typmod-bcharlen );
+ curlen += b->typmod-bcharlen;
+ }
+
+
+ result->typmod = -1;
+ SET_VARSIZE(result, sizeof(UChar) * curlen + MCHARHDRSZ);
+
+ PG_FREE_IF_COPY(a,0);
+ PG_FREE_IF_COPY(b,1);
+
+ PG_RETURN_MCHAR(result);
+}
+
+static inline int
+mvarchar_icase_compare( MVarChar *a, MVarChar *b ) {
+
+ return UCharCaseCompare(
+ a->data, lengthWithoutSpaceVarChar(a),
+ b->data, lengthWithoutSpaceVarChar(b)
+ );
+}
+
+static inline int
+mvarchar_case_compare( MVarChar *a, MVarChar *b ) {
+ return UCharCompare(
+ a->data, lengthWithoutSpaceVarChar(a),
+ b->data, lengthWithoutSpaceVarChar(b)
+ );
+}
+
+#define MVARCHARCMPFUNC( c, type, action, ret ) \
+PG_FUNCTION_INFO_V1( mvarchar_##c##_##type ); \
+Datum mvarchar_##c##_##type(PG_FUNCTION_ARGS); \
+Datum \
+mvarchar_##c##_##type(PG_FUNCTION_ARGS) { \
+ MVarChar *a = PG_GETARG_MVARCHAR(0); \
+ MVarChar *b = PG_GETARG_MVARCHAR(1); \
+ int res = mvarchar_##c##_compare(a,b); \
+ \
+ PG_FREE_IF_COPY(a,0); \
+ PG_FREE_IF_COPY(b,1); \
+ PG_RETURN_##ret( res action 0 ); \
+}
+
+
+MVARCHARCMPFUNC( case, eq, ==, BOOL )
+MVARCHARCMPFUNC( case, ne, !=, BOOL )
+MVARCHARCMPFUNC( case, lt, <, BOOL )
+MVARCHARCMPFUNC( case, le, <=, BOOL )
+MVARCHARCMPFUNC( case, ge, >=, BOOL )
+MVARCHARCMPFUNC( case, gt, >, BOOL )
+MVARCHARCMPFUNC( case, cmp, +, INT32 )
+
+MVARCHARCMPFUNC( icase, eq, ==, BOOL )
+MVARCHARCMPFUNC( icase, ne, !=, BOOL )
+MVARCHARCMPFUNC( icase, lt, <, BOOL )
+MVARCHARCMPFUNC( icase, le, <=, BOOL )
+MVARCHARCMPFUNC( icase, ge, >=, BOOL )
+MVARCHARCMPFUNC( icase, gt, >, BOOL )
+MVARCHARCMPFUNC( icase, cmp, +, INT32 )
+
+PG_FUNCTION_INFO_V1( mvarchar_larger );
+Datum mvarchar_larger( PG_FUNCTION_ARGS );
+Datum
+mvarchar_larger( PG_FUNCTION_ARGS ) {
+ MVarChar *a = PG_GETARG_MVARCHAR(0);
+ MVarChar *b = PG_GETARG_MVARCHAR(1);
+ MVarChar *r;
+
+ r = ( mvarchar_icase_compare(a,b) > 0 ) ? a : b;
+
+ PG_RETURN_MVARCHAR(r);
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_smaller );
+Datum mvarchar_smaller( PG_FUNCTION_ARGS );
+Datum
+mvarchar_smaller( PG_FUNCTION_ARGS ) {
+ MVarChar *a = PG_GETARG_MVARCHAR(0);
+ MVarChar *b = PG_GETARG_MVARCHAR(1);
+ MVarChar *r;
+
+ r = ( mvarchar_icase_compare(a,b) < 0 ) ? a : b;
+
+ PG_RETURN_MVARCHAR(r);
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_concat );
+Datum mvarchar_concat( PG_FUNCTION_ARGS );
+Datum
+mvarchar_concat( PG_FUNCTION_ARGS ) {
+ MVarChar *a = PG_GETARG_MVARCHAR(0);
+ MVarChar *b = PG_GETARG_MVARCHAR(1);
+ MVarChar *result;
+ int curlen;
+ int acharlen = u_countChar32(a->data, UVARCHARLENGTH(a)),
+ bcharlen = u_countChar32(b->data, UVARCHARLENGTH(b));
+
+ result = (MVarChar*)palloc( MVARCHARHDRSZ + sizeof(UChar) * 2 * (acharlen + bcharlen) );
+
+ curlen = UVARCHARLENGTH( a );
+ if ( curlen > 0 )
+ memcpy( result->data, a->data, MVARCHARLENGTH(a) );
+
+ if ( UVARCHARLENGTH(b) > 0 ) {
+ memcpy( result->data + curlen, b->data, MVARCHARLENGTH( b ) );
+ curlen += UVARCHARLENGTH( b );
+ }
+
+ SET_VARSIZE(result, sizeof(UChar) * curlen + MVARCHARHDRSZ);
+
+ PG_FREE_IF_COPY(a,0);
+ PG_FREE_IF_COPY(b,1);
+
+ PG_RETURN_MVARCHAR(result);
+}
+
+PG_FUNCTION_INFO_V1( mchar_mvarchar_concat );
+Datum mchar_mvarchar_concat( PG_FUNCTION_ARGS );
+Datum
+mchar_mvarchar_concat( PG_FUNCTION_ARGS ) {
+ MChar *a = PG_GETARG_MCHAR(0);
+ MVarChar *b = PG_GETARG_MVARCHAR(1);
+ MVarChar *result;
+ int curlen, maxcharlen;
+ int acharlen = u_countChar32(a->data, UCHARLENGTH(a)),
+ bcharlen = u_countChar32(b->data, UVARCHARLENGTH(b));
+
+ maxcharlen = ((a->typmod<=0) ? acharlen : a->typmod) + bcharlen;
+
+ result = (MVarChar*)palloc( MVARCHARHDRSZ + sizeof(UChar) * 2 * maxcharlen );
+
+ curlen = UCHARLENGTH( a );
+ if ( curlen > 0 )
+ memcpy( result->data, a->data, MCHARLENGTH(a) );
+ if ( a->typmod > 0 && acharlen < a->typmod ) {
+ FillWhiteSpace( result->data + curlen, a->typmod-acharlen );
+ curlen += a->typmod-acharlen;
+ }
+
+ if ( UVARCHARLENGTH(b) > 0 ) {
+ memcpy( result->data + curlen, b->data, MVARCHARLENGTH( b ) );
+ curlen += UVARCHARLENGTH( b );
+ }
+
+ SET_VARSIZE(result, sizeof(UChar) * curlen + MVARCHARHDRSZ);
+
+ PG_FREE_IF_COPY(a,0);
+ PG_FREE_IF_COPY(b,1);
+
+ PG_RETURN_MVARCHAR(result);
+}
+
+PG_FUNCTION_INFO_V1( mvarchar_mchar_concat );
+Datum mvarchar_mchar_concat( PG_FUNCTION_ARGS );
+Datum
+mvarchar_mchar_concat( PG_FUNCTION_ARGS ) {
+ MVarChar *a = PG_GETARG_MVARCHAR(0);
+ MChar *b = PG_GETARG_MCHAR(1);
+ MVarChar *result;
+ int curlen, maxcharlen;
+ int acharlen = u_countChar32(a->data, UVARCHARLENGTH(a)),
+ bcharlen = u_countChar32(b->data, UCHARLENGTH(b));
+
+ maxcharlen = acharlen + ((b->typmod<=0) ? bcharlen : b->typmod);
+
+ result = (MVarChar*)palloc( MVARCHARHDRSZ + sizeof(UChar) * 2 * maxcharlen );
+
+ curlen = UVARCHARLENGTH( a );
+ if ( curlen > 0 )
+ memcpy( result->data, a->data, MVARCHARLENGTH(a) );
+
+ if ( UCHARLENGTH(b) > 0 ) {
+ memcpy( result->data + curlen, b->data, MCHARLENGTH( b ) );
+ curlen += UCHARLENGTH( b );
+ }
+ if ( b->typmod > 0 && bcharlen < b->typmod ) {
+ FillWhiteSpace( result->data + curlen, b->typmod-bcharlen );
+ curlen += b->typmod-bcharlen;
+ }
+
+ SET_VARSIZE(result, sizeof(UChar) * curlen + MVARCHARHDRSZ);
+
+ PG_FREE_IF_COPY(a,0);
+ PG_FREE_IF_COPY(b,1);
+
+ PG_RETURN_MVARCHAR(result);
+}
+
+/*
+ * mchar <> mvarchar
+ */
+static inline int
+mc_mv_icase_compare( MChar *a, MVarChar *b ) {
+ return UCharCaseCompare(
+ a->data, lengthWithoutSpaceChar(a),
+ b->data, lengthWithoutSpaceVarChar(b)
+ );
+}
+
+static inline int
+mc_mv_case_compare( MChar *a, MVarChar *b ) {
+ return UCharCompare(
+ a->data, lengthWithoutSpaceChar(a),
+ b->data, lengthWithoutSpaceVarChar(b)
+ );
+}
+
+#define MC_MV_CHARCMPFUNC( c, type, action, ret ) \
+PG_FUNCTION_INFO_V1( mc_mv_##c##_##type ); \
+Datum mc_mv_##c##_##type(PG_FUNCTION_ARGS);\
+Datum \
+mc_mv_##c##_##type(PG_FUNCTION_ARGS) { \
+ MChar *a = PG_GETARG_MCHAR(0); \
+ MVarChar *b = PG_GETARG_MVARCHAR(1); \
+ int res = mc_mv_##c##_compare(a,b); \
+ \
+ PG_FREE_IF_COPY(a,0); \
+ PG_FREE_IF_COPY(b,1); \
+ PG_RETURN_##ret( res action 0 ); \
+}
+
+
+MC_MV_CHARCMPFUNC( case, eq, ==, BOOL )
+MC_MV_CHARCMPFUNC( case, ne, !=, BOOL )
+MC_MV_CHARCMPFUNC( case, lt, <, BOOL )
+MC_MV_CHARCMPFUNC( case, le, <=, BOOL )
+MC_MV_CHARCMPFUNC( case, ge, >=, BOOL )
+MC_MV_CHARCMPFUNC( case, gt, >, BOOL )
+MC_MV_CHARCMPFUNC( case, cmp, +, INT32 )
+
+MC_MV_CHARCMPFUNC( icase, eq, ==, BOOL )
+MC_MV_CHARCMPFUNC( icase, ne, !=, BOOL )
+MC_MV_CHARCMPFUNC( icase, lt, <, BOOL )
+MC_MV_CHARCMPFUNC( icase, le, <=, BOOL )
+MC_MV_CHARCMPFUNC( icase, ge, >=, BOOL )
+MC_MV_CHARCMPFUNC( icase, gt, >, BOOL )
+MC_MV_CHARCMPFUNC( icase, cmp, +, INT32 )
+
+/*
+ * mvarchar <> mchar
+ */
+static inline int
+mv_mc_icase_compare( MVarChar *a, MChar *b ) {
+ return UCharCaseCompare(
+ a->data, lengthWithoutSpaceVarChar(a),
+ b->data, lengthWithoutSpaceChar(b)
+ );
+}
+
+static inline int
+mv_mc_case_compare( MVarChar *a, MChar *b ) {
+ return UCharCompare(
+ a->data, lengthWithoutSpaceVarChar(a),
+ b->data, lengthWithoutSpaceChar(b)
+ );
+}
+
+#define MV_MC_CHARCMPFUNC( c, type, action, ret ) \
+PG_FUNCTION_INFO_V1( mv_mc_##c##_##type ); \
+Datum mv_mc_##c##_##type(PG_FUNCTION_ARGS);\
+Datum \
+mv_mc_##c##_##type(PG_FUNCTION_ARGS) { \
+ MVarChar *a = PG_GETARG_MVARCHAR(0); \
+ MChar *b = PG_GETARG_MCHAR(1); \
+ int res = mv_mc_##c##_compare(a,b); \
+ \
+ PG_FREE_IF_COPY(a,0); \
+ PG_FREE_IF_COPY(b,1); \
+ PG_RETURN_##ret( res action 0 ); \
+}
+
+
+MV_MC_CHARCMPFUNC( case, eq, ==, BOOL )
+MV_MC_CHARCMPFUNC( case, ne, !=, BOOL )
+MV_MC_CHARCMPFUNC( case, lt, <, BOOL )
+MV_MC_CHARCMPFUNC( case, le, <=, BOOL )
+MV_MC_CHARCMPFUNC( case, ge, >=, BOOL )
+MV_MC_CHARCMPFUNC( case, gt, >, BOOL )
+MV_MC_CHARCMPFUNC( case, cmp, +, INT32 )
+
+MV_MC_CHARCMPFUNC( icase, eq, ==, BOOL )
+MV_MC_CHARCMPFUNC( icase, ne, !=, BOOL )
+MV_MC_CHARCMPFUNC( icase, lt, <, BOOL )
+MV_MC_CHARCMPFUNC( icase, le, <=, BOOL )
+MV_MC_CHARCMPFUNC( icase, ge, >=, BOOL )
+MV_MC_CHARCMPFUNC( icase, gt, >, BOOL )
+MV_MC_CHARCMPFUNC( icase, cmp, +, INT32 )
+
+#define NULLHASHVALUE (-2147483647)
+
+#define FULLEQ_FUNC(type, cmpfunc, hashfunc) \
+PG_FUNCTION_INFO_V1( isfulleq_##type ); \
+Datum isfulleq_##type(PG_FUNCTION_ARGS); \
+Datum \
+isfulleq_##type(PG_FUNCTION_ARGS) { \
+ if ( PG_ARGISNULL(0) && PG_ARGISNULL(1) ) \
+ PG_RETURN_BOOL(true); \
+ else if ( PG_ARGISNULL(0) || PG_ARGISNULL(1) ) \
+ PG_RETURN_BOOL(false); \
+ \
+ PG_RETURN_DATUM( DirectFunctionCall2( cmpfunc, \
+ PG_GETARG_DATUM(0), \
+ PG_GETARG_DATUM(1) \
+ ) ); \
+} \
+ \
+PG_FUNCTION_INFO_V1( fullhash_##type ); \
+Datum fullhash_##type(PG_FUNCTION_ARGS); \
+Datum \
+fullhash_##type(PG_FUNCTION_ARGS) { \
+ if ( PG_ARGISNULL(0) ) \
+ PG_RETURN_INT32(NULLHASHVALUE); \
+ \
+ PG_RETURN_DATUM( DirectFunctionCall1( hashfunc, \
+ PG_GETARG_DATUM(0) \
+ ) ); \
+}
+
+FULLEQ_FUNC( mchar, mchar_icase_eq, mchar_hash );
+FULLEQ_FUNC( mvarchar, mvarchar_icase_eq, mvarchar_hash );
+
diff --git a/contrib/mchar/mchar_proc.c b/contrib/mchar/mchar_proc.c
new file mode 100644
index 00000000000..edabfb5eb66
--- /dev/null
+++ b/contrib/mchar/mchar_proc.c
@@ -0,0 +1,315 @@
+#include "mchar.h"
+#include "mb/pg_wchar.h"
+
+PG_FUNCTION_INFO_V1(mchar_length);
+Datum mchar_length(PG_FUNCTION_ARGS);
+
+Datum
+mchar_length(PG_FUNCTION_ARGS) {
+ MChar *m = PG_GETARG_MCHAR(0);
+ int32 l = UCHARLENGTH(m);
+
+ while( l>0 && m_isspace( m->data[ l-1 ] ) )
+ l--;
+
+ l = u_countChar32(m->data, l);
+
+ PG_FREE_IF_COPY(m,0);
+
+ PG_RETURN_INT32(l);
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_length);
+Datum mvarchar_length(PG_FUNCTION_ARGS);
+
+Datum
+mvarchar_length(PG_FUNCTION_ARGS) {
+ MVarChar *m = PG_GETARG_MVARCHAR(0);
+ int32 l = UVARCHARLENGTH(m);
+
+ while( l>0 && m_isspace( m->data[ l-1 ] ) )
+ l--;
+
+ l = u_countChar32(m->data, l);
+
+ PG_FREE_IF_COPY(m,0);
+
+ PG_RETURN_INT32(l);
+}
+
+static int32
+uchar_substring(
+ UChar *str, int32 strl,
+ int32 start, int32 length, bool length_not_specified,
+ UChar *dst) {
+ int32 S = start-1; /* start position */
+ int32 S1; /* adjusted start position */
+ int32 L1; /* adjusted substring length */
+ int32 subbegin=0, subend=0;
+
+ S1 = Max(S, 0);
+ if (length_not_specified)
+ L1 = -1;
+ else {
+ /* end position */
+ int32 E = S + length;
+
+ /*
+ * A negative value for L is the only way for the end position to
+ * be before the start. SQL99 says to throw an error.
+ */
+
+ if (E < S)
+ ereport(ERROR,
+ (errcode(ERRCODE_SUBSTRING_ERROR),
+ errmsg("negative substring length not allowed")));
+
+ /*
+ * A zero or negative value for the end position can happen if the
+ * start was negative or one. SQL99 says to return a zero-length
+ * string.
+ */
+ if (E < 0)
+ return 0;
+
+ L1 = E - S1;
+ }
+
+ U16_FWD_N( str, subbegin, strl, S1 );
+ if ( subbegin >= strl )
+ return 0;
+ subend = subbegin;
+ U16_FWD_N( str, subend, strl, L1 );
+
+ memcpy( dst, str+subbegin, sizeof(UChar)*(subend-subbegin) );
+
+ return subend-subbegin;
+}
+
+PG_FUNCTION_INFO_V1(mchar_substring);
+Datum mchar_substring(PG_FUNCTION_ARGS);
+Datum
+mchar_substring(PG_FUNCTION_ARGS) {
+ MChar *src = PG_GETARG_MCHAR(0);
+ MChar *dst;
+ int32 length;
+
+ dst = (MChar*)palloc( VARSIZE(src) );
+ length = uchar_substring(
+ src->data, UCHARLENGTH(src),
+ PG_GETARG_INT32(1), PG_GETARG_INT32(2), false,
+ dst->data);
+
+ dst->typmod = src->typmod;
+ SET_VARSIZE(dst, MCHARHDRSZ + length *sizeof(UChar));
+
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MCHAR(dst);
+}
+
+PG_FUNCTION_INFO_V1(mchar_substring_no_len);
+Datum mchar_substring_no_len(PG_FUNCTION_ARGS);
+Datum
+mchar_substring_no_len(PG_FUNCTION_ARGS) {
+ MChar *src = PG_GETARG_MCHAR(0);
+ MChar *dst;
+ int32 length;
+
+ dst = (MChar*)palloc( VARSIZE(src) );
+ length = uchar_substring(
+ src->data, UCHARLENGTH(src),
+ PG_GETARG_INT32(1), -1, true,
+ dst->data);
+
+ dst->typmod = src->typmod;
+ SET_VARSIZE(dst, MCHARHDRSZ + length *sizeof(UChar));
+
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MCHAR(dst);
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_substring);
+Datum mvarchar_substring(PG_FUNCTION_ARGS);
+Datum
+mvarchar_substring(PG_FUNCTION_ARGS) {
+ MVarChar *src = PG_GETARG_MVARCHAR(0);
+ MVarChar *dst;
+ int32 length;
+
+ dst = (MVarChar*)palloc( VARSIZE(src) );
+ length = uchar_substring(
+ src->data, UVARCHARLENGTH(src),
+ PG_GETARG_INT32(1), PG_GETARG_INT32(2), false,
+ dst->data);
+
+ SET_VARSIZE(dst, MVARCHARHDRSZ + length *sizeof(UChar));
+
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MVARCHAR(dst);
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_substring_no_len);
+Datum mvarchar_substring_no_len(PG_FUNCTION_ARGS);
+Datum
+mvarchar_substring_no_len(PG_FUNCTION_ARGS) {
+ MVarChar *src = PG_GETARG_MVARCHAR(0);
+ MVarChar *dst;
+ int32 length;
+
+ dst = (MVarChar*)palloc( VARSIZE(src) );
+ length = uchar_substring(
+ src->data, UVARCHARLENGTH(src),
+ PG_GETARG_INT32(1), -1, true,
+ dst->data);
+
+ SET_VARSIZE(dst, MVARCHARHDRSZ + length *sizeof(UChar));
+
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MVARCHAR(dst);
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_hash);
+Datum
+mvarchar_hash(PG_FUNCTION_ARGS) {
+ MVarChar *src = PG_GETARG_MVARCHAR(0);
+ Datum res;
+
+ res = hash_uchar( src->data, lengthWithoutSpaceVarChar(src) );
+
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_DATUM( res );
+}
+
+PG_FUNCTION_INFO_V1(mchar_hash);
+Datum
+mchar_hash(PG_FUNCTION_ARGS) {
+ MChar *src = PG_GETARG_MCHAR(0);
+ Datum res;
+
+ res = hash_uchar( src->data, lengthWithoutSpaceChar(src) );
+
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_DATUM( res );
+}
+
+PG_FUNCTION_INFO_V1(mchar_upper);
+Datum mchar_upper(PG_FUNCTION_ARGS);
+Datum
+mchar_upper(PG_FUNCTION_ARGS) {
+ MChar *src = PG_GETARG_MCHAR(0);
+ MChar *dst = (MChar*)palloc( VARSIZE(src) * 2 );
+
+ dst->len = MCHARHDRSZ;
+ dst->typmod = src->typmod;
+ if ( UCHARLENGTH(src) != 0 ) {
+ int length;
+ UErrorCode err=0;
+
+ length = u_strToUpper( dst->data, VARSIZE(src) * 2 - MCHARHDRSZ,
+ src->data, UCHARLENGTH(src),
+ NULL, &err );
+
+ Assert( length <= VARSIZE(src) * 2 - MCHARHDRSZ );
+
+ if ( U_FAILURE(err) )
+ elog(ERROR,"ICU u_strToUpper fails and returns %d (%s)", err, u_errorName(err));
+
+ dst->len += sizeof(UChar) * length;
+ }
+
+ SET_VARSIZE( dst, dst->len );
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MCHAR( dst );
+}
+
+PG_FUNCTION_INFO_V1(mchar_lower);
+Datum mchar_lower(PG_FUNCTION_ARGS);
+Datum
+mchar_lower(PG_FUNCTION_ARGS) {
+ MChar *src = PG_GETARG_MCHAR(0);
+ MChar *dst = (MChar*)palloc( VARSIZE(src) * 2 );
+
+ dst->len = MCHARHDRSZ;
+ dst->typmod = src->typmod;
+ if ( UCHARLENGTH(src) != 0 ) {
+ int length;
+ UErrorCode err=0;
+
+ length = u_strToLower( dst->data, VARSIZE(src) * 2 - MCHARHDRSZ,
+ src->data, UCHARLENGTH(src),
+ NULL, &err );
+
+ Assert( length <= VARSIZE(src) * 2 - MCHARHDRSZ );
+
+ if ( U_FAILURE(err) )
+ elog(ERROR,"ICU u_strToLower fails and returns %d (%s)", err, u_errorName(err));
+
+ dst->len += sizeof(UChar) * length;
+ }
+
+ SET_VARSIZE( dst, dst->len );
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MCHAR( dst );
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_upper);
+Datum mvarchar_upper(PG_FUNCTION_ARGS);
+Datum
+mvarchar_upper(PG_FUNCTION_ARGS) {
+ MVarChar *src = PG_GETARG_MVARCHAR(0);
+ MVarChar *dst = (MVarChar*)palloc( VARSIZE(src) * 2 );
+
+ dst->len = MVARCHARHDRSZ;
+
+ if ( UVARCHARLENGTH(src) != 0 ) {
+ int length;
+ UErrorCode err=0;
+
+ length = u_strToUpper( dst->data, VARSIZE(src) * 2 - MVARCHARHDRSZ,
+ src->data, UVARCHARLENGTH(src),
+ NULL, &err );
+
+ Assert( length <= VARSIZE(src) * 2 - MVARCHARHDRSZ );
+
+ if ( U_FAILURE(err) )
+ elog(ERROR,"ICU u_strToUpper fails and returns %d (%s)", err, u_errorName(err));
+
+ dst->len += sizeof(UChar) * length;
+ }
+
+ SET_VARSIZE( dst, dst->len );
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MVARCHAR( dst );
+}
+
+PG_FUNCTION_INFO_V1(mvarchar_lower);
+Datum mvarchar_lower(PG_FUNCTION_ARGS);
+Datum
+mvarchar_lower(PG_FUNCTION_ARGS) {
+ MVarChar *src = PG_GETARG_MVARCHAR(0);
+ MVarChar *dst = (MVarChar*)palloc( VARSIZE(src) * 2 );
+
+ dst->len = MVARCHARHDRSZ;
+
+ if ( UVARCHARLENGTH(src) != 0 ) {
+ int length;
+ UErrorCode err=0;
+
+ length = u_strToLower( dst->data, VARSIZE(src) * 2 - MVARCHARHDRSZ,
+ src->data, UVARCHARLENGTH(src),
+ NULL, &err );
+
+ Assert( length <= VARSIZE(src) * 2 - MVARCHARHDRSZ );
+
+ if ( U_FAILURE(err) )
+ elog(ERROR,"ICU u_strToLower fails and returns %d (%s)", err, u_errorName(err));
+
+ dst->len += sizeof(UChar) * length;
+ }
+
+ SET_VARSIZE( dst, dst->len );
+ PG_FREE_IF_COPY(src, 0);
+ PG_RETURN_MVARCHAR( dst );
+}
+
+
diff --git a/contrib/mchar/mchar_recode.c b/contrib/mchar/mchar_recode.c
new file mode 100644
index 00000000000..12bc6d4f3aa
--- /dev/null
+++ b/contrib/mchar/mchar_recode.c
@@ -0,0 +1,166 @@
+#include "mchar.h"
+#include "access/hash.h"
+
+#include "unicode/ucol.h"
+#include "unicode/ucnv.h"
+
+static UConverter *cnvDB = NULL;
+static UCollator *colCaseInsensitive = NULL;
+static UCollator *colCaseSensitive = NULL;
+
+static void
+createUObjs() {
+ if ( !cnvDB ) {
+ UErrorCode err = 0;
+
+ if ( GetDatabaseEncoding() == PG_UTF8 )
+ cnvDB = ucnv_open("UTF8", &err);
+ else
+ cnvDB = ucnv_open(NULL, &err);
+ if ( U_FAILURE(err) || cnvDB == NULL )
+ elog(ERROR,"ICU ucnv_open returns %d (%s)", err, u_errorName(err));
+ }
+
+ if ( !colCaseInsensitive ) {
+ UErrorCode err = 0;
+
+ colCaseInsensitive = ucol_open("", &err);
+ if ( U_FAILURE(err) || cnvDB == NULL ) {
+ if ( colCaseSensitive )
+ ucol_close( colCaseSensitive );
+ colCaseSensitive = NULL;
+ elog(ERROR,"ICU ucol_open returns %d (%s)", err, u_errorName(err));
+ }
+
+ ucol_setStrength( colCaseInsensitive, UCOL_SECONDARY );
+ }
+
+ if ( !colCaseSensitive ) {
+ UErrorCode err = 0;
+
+ colCaseSensitive = ucol_open("", &err);
+ if ( U_FAILURE(err) || cnvDB == NULL ) {
+ if ( colCaseSensitive )
+ ucol_close( colCaseSensitive );
+ colCaseSensitive = NULL;
+ elog(ERROR,"ICU ucol_open returns %d (%s)", err, u_errorName(err));
+ }
+
+ ucol_setAttribute(colCaseSensitive, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &err);
+ if (U_FAILURE(err)) {
+ if ( colCaseSensitive )
+ ucol_close( colCaseSensitive );
+ colCaseSensitive = NULL;
+ elog(ERROR,"ICU ucol_setAttribute returns %d (%s)", err, u_errorName(err));
+ }
+ }
+}
+
+int
+Char2UChar(const char * src, int srclen, UChar *dst) {
+ int dstlen=0;
+ UErrorCode err = 0;
+
+ createUObjs();
+ dstlen = ucnv_toUChars( cnvDB, dst, srclen*4, src, srclen, &err );
+ if ( U_FAILURE(err))
+ elog(ERROR,"ICU ucnv_toUChars returns %d (%s)", err, u_errorName(err));
+
+ return dstlen;
+}
+
+int
+UChar2Char(const UChar * src, int srclen, char *dst) {
+ int dstlen=0;
+ UErrorCode err = 0;
+
+ createUObjs();
+ dstlen = ucnv_fromUChars( cnvDB, dst, srclen*4, src, srclen, &err );
+ if ( U_FAILURE(err) )
+ elog(ERROR,"ICU ucnv_fromUChars returns %d (%s)", err, u_errorName(err));
+
+ return dstlen;
+}
+
+int
+UChar2Wchar(UChar * src, int srclen, pg_wchar *dst) {
+ int dstlen=0;
+ char *utf = palloc(sizeof(char)*srclen*4);
+
+ dstlen = UChar2Char(src, srclen, utf);
+ dstlen = pg_mb2wchar_with_len( utf, dst, dstlen );
+ pfree(utf);
+
+ return dstlen;
+}
+
+static UChar UCharWhiteSpace = 0;
+
+void
+FillWhiteSpace( UChar *dst, int n ) {
+ if ( UCharWhiteSpace == 0 ) {
+ int len;
+ UErrorCode err = 0;
+
+ u_strFromUTF8( &UCharWhiteSpace, 1, &len, " ", 1, &err);
+
+ Assert( len==1 );
+ Assert( !U_FAILURE(err) );
+ }
+
+ while( n-- > 0 )
+ *dst++ = UCharWhiteSpace;
+}
+
+int
+UCharCaseCompare(UChar * a, int alen, UChar *b, int blen) {
+
+ createUObjs();
+
+ return (int)ucol_strcoll( colCaseInsensitive,
+ a, alen,
+ b, blen);
+}
+
+int
+UCharCompare(UChar * a, int alen, UChar *b, int blen) {
+
+ createUObjs();
+
+ return (int)ucol_strcoll( colCaseSensitive,
+ a, alen,
+ b, blen);
+}
+
+Datum
+hash_uchar( UChar *s, int len ) {
+ int32 length = INT_MAX, i;
+ Datum res;
+ uint8 *d;
+
+ if ( len == 0 )
+ return hash_any( NULL, 0 );
+
+ createUObjs();
+
+ for(i=2;; i*=2)
+ {
+ d = palloc(len * i);
+ length = ucol_getSortKey(colCaseInsensitive, s, len, d, len*i);
+
+ if (length == 0)
+ elog(ERROR,"ICU ucol_getSortKey fails");
+
+ if (length < len*i)
+ break;
+
+ pfree(d);
+ }
+
+ res = hash_any( (unsigned char*) d, length);
+
+ pfree(d);
+
+ return res;
+}
+
diff --git a/contrib/mchar/meson.build b/contrib/mchar/meson.build
new file mode 100644
index 00000000000..2e22ae8a15b
--- /dev/null
+++ b/contrib/mchar/meson.build
@@ -0,0 +1,51 @@
+mchar_sources = files(
+ 'mchar_io.c',
+ 'mchar_proc.c',
+ 'mchar_op.c',
+ 'mchar_recode.c',
+ 'mchar_like.c'
+)
+
+if host_system == 'windows'
+ mchar_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'mchar',
+ '--FILEDESC', 'mchar',])
+endif
+
+mchar = shared_module('mchar',
+ mchar_sources,
+ kwargs: contrib_mod_args + {
+ 'dependencies': [icu_i18n, contrib_mod_args['dependencies']],
+ },
+)
+contrib_targets += mchar
+
+install_data(
+ 'mchar.control',
+ 'mchar--2.2.1.sql',
+ 'mchar--2.0.1--2.1.sql',
+ 'mchar--2.0--2.1.sql',
+ 'mchar--2.1.1--2.2.sql',
+ 'mchar--2.1--2.2.sql',
+ 'mchar--2.2--2.2.1.sql',
+ 'mchar--unpackaged--2.0.sql',
+ kwargs: contrib_data_args,
+)
+
+tests += {
+ 'name': 'mchar',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'init',
+ 'mchar',
+ 'mvarchar',
+ 'mm',
+ 'like',
+ 'compat',
+ ]
+ },
+}
+
+# TODO: DOCS = README.mchar
\ No newline at end of file
diff --git a/contrib/mchar/sql/compat.sql b/contrib/mchar/sql/compat.sql
new file mode 100644
index 00000000000..d5b6a986960
--- /dev/null
+++ b/contrib/mchar/sql/compat.sql
@@ -0,0 +1,11 @@
+--- table based checks
+
+select '<' || ch || '>', '<' || vch || '>' from chvch;
+select * from chvch where vch = 'One space';
+select * from chvch where vch = 'One space ';
+
+select * from ch where chcol = 'abcd' order by chcol;
+select * from ch t1 join ch t2 on t1.chcol = t2.chcol order by t1.chcol, t2.chcol;
+select * from ch where chcol > 'abcd' and chcol<'ee';
+select * from ch order by chcol;
+
diff --git a/contrib/mchar/sql/init.sql b/contrib/mchar/sql/init.sql
new file mode 100644
index 00000000000..04310044458
--- /dev/null
+++ b/contrib/mchar/sql/init.sql
@@ -0,0 +1,23 @@
+CREATE EXTENSION mchar;
+
+create table ch (
+ chcol mchar(32)
+) without oids;
+
+insert into ch values('abcd');
+insert into ch values('AbcD');
+insert into ch values('abcz');
+insert into ch values('defg');
+insert into ch values('dEfg');
+insert into ch values('ee');
+insert into ch values('Ee');
+
+create table chvch (
+ ch mchar(12),
+ vch mvarchar(12)
+) without oids;
+
+insert into chvch values('No spaces', 'No spaces');
+insert into chvch values('One space ', 'One space ');
+insert into chvch values('1 space', '1 space ');
+
diff --git a/contrib/mchar/sql/like.sql b/contrib/mchar/sql/like.sql
new file mode 100644
index 00000000000..c29cf4eb6f9
--- /dev/null
+++ b/contrib/mchar/sql/like.sql
@@ -0,0 +1,231 @@
+-- simplest examples
+-- E061-04 like predicate
+set standard_conforming_strings=off;
+
+SELECT 'hawkeye'::mchar LIKE 'h%' AS "true";
+SELECT 'hawkeye'::mchar NOT LIKE 'h%' AS "false";
+
+SELECT 'hawkeye'::mchar LIKE 'H%' AS "true";
+SELECT 'hawkeye'::mchar NOT LIKE 'H%' AS "false";
+
+SELECT 'hawkeye'::mchar LIKE 'indio%' AS "false";
+SELECT 'hawkeye'::mchar NOT LIKE 'indio%' AS "true";
+
+SELECT 'hawkeye'::mchar LIKE 'h%eye' AS "true";
+SELECT 'hawkeye'::mchar NOT LIKE 'h%eye' AS "false";
+
+SELECT 'indio'::mchar LIKE '_ndio' AS "true";
+SELECT 'indio'::mchar NOT LIKE '_ndio' AS "false";
+
+SELECT 'indio'::mchar LIKE 'in__o' AS "true";
+SELECT 'indio'::mchar NOT LIKE 'in__o' AS "false";
+
+SELECT 'indio'::mchar LIKE 'in_o' AS "false";
+SELECT 'indio'::mchar NOT LIKE 'in_o' AS "true";
+
+SELECT 'hawkeye'::mvarchar LIKE 'h%' AS "true";
+SELECT 'hawkeye'::mvarchar NOT LIKE 'h%' AS "false";
+
+SELECT 'hawkeye'::mvarchar LIKE 'H%' AS "true";
+SELECT 'hawkeye'::mvarchar NOT LIKE 'H%' AS "false";
+
+SELECT 'hawkeye'::mvarchar LIKE 'indio%' AS "false";
+SELECT 'hawkeye'::mvarchar NOT LIKE 'indio%' AS "true";
+
+SELECT 'hawkeye'::mvarchar LIKE 'h%eye' AS "true";
+SELECT 'hawkeye'::mvarchar NOT LIKE 'h%eye' AS "false";
+
+SELECT 'indio'::mvarchar LIKE '_ndio' AS "true";
+SELECT 'indio'::mvarchar NOT LIKE '_ndio' AS "false";
+
+SELECT 'indio'::mvarchar LIKE 'in__o' AS "true";
+SELECT 'indio'::mvarchar NOT LIKE 'in__o' AS "false";
+
+SELECT 'indio'::mvarchar LIKE 'in_o' AS "false";
+SELECT 'indio'::mvarchar NOT LIKE 'in_o' AS "true";
+
+-- unused escape character
+SELECT 'hawkeye'::mchar LIKE 'h%'::mchar ESCAPE '#' AS "true";
+SELECT 'hawkeye'::mchar NOT LIKE 'h%'::mchar ESCAPE '#' AS "false";
+
+SELECT 'indio'::mchar LIKE 'ind_o'::mchar ESCAPE '$' AS "true";
+SELECT 'indio'::mchar NOT LIKE 'ind_o'::mchar ESCAPE '$' AS "false";
+
+-- escape character
+-- E061-05 like predicate with escape clause
+SELECT 'h%'::mchar LIKE 'h#%'::mchar ESCAPE '#' AS "true";
+SELECT 'h%'::mchar NOT LIKE 'h#%'::mchar ESCAPE '#' AS "false";
+
+SELECT 'h%wkeye'::mchar LIKE 'h#%'::mchar ESCAPE '#' AS "false";
+SELECT 'h%wkeye'::mchar NOT LIKE 'h#%'::mchar ESCAPE '#' AS "true";
+
+SELECT 'h%wkeye'::mchar LIKE 'h#%%'::mchar ESCAPE '#' AS "true";
+SELECT 'h%wkeye'::mchar NOT LIKE 'h#%%'::mchar ESCAPE '#' AS "false";
+
+SELECT 'h%awkeye'::mchar LIKE 'h#%a%k%e'::mchar ESCAPE '#' AS "true";
+SELECT 'h%awkeye'::mchar NOT LIKE 'h#%a%k%e'::mchar ESCAPE '#' AS "false";
+
+SELECT 'indio'::mchar LIKE '_ndio'::mchar ESCAPE '$' AS "true";
+SELECT 'indio'::mchar NOT LIKE '_ndio'::mchar ESCAPE '$' AS "false";
+
+SELECT 'i_dio'::mchar LIKE 'i$_d_o'::mchar ESCAPE '$' AS "true";
+SELECT 'i_dio'::mchar NOT LIKE 'i$_d_o'::mchar ESCAPE '$' AS "false";
+
+SELECT 'i_dio'::mchar LIKE 'i$_nd_o'::mchar ESCAPE '$' AS "false";
+SELECT 'i_dio'::mchar NOT LIKE 'i$_nd_o'::mchar ESCAPE '$' AS "true";
+
+SELECT 'i_dio'::mchar LIKE 'i$_d%o'::mchar ESCAPE '$' AS "true";
+SELECT 'i_dio'::mchar NOT LIKE 'i$_d%o'::mchar ESCAPE '$' AS "false";
+
+-- escape character same as pattern character
+SELECT 'maca'::mchar LIKE 'm%aca' ESCAPE '%'::mchar AS "true";
+SELECT 'maca'::mchar NOT LIKE 'm%aca' ESCAPE '%'::mchar AS "false";
+
+SELECT 'ma%a'::mchar LIKE 'm%a%%a' ESCAPE '%'::mchar AS "true";
+SELECT 'ma%a'::mchar NOT LIKE 'm%a%%a' ESCAPE '%'::mchar AS "false";
+
+SELECT 'bear'::mchar LIKE 'b_ear' ESCAPE '_'::mchar AS "true";
+SELECT 'bear'::mchar NOT LIKE 'b_ear'::mchar ESCAPE '_' AS "false";
+
+SELECT 'be_r'::mchar LIKE 'b_e__r' ESCAPE '_'::mchar AS "true";
+SELECT 'be_r'::mchar NOT LIKE 'b_e__r' ESCAPE '_'::mchar AS "false";
+
+SELECT 'be_r'::mchar LIKE '__e__r' ESCAPE '_'::mchar AS "false";
+SELECT 'be_r'::mchar NOT LIKE '__e__r'::mchar ESCAPE '_' AS "true";
+
+-- unused escape character
+SELECT 'hawkeye'::mvarchar LIKE 'h%'::mvarchar ESCAPE '#' AS "true";
+SELECT 'hawkeye'::mvarchar NOT LIKE 'h%'::mvarchar ESCAPE '#' AS "false";
+
+SELECT 'indio'::mvarchar LIKE 'ind_o'::mvarchar ESCAPE '$' AS "true";
+SELECT 'indio'::mvarchar NOT LIKE 'ind_o'::mvarchar ESCAPE '$' AS "false";
+
+-- escape character
+-- E061-05 like predicate with escape clause
+SELECT 'h%'::mvarchar LIKE 'h#%'::mvarchar ESCAPE '#' AS "true";
+SELECT 'h%'::mvarchar NOT LIKE 'h#%'::mvarchar ESCAPE '#' AS "false";
+
+SELECT 'h%wkeye'::mvarchar LIKE 'h#%'::mvarchar ESCAPE '#' AS "false";
+SELECT 'h%wkeye'::mvarchar NOT LIKE 'h#%'::mvarchar ESCAPE '#' AS "true";
+
+SELECT 'h%wkeye'::mvarchar LIKE 'h#%%'::mvarchar ESCAPE '#' AS "true";
+SELECT 'h%wkeye'::mvarchar NOT LIKE 'h#%%'::mvarchar ESCAPE '#' AS "false";
+
+SELECT 'h%awkeye'::mvarchar LIKE 'h#%a%k%e'::mvarchar ESCAPE '#' AS "true";
+SELECT 'h%awkeye'::mvarchar NOT LIKE 'h#%a%k%e'::mvarchar ESCAPE '#' AS "false";
+
+SELECT 'indio'::mvarchar LIKE '_ndio'::mvarchar ESCAPE '$' AS "true";
+SELECT 'indio'::mvarchar NOT LIKE '_ndio'::mvarchar ESCAPE '$' AS "false";
+
+SELECT 'i_dio'::mvarchar LIKE 'i$_d_o'::mvarchar ESCAPE '$' AS "true";
+SELECT 'i_dio'::mvarchar NOT LIKE 'i$_d_o'::mvarchar ESCAPE '$' AS "false";
+
+SELECT 'i_dio'::mvarchar LIKE 'i$_nd_o'::mvarchar ESCAPE '$' AS "false";
+SELECT 'i_dio'::mvarchar NOT LIKE 'i$_nd_o'::mvarchar ESCAPE '$' AS "true";
+
+SELECT 'i_dio'::mvarchar LIKE 'i$_d%o'::mvarchar ESCAPE '$' AS "true";
+SELECT 'i_dio'::mvarchar NOT LIKE 'i$_d%o'::mvarchar ESCAPE '$' AS "false";
+
+-- escape character same as pattern character
+SELECT 'maca'::mvarchar LIKE 'm%aca' ESCAPE '%'::mvarchar AS "true";
+SELECT 'maca'::mvarchar NOT LIKE 'm%aca' ESCAPE '%'::mvarchar AS "false";
+
+SELECT 'ma%a'::mvarchar LIKE 'm%a%%a' ESCAPE '%'::mvarchar AS "true";
+SELECT 'ma%a'::mvarchar NOT LIKE 'm%a%%a' ESCAPE '%'::mvarchar AS "false";
+
+SELECT 'bear'::mvarchar LIKE 'b_ear' ESCAPE '_'::mvarchar AS "true";
+SELECT 'bear'::mvarchar NOT LIKE 'b_ear'::mvarchar ESCAPE '_' AS "false";
+
+SELECT 'be_r'::mvarchar LIKE 'b_e__r' ESCAPE '_'::mvarchar AS "true";
+SELECT 'be_r'::mvarchar NOT LIKE 'b_e__r' ESCAPE '_'::mvarchar AS "false";
+
+SELECT 'be_r'::mvarchar LIKE '__e__r' ESCAPE '_'::mvarchar AS "false";
+SELECT 'be_r'::mvarchar NOT LIKE '__e__r'::mvarchar ESCAPE '_' AS "true";
+
+-- similar to
+
+SELECT 'abc'::mchar SIMILAR TO 'abc'::mchar AS "true";
+SELECT 'abc'::mchar SIMILAR TO 'a'::mchar AS "false";
+SELECT 'abc'::mchar SIMILAR TO '%(b|d)%'::mchar AS "true";
+SELECT 'abc'::mchar SIMILAR TO '(b|c)%'::mchar AS "false";
+SELECT 'h%'::mchar SIMILAR TO 'h#%'::mchar AS "false";
+SELECT 'h%'::mchar SIMILAR TO 'h#%'::mchar ESCAPE '#' AS "true";
+
+SELECT 'abc'::mvarchar SIMILAR TO 'abc'::mvarchar AS "true";
+SELECT 'abc'::mvarchar SIMILAR TO 'a'::mvarchar AS "false";
+SELECT 'abc'::mvarchar SIMILAR TO '%(b|d)%'::mvarchar AS "true";
+SELECT 'abc'::mvarchar SIMILAR TO '(b|c)%'::mvarchar AS "false";
+SELECT 'h%'::mvarchar SIMILAR TO 'h#%'::mvarchar AS "false";
+SELECT 'h%'::mvarchar SIMILAR TO 'h#%'::mvarchar ESCAPE '#' AS "true";
+
+-- index support
+
+SELECT * from ch where chcol like 'aB_d' order by chcol using &<;
+SELECT * from ch where chcol like 'aB%d' order by chcol using &<;
+SELECT * from ch where chcol like 'aB%' order by chcol using &<;
+SELECT * from ch where chcol like '%BC%' order by chcol using &<;
+set enable_seqscan = off;
+explain (costs off)
+SELECT * from ch where chcol like 'aB_d' order by chcol using &<;
+SELECT * from ch where chcol like 'aB_d' order by chcol using &<;
+SELECT * from ch where chcol like 'aB%d' order by chcol using &<;
+SELECT * from ch where chcol like 'aB%' order by chcol using &<;
+SELECT * from ch where chcol like '%BC%' order by chcol using &<;
+set enable_seqscan = on;
+
+
+create table testt (f1 mchar(10));
+insert into testt values ('Abc-000001');
+insert into testt values ('Abc-000002');
+insert into testt values ('0000000001');
+insert into testt values ('0000000002');
+
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+create index testindex on testt(f1);
+set enable_seqscan=off;
+explain (costs off)
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+set enable_seqscan = on;
+drop table testt;
+
+create table testt (f1 mvarchar(10));
+insert into testt values ('Abc-000001');
+insert into testt values ('Abc-000002');
+insert into testt values ('0000000001');
+insert into testt values ('0000000002');
+
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+select * from testt where f1::mchar like E'Abc\\- %'::mchar;
+select * from testt where f1::mchar like E' %'::mchar;
+create index testindex on testt(f1);
+set enable_seqscan=off;
+explain (costs off)
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+select f1 from testt where f1::mvarchar like E'Abc\\-%'::mvarchar;
+select * from testt where f1::mchar like E'Abc\\-%'::mchar;
+select * from testt where f1::mchar like E'Abc\\- %'::mchar;
+select * from testt where f1::mchar like E' %'::mchar;
+set enable_seqscan = on;
+drop table testt;
+
+
+CREATE TABLE test ( code mchar(5) NOT NULL );
+insert into test values('1111 ');
+insert into test values('111 ');
+insert into test values('11 ');
+insert into test values('1 ');
+
+SELECT * FROM test WHERE code LIKE ('% ');
+
+set escape_string_warning = off;
+SELECT CASE WHEN ('_'::text SIMILAR TO '[\\_]'::text ESCAPE '\\'::text) THEN TRUE ELSE FALSE END ;
+SELECT CASE WHEN ('_'::mchar SIMILAR TO '[\\_]'::mchar ESCAPE '\\'::mchar) THEN TRUE ELSE FALSE END ;
+SELECT CASE WHEN ('_'::mvarchar SIMILAR TO '[\\_]'::mvarchar ESCAPE '\\'::mvarchar) THEN TRUE ELSE FALSE END ;
+reset escape_string_warning;
+reset standard_conforming_strings;
+
+
diff --git a/contrib/mchar/sql/mchar.sql b/contrib/mchar/sql/mchar.sql
new file mode 100644
index 00000000000..8850aa3e9d7
--- /dev/null
+++ b/contrib/mchar/sql/mchar.sql
@@ -0,0 +1,90 @@
+-- I/O tests
+
+select '1'::mchar;
+select '2 '::mchar;
+select '10 '::mchar;
+
+select '1'::mchar(2);
+select '2 '::mchar(2);
+select '3 '::mchar(2);
+select '10 '::mchar(2);
+
+select ' '::mchar(10);
+select ' '::mchar;
+
+-- operations & functions
+
+select length('1'::mchar);
+select length('2 '::mchar);
+select length('10 '::mchar);
+
+select length('1'::mchar(2));
+select length('2 '::mchar(2));
+select length('3 '::mchar(2));
+select length('10 '::mchar(2));
+
+select length(' '::mchar(10));
+select length(' '::mchar);
+
+select 'asd'::mchar(10) || '>'::mchar(10);
+select length('asd'::mchar(10) || '>'::mchar(10));
+select 'asd'::mchar(2) || '>'::mchar(10);
+select length('asd'::mchar(2) || '>'::mchar(10));
+
+-- Comparisons
+
+select 'asdf'::mchar = 'aSdf'::mchar;
+select 'asdf'::mchar = 'aSdf '::mchar;
+select 'asdf'::mchar = 'aSdf 1'::mchar(4);
+select 'asdf'::mchar = 'aSdf 1'::mchar(5);
+select 'asdf'::mchar = 'aSdf 1'::mchar(6);
+select 'asdf'::mchar(3) = 'aSdf 1'::mchar(5);
+select 'asdf'::mchar(3) = 'aSdf 1'::mchar(3);
+
+select 'asdf'::mchar < 'aSdf'::mchar;
+select 'asdf'::mchar < 'aSdf '::mchar;
+select 'asdf'::mchar < 'aSdf 1'::mchar(4);
+select 'asdf'::mchar < 'aSdf 1'::mchar(5);
+select 'asdf'::mchar < 'aSdf 1'::mchar(6);
+
+select 'asdf'::mchar <= 'aSdf'::mchar;
+select 'asdf'::mchar <= 'aSdf '::mchar;
+select 'asdf'::mchar <= 'aSdf 1'::mchar(4);
+select 'asdf'::mchar <= 'aSdf 1'::mchar(5);
+select 'asdf'::mchar <= 'aSdf 1'::mchar(6);
+
+select 'asdf'::mchar >= 'aSdf'::mchar;
+select 'asdf'::mchar >= 'aSdf '::mchar;
+select 'asdf'::mchar >= 'aSdf 1'::mchar(4);
+select 'asdf'::mchar >= 'aSdf 1'::mchar(5);
+select 'asdf'::mchar >= 'aSdf 1'::mchar(6);
+
+select 'asdf'::mchar > 'aSdf'::mchar;
+select 'asdf'::mchar > 'aSdf '::mchar;
+select 'asdf'::mchar > 'aSdf 1'::mchar(4);
+select 'asdf'::mchar > 'aSdf 1'::mchar(5);
+select 'asdf'::mchar > 'aSdf 1'::mchar(6);
+
+select max(ch) from chvch;
+select min(ch) from chvch;
+
+select substr('1234567890'::mchar, 3) = '34567890' as "34567890";
+select substr('1234567890'::mchar, 4, 3) = '456' as "456";
+
+select lower('asdfASDF'::mchar);
+select upper('asdfASDF'::mchar);
+
+select 'asd'::mchar == 'aSd'::mchar;
+select 'asd'::mchar == 'aCd'::mchar;
+select 'asd'::mchar == NULL;
+select NULL == 'aCd'::mchar;
+select NULL::mchar == NULL;
+
+
+--Note: here we use different space symbols, be carefull to copy it!
+select v, count(*) from
+(values (1, '4 242'::mchar), (2, '4 242'), (3, 'aSDF'), (4, 'asdf')) as t(i,v) group by v order by v;
+set enable_hashagg=off;
+select v, count(*) from
+(values (1, '4 242'::mchar), (2, '4 242'), (3, 'aSDF'), (4, 'asdf')) as t(i,v) group by v order by v;
+reset enable_hashagg;
diff --git a/contrib/mchar/sql/mm.sql b/contrib/mchar/sql/mm.sql
new file mode 100644
index 00000000000..2e11b937040
--- /dev/null
+++ b/contrib/mchar/sql/mm.sql
@@ -0,0 +1,196 @@
+select 'asd'::mchar::mvarchar;
+select 'asd '::mchar::mvarchar;
+select 'asd'::mchar(2)::mvarchar;
+select 'asd '::mchar(2)::mvarchar;
+select 'asd'::mchar(5)::mvarchar;
+select 'asd '::mchar(5)::mvarchar;
+select 'asd'::mchar::mvarchar(2);
+select 'asd '::mchar::mvarchar(2);
+select 'asd'::mchar(2)::mvarchar(2);
+select 'asd '::mchar(2)::mvarchar(2);
+select 'asd'::mchar(5)::mvarchar(2);
+select 'asd '::mchar(5)::mvarchar(2);
+select 'asd'::mchar::mvarchar(5);
+select 'asd '::mchar::mvarchar(5);
+select 'asd'::mchar(2)::mvarchar(5);
+select 'asd '::mchar(2)::mvarchar(5);
+select 'asd'::mchar(5)::mvarchar(5);
+select 'asd '::mchar(5)::mvarchar(5);
+
+select 'asd'::mvarchar::mchar;
+select 'asd '::mvarchar::mchar;
+select 'asd'::mvarchar(2)::mchar;
+select 'asd '::mvarchar(2)::mchar;
+select 'asd'::mvarchar(5)::mchar;
+select 'asd '::mvarchar(5)::mchar;
+select 'asd'::mvarchar::mchar(2);
+select 'asd '::mvarchar::mchar(2);
+select 'asd'::mvarchar(2)::mchar(2);
+select 'asd '::mvarchar(2)::mchar(2);
+select 'asd'::mvarchar(5)::mchar(2);
+select 'asd '::mvarchar(5)::mchar(2);
+select 'asd'::mvarchar::mchar(5);
+select 'asd '::mvarchar::mchar(5);
+select 'asd'::mvarchar(2)::mchar(5);
+select 'asd '::mvarchar(2)::mchar(5);
+select 'asd'::mvarchar(5)::mchar(5);
+select 'asd '::mvarchar(5)::mchar(5);
+
+select 'asd'::mchar || '123';
+select 'asd'::mchar || '123'::mchar;
+select 'asd'::mchar || '123'::mvarchar;
+
+select 'asd '::mchar || '123';
+select 'asd '::mchar || '123'::mchar;
+select 'asd '::mchar || '123'::mvarchar;
+
+select 'asd '::mchar || '123 ';
+select 'asd '::mchar || '123 '::mchar;
+select 'asd '::mchar || '123 '::mvarchar;
+
+
+select 'asd'::mvarchar || '123';
+select 'asd'::mvarchar || '123'::mchar;
+select 'asd'::mvarchar || '123'::mvarchar;
+
+select 'asd '::mvarchar || '123';
+select 'asd '::mvarchar || '123'::mchar;
+select 'asd '::mvarchar || '123'::mvarchar;
+
+select 'asd '::mvarchar || '123 ';
+select 'asd '::mvarchar || '123 '::mchar;
+select 'asd '::mvarchar || '123 '::mvarchar;
+
+
+select 'asd'::mchar(2) || '123';
+select 'asd'::mchar(2) || '123'::mchar;
+select 'asd'::mchar(2) || '123'::mvarchar;
+
+
+select 'asd '::mchar(2) || '123';
+select 'asd '::mchar(2) || '123'::mchar;
+select 'asd '::mchar(2) || '123'::mvarchar;
+
+
+select 'asd '::mchar(2) || '123 ';
+select 'asd '::mchar(2) || '123 '::mchar;
+select 'asd '::mchar(2) || '123 '::mvarchar;
+
+select 'asd'::mvarchar(2) || '123';
+select 'asd'::mvarchar(2) || '123'::mchar;
+select 'asd'::mvarchar(2) || '123'::mvarchar;
+
+select 'asd '::mvarchar(2) || '123';
+select 'asd '::mvarchar(2) || '123'::mchar;
+select 'asd '::mvarchar(2) || '123'::mvarchar;
+
+select 'asd '::mvarchar(2) || '123 ';
+select 'asd '::mvarchar(2) || '123 '::mchar;
+select 'asd '::mvarchar(2) || '123 '::mvarchar;
+
+select 'asd'::mchar(4) || '143';
+select 'asd'::mchar(4) || '123'::mchar;
+select 'asd'::mchar(4) || '123'::mvarchar;
+
+select 'asd '::mchar(4) || '123';
+select 'asd '::mchar(4) || '123'::mchar;
+select 'asd '::mchar(4) || '123'::mvarchar;
+
+select 'asd '::mchar(4) || '123 ';
+select 'asd '::mchar(4) || '123 '::mchar;
+select 'asd '::mchar(4) || '123 '::mvarchar;
+
+select 'asd'::mvarchar(4) || '123';
+select 'asd'::mvarchar(4) || '123'::mchar;
+select 'asd'::mvarchar(4) || '123'::mvarchar;
+
+select 'asd '::mvarchar(4) || '123';
+select 'asd '::mvarchar(4) || '123'::mchar;
+select 'asd '::mvarchar(4) || '123'::mvarchar;
+
+select 'asd '::mvarchar(4) || '123 ';
+select 'asd '::mvarchar(4) || '123 '::mchar;
+select 'asd '::mvarchar(4) || '123 '::mvarchar;
+
+
+select 'asd '::mvarchar(4) || '123 '::mchar(4);
+select 'asd '::mvarchar(4) || '123 '::mvarchar(4);
+select 'asd '::mvarchar(4) || '123'::mchar(4);
+select 'asd '::mvarchar(4) || '123'::mvarchar(4);
+
+
+select 1 where 'f'::mchar='F'::mvarchar;
+select 1 where 'f'::mchar='F '::mvarchar;
+select 1 where 'f '::mchar='F'::mvarchar;
+select 1 where 'f '::mchar='F '::mvarchar;
+
+select 1 where 'f'::mchar='F'::mvarchar(2);
+select 1 where 'f'::mchar='F '::mvarchar(2);
+select 1 where 'f '::mchar='F'::mvarchar(2);
+select 1 where 'f '::mchar='F '::mvarchar(2);
+
+select 1 where 'f'::mchar(2)='F'::mvarchar;
+select 1 where 'f'::mchar(2)='F '::mvarchar;
+select 1 where 'f '::mchar(2)='F'::mvarchar;
+select 1 where 'f '::mchar(2)='F '::mvarchar;
+
+select 1 where 'f'::mchar(2)='F'::mvarchar(2);
+select 1 where 'f'::mchar(2)='F '::mvarchar(2);
+select 1 where 'f '::mchar(2)='F'::mvarchar(2);
+select 1 where 'f '::mchar(2)='F '::mvarchar(2);
+
+select 1 where 'foo'::mchar='FOO'::mvarchar;
+select 1 where 'foo'::mchar='FOO '::mvarchar;
+select 1 where 'foo '::mchar='FOO'::mvarchar;
+select 1 where 'foo '::mchar='FOO '::mvarchar;
+
+select 1 where 'foo'::mchar='FOO'::mvarchar(2);
+select 1 where 'foo'::mchar='FOO '::mvarchar(2);
+select 1 where 'foo '::mchar='FOO'::mvarchar(2);
+select 1 where 'foo '::mchar='FOO '::mvarchar(2);
+
+select 1 where 'foo'::mchar(2)='FOO'::mvarchar;
+select 1 where 'foo'::mchar(2)='FOO '::mvarchar;
+select 1 where 'foo '::mchar(2)='FOO'::mvarchar;
+select 1 where 'foo '::mchar(2)='FOO '::mvarchar;
+
+select 1 where 'foo'::mchar(2)='FOO'::mvarchar(2);
+select 1 where 'foo'::mchar(2)='FOO '::mvarchar(2);
+select 1 where 'foo '::mchar(2)='FOO'::mvarchar(2);
+select 1 where 'foo '::mchar(2)='FOO '::mvarchar(2);
+
+Select 'f'::mchar(1) Union Select 'o'::mvarchar(1);
+Select 'f'::mvarchar(1) Union Select 'o'::mchar(1);
+
+select * from chvch where ch=vch;
+
+select ch.* from ch, (select 'dEfg'::mvarchar as q) as p where chcol > p.q;
+create index qq on ch (chcol);
+set enable_seqscan=off;
+select ch.* from ch, (select 'dEfg'::mvarchar as q) as p where chcol > p.q;
+set enable_seqscan=on;
+
+
+--\copy chvch to 'results/chvch.dump' binary
+--truncate table chvch;
+--\copy chvch from 'results/chvch.dump' binary
+
+--test joins
+CREATE TABLE a (mchar2 MCHAR(2) NOT NULL);
+CREATE TABLE c (mvarchar255 mvarchar NOT NULL);
+SELECT * FROM a, c WHERE mchar2 = mvarchar255;
+SELECT * FROM a, c WHERE mvarchar255 = mchar2;
+DROP TABLE a;
+DROP TABLE c;
+
+select * from (values
+ ('е'::mchar),('ё'),('еа'),('еб'),('ее'),('еж'),('ёа'),('ёб'),('ёё'),('ёж'),('ёе'),('её'))
+ z order by 1;
+
+select 'ё'::mchar = 'е';
+select 'Ё'::mchar = 'Е';
+select 'й'::mchar = 'и';
+select 'Й'::mchar = 'И';
+
+select mvarchar_icase_cmp('ёа','еб'), mvarchar_icase_cmp('еб','ё'),
+ mvarchar_icase_cmp('ё', 'ёа');
diff --git a/contrib/mchar/sql/mvarchar.sql b/contrib/mchar/sql/mvarchar.sql
new file mode 100644
index 00000000000..91b0981075d
--- /dev/null
+++ b/contrib/mchar/sql/mvarchar.sql
@@ -0,0 +1,82 @@
+-- I/O tests
+
+select '1'::mvarchar;
+select '2 '::mvarchar;
+select '10 '::mvarchar;
+
+select '1'::mvarchar(2);
+select '2 '::mvarchar(2);
+select '3 '::mvarchar(2);
+select '10 '::mvarchar(2);
+
+select ' '::mvarchar(10);
+select ' '::mvarchar;
+
+-- operations & functions
+
+select length('1'::mvarchar);
+select length('2 '::mvarchar);
+select length('10 '::mvarchar);
+
+select length('1'::mvarchar(2));
+select length('2 '::mvarchar(2));
+select length('3 '::mvarchar(2));
+select length('10 '::mvarchar(2));
+
+select length(' '::mvarchar(10));
+select length(' '::mvarchar);
+
+select 'asd'::mvarchar(10) || '>'::mvarchar(10);
+select length('asd'::mvarchar(10) || '>'::mvarchar(10));
+select 'asd'::mvarchar(2) || '>'::mvarchar(10);
+select length('asd'::mvarchar(2) || '>'::mvarchar(10));
+
+-- Comparisons
+
+select 'asdf'::mvarchar = 'aSdf'::mvarchar;
+select 'asdf'::mvarchar = 'aSdf '::mvarchar;
+select 'asdf'::mvarchar = 'aSdf 1'::mvarchar(4);
+select 'asdf'::mvarchar = 'aSdf 1'::mvarchar(5);
+select 'asdf'::mvarchar = 'aSdf 1'::mvarchar(6);
+select 'asdf'::mvarchar(3) = 'aSdf 1'::mvarchar(5);
+select 'asdf'::mvarchar(3) = 'aSdf 1'::mvarchar(3);
+
+select 'asdf'::mvarchar < 'aSdf'::mvarchar;
+select 'asdf'::mvarchar < 'aSdf '::mvarchar;
+select 'asdf'::mvarchar < 'aSdf 1'::mvarchar(4);
+select 'asdf'::mvarchar < 'aSdf 1'::mvarchar(5);
+select 'asdf'::mvarchar < 'aSdf 1'::mvarchar(6);
+
+select 'asdf'::mvarchar <= 'aSdf'::mvarchar;
+select 'asdf'::mvarchar <= 'aSdf '::mvarchar;
+select 'asdf'::mvarchar <= 'aSdf 1'::mvarchar(4);
+select 'asdf'::mvarchar <= 'aSdf 1'::mvarchar(5);
+select 'asdf'::mvarchar <= 'aSdf 1'::mvarchar(6);
+
+select 'asdf'::mvarchar >= 'aSdf'::mvarchar;
+select 'asdf'::mvarchar >= 'aSdf '::mvarchar;
+select 'asdf'::mvarchar >= 'aSdf 1'::mvarchar(4);
+select 'asdf'::mvarchar >= 'aSdf 1'::mvarchar(5);
+select 'asdf'::mvarchar >= 'aSdf 1'::mvarchar(6);
+
+select 'asdf'::mvarchar > 'aSdf'::mvarchar;
+select 'asdf'::mvarchar > 'aSdf '::mvarchar;
+select 'asdf'::mvarchar > 'aSdf 1'::mvarchar(4);
+select 'asdf'::mvarchar > 'aSdf 1'::mvarchar(5);
+select 'asdf'::mvarchar > 'aSdf 1'::mvarchar(6);
+
+select max(vch) from chvch;
+select min(vch) from chvch;
+
+select substr('1234567890'::mvarchar, 3) = '34567890' as "34567890";
+select substr('1234567890'::mvarchar, 4, 3) = '456' as "456";
+
+select lower('asdfASDF'::mvarchar);
+select upper('asdfASDF'::mvarchar);
+
+select 'asd'::mvarchar == 'aSd'::mvarchar;
+select 'asd'::mvarchar == 'aCd'::mvarchar;
+select 'asd'::mvarchar == NULL;
+select NULL == 'aCd'::mvarchar;
+select NULL::mvarchar == NULL;
+
diff --git a/contrib/meson.build b/contrib/meson.build
index 14a89068650..71f550a9589 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -14,6 +14,7 @@ contrib_doc_args = {
subdir('amcheck')
subdir('auth_delay')
+subdir('auto_dump')
subdir('auto_explain')
subdir('basic_archive')
subdir('bloom')
@@ -23,11 +24,14 @@ subdir('btree_gin')
subdir('btree_gist')
subdir('citext')
subdir('cube')
+subdir('dbcopies_decoding')
subdir('dblink')
subdir('dict_int')
subdir('dict_xsyn')
subdir('earthdistance')
+subdir('fasttrun')
subdir('file_fdw')
+subdir('fulleq')
subdir('fuzzystrmatch')
subdir('hstore')
subdir('hstore_plperl')
@@ -40,7 +44,9 @@ subdir('jsonb_plpython')
subdir('lo')
subdir('ltree')
subdir('ltree_plpython')
+subdir('mchar')
subdir('oid2name')
+subdir('online_analyze')
subdir('pageinspect')
subdir('passwordcheck')
subdir('pg_buffercache')
@@ -53,7 +59,9 @@ subdir('pgstattuple')
subdir('pg_surgery')
subdir('pg_trgm')
subdir('pg_visibility')
+subdir('pg_wait_sampling')
subdir('pg_walinspect')
+subdir('plantuner')
subdir('postgres_fdw')
subdir('seg')
subdir('sepgsql')
diff --git a/contrib/online_analyze/COPYRIGHT b/contrib/online_analyze/COPYRIGHT
new file mode 100644
index 00000000000..75fea1f35d6
--- /dev/null
+++ b/contrib/online_analyze/COPYRIGHT
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2011 Teodor Sigaev <teodor@sigaev.ru>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
diff --git a/contrib/online_analyze/Makefile b/contrib/online_analyze/Makefile
new file mode 100644
index 00000000000..333add2b09b
--- /dev/null
+++ b/contrib/online_analyze/Makefile
@@ -0,0 +1,16 @@
+MODULE_big = online_analyze
+OBJS = online_analyze.o
+#DATA_built = online_analyze.sql
+DOCS = README.online_analyze
+#REGRESS = online_analyze
+
+ifdef USE_PGXS
+PGXS := $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/online_analyze
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
diff --git a/contrib/online_analyze/README.online_analyze b/contrib/online_analyze/README.online_analyze
new file mode 100644
index 00000000000..d72f17db424
--- /dev/null
+++ b/contrib/online_analyze/README.online_analyze
@@ -0,0 +1,46 @@
+Module makes an analyze call immediately after INSERT/UPDATE/DELETE/SELECT INTO
+for affected table(s).
+
+Supported versions of PostgreSQL: 8.4.*, 9.0.*, 9.1.*, 9.2.*, 9.3.*, 9.4*, 9.5*,
+ 9.6*
+
+Usage: LOAD 'online_analyze';
+
+Custom variables (defaults values are shown):
+online_analyze.enable = on
+ Enables on-line analyze
+
+online_analyze.local_tracking = off
+ Per backend tracking for temp tables (do not use system statistic)
+
+online_analyze.verbose = on
+ Execute ANALYZE VERBOSE
+
+online_analyze.scale_factor = 0.1
+ Fraction of table size to start on-line analyze (similar to
+ autovacuum_analyze_scale_factor)
+
+online_analyze.threshold = 50
+ Min number of row updates before on-line analyze (similar to
+ autovacuum_analyze_threshold)
+
+online_analyze.min_interval = 10000
+ Minimum time interval between analyze call per table (in milliseconds)
+
+online_analyze.lower_limit = 0
+ Min number of rows in table to analyze
+
+online_analyze.table_type = "all"
+ Type(s) of table for online analyze: all, persistent, temporary, none
+
+online_analyze.exclude_tables = ""
+ List of tables which will not online analyze
+
+online_analyze.include_tables = ""
+ List of tables which will online analyze
+ online_analyze.include_tables overwrites online_analyze.exclude_tables.
+
+online_analyze.capacity_threshold = 100000
+ Maximum number of temporary tables to store in local cache
+
+Author: Teodor Sigaev <teodor@sigaev.ru>
diff --git a/contrib/online_analyze/meson.build b/contrib/online_analyze/meson.build
new file mode 100644
index 00000000000..e427099e8e4
--- /dev/null
+++ b/contrib/online_analyze/meson.build
@@ -0,0 +1,24 @@
+online_analyze_sources = files(
+ 'online_analyze.c'
+)
+
+if host_system == 'windows'
+ online_analyze_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'online_analyze',
+ '--FILEDESC', 'online_analyze',])
+endif
+
+online_analyze = shared_module('online_analyze',
+ online_analyze_sources,
+ kwargs: contrib_mod_args + {
+ 'dependencies': contrib_mod_args['dependencies'],
+ },
+)
+contrib_targets += online_analyze
+
+install_data(
+ kwargs: contrib_data_args,
+)
+
+
+# TODO: DOCS = README.online_analyze
\ No newline at end of file
diff --git a/contrib/online_analyze/online_analyze.c b/contrib/online_analyze/online_analyze.c
new file mode 100644
index 00000000000..412030864f2
--- /dev/null
+++ b/contrib/online_analyze/online_analyze.c
@@ -0,0 +1,1408 @@
+/*
+ * Copyright (c) 2011 Teodor Sigaev <teodor@sigaev.ru>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "postgres.h"
+
+#include "pgstat.h"
+#include "miscadmin.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "catalog/namespace.h"
+#include "commands/vacuum.h"
+#include "executor/executor.h"
+#include "nodes/nodes.h"
+#include "nodes/parsenodes.h"
+#include "storage/bufmgr.h"
+#include "utils/builtins.h"
+#include "utils/hsearch.h"
+#include "utils/memutils.h"
+#include "utils/lsyscache.h"
+#include "utils/guc.h"
+#if PG_VERSION_NUM >= 90200
+#include "catalog/pg_class.h"
+#include "nodes/primnodes.h"
+#include "tcop/utility.h"
+#include "utils/rel.h"
+#include "utils/relcache.h"
+#include "utils/timestamp.h"
+#if PG_VERSION_NUM >= 90500
+#include "nodes/makefuncs.h"
+#if PG_VERSION_NUM >= 100000
+#include "utils/varlena.h"
+#include "utils/regproc.h"
+#if PG_VERSION_NUM >= 130000
+#include "common/hashfn.h"
+#endif
+#endif
+#endif
+#endif
+
+#ifdef PG_MODULE_MAGIC
+PG_MODULE_MAGIC;
+#endif
+
+static bool online_analyze_enable = true;
+static bool online_analyze_local_tracking = false;
+static bool online_analyze_verbose = true;
+static double online_analyze_scale_factor = 0.1;
+static int online_analyze_threshold = 50;
+static int online_analyze_capacity_threshold = 100000;
+static double online_analyze_min_interval = 10000;
+static int online_analyze_lower_limit = 0;
+
+static ExecutorEnd_hook_type oldExecutorEndHook = NULL;
+#if PG_VERSION_NUM >= 90200
+static ProcessUtility_hook_type oldProcessUtilityHook = NULL;
+#endif
+
+#if PG_VERSION_NUM >= 120000
+#define VACOPT_NOWAIT VACOPT_SKIP_LOCKED
+#endif
+
+typedef enum CmdKind
+{
+ CK_SELECT = CMD_SELECT,
+ CK_UPDATE = CMD_UPDATE,
+ CK_INSERT = CMD_INSERT,
+ CK_DELETE = CMD_DELETE,
+ CK_TRUNCATE,
+ CK_FASTTRUNCATE,
+ CK_CREATE,
+ CK_ANALYZE,
+ CK_VACUUM
+} CmdKind;
+
+
+typedef enum
+{
+ OATT_ALL = 0x03,
+ OATT_PERSISTENT = 0x01,
+ OATT_TEMPORARY = 0x02,
+ OATT_NONE = 0x00
+} OnlineAnalyzeTableType;
+
+static const struct config_enum_entry online_analyze_table_type_options[] =
+{
+ {"all", OATT_ALL, false},
+ {"persistent", OATT_PERSISTENT, false},
+ {"temporary", OATT_TEMPORARY, false},
+ {"none", OATT_NONE, false},
+ {NULL, 0, false},
+};
+
+static int online_analyze_table_type = (int)OATT_ALL;
+
+typedef struct TableList {
+ int nTables;
+ Oid *tables;
+ char *tableStr;
+ bool inited;
+} TableList;
+
+static TableList excludeTables = {0, NULL, NULL, false};
+static TableList includeTables = {0, NULL, NULL, false};
+
+typedef struct OnlineAnalyzeTableStat {
+ Oid tableid;
+ bool rereadStat;
+ PgStat_Counter n_tuples;
+ PgStat_Counter mod_since_analyze;
+ TimestampTz last_autoanalyze_time;
+ TimestampTz last_analyze_time;
+} OnlineAnalyzeTableStat;
+
+static MemoryContext onlineAnalyzeMemoryContext = NULL;
+static HTAB *relstats = NULL;
+
+static void relstatsInit(void);
+
+#if PG_VERSION_NUM < 100000
+static int
+oid_cmp(const void *a, const void *b)
+{
+ if (*(Oid*)a == *(Oid*)b)
+ return 0;
+ return (*(Oid*)a > *(Oid*)b) ? 1 : -1;
+}
+#endif
+
+static const char *
+tableListAssign(const char * newval, bool doit, TableList *tbl)
+{
+ char *rawname;
+ List *namelist;
+ ListCell *l;
+ Oid *newOids = NULL;
+ int nOids = 0,
+ i = 0;
+
+ rawname = pstrdup(newval);
+
+ if (!SplitIdentifierString(rawname, ',', &namelist))
+ goto cleanup;
+
+ /*
+ * follow work could be done only in normal processing because of
+ * accsess to system catalog
+ */
+ if (MyProcNumber == INVALID_PROC_NUMBER || !IsUnderPostmaster ||
+ !IsTransactionState())
+ {
+ includeTables.inited = false;
+ excludeTables.inited = false;
+ return newval;
+ }
+
+ if (doit)
+ {
+ nOids = list_length(namelist);
+ newOids = malloc(sizeof(Oid) * (nOids+1));
+ if (!newOids)
+ elog(ERROR,"could not allocate %d bytes",
+ (int)(sizeof(Oid) * (nOids+1)));
+ }
+
+ foreach(l, namelist)
+ {
+ char *curname = (char *) lfirst(l);
+#if PG_VERSION_NUM >= 160000
+ Oid relOid = RangeVarGetRelid(makeRangeVarFromNameList(
+ stringToQualifiedNameList(curname, NULL)), NoLock, true);
+#elif PG_VERSION_NUM >= 90200
+ Oid relOid = RangeVarGetRelid(makeRangeVarFromNameList(
+ stringToQualifiedNameList(curname)), NoLock, true);
+#else
+ Oid relOid = RangeVarGetRelid(makeRangeVarFromNameList(
+ stringToQualifiedNameList(curname)), true);
+#endif
+
+ if (relOid == InvalidOid)
+ {
+#if PG_VERSION_NUM >= 90100
+ if (doit == false)
+#endif
+ elog(WARNING,"'%s' does not exist", curname);
+ continue;
+ }
+ else if ( get_rel_relkind(relOid) != RELKIND_RELATION )
+ {
+#if PG_VERSION_NUM >= 90100
+ if (doit == false)
+#endif
+ elog(WARNING,"'%s' is not an table", curname);
+ continue;
+ }
+ else if (doit)
+ {
+ newOids[i++] = relOid;
+ }
+ }
+
+ if (doit)
+ {
+ tbl->nTables = i;
+ if (tbl->tables)
+ free(tbl->tables);
+ tbl->tables = newOids;
+ if (tbl->nTables > 1)
+ qsort(tbl->tables, tbl->nTables, sizeof(tbl->tables[0]), oid_cmp);
+ }
+
+ pfree(rawname);
+ list_free(namelist);
+
+ return newval;
+
+cleanup:
+ if (newOids)
+ free(newOids);
+ pfree(rawname);
+ list_free(namelist);
+ return NULL;
+}
+
+#if PG_VERSION_NUM >= 90100
+static bool
+excludeTablesCheck(char **newval, void **extra, GucSource source)
+{
+ char *val;
+
+ val = (char*)tableListAssign(*newval, false, &excludeTables);
+
+ if (val)
+ {
+ *newval = val;
+ return true;
+ }
+
+ return false;
+}
+
+static void
+excludeTablesAssign(const char *newval, void *extra)
+{
+ tableListAssign(newval, true, &excludeTables);
+}
+
+static bool
+includeTablesCheck(char **newval, void **extra, GucSource source)
+{
+ char *val;
+
+ val = (char*)tableListAssign(*newval, false, &includeTables);
+
+ if (val)
+ {
+ *newval = val;
+ return true;
+ }
+
+ return false;
+}
+
+static void
+includeTablesAssign(const char *newval, void *extra)
+{
+ tableListAssign(newval, true, &includeTables);
+}
+
+#else /* PG_VERSION_NUM < 90100 */
+
+static const char *
+excludeTablesAssign(const char * newval, bool doit, GucSource source)
+{
+ return tableListAssign(newval, doit, &excludeTables);
+}
+
+static const char *
+includeTablesAssign(const char * newval, bool doit, GucSource source)
+{
+ return tableListAssign(newval, doit, &includeTables);
+}
+
+#endif
+
+static void
+lateInit()
+{
+ TableList *tl[] = {&includeTables, &excludeTables};
+ int i;
+
+ if (MyProcNumber == INVALID_PROC_NUMBER || !IsUnderPostmaster ||
+ !IsTransactionState())
+ return; /* we aren't in connected state */
+
+ for(i=0; i<lengthof(tl); i++)
+ {
+ TableList *tbl = tl[i];
+
+ if (tbl->inited == false)
+ tableListAssign(tbl->tableStr, true, tbl);
+ tbl->inited = true;
+ }
+}
+
+static const char*
+tableListShow(TableList *tbl)
+{
+ char *val, *ptr;
+ int i,
+ len;
+
+ lateInit();
+
+ len = 1 /* \0 */ + tbl->nTables * (2 * NAMEDATALEN + 2 /* ', ' */ + 1 /* . */);
+ ptr = val = palloc(len);
+ *ptr ='\0';
+ for(i=0; i<tbl->nTables; i++)
+ {
+ char *relname = get_rel_name(tbl->tables[i]);
+ Oid nspOid = get_rel_namespace(tbl->tables[i]);
+ char *nspname = get_namespace_name(nspOid);
+
+ if ( relname == NULL || nspOid == InvalidOid || nspname == NULL )
+ continue;
+
+ ptr += snprintf(ptr, len - (ptr - val), "%s%s.%s",
+ (i==0) ? "" : ", ",
+ nspname, relname);
+ }
+
+ return val;
+}
+
+static const char*
+excludeTablesShow(void)
+{
+ return tableListShow(&excludeTables);
+}
+
+static const char*
+includeTablesShow(void)
+{
+ return tableListShow(&includeTables);
+}
+
+static bool
+matchOid(TableList *tbl, Oid oid)
+{
+ Oid *StopLow = tbl->tables,
+ *StopHigh = tbl->tables + tbl->nTables,
+ *StopMiddle;
+
+ /* Loop invariant: StopLow <= val < StopHigh */
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+
+ if (*StopMiddle == oid)
+ return true;
+ else if (*StopMiddle < oid)
+ StopLow = StopMiddle + 1;
+ else
+ StopHigh = StopMiddle;
+ }
+
+ return false;
+}
+
+#if PG_VERSION_NUM >= 90500
+static RangeVar*
+makeRangeVarFromOid(Oid relOid)
+{
+ return makeRangeVar(
+ get_namespace_name(get_rel_namespace(relOid)),
+ get_rel_name(relOid),
+ -1
+ );
+
+}
+#endif
+
+static void
+makeAnalyze(Oid relOid, CmdKind operation, int64 naffected)
+{
+ TimestampTz now = GetCurrentTimestamp();
+ Relation rel;
+ OnlineAnalyzeTableType reltype;
+ bool found = false,
+ newTable = false;
+ OnlineAnalyzeTableStat *rstat,
+ dummyrstat;
+ PgStat_StatTabEntry *tabentry = NULL;
+
+ if (relOid == InvalidOid)
+ return;
+
+ if (naffected == 0)
+ /* return if there is no changes */
+ return;
+ else if (naffected < 0)
+ /* number if affected rows is unknown */
+ naffected = 0;
+
+ rel = RelationIdGetRelation(relOid);
+ if (rel->rd_rel->relkind != RELKIND_RELATION)
+ {
+ RelationClose(rel);
+ return;
+ }
+
+ reltype =
+#if PG_VERSION_NUM >= 90100
+ (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+#else
+ (rel->rd_istemp || rel->rd_islocaltemp)
+#endif
+ ? OATT_TEMPORARY : OATT_PERSISTENT;
+
+ RelationClose(rel);
+
+ /*
+ * includeTables overwrites excludeTables
+ */
+ switch(online_analyze_table_type)
+ {
+ case OATT_ALL:
+ if (get_rel_relkind(relOid) != RELKIND_RELATION ||
+ (matchOid(&excludeTables, relOid) == true &&
+ matchOid(&includeTables, relOid) == false))
+ return;
+ break;
+ case OATT_NONE:
+ if (get_rel_relkind(relOid) != RELKIND_RELATION ||
+ matchOid(&includeTables, relOid) == false)
+ return;
+ break;
+ case OATT_TEMPORARY:
+ case OATT_PERSISTENT:
+ default:
+ /*
+ * skip analyze if relation's type doesn't not match
+ * online_analyze_table_type
+ */
+ if ((online_analyze_table_type & reltype) == 0 ||
+ matchOid(&excludeTables, relOid) == true)
+ {
+ if (matchOid(&includeTables, relOid) == false)
+ return;
+ }
+ break;
+ }
+
+ /*
+ * Do not store data about persistent table in local memory because we
+ * could not track changes of them: they could be changed by another
+ * backends. So always get a pgstat table entry.
+ */
+ if (reltype == OATT_TEMPORARY)
+ rstat = hash_search(relstats, &relOid, HASH_ENTER, &found);
+ else
+ rstat = &dummyrstat; /* found == false for following if */
+
+ if (!found)
+ {
+ MemSet(rstat, 0, sizeof(*rstat));
+ rstat->tableid = relOid;
+ newTable = true;
+ }
+
+ if (operation == CK_VACUUM)
+ {
+ /* force reread because vacuum could change n_tuples */
+ rstat->rereadStat = true;
+ return;
+ }
+ else if (operation == CK_ANALYZE)
+ {
+ /* only analyze */
+ rstat->mod_since_analyze = 0;
+ rstat->last_analyze_time = now;
+ if (newTable)
+ rstat->rereadStat = true;
+ return;
+ }
+
+ Assert(rstat->tableid == relOid);
+
+ if (
+ /* do not reread data if it was a truncation */
+ operation != CK_TRUNCATE && operation != CK_FASTTRUNCATE &&
+ /* read for persistent table and for temp teble if it allowed */
+ (reltype == OATT_PERSISTENT || online_analyze_local_tracking == false) &&
+ /* read only for new table or we know that it's needed */
+ (newTable == true || rstat->rereadStat == true)
+ )
+ {
+ rstat->rereadStat = false;
+
+ tabentry = pgstat_fetch_stat_tabentry(relOid);
+
+ if (tabentry)
+ {
+ rstat->n_tuples =
+#if PG_VERSION_NUM >= 160000
+ tabentry->dead_tuples + tabentry->live_tuples;
+#else
+ tabentry->n_dead_tuples + tabentry->n_live_tuples;
+#endif
+
+ rstat->mod_since_analyze =
+#if PG_VERSION_NUM >= 160000
+ tabentry->mod_since_analyze;
+#elif PG_VERSION_NUM >= 90000
+ tabentry->changes_since_analyze;
+#else
+ tabentry->n_live_tuples + tabentry->n_dead_tuples -
+ tabentry->last_anl_tuples;
+#endif
+
+ rstat->last_autoanalyze_time =
+#if PG_VERSION_NUM >= 160000
+ tabentry->last_autoanalyze_time;
+#else
+ tabentry->autovac_analyze_timestamp;
+#endif
+
+ rstat->last_analyze_time =
+#if PG_VERSION_NUM >= 160000
+ tabentry->last_analyze_time;
+#else
+ tabentry->analyze_timestamp;
+#endif
+ }
+ }
+
+ if (newTable ||
+ /* force analyze after truncate, fasttruncate already did analyze */
+ operation == CK_TRUNCATE || (
+ /* do not analyze too often, if both stamps are exceeded the go */
+ TimestampDifferenceExceeds(rstat->last_analyze_time, now, online_analyze_min_interval) &&
+ TimestampDifferenceExceeds(rstat->last_autoanalyze_time, now, online_analyze_min_interval) &&
+ /* do not analyze too small tables */
+ rstat->n_tuples + rstat->mod_since_analyze + naffected > online_analyze_lower_limit &&
+ /* be in sync with relation_needs_vacanalyze */
+ ((double)(rstat->mod_since_analyze + naffected)) >=
+ online_analyze_scale_factor * ((double)rstat->n_tuples) +
+ (double)online_analyze_threshold))
+ {
+#if PG_VERSION_NUM < 90500
+ VacuumStmt vacstmt;
+#else
+ VacuumParams vacstmt;
+#endif
+ TimestampTz startStamp, endStamp;
+ int flags;
+
+#ifdef PGPRO_EE
+ /* ATX is not compatible with online_analyze */
+ if (getNestLevelATX() != 0)
+ return;
+#endif
+
+ memset(&startStamp, 0, sizeof(startStamp)); /* keep compiler quiet */
+
+ memset(&vacstmt, 0, sizeof(vacstmt));
+
+ vacstmt.freeze_min_age = -1;
+ vacstmt.freeze_table_age = -1; /* ??? */
+
+#if PG_VERSION_NUM < 90500
+ vacstmt.type = T_VacuumStmt;
+ vacstmt.relation = NULL;
+ vacstmt.va_cols = NIL;
+#if PG_VERSION_NUM >= 90000
+ vacstmt.options = VACOPT_ANALYZE;
+ if (online_analyze_verbose)
+ vacstmt.options |= VACOPT_VERBOSE;
+#else
+ vacstmt.vacuum = vacstmt.full = false;
+ vacstmt.analyze = true;
+ vacstmt.verbose = online_analyze_verbose;
+#endif
+#else
+ vacstmt.multixact_freeze_min_age = -1;
+ vacstmt.multixact_freeze_table_age = -1;
+ vacstmt.log_min_duration = -1;
+#endif
+
+
+ if (online_analyze_verbose)
+ startStamp = GetCurrentTimestamp();
+
+ flags = VACOPT_ANALYZE | VACOPT_NOWAIT |
+ ((online_analyze_verbose) ? VACOPT_VERBOSE : 0);
+
+#if PG_VERSION_NUM >= 120000
+ vacstmt.options = flags;
+#endif
+ analyze_rel(relOid,
+#if PG_VERSION_NUM < 90500
+ &vacstmt
+#if PG_VERSION_NUM >= 90018
+ , true
+#endif
+ , GetAccessStrategy(BAS_VACUUM)
+#if (PG_VERSION_NUM >= 90000) && (PG_VERSION_NUM < 90004)
+ , true
+#endif
+#else
+ makeRangeVarFromOid(relOid),
+#if PG_VERSION_NUM < 120000
+ flags,
+#endif
+ &vacstmt, NULL, true, GetAccessStrategy(BAS_VACUUM)
+#endif
+ );
+
+ /* Make changes visible to subsequent calls */
+ CommandCounterIncrement();
+
+ if (online_analyze_verbose)
+ {
+ long secs;
+ int microsecs;
+
+ endStamp = GetCurrentTimestamp();
+ TimestampDifference(startStamp, endStamp, &secs, µsecs);
+ elog(INFO, "analyze \"%s\" took %.02f seconds",
+ get_rel_name(relOid),
+ ((double)secs) + ((double)microsecs)/1.0e6);
+ }
+
+ rstat->last_autoanalyze_time = now;
+ rstat->mod_since_analyze = 0;
+
+ switch(operation)
+ {
+ case CK_CREATE:
+ case CK_INSERT:
+ case CK_UPDATE:
+ rstat->n_tuples += naffected;
+ /* FALLTHROUGH */
+ case CK_DELETE:
+ rstat->rereadStat = (reltype == OATT_PERSISTENT);
+ break;
+ case CK_TRUNCATE:
+ case CK_FASTTRUNCATE:
+ rstat->rereadStat = false;
+ rstat->n_tuples = 0;
+ break;
+ default:
+ break;
+ }
+
+ /* update last analyze timestamp in local memory of backend */
+ if (tabentry)
+ {
+#if PG_VERSION_NUM >= 160000
+ tabentry->last_analyze_time = now;
+ tabentry->mod_since_analyze = 0;
+#else
+ tabentry->analyze_timestamp = now;
+ tabentry->changes_since_analyze = 0;
+#endif
+ }
+#if 0
+ /* force reload stat for new table */
+ if (newTable)
+ pgstat_clear_snapshot();
+#endif
+ }
+ else
+ {
+#if PG_VERSION_NUM >= 90000
+ if (tabentry)
+#if PG_VERSION_NUM >= 160000
+ tabentry->mod_since_analyze += naffected;
+#else
+ tabentry->changes_since_analyze += naffected;
+#endif
+#endif
+ switch(operation)
+ {
+ case CK_CREATE:
+ case CK_INSERT:
+ rstat->mod_since_analyze += naffected;
+ rstat->n_tuples += naffected;
+ break;
+ case CK_UPDATE:
+ rstat->mod_since_analyze += 2 * naffected;
+ rstat->n_tuples += naffected;
+ break;
+ case CK_DELETE:
+ rstat->mod_since_analyze += naffected;
+ break;
+ case CK_TRUNCATE:
+ case CK_FASTTRUNCATE:
+ rstat->mod_since_analyze = 0;
+ rstat->n_tuples = 0;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Reset local cache if we are over limit */
+ if (hash_get_num_entries(relstats) > online_analyze_capacity_threshold)
+ relstatsInit();
+}
+
+static Const*
+isFastTruncateCall(QueryDesc *queryDesc)
+{
+ TargetEntry *te;
+ FuncExpr *fe;
+ Const *constval;
+
+ if (!(
+ queryDesc->plannedstmt &&
+ queryDesc->operation == CMD_SELECT &&
+ queryDesc->plannedstmt->planTree &&
+ queryDesc->plannedstmt->planTree->targetlist &&
+ list_length(queryDesc->plannedstmt->planTree->targetlist) == 1
+ ))
+ return NULL;
+
+ te = linitial(queryDesc->plannedstmt->planTree->targetlist);
+
+ if (!IsA(te, TargetEntry))
+ return NULL;
+
+ fe = (FuncExpr*)te->expr;
+
+ if (!(
+ fe && IsA(fe, FuncExpr) &&
+ fe->funcid >= FirstNormalObjectId &&
+ fe->funcretset == false &&
+ fe->funcresulttype == VOIDOID &&
+ fe->funcvariadic == false &&
+ list_length(fe->args) == 1
+ ))
+ return NULL;
+
+ constval = linitial(fe->args);
+
+ if (!(
+ IsA(constval,Const) &&
+ constval->consttype == TEXTOID &&
+ strcmp(get_func_name(fe->funcid), "fasttruncate") == 0
+ ))
+ return NULL;
+
+ return constval;
+}
+
+
+extern PGDLLIMPORT void onlineAnalyzeHooker(QueryDesc *queryDesc);
+void
+onlineAnalyzeHooker(QueryDesc *queryDesc)
+{
+ int64 naffected = -1;
+ Const *constval;
+
+ if (queryDesc->estate)
+ naffected = queryDesc->estate->es_processed;
+
+ lateInit();
+
+#if PG_VERSION_NUM >= 90200
+ if (online_analyze_enable &&
+ (constval = isFastTruncateCall(queryDesc)) != NULL)
+ {
+ Datum tblnamed = constval->constvalue;
+ char *tblname = text_to_cstring(DatumGetTextP(tblnamed));
+#if PG_VERSION_NUM >= 160000
+ RangeVar *tblvar =
+ makeRangeVarFromNameList(stringToQualifiedNameList(tblname, NULL));
+#else
+ RangeVar *tblvar =
+ makeRangeVarFromNameList(stringToQualifiedNameList(tblname));
+#endif
+
+ makeAnalyze(RangeVarGetRelid(tblvar,
+ NoLock,
+ false),
+ CK_FASTTRUNCATE, -1);
+ }
+#endif
+
+ if (online_analyze_enable && queryDesc->plannedstmt &&
+ (queryDesc->operation == CMD_INSERT ||
+ queryDesc->operation == CMD_UPDATE ||
+ queryDesc->operation == CMD_DELETE
+#if PG_VERSION_NUM < 90200
+ || (queryDesc->operation == CMD_SELECT &&
+ queryDesc->plannedstmt->intoClause)
+#endif
+ ))
+ {
+#if PG_VERSION_NUM < 90200
+ if (queryDesc->operation == CMD_SELECT)
+ {
+ Oid relOid = RangeVarGetRelid(queryDesc->plannedstmt->intoClause->rel, true);
+
+ makeAnalyze(relOid, queryDesc->operation, naffected);
+ }
+ else
+#endif
+ if (queryDesc->plannedstmt->resultRelations &&
+ queryDesc->plannedstmt->rtable)
+ {
+ ListCell *l;
+
+ foreach(l, queryDesc->plannedstmt->resultRelations)
+ {
+ int n = lfirst_int(l);
+ RangeTblEntry *rte = list_nth(queryDesc->plannedstmt->rtable, n-1);
+
+ if (rte->rtekind == RTE_RELATION)
+ makeAnalyze(rte->relid, (CmdKind)queryDesc->operation, naffected);
+ }
+ }
+ }
+
+ if (oldExecutorEndHook)
+ oldExecutorEndHook(queryDesc);
+ else
+ standard_ExecutorEnd(queryDesc);
+}
+
+static List *toremove = NIL;
+
+/*
+ * removeTable called on transaction end, see call RegisterXactCallback() below
+ */
+static void
+removeTable(XactEvent event, void *arg)
+{
+ ListCell *cell;
+
+ switch(event)
+ {
+ case XACT_EVENT_COMMIT:
+ break;
+ case XACT_EVENT_ABORT:
+ toremove = NIL;
+ default:
+ return;
+ }
+
+ foreach(cell, toremove)
+ {
+ Oid relOid = lfirst_oid(cell);
+
+ hash_search(relstats, &relOid, HASH_REMOVE, NULL);
+ }
+
+ toremove = NIL;
+}
+
+#if PG_VERSION_NUM >= 120000
+static int
+parse_vacuum_opt(VacuumStmt *vacstmt)
+{
+ int options = vacstmt->is_vacuumcmd ? VACOPT_VACUUM : VACOPT_ANALYZE;
+ ListCell *lc;
+
+ foreach(lc, vacstmt->options)
+ {
+ DefElem *opt = (DefElem *) lfirst(lc);
+
+ /* Parse common options for VACUUM and ANALYZE */
+ if (strcmp(opt->defname, "verbose") == 0)
+ options |= VACOPT_VERBOSE;
+ else if (strcmp(opt->defname, "skip_locked") == 0)
+ options |= VACOPT_SKIP_LOCKED;
+ else if (strcmp(opt->defname, "analyze") == 0)
+ options |= VACOPT_ANALYZE;
+ else if (strcmp(opt->defname, "freeze") == 0)
+ options |= VACOPT_FREEZE;
+ else if (strcmp(opt->defname, "full") == 0)
+ options |= VACOPT_FULL;
+ else if (strcmp(opt->defname, "disable_page_skipping") == 0)
+ options |= VACOPT_DISABLE_PAGE_SKIPPING;
+ }
+
+ return options;
+}
+#endif
+
+
+#if PG_VERSION_NUM >= 90200
+static void
+onlineAnalyzeHookerUtility(
+#if PG_VERSION_NUM >= 100000
+ PlannedStmt *pstmt,
+#else
+ Node *parsetree,
+#endif
+ const char *queryString,
+#if PG_VERSION_NUM >= 140000
+ bool readOnlyTree,
+#endif
+#if PG_VERSION_NUM >= 90300
+ ProcessUtilityContext context, ParamListInfo params,
+#if PG_VERSION_NUM >= 100000
+ QueryEnvironment *queryEnv,
+#endif
+#else
+ ParamListInfo params, bool isTopLevel,
+#endif
+ DestReceiver *dest,
+#if PG_VERSION_NUM >= 130000
+ QueryCompletion *completionTag
+#else
+ char *completionTag
+#endif
+) {
+ List *tblnames = NIL;
+ CmdKind op = CK_INSERT;
+#if PG_VERSION_NUM >= 100000
+ Node *parsetree = NULL;
+
+ if (pstmt->commandType == CMD_UTILITY)
+ parsetree = pstmt->utilityStmt;
+#endif
+
+ lateInit();
+
+ if (parsetree && online_analyze_enable)
+ {
+ if (IsA(parsetree, CreateTableAsStmt) &&
+ ((CreateTableAsStmt*)parsetree)->into)
+ {
+ tblnames =
+ list_make1((RangeVar*)copyObject(((CreateTableAsStmt*)parsetree)->into->rel));
+ op = CK_CREATE;
+ }
+ else if (IsA(parsetree, TruncateStmt))
+ {
+ tblnames = list_copy(((TruncateStmt*)parsetree)->relations);
+ op = CK_TRUNCATE;
+ }
+ else if (IsA(parsetree, DropStmt) &&
+ ((DropStmt*)parsetree)->removeType == OBJECT_TABLE)
+ {
+ ListCell *cell;
+
+ foreach(cell, ((DropStmt*)parsetree)->objects)
+ {
+ List *relname = (List *) lfirst(cell);
+ RangeVar *rel = makeRangeVarFromNameList(relname);
+ Oid relOid = RangeVarGetRelid(rel, NoLock, true);
+
+ if (OidIsValid(relOid))
+ {
+ MemoryContext ctx;
+
+ ctx = MemoryContextSwitchTo(TopTransactionContext);
+ toremove = lappend_oid(toremove, relOid);
+ MemoryContextSwitchTo(ctx);
+ }
+ }
+ }
+ else if (IsA(parsetree, VacuumStmt))
+ {
+ VacuumStmt *vac = (VacuumStmt*)parsetree;
+ int options =
+#if PG_VERSION_NUM >= 120000
+ parse_vacuum_opt(vac)
+#else
+ vac->options
+#endif
+ ;
+
+
+#if PG_VERSION_NUM >= 110000
+ tblnames = vac->rels;
+#else
+ if (vac->relation)
+ tblnames = list_make1(vac->relation);
+#endif
+
+ if (options & (VACOPT_VACUUM | VACOPT_FULL | VACOPT_FREEZE))
+ {
+ /* optionally with analyze */
+ op = CK_VACUUM;
+
+ /* drop all collected stat */
+ if (tblnames == NIL)
+ relstatsInit();
+ }
+ else if (options & VACOPT_ANALYZE)
+ {
+ op = CK_ANALYZE;
+
+ /* should reset all counters */
+ if (tblnames == NIL)
+ {
+ HASH_SEQ_STATUS hs;
+ OnlineAnalyzeTableStat *rstat;
+ TimestampTz now = GetCurrentTimestamp();
+
+ hash_seq_init(&hs, relstats);
+
+ while((rstat = hash_seq_search(&hs)) != NULL)
+ {
+ rstat->mod_since_analyze = 0;
+ rstat->last_analyze_time = now;
+ }
+ }
+ }
+ else
+ tblnames = NIL;
+ }
+ }
+
+#if PG_VERSION_NUM >= 100000
+#define parsetree pstmt
+#endif
+
+ if (oldProcessUtilityHook)
+ oldProcessUtilityHook(parsetree, queryString,
+#if PG_VERSION_NUM >= 140000
+ readOnlyTree,
+#endif
+#if PG_VERSION_NUM >= 90300
+ context, params,
+#if PG_VERSION_NUM >= 100000
+ queryEnv,
+#endif
+#else
+ params, isTopLevel,
+#endif
+ dest, completionTag);
+ else
+ standard_ProcessUtility(parsetree, queryString,
+#if PG_VERSION_NUM >= 140000
+ readOnlyTree,
+#endif
+#if PG_VERSION_NUM >= 90300
+ context, params,
+#if PG_VERSION_NUM >= 100000
+ queryEnv,
+#endif
+#else
+ params, isTopLevel,
+#endif
+ dest, completionTag);
+
+#if PG_VERSION_NUM >= 100000
+#undef parsetree
+#endif
+
+ if (tblnames) {
+ ListCell *l;
+
+ foreach(l, tblnames)
+ {
+ RangeVar *tblname =
+#if PG_VERSION_NUM >= 110000
+ (IsA(lfirst(l), VacuumRelation)) ?
+ ((VacuumRelation*)lfirst(l))->relation :
+#endif
+ (RangeVar*)lfirst(l);
+ Oid tblOid;
+
+ Assert(IsA(tblname, RangeVar));
+
+ tblOid = RangeVarGetRelid(tblname, NoLock, true);
+ makeAnalyze(tblOid, op, -1);
+ }
+ }
+}
+#endif
+
+
+static void
+relstatsInit(void)
+{
+ HASHCTL hash_ctl;
+ int flags = 0;
+
+ MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+
+ hash_ctl.hash = oid_hash;
+ flags |= HASH_FUNCTION;
+
+ if (onlineAnalyzeMemoryContext)
+ {
+ Assert(relstats != NULL);
+ MemoryContextReset(onlineAnalyzeMemoryContext);
+ }
+ else
+ {
+ Assert(relstats == NULL);
+
+#if PG_VERSION_NUM < 90600
+ onlineAnalyzeMemoryContext =
+ AllocSetContextCreate(CacheMemoryContext,
+ "online_analyze storage context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE
+ );
+#else
+ onlineAnalyzeMemoryContext =
+ AllocSetContextCreate(CacheMemoryContext,
+ "online_analyze storage context", ALLOCSET_DEFAULT_SIZES);
+#endif
+ }
+
+ hash_ctl.hcxt = onlineAnalyzeMemoryContext;
+ flags |= HASH_CONTEXT;
+
+ hash_ctl.keysize = sizeof(Oid);
+
+ hash_ctl.entrysize = sizeof(OnlineAnalyzeTableStat);
+ flags |= HASH_ELEM;
+
+ relstats = hash_create("online_analyze storage", 1024, &hash_ctl, flags);
+}
+
+void _PG_init(void);
+void
+_PG_init(void)
+{
+ relstatsInit();
+
+ oldExecutorEndHook = ExecutorEnd_hook;
+
+ ExecutorEnd_hook = onlineAnalyzeHooker;
+
+#if PG_VERSION_NUM >= 90200
+ oldProcessUtilityHook = ProcessUtility_hook;
+
+ ProcessUtility_hook = onlineAnalyzeHookerUtility;
+#endif
+
+
+ DefineCustomBoolVariable(
+ "online_analyze.enable",
+ "Enable on-line analyze",
+ "Enables analyze of table directly after insert/update/delete/select into",
+ &online_analyze_enable,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_enable,
+#endif
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomBoolVariable(
+ "online_analyze.local_tracking",
+ "Per backend tracking",
+ "Per backend tracking for temp tables (do not use system statistic)",
+ &online_analyze_local_tracking,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_local_tracking,
+#endif
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomBoolVariable(
+ "online_analyze.verbose",
+ "Verbosity of on-line analyze",
+ "Make ANALYZE VERBOSE after table's changes",
+ &online_analyze_verbose,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_verbose,
+#endif
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomRealVariable(
+ "online_analyze.scale_factor",
+ "fraction of table size to start on-line analyze",
+ "fraction of table size to start on-line analyze",
+ &online_analyze_scale_factor,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_scale_factor,
+#endif
+ 0.0,
+ 1.0,
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomIntVariable(
+ "online_analyze.threshold",
+ "min number of row updates before on-line analyze",
+ "min number of row updates before on-line analyze",
+ &online_analyze_threshold,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_threshold,
+#endif
+ 0,
+ 0x7fffffff,
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomIntVariable(
+ "online_analyze.capacity_threshold",
+ "Max local cache table capacity",
+ "Max local cache table capacity",
+ &online_analyze_capacity_threshold,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_capacity_threshold,
+#endif
+ 0,
+ 0x7fffffff,
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomRealVariable(
+ "online_analyze.min_interval",
+ "minimum time interval between analyze call (in milliseconds)",
+ "minimum time interval between analyze call (in milliseconds)",
+ &online_analyze_min_interval,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_min_interval,
+#endif
+ 0.0,
+ 1e30,
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomEnumVariable(
+ "online_analyze.table_type",
+ "Type(s) of table for online analyze: all(default), persistent, temporary, none",
+ NULL,
+ &online_analyze_table_type,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_table_type,
+#endif
+ online_analyze_table_type_options,
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ DefineCustomStringVariable(
+ "online_analyze.exclude_tables",
+ "List of tables which will not online analyze",
+ NULL,
+ &excludeTables.tableStr,
+#if PG_VERSION_NUM >= 80400
+ "",
+#endif
+ PGC_USERSET,
+ 0,
+#if PG_VERSION_NUM >= 90100
+ excludeTablesCheck,
+ excludeTablesAssign,
+#else
+ excludeTablesAssign,
+#endif
+ excludeTablesShow
+ );
+
+ DefineCustomStringVariable(
+ "online_analyze.include_tables",
+ "List of tables which will online analyze",
+ NULL,
+ &includeTables.tableStr,
+#if PG_VERSION_NUM >= 80400
+ "",
+#endif
+ PGC_USERSET,
+ 0,
+#if PG_VERSION_NUM >= 90100
+ includeTablesCheck,
+ includeTablesAssign,
+#else
+ includeTablesAssign,
+#endif
+ includeTablesShow
+ );
+
+ DefineCustomIntVariable(
+ "online_analyze.lower_limit",
+ "min number of rows in table to analyze",
+ "min number of rows in table to analyze",
+ &online_analyze_lower_limit,
+#if PG_VERSION_NUM >= 80400
+ online_analyze_lower_limit,
+#endif
+ 0,
+ 0x7fffffff,
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ RegisterXactCallback(removeTable, NULL);
+}
+
+#if PG_VERSION_NUM < 150000
+void _PG_fini(void);
+void
+_PG_fini(void)
+{
+ ExecutorEnd_hook = oldExecutorEndHook;
+#if PG_VERSION_NUM >= 90200
+ ProcessUtility_hook = oldProcessUtilityHook;
+#endif
+
+ if (excludeTables.tables)
+ free(excludeTables.tables);
+ if (includeTables.tables)
+ free(includeTables.tables);
+
+ excludeTables.tables = includeTables.tables = NULL;
+ excludeTables.nTables = includeTables.nTables = 0;
+}
+#endif
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index c5c4edce423..4017c9a3a02 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -881,7 +881,7 @@ pgss_planner(Query *parse,
int cursorOptions,
ParamListInfo boundParams)
{
- PlannedStmt *result;
+ PlannedStmt *result = NULL;
/*
* We can't process the query if no query_string is provided, as
Submodule contrib/pg_wait_sampling 00000000000...45d2627523f (new submodule)
diff --git a/contrib/pg_wait_sampling/.gitignore b/contrib/pg_wait_sampling/.gitignore
new file mode 100644
index 0000000..e066fb5
--- /dev/null
+++ b/contrib/pg_wait_sampling/.gitignore
@@ -0,0 +1,6 @@
+*.o
+*.so
+/.deps/
+/log/
+/results/
+/tmp_check/
diff --git a/contrib/pg_wait_sampling/.travis.yml b/contrib/pg_wait_sampling/.travis.yml
new file mode 100644
index 0000000..1fce04a
--- /dev/null
+++ b/contrib/pg_wait_sampling/.travis.yml
@@ -0,0 +1,21 @@
+dist: jammy
+language: c
+env:
+- PG_MAJOR=17 BETA=1
+- PG_MAJOR=16
+- PG_MAJOR=15
+- PG_MAJOR=14
+- PG_MAJOR=13
+- PG_MAJOR=12
+before_script:
+- curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
+- echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" | sudo tee -a /etc/apt/sources.list
+- if [ -n "${BETA}" ]; then echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main ${PG_MAJOR}" | sudo tee -a /etc/apt/sources.list; fi
+- sudo apt-get update
+- sudo systemctl stop postgresql
+- sudo apt-get install -y --no-install-recommends postgresql-client-${PG_MAJOR} postgresql-${PG_MAJOR} postgresql-server-dev-${PG_MAJOR}
+- sudo systemctl stop postgresql
+script: ./run-tests.sh
+after_script:
+- cat regression.diffs
+- cat logfile
diff --git a/contrib/pg_wait_sampling/LICENSE b/contrib/pg_wait_sampling/LICENSE
new file mode 100644
index 0000000..f4b38be
--- /dev/null
+++ b/contrib/pg_wait_sampling/LICENSE
@@ -0,0 +1,11 @@
+pg_wait_sampling is released under the PostgreSQL License, a liberal Open Source license, similar to the BSD or MIT licenses.
+
+Copyright (c) 2015-2017, Postgres Professional
+Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+Portions Copyright (c) 1994, The Regents of the University of California
+
+Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies.
+
+IN NO EVENT SHALL POSTGRES PROFESSIONAL BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF POSTGRES PROFESSIONAL HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+POSTGRES PROFESSIONAL SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND POSTGRES PROFESSIONAL HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
diff --git a/contrib/pg_wait_sampling/Makefile b/contrib/pg_wait_sampling/Makefile
new file mode 100644
index 0000000..32711a3
--- /dev/null
+++ b/contrib/pg_wait_sampling/Makefile
@@ -0,0 +1,22 @@
+# contrib/pg_wait_sampling/Makefile
+
+MODULE_big = pg_wait_sampling
+OBJS = pg_wait_sampling.o collector.o
+
+EXTENSION = pg_wait_sampling
+DATA = pg_wait_sampling--1.1.sql pg_wait_sampling--1.0--1.1.sql
+
+REGRESS = load queries
+
+EXTRA_REGRESS_OPTS=--temp-config=$(top_srcdir)/$(subdir)/conf.add
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_wait_sampling
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/pg_wait_sampling/README.md b/contrib/pg_wait_sampling/README.md
new file mode 100644
index 0000000..c9ef3f9
--- /dev/null
+++ b/contrib/pg_wait_sampling/README.md
@@ -0,0 +1,183 @@
+[](https://app.travis-ci.com/postgrespro/pg_wait_sampling)
+[](https://raw.githubusercontent.com/postgrespro/pg_wait_sampling/master/LICENSE)
+
+`pg_wait_sampling` – sampling based statistics of wait events
+=============================================================
+
+Introduction
+------------
+
+PostgreSQL provides information about current wait event of particular
+process. However, in order to gather descriptive statistics of server
+behavior user have to sample current wait event multiple times.
+`pg_wait_sampling` is an extension for collecting sampling statistics of wait
+events.
+
+The module must be loaded by adding `pg_wait_sampling` to
+`shared_preload_libraries` in postgresql.conf, because it requires additional
+shared memory and launches background worker. This means that a server restart
+is needed to add or remove the module.
+
+When `pg_wait_sampling` is enabled, it collects two kinds of statistics.
+
+ * History of waits events. It's implemented as in-memory ring buffer where
+ samples of each process wait events are written with given (configurable)
+ period. Therefore, for each running process user can see some number of
+ recent samples depending on history size (configurable). Assuming there is
+ a client who periodically read this history and dump it somewhere, user
+ can have continuous history.
+ * Waits profile. It's implemented as in-memory hash table where count
+ of samples are accumulated per each process and each wait event
+ (and each query with `pg_stat_statements`). This hash
+ table can be reset by user request. Assuming there is a client who
+ periodically dumps profile and resets it, user can have statistics of
+ intensivity of wait events among time.
+
+In combination with `pg_stat_statements` this extension can also provide
+per query statistics.
+
+`pg_wait_sampling` launches special background worker for gathering the
+statistics above.
+
+Availability
+------------
+
+`pg_wait_sampling` is implemented as an extension and not available in default
+PostgreSQL installation. It is available from
+[github](https://github.com/postgrespro/pg_wait_sampling)
+under the same license as
+[PostgreSQL](http://www.postgresql.org/about/licence/)
+and supports PostgreSQL 12+.
+
+Installation
+------------
+
+Pre-built `pg_wait_sampling` packages are provided in official PostgreSQL
+repository: https://download.postgresql.org/pub/repos/
+
+Manual build
+------------
+
+`pg_wait_sampling` is PostgreSQL extension which requires PostgreSQL 12 or
+higher. Before build and install you should ensure following:
+
+ * PostgreSQL version is 12 or higher.
+ * You have development package of PostgreSQL installed or you built
+ PostgreSQL from source.
+ * Your PATH variable is configured so that `pg_config` command available, or
+ set PG_CONFIG variable.
+
+Typical installation procedure may look like this:
+
+ $ git clone https://github.com/postgrespro/pg_wait_sampling.git
+ $ cd pg_wait_sampling
+ $ make USE_PGXS=1
+ $ sudo make USE_PGXS=1 install
+
+Then add `shared_preload_libraries = pg_wait_sampling` to `postgresql.conf` and
+restart the server.
+
+To test your installation:
+
+ $ make USE_PGXS=1 installcheck
+
+To create the extension in the target database:
+
+ CREATE EXTENSION pg_wait_sampling;
+
+Compilation on Windows is not supported, since the extension uses symbols from PostgreSQL
+that are not exported.
+
+Usage
+-----
+
+`pg_wait_sampling` interacts with user by set of views and functions.
+
+`pg_wait_sampling_current` view – information about current wait events for
+all processed including background workers.
+
+| Column name | Column type | Description |
+| ----------- | ----------- | ----------------------- |
+| pid | int4 | Id of process |
+| event_type | text | Name of wait event type |
+| event | text | Name of wait event |
+| queryid | int8 | Id of query |
+
+`pg_wait_sampling_get_current(pid int4)` returns the same table for single given
+process.
+
+`pg_wait_sampling_history` view – history of wait events obtained by sampling into
+in-memory ring buffer.
+
+| Column name | Column type | Description |
+| ----------- | ----------- | ----------------------- |
+| pid | int4 | Id of process |
+| ts | timestamptz | Sample timestamp |
+| event_type | text | Name of wait event type |
+| event | text | Name of wait event |
+| queryid | int8 | Id of query |
+
+`pg_wait_sampling_profile` view – profile of wait events obtained by sampling into
+in-memory hash table.
+
+| Column name | Column type | Description |
+| ----------- | ----------- | ----------------------- |
+| pid | int4 | Id of process |
+| event_type | text | Name of wait event type |
+| event | text | Name of wait event |
+| queryid | int8 | Id of query |
+| count | text | Count of samples |
+
+`pg_wait_sampling_reset_profile()` function resets the profile.
+
+The work of wait event statistics collector worker is controlled by following
+GUCs.
+
+| Parameter name | Data type | Description | Default value |
+|----------------------------------| --------- |---------------------------------------------|--------------:|
+| pg_wait_sampling.history_size | int4 | Size of history in-memory ring buffer | 5000 |
+| pg_wait_sampling.history_period | int4 | Period for history sampling in milliseconds | 10 |
+| pg_wait_sampling.profile_period | int4 | Period for profile sampling in milliseconds | 10 |
+| pg_wait_sampling.profile_pid | bool | Whether profile should be per pid | true |
+| pg_wait_sampling.profile_queries | enum | Whether profile should be per query | top |
+| pg_wait_sampling.sample_cpu | bool | Whether on CPU backends should be sampled | true |
+
+If `pg_wait_sampling.profile_pid` is set to false, sampling profile wouldn't be
+collected in per-process manner. In this case the value of pid could would
+be always zero and corresponding row contain samples among all the processes.
+
+If `pg_wait_sampling.profile_queries` is set to `none`, `queryid` field in
+views will be zero. If it is set to `top`, queryIds only of top level statements
+are recorded. If it is set to `all`, queryIds of nested statements are recorded.
+
+If `pg_wait_sampling.sample_cpu` is set to true then processes that are not
+waiting on anything are also sampled. The wait event columns for such processes
+will be NULL.
+
+These GUCs are allowed to be changed by superuser. Also, they are placed into
+shared memory. Thus, they could be changed from any backend and affects worker
+runtime.
+
+See
+[PostgreSQL documentation](http://www.postgresql.org/docs/devel/static/monitoring-stats.html#WAIT-EVENT-TABLE)
+for list of possible wait events.
+
+Contribution
+------------
+
+Please, notice, that `pg_wait_sampling` is still under development and while
+it's stable and tested, it may contains some bugs. Don't hesitate to raise
+[issues at github](https://github.com/postgrespro/pg_wait_sampling/issues) with
+your bug reports.
+
+If you're lacking of some functionality in `pg_wait_sampling` and feeling power
+to implement it then you're welcome to make pull requests.
+
+Authors
+-------
+
+ * Alexander Korotkov <a.korotkov@postgrespro.ru>, Postgres Professional,
+ Moscow, Russia
+ * Ildus Kurbangaliev <i.kurbangaliev@gmail.com>, Postgres Professional,
+ Moscow, Russia
+
diff --git a/contrib/pg_wait_sampling/collector.c b/contrib/pg_wait_sampling/collector.c
new file mode 100644
index 0000000..235d95f
--- /dev/null
+++ b/contrib/pg_wait_sampling/collector.c
@@ -0,0 +1,498 @@
+/*
+ * collector.c
+ * Collector of wait event history and profile.
+ *
+ * Copyright (c) 2015-2016, Postgres Professional
+ *
+ * IDENTIFICATION
+ * contrib/pg_wait_sampling/pg_wait_sampling.c
+ */
+#include "postgres.h"
+
+#include "catalog/pg_type.h"
+#if PG_VERSION_NUM >= 130000
+#include "common/hashfn.h"
+#endif
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "postmaster/bgworker.h"
+#include "storage/ipc.h"
+#include "storage/procarray.h"
+#include "storage/procsignal.h"
+#include "storage/shm_mq.h"
+#include "storage/shm_toc.h"
+#include "storage/spin.h"
+#include "utils/memutils.h"
+#include "utils/resowner.h"
+#include "pgstat.h"
+
+#include "compat.h"
+#include "pg_wait_sampling.h"
+
+static volatile sig_atomic_t shutdown_requested = false;
+
+static void handle_sigterm(SIGNAL_ARGS);
+
+/*
+ * Register background worker for collecting waits history.
+ */
+void
+pgws_register_wait_collector(void)
+{
+ BackgroundWorker worker;
+
+ /* Set up background worker parameters */
+ memset(&worker, 0, sizeof(worker));
+ worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
+ worker.bgw_start_time = BgWorkerStart_ConsistentState;
+ worker.bgw_restart_time = 1;
+ worker.bgw_notify_pid = 0;
+ snprintf(worker.bgw_library_name, BGW_MAXLEN, "pg_wait_sampling");
+ snprintf(worker.bgw_function_name, BGW_MAXLEN, CppAsString(pgws_collector_main));
+ snprintf(worker.bgw_name, BGW_MAXLEN, "pg_wait_sampling collector");
+ worker.bgw_main_arg = (Datum) 0;
+ RegisterBackgroundWorker(&worker);
+}
+
+/*
+ * Allocate memory for waits history.
+ */
+static void
+alloc_history(History *observations, int count)
+{
+ observations->items = (HistoryItem *) palloc0(sizeof(HistoryItem) * count);
+ observations->index = 0;
+ observations->count = count;
+ observations->wraparound = false;
+}
+
+/*
+ * Reallocate memory for changed number of history items.
+ */
+static void
+realloc_history(History *observations, int count)
+{
+ HistoryItem *newitems;
+ int copyCount,
+ i,
+ j;
+
+ /* Allocate new array for history */
+ newitems = (HistoryItem *) palloc0(sizeof(HistoryItem) * count);
+
+ /* Copy entries from old array to the new */
+ if (observations->wraparound)
+ copyCount = observations->count;
+ else
+ copyCount = observations->index;
+
+ copyCount = Min(copyCount, count);
+
+ i = 0;
+ if (observations->wraparound)
+ j = observations->index + 1;
+ else
+ j = 0;
+ while (i < copyCount)
+ {
+ if (j >= observations->count)
+ j = 0;
+ memcpy(&newitems[i], &observations->items[j], sizeof(HistoryItem));
+ i++;
+ j++;
+ }
+
+ /* Switch to new history array */
+ pfree(observations->items);
+ observations->items = newitems;
+ observations->index = copyCount;
+ observations->count = count;
+ observations->wraparound = false;
+}
+
+static void
+handle_sigterm(SIGNAL_ARGS)
+{
+ int save_errno = errno;
+ shutdown_requested = true;
+ if (MyProc)
+ SetLatch(&MyProc->procLatch);
+ errno = save_errno;
+}
+
+/*
+ * Get next item of history with rotation.
+ */
+static HistoryItem *
+get_next_observation(History *observations)
+{
+ HistoryItem *result;
+
+ if (observations->index >= observations->count)
+ {
+ observations->index = 0;
+ observations->wraparound = true;
+ }
+ result = &observations->items[observations->index];
+ observations->index++;
+ return result;
+}
+
+/*
+ * Read current waits from backends and write them to history array
+ * and/or profile hash.
+ */
+static void
+probe_waits(History *observations, HTAB *profile_hash,
+ bool write_history, bool write_profile, bool profile_pid)
+{
+ int i,
+ newSize;
+ TimestampTz ts = GetCurrentTimestamp();
+
+ /* Realloc waits history if needed */
+ newSize = pgws_collector_hdr->historySize;
+ if (observations->count != newSize)
+ realloc_history(observations, newSize);
+
+ /* Iterate PGPROCs under shared lock */
+ LWLockAcquire(ProcArrayLock, LW_SHARED);
+ for (i = 0; i < ProcGlobal->allProcCount; i++)
+ {
+ HistoryItem item,
+ *observation;
+ PGPROC *proc = &ProcGlobal->allProcs[i];
+
+ if (!pgws_should_sample_proc(proc))
+ continue;
+
+ /* Collect next wait event sample */
+ item.pid = proc->pid;
+ item.wait_event_info = proc->wait_event_info;
+
+ if (pgws_collector_hdr->profileQueries)
+ item.queryId = pgws_proc_queryids[i];
+ else
+ item.queryId = 0;
+
+ item.ts = ts;
+
+ /* Write to the history if needed */
+ if (write_history)
+ {
+ observation = get_next_observation(observations);
+ *observation = item;
+ }
+
+ /* Write to the profile if needed */
+ if (write_profile)
+ {
+ ProfileItem *profileItem;
+ bool found;
+
+ if (!profile_pid)
+ item.pid = 0;
+
+ profileItem = (ProfileItem *) hash_search(profile_hash, &item, HASH_ENTER, &found);
+ if (found)
+ profileItem->count++;
+ else
+ profileItem->count = 1;
+ }
+ }
+ LWLockRelease(ProcArrayLock);
+}
+
+/*
+ * Send waits history to shared memory queue.
+ */
+static void
+send_history(History *observations, shm_mq_handle *mqh)
+{
+ Size count,
+ i;
+ shm_mq_result mq_result;
+
+ if (observations->wraparound)
+ count = observations->count;
+ else
+ count = observations->index;
+
+ mq_result = shm_mq_send_compat(mqh, sizeof(count), &count, false, true);
+ if (mq_result == SHM_MQ_DETACHED)
+ {
+ ereport(WARNING,
+ (errmsg("pg_wait_sampling collector: "
+ "receiver of message queue has been detached")));
+ return;
+ }
+ for (i = 0; i < count; i++)
+ {
+ mq_result = shm_mq_send_compat(mqh,
+ sizeof(HistoryItem),
+ &observations->items[i],
+ false,
+ true);
+ if (mq_result == SHM_MQ_DETACHED)
+ {
+ ereport(WARNING,
+ (errmsg("pg_wait_sampling collector: "
+ "receiver of message queue has been detached")));
+ return;
+ }
+ }
+}
+
+/*
+ * Send profile to shared memory queue.
+ */
+static void
+send_profile(HTAB *profile_hash, shm_mq_handle *mqh)
+{
+ HASH_SEQ_STATUS scan_status;
+ ProfileItem *item;
+ Size count = hash_get_num_entries(profile_hash);
+ shm_mq_result mq_result;
+
+ mq_result = shm_mq_send_compat(mqh, sizeof(count), &count, false, true);
+ if (mq_result == SHM_MQ_DETACHED)
+ {
+ ereport(WARNING,
+ (errmsg("pg_wait_sampling collector: "
+ "receiver of message queue has been detached")));
+ return;
+ }
+ hash_seq_init(&scan_status, profile_hash);
+ while ((item = (ProfileItem *) hash_seq_search(&scan_status)) != NULL)
+ {
+ mq_result = shm_mq_send_compat(mqh, sizeof(ProfileItem), item, false,
+ true);
+ if (mq_result == SHM_MQ_DETACHED)
+ {
+ hash_seq_term(&scan_status);
+ ereport(WARNING,
+ (errmsg("pg_wait_sampling collector: "
+ "receiver of message queue has been detached")));
+ return;
+ }
+ }
+}
+
+/*
+ * Make hash table for wait profile.
+ */
+static HTAB *
+make_profile_hash()
+{
+ HASHCTL hash_ctl;
+
+ hash_ctl.hash = tag_hash;
+ hash_ctl.hcxt = TopMemoryContext;
+
+ if (pgws_collector_hdr->profileQueries)
+ hash_ctl.keysize = offsetof(ProfileItem, count);
+ else
+ hash_ctl.keysize = offsetof(ProfileItem, queryId);
+
+ hash_ctl.entrysize = sizeof(ProfileItem);
+ return hash_create("Waits profile hash", 1024, &hash_ctl,
+ HASH_FUNCTION | HASH_ELEM);
+}
+
+/*
+ * Delta between two timestamps in milliseconds.
+ */
+static int64
+millisecs_diff(TimestampTz tz1, TimestampTz tz2)
+{
+ long secs;
+ int microsecs;
+
+ TimestampDifference(tz1, tz2, &secs, µsecs);
+
+ return secs * 1000 + microsecs / 1000;
+
+}
+
+/*
+ * Main routine of wait history collector.
+ */
+void
+pgws_collector_main(Datum main_arg)
+{
+ HTAB *profile_hash = NULL;
+ History observations;
+ MemoryContext old_context,
+ collector_context;
+ TimestampTz current_ts,
+ history_ts,
+ profile_ts;
+
+ /*
+ * Establish signal handlers.
+ *
+ * We want CHECK_FOR_INTERRUPTS() to kill off this worker process just as
+ * it would a normal user backend. To make that happen, we establish a
+ * signal handler that is a stripped-down version of die(). We don't have
+ * any equivalent of the backend's command-read loop, where interrupts can
+ * be processed immediately, so make sure ImmediateInterruptOK is turned
+ * off.
+ *
+ * We also want to respond to the ProcSignal notifications. This is done
+ * in the upstream provided procsignal_sigusr1_handler, which is
+ * automatically used if a bgworker connects to a database. But since our
+ * worker doesn't connect to any database even though it calls
+ * InitPostgres, which will still initializze a new backend and thus
+ * partitipate to the ProcSignal infrastructure.
+ */
+ pqsignal(SIGTERM, handle_sigterm);
+ pqsignal(SIGUSR1, procsignal_sigusr1_handler);
+ BackgroundWorkerUnblockSignals();
+ InitPostgresCompat(NULL, InvalidOid, NULL, InvalidOid, false, false, NULL);
+ SetProcessingMode(NormalProcessing);
+
+ /* Make pg_wait_sampling recognisable in pg_stat_activity */
+ pgstat_report_appname("pg_wait_sampling collector");
+
+ profile_hash = make_profile_hash();
+ pgws_collector_hdr->latch = &MyProc->procLatch;
+
+ CurrentResourceOwner = ResourceOwnerCreate(NULL, "pg_wait_sampling collector");
+ collector_context = AllocSetContextCreate(TopMemoryContext,
+ "pg_wait_sampling context", ALLOCSET_DEFAULT_SIZES);
+ old_context = MemoryContextSwitchTo(collector_context);
+ alloc_history(&observations, pgws_collector_hdr->historySize);
+ MemoryContextSwitchTo(old_context);
+
+ ereport(LOG, (errmsg("pg_wait_sampling collector started")));
+
+ /* Start counting time for history and profile samples */
+ profile_ts = history_ts = GetCurrentTimestamp();
+
+ while (1)
+ {
+ int rc;
+ shm_mq_handle *mqh;
+ int64 history_diff,
+ profile_diff;
+ int history_period,
+ profile_period;
+ bool write_history,
+ write_profile;
+
+ /* We need an explicit call for at least ProcSignal notifications. */
+ CHECK_FOR_INTERRUPTS();
+
+ /* Wait calculate time to next sample for history or profile */
+ current_ts = GetCurrentTimestamp();
+
+ history_diff = millisecs_diff(history_ts, current_ts);
+ profile_diff = millisecs_diff(profile_ts, current_ts);
+ history_period = pgws_collector_hdr->historyPeriod;
+ profile_period = pgws_collector_hdr->profilePeriod;
+
+ write_history = (history_diff >= (int64)history_period);
+ write_profile = (profile_diff >= (int64)profile_period);
+
+ if (write_history || write_profile)
+ {
+ probe_waits(&observations, profile_hash,
+ write_history, write_profile, pgws_collector_hdr->profilePid);
+
+ if (write_history)
+ {
+ history_ts = current_ts;
+ history_diff = 0;
+ }
+
+ if (write_profile)
+ {
+ profile_ts = current_ts;
+ profile_diff = 0;
+ }
+ }
+
+ /* Shutdown if requested */
+ if (shutdown_requested)
+ break;
+
+ /*
+ * Wait until next sample time or request to do something through
+ * shared memory.
+ */
+ rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
+ Min(history_period - (int)history_diff,
+ profile_period - (int)profile_diff), PG_WAIT_EXTENSION);
+
+ if (rc & WL_POSTMASTER_DEATH)
+ proc_exit(1);
+
+ ResetLatch(&MyProc->procLatch);
+
+ /* Handle request if any */
+ if (pgws_collector_hdr->request != NO_REQUEST)
+ {
+ LOCKTAG tag;
+ SHMRequest request;
+
+ pgws_init_lock_tag(&tag, PGWS_COLLECTOR_LOCK);
+
+ LockAcquire(&tag, ExclusiveLock, false, false);
+ request = pgws_collector_hdr->request;
+ pgws_collector_hdr->request = NO_REQUEST;
+
+ if (request == HISTORY_REQUEST || request == PROFILE_REQUEST)
+ {
+ shm_mq_result mq_result;
+
+ /* Send history or profile */
+ shm_mq_set_sender(pgws_collector_mq, MyProc);
+ mqh = shm_mq_attach(pgws_collector_mq, NULL, NULL);
+ mq_result = shm_mq_wait_for_attach(mqh);
+ switch (mq_result)
+ {
+ case SHM_MQ_SUCCESS:
+ switch (request)
+ {
+ case HISTORY_REQUEST:
+ send_history(&observations, mqh);
+ break;
+ case PROFILE_REQUEST:
+ send_profile(profile_hash, mqh);
+ break;
+ default:
+ Assert(false);
+ }
+ break;
+ case SHM_MQ_DETACHED:
+ ereport(WARNING,
+ (errmsg("pg_wait_sampling collector: "
+ "receiver of message queue have been "
+ "detached")));
+ break;
+ default:
+ Assert(false);
+ }
+ shm_mq_detach(mqh);
+ }
+ else if (request == PROFILE_RESET)
+ {
+ /* Reset profile hash */
+ hash_destroy(profile_hash);
+ profile_hash = make_profile_hash();
+ }
+ LockRelease(&tag, ExclusiveLock, false);
+ }
+ }
+
+ MemoryContextReset(collector_context);
+
+ /*
+ * We're done. Explicitly detach the shared memory segment so that we
+ * don't get a resource leak warning at commit time. This will fire any
+ * on_dsm_detach callbacks we've registered, as well. Once that's done,
+ * we can go ahead and exit.
+ */
+ ereport(LOG, (errmsg("pg_wait_sampling collector shutting down")));
+ proc_exit(0);
+}
diff --git a/contrib/pg_wait_sampling/compat.h b/contrib/pg_wait_sampling/compat.h
new file mode 100644
index 0000000..72a63ab
--- /dev/null
+++ b/contrib/pg_wait_sampling/compat.h
@@ -0,0 +1,64 @@
+/*
+ * compat.h
+ * Definitions for function wrappers compatible between PG versions.
+ *
+ * Copyright (c) 2015-2022, Postgres Professional
+ *
+ * IDENTIFICATION
+ * contrib/pg_wait_sampling/compat.h
+ */
+#ifndef __COMPAT_H__
+#define __COMPAT_H__
+
+#include "postgres.h"
+
+#include "access/tupdesc.h"
+#include "miscadmin.h"
+#include "storage/shm_mq.h"
+#include "utils/guc_tables.h"
+
+static inline shm_mq_result
+shm_mq_send_compat(shm_mq_handle *mqh, Size nbytes, const void *data,
+ bool nowait, bool force_flush)
+{
+#if PG_VERSION_NUM >= 150000
+ return shm_mq_send(mqh, nbytes, data, nowait, force_flush);
+#else
+ return shm_mq_send(mqh, nbytes, data, nowait);
+#endif
+}
+
+static inline void
+InitPostgresCompat(const char *in_dbname, Oid dboid,
+ const char *username, Oid useroid,
+ bool load_session_libraries,
+ bool override_allow_connections,
+ char *out_dbname)
+{
+#if PG_VERSION_NUM >= 170000
+ InitPostgres(in_dbname, dboid, username, useroid, (load_session_libraries ? INIT_PG_LOAD_SESSION_LIBS : 0) |
+ (override_allow_connections ? INIT_PG_OVERRIDE_ALLOW_CONNS : 0), out_dbname);
+#elif PG_VERSION_NUM >= 150000
+ InitPostgres(in_dbname, dboid, username, useroid, load_session_libraries,
+ override_allow_connections, out_dbname);
+#else
+ InitPostgres(in_dbname, dboid, username, useroid, out_dbname,
+ override_allow_connections);
+#endif
+}
+
+static inline void
+get_guc_variables_compat(struct config_generic ***vars, int *num_vars)
+{
+ Assert(vars != NULL);
+ Assert(num_vars != NULL);
+
+#if PG_VERSION_NUM >= 160000
+ *vars = get_guc_variables(num_vars);
+#else
+ *vars = get_guc_variables();
+ *num_vars = GetNumConfigOptions();
+#endif
+}
+
+#endif
diff --git a/contrib/pg_wait_sampling/conf.add b/contrib/pg_wait_sampling/conf.add
new file mode 100644
index 0000000..54c013d
--- /dev/null
+++ b/contrib/pg_wait_sampling/conf.add
@@ -0,0 +1 @@
+shared_preload_libraries = 'pg_wait_sampling'
diff --git a/contrib/pg_wait_sampling/expected/load.out b/contrib/pg_wait_sampling/expected/load.out
new file mode 100644
index 0000000..b7de0ac
--- /dev/null
+++ b/contrib/pg_wait_sampling/expected/load.out
@@ -0,0 +1,31 @@
+CREATE EXTENSION pg_wait_sampling;
+\d pg_wait_sampling_current
+View "public.pg_wait_sampling_current"
+ Column | Type | Modifiers
+------------+---------+-----------
+ pid | integer |
+ event_type | text |
+ event | text |
+ queryid | bigint |
+
+\d pg_wait_sampling_history
+ View "public.pg_wait_sampling_history"
+ Column | Type | Modifiers
+------------+--------------------------+-----------
+ pid | integer |
+ ts | timestamp with time zone |
+ event_type | text |
+ event | text |
+ queryid | bigint |
+
+\d pg_wait_sampling_profile
+View "public.pg_wait_sampling_profile"
+ Column | Type | Modifiers
+------------+---------+-----------
+ pid | integer |
+ event_type | text |
+ event | text |
+ queryid | bigint |
+ count | bigint |
+
+DROP EXTENSION pg_wait_sampling;
diff --git a/contrib/pg_wait_sampling/expected/load_1.out b/contrib/pg_wait_sampling/expected/load_1.out
new file mode 100644
index 0000000..1a1358a
--- /dev/null
+++ b/contrib/pg_wait_sampling/expected/load_1.out
@@ -0,0 +1,31 @@
+CREATE EXTENSION pg_wait_sampling;
+\d pg_wait_sampling_current
+ View "public.pg_wait_sampling_current"
+ Column | Type | Collation | Nullable | Default
+------------+---------+-----------+----------+---------
+ pid | integer | | |
+ event_type | text | | |
+ event | text | | |
+ queryid | bigint | | |
+
+\d pg_wait_sampling_history
+ View "public.pg_wait_sampling_history"
+ Column | Type | Collation | Nullable | Default
+------------+--------------------------+-----------+----------+---------
+ pid | integer | | |
+ ts | timestamp with time zone | | |
+ event_type | text | | |
+ event | text | | |
+ queryid | bigint | | |
+
+\d pg_wait_sampling_profile
+ View "public.pg_wait_sampling_profile"
+ Column | Type | Collation | Nullable | Default
+------------+---------+-----------+----------+---------
+ pid | integer | | |
+ event_type | text | | |
+ event | text | | |
+ queryid | bigint | | |
+ count | bigint | | |
+
+DROP EXTENSION pg_wait_sampling;
diff --git a/contrib/pg_wait_sampling/expected/queries.out b/contrib/pg_wait_sampling/expected/queries.out
new file mode 100644
index 0000000..722df5f
--- /dev/null
+++ b/contrib/pg_wait_sampling/expected/queries.out
@@ -0,0 +1,48 @@
+CREATE EXTENSION pg_wait_sampling;
+WITH t as (SELECT sum(0) FROM pg_wait_sampling_current)
+ SELECT sum(0) FROM generate_series(1, 2), t;
+ sum
+-----
+ 0
+(1 row)
+
+WITH t as (SELECT sum(0) FROM pg_wait_sampling_history)
+ SELECT sum(0) FROM generate_series(1, 2), t;
+ sum
+-----
+ 0
+(1 row)
+
+WITH t as (SELECT sum(0) FROM pg_wait_sampling_profile)
+ SELECT sum(0) FROM generate_series(1, 2), t;
+ sum
+-----
+ 0
+(1 row)
+
+-- Some dummy checks just to be sure that all our functions work and return something.
+SELECT count(*) = 1 as test FROM pg_wait_sampling_get_current(pg_backend_pid());
+ test
+------
+ t
+(1 row)
+
+SELECT count(*) >= 0 as test FROM pg_wait_sampling_get_profile();
+ test
+------
+ t
+(1 row)
+
+SELECT count(*) >= 0 as test FROM pg_wait_sampling_get_history();
+ test
+------
+ t
+(1 row)
+
+SELECT pg_wait_sampling_reset_profile();
+ pg_wait_sampling_reset_profile
+--------------------------------
+
+(1 row)
+
+DROP EXTENSION pg_wait_sampling;
diff --git a/contrib/pg_wait_sampling/meson.build b/contrib/pg_wait_sampling/meson.build
new file mode 100644
index 0000000..c1041cf
--- /dev/null
+++ b/contrib/pg_wait_sampling/meson.build
@@ -0,0 +1,39 @@
+pg_wait_sampling_sources = files(
+ 'pg_wait_sampling.c',
+ 'collector.c',
+)
+
+
+if host_system == 'windows'
+ pg_wait_sampling_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'pg_wait_sampling',
+ '--FILEDESC', 'pg_wait_sampling',])
+endif
+
+pg_wait_sampling = shared_module('pg_wait_sampling',
+ pg_wait_sampling_sources,
+ kwargs: contrib_mod_args,
+)
+contrib_targets += pg_wait_sampling
+
+install_data(
+ 'pg_wait_sampling--1.1.sql',
+ 'pg_wait_sampling--1.0--1.1.sql',
+ 'pg_wait_sampling.control',
+ kwargs: contrib_data_args,
+)
+
+tests += {
+ 'name': 'pg_wait_sampling',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'load',
+ 'queries',
+ ],
+ 'regress_args': [
+ '--temp-config', files('conf.add')
+ ]
+ },
+}
diff --git a/contrib/pg_wait_sampling/pg_wait_sampling--1.0--1.1.sql b/contrib/pg_wait_sampling/pg_wait_sampling--1.0--1.1.sql
new file mode 100644
index 0000000..3831394
--- /dev/null
+++ b/contrib/pg_wait_sampling/pg_wait_sampling--1.0--1.1.sql
@@ -0,0 +1,70 @@
+/* contrib/pg_wait_sampling/pg_wait_sampling--1.0--1.1.sql */
+
+DROP FUNCTION pg_wait_sampling_get_current (
+ pid int4,
+ OUT pid int4,
+ OUT event_type text,
+ OUT event text
+) CASCADE;
+
+DROP FUNCTION pg_wait_sampling_get_history (
+ OUT pid int4,
+ OUT ts timestamptz,
+ OUT event_type text,
+ OUT event text
+) CASCADE;
+
+DROP FUNCTION pg_wait_sampling_get_profile (
+ OUT pid int4,
+ OUT event_type text,
+ OUT event text,
+ OUT count bigint
+) CASCADE;
+
+CREATE FUNCTION pg_wait_sampling_get_current (
+ pid int4,
+ OUT pid int4,
+ OUT event_type text,
+ OUT event text,
+ OUT queryid int8
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME'
+LANGUAGE C VOLATILE CALLED ON NULL INPUT;
+
+CREATE VIEW pg_wait_sampling_current AS
+ SELECT * FROM pg_wait_sampling_get_current(NULL::integer);
+
+GRANT SELECT ON pg_wait_sampling_current TO PUBLIC;
+
+CREATE FUNCTION pg_wait_sampling_get_history (
+ OUT pid int4,
+ OUT ts timestamptz,
+ OUT event_type text,
+ OUT event text,
+ OUT queryid int8
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE VIEW pg_wait_sampling_history AS
+ SELECT * FROM pg_wait_sampling_get_history();
+
+GRANT SELECT ON pg_wait_sampling_history TO PUBLIC;
+
+CREATE FUNCTION pg_wait_sampling_get_profile (
+ OUT pid int4,
+ OUT event_type text,
+ OUT event text,
+ OUT queryid int8,
+ OUT count int8
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE VIEW pg_wait_sampling_profile AS
+ SELECT * FROM pg_wait_sampling_get_profile();
+
+GRANT SELECT ON pg_wait_sampling_profile TO PUBLIC;
diff --git a/contrib/pg_wait_sampling/pg_wait_sampling--1.1.sql b/contrib/pg_wait_sampling/pg_wait_sampling--1.1.sql
new file mode 100644
index 0000000..e1bdf6a
--- /dev/null
+++ b/contrib/pg_wait_sampling/pg_wait_sampling--1.1.sql
@@ -0,0 +1,60 @@
+/* contrib/pg_wait_sampling/setup.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION pg_wait_sampling" to load this file. \quit
+
+CREATE FUNCTION pg_wait_sampling_get_current (
+ pid int4,
+ OUT pid int4,
+ OUT event_type text,
+ OUT event text,
+ OUT queryid int8
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME'
+LANGUAGE C VOLATILE CALLED ON NULL INPUT;
+
+CREATE VIEW pg_wait_sampling_current AS
+ SELECT * FROM pg_wait_sampling_get_current(NULL::integer);
+
+GRANT SELECT ON pg_wait_sampling_current TO PUBLIC;
+
+CREATE FUNCTION pg_wait_sampling_get_history (
+ OUT pid int4,
+ OUT ts timestamptz,
+ OUT event_type text,
+ OUT event text,
+ OUT queryid int8
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE VIEW pg_wait_sampling_history AS
+ SELECT * FROM pg_wait_sampling_get_history();
+
+GRANT SELECT ON pg_wait_sampling_history TO PUBLIC;
+
+CREATE FUNCTION pg_wait_sampling_get_profile (
+ OUT pid int4,
+ OUT event_type text,
+ OUT event text,
+ OUT queryid int8,
+ OUT count int8
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME'
+LANGUAGE C VOLATILE STRICT;
+
+CREATE VIEW pg_wait_sampling_profile AS
+ SELECT * FROM pg_wait_sampling_get_profile();
+
+GRANT SELECT ON pg_wait_sampling_profile TO PUBLIC;
+
+CREATE FUNCTION pg_wait_sampling_reset_profile()
+RETURNS void
+AS 'MODULE_PATHNAME'
+LANGUAGE C VOLATILE STRICT;
+
+-- Don't want this to be available to non-superusers.
+REVOKE ALL ON FUNCTION pg_wait_sampling_reset_profile() FROM PUBLIC;
diff --git a/contrib/pg_wait_sampling/pg_wait_sampling.c b/contrib/pg_wait_sampling/pg_wait_sampling.c
new file mode 100644
index 0000000..068b553
--- /dev/null
+++ b/contrib/pg_wait_sampling/pg_wait_sampling.c
@@ -0,0 +1,1184 @@
+/*
+ * pg_wait_sampling.c
+ * Track information about wait events.
+ *
+ * Copyright (c) 2015-2017, Postgres Professional
+ *
+ * IDENTIFICATION
+ * contrib/pg_wait_sampling/pg_wait_sampling.c
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/twophase.h"
+#include "catalog/pg_type.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "optimizer/planner.h"
+#include "pgstat.h"
+#include "postmaster/autovacuum.h"
+#include "replication/walsender.h"
+#include "storage/ipc.h"
+#include "storage/pg_shmem.h"
+#include "storage/procarray.h"
+#include "storage/shm_mq.h"
+#include "storage/shm_toc.h"
+#include "storage/spin.h"
+#include "tcop/utility.h"
+#include "utils/builtins.h"
+#include "utils/datetime.h"
+#include "utils/guc_tables.h"
+#include "utils/guc.h"
+#include "utils/memutils.h" /* TopMemoryContext. Actually for PG 9.6 only,
+ * but there should be no harm for others. */
+
+#include "compat.h"
+#include "pg_wait_sampling.h"
+
+PG_MODULE_MAGIC;
+
+void _PG_init(void);
+
+static bool shmem_initialized = false;
+
+/* Hooks */
+static ExecutorStart_hook_type prev_ExecutorStart = NULL;
+static ExecutorRun_hook_type prev_ExecutorRun = NULL;
+static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
+static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
+static planner_hook_type planner_hook_next = NULL;
+static ProcessUtility_hook_type prev_ProcessUtility = NULL;
+
+/* Current nesting depth of planner/Executor calls */
+static int nesting_level = 0;
+
+/* Pointers to shared memory objects */
+shm_mq *pgws_collector_mq = NULL;
+uint64 *pgws_proc_queryids = NULL;
+CollectorShmqHeader *pgws_collector_hdr = NULL;
+
+/* Receiver (backend) local shm_mq pointers and lock */
+static shm_mq *recv_mq = NULL;
+static shm_mq_handle *recv_mqh = NULL;
+static LOCKTAG queueTag;
+
+#if PG_VERSION_NUM >= 150000
+static shmem_request_hook_type prev_shmem_request_hook = NULL;
+#endif
+static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
+static PGPROC * search_proc(int backendPid);
+static PlannedStmt *pgws_planner_hook(Query *parse,
+#if PG_VERSION_NUM >= 130000
+ const char *query_string,
+#endif
+ int cursorOptions, ParamListInfo boundParams);
+static void pgws_ExecutorStart(QueryDesc *queryDesc, int eflags);
+static void pgws_ExecutorRun(QueryDesc *queryDesc,
+ ScanDirection direction,
+ uint64 count, bool execute_once);
+static void pgws_ExecutorFinish(QueryDesc *queryDesc);
+static void pgws_ExecutorEnd(QueryDesc *queryDesc);
+static void pgws_ProcessUtility(PlannedStmt *pstmt,
+ const char *queryString,
+#if PG_VERSION_NUM >= 140000
+ bool readOnlyTree,
+#endif
+ ProcessUtilityContext context,
+ ParamListInfo params,
+ QueryEnvironment *queryEnv,
+ DestReceiver *dest,
+#if PG_VERSION_NUM >= 130000
+ QueryCompletion *qc
+#else
+ char *completionTag
+#endif
+ );
+
+/*---- GUC variables ----*/
+
+typedef enum
+{
+ PGWS_PROFILE_QUERIES_NONE, /* profile no statements */
+ PGWS_PROFILE_QUERIES_TOP, /* only top level statements */
+ PGWS_PROFILE_QUERIES_ALL /* all statements, including nested ones */
+} PGWSTrackLevel;
+
+static const struct config_enum_entry pgws_profile_queries_options[] =
+{
+ {"none", PGWS_PROFILE_QUERIES_NONE, false},
+ {"off", PGWS_PROFILE_QUERIES_NONE, false},
+ {"no", PGWS_PROFILE_QUERIES_NONE, false},
+ {"false", PGWS_PROFILE_QUERIES_NONE, false},
+ {"0", PGWS_PROFILE_QUERIES_NONE, false},
+ {"top", PGWS_PROFILE_QUERIES_TOP, false},
+ {"on", PGWS_PROFILE_QUERIES_TOP, false},
+ {"yes", PGWS_PROFILE_QUERIES_TOP, false},
+ {"true", PGWS_PROFILE_QUERIES_TOP, false},
+ {"1", PGWS_PROFILE_QUERIES_TOP, false},
+ {"all", PGWS_PROFILE_QUERIES_ALL, false},
+ {NULL, 0, false}
+};
+
+#define pgws_enabled(level) \
+ ((pgws_collector_hdr->profileQueries == PGWS_PROFILE_QUERIES_ALL) || \
+ (pgws_collector_hdr->profileQueries == PGWS_PROFILE_QUERIES_TOP && (level) == 0))
+
+/*
+ * Calculate max processes count.
+ *
+ * The value has to be in sync with ProcGlobal->allProcCount, initialized in
+ * InitProcGlobal() (proc.c).
+ *
+ */
+static int
+get_max_procs_count(void)
+{
+ int count = 0;
+
+ /* First, add the maximum number of backends (MaxBackends). */
+#if PG_VERSION_NUM >= 150000
+ /*
+ * On pg15+, we can directly access the MaxBackends variable, as it will
+ * have already been initialized in shmem_request_hook.
+ */
+ Assert(MaxBackends > 0);
+ count += MaxBackends;
+#else
+ /*
+ * On older versions, we need to compute MaxBackends: bgworkers, autovacuum
+ * workers and launcher.
+ * This has to be in sync with the value computed in
+ * InitializeMaxBackends() (postinit.c)
+ *
+ * Note that we need to calculate the value as it won't initialized when we
+ * need it during _PG_init().
+ *
+ * Note also that the value returned during _PG_init() might be different
+ * from the value returned later if some third-party modules change one of
+ * the underlying GUC. This isn't ideal but can't lead to a crash, as the
+ * value returned during _PG_init() is only used to ask for additional
+ * shmem with RequestAddinShmemSpace(), and postgres has an extra 100kB of
+ * shmem to compensate some small unaccounted usage. So if the value later
+ * changes, we will allocate and initialize the new (and correct) memory
+ * size, which will either work thanks for the extra 100kB of shmem, of
+ * fail (and prevent postgres startup) due to an out of shared memory
+ * error.
+ */
+ count += MaxConnections + autovacuum_max_workers + 1
+ + max_worker_processes;
+
+ /*
+ * Starting with pg12, wal senders aren't part of MaxConnections anymore
+ * and have to be accounted for.
+ */
+ count += max_wal_senders;
+#endif /* pg 15- */
+ /* End of MaxBackends calculation. */
+
+ /* Add AuxiliaryProcs */
+ count += NUM_AUXILIARY_PROCS;
+
+ return count;
+}
+
+/*
+ * Estimate amount of shared memory needed.
+ */
+static Size
+pgws_shmem_size(void)
+{
+ shm_toc_estimator e;
+ Size size;
+ int nkeys;
+
+ shm_toc_initialize_estimator(&e);
+
+ nkeys = 3;
+
+ shm_toc_estimate_chunk(&e, sizeof(CollectorShmqHeader));
+ shm_toc_estimate_chunk(&e, (Size) COLLECTOR_QUEUE_SIZE);
+ shm_toc_estimate_chunk(&e, sizeof(uint64) * get_max_procs_count());
+
+ shm_toc_estimate_keys(&e, nkeys);
+ size = shm_toc_estimate(&e);
+
+ return size;
+}
+
+static bool
+shmem_int_guc_check_hook(int *newval, void **extra, GucSource source)
+{
+ if (UsedShmemSegAddr == NULL)
+ return false;
+ return true;
+}
+
+static bool
+shmem_enum_guc_check_hook(int *newval, void **extra, GucSource source)
+{
+ if (UsedShmemSegAddr == NULL)
+ return false;
+ return true;
+}
+
+static bool
+shmem_bool_guc_check_hook(bool *newval, void **extra, GucSource source)
+{
+ if (UsedShmemSegAddr == NULL)
+ return false;
+ return true;
+}
+
+/*
+ * This union allows us to mix the numerous different types of structs
+ * that we are organizing.
+ */
+typedef union
+{
+ struct config_generic generic;
+ struct config_bool _bool;
+ struct config_real real;
+ struct config_int integer;
+ struct config_string string;
+ struct config_enum _enum;
+} mixedStruct;
+
+/*
+ * Setup new GUCs or modify existsing.
+ */
+static void
+setup_gucs()
+{
+ struct config_generic **guc_vars;
+ int numOpts,
+ i;
+ bool history_size_found = false,
+ history_period_found = false,
+ profile_period_found = false,
+ profile_pid_found = false,
+ profile_queries_found = false,
+ sample_cpu_found = false;
+
+ get_guc_variables_compat(&guc_vars, &numOpts);
+
+ for (i = 0; i < numOpts; i++)
+ {
+ mixedStruct *var = (mixedStruct *) guc_vars[i];
+ const char *name = var->generic.name;
+
+ if (var->generic.flags & GUC_CUSTOM_PLACEHOLDER)
+ continue;
+
+ if (!strcmp(name, "pg_wait_sampling.history_size"))
+ {
+ history_size_found = true;
+ var->integer.variable = &pgws_collector_hdr->historySize;
+ pgws_collector_hdr->historySize = 5000;
+ }
+ else if (!strcmp(name, "pg_wait_sampling.history_period"))
+ {
+ history_period_found = true;
+ var->integer.variable = &pgws_collector_hdr->historyPeriod;
+ pgws_collector_hdr->historyPeriod = 10;
+ }
+ else if (!strcmp(name, "pg_wait_sampling.profile_period"))
+ {
+ profile_period_found = true;
+ var->integer.variable = &pgws_collector_hdr->profilePeriod;
+ pgws_collector_hdr->profilePeriod = 10;
+ }
+ else if (!strcmp(name, "pg_wait_sampling.profile_pid"))
+ {
+ profile_pid_found = true;
+ var->_bool.variable = &pgws_collector_hdr->profilePid;
+ pgws_collector_hdr->profilePid = true;
+ }
+ else if (!strcmp(name, "pg_wait_sampling.profile_queries"))
+ {
+ profile_queries_found = true;
+ var->_enum.variable = &pgws_collector_hdr->profileQueries;
+ pgws_collector_hdr->profileQueries = PGWS_PROFILE_QUERIES_TOP;
+ }
+ else if (!strcmp(name, "pg_wait_sampling.sample_cpu"))
+ {
+ sample_cpu_found = true;
+ var->_bool.variable = &pgws_collector_hdr->sampleCpu;
+ pgws_collector_hdr->sampleCpu = true;
+ }
+ }
+
+ if (!history_size_found)
+ DefineCustomIntVariable("pg_wait_sampling.history_size",
+ "Sets size of waits history.", NULL,
+ &pgws_collector_hdr->historySize, 5000, 100, INT_MAX,
+ PGC_SUSET, 0, shmem_int_guc_check_hook, NULL, NULL);
+
+ if (!history_period_found)
+ DefineCustomIntVariable("pg_wait_sampling.history_period",
+ "Sets period of waits history sampling.", NULL,
+ &pgws_collector_hdr->historyPeriod, 10, 1, INT_MAX,
+ PGC_SUSET, 0, shmem_int_guc_check_hook, NULL, NULL);
+
+ if (!profile_period_found)
+ DefineCustomIntVariable("pg_wait_sampling.profile_period",
+ "Sets period of waits profile sampling.", NULL,
+ &pgws_collector_hdr->profilePeriod, 10, 1, INT_MAX,
+ PGC_SUSET, 0, shmem_int_guc_check_hook, NULL, NULL);
+
+ if (!profile_pid_found)
+ DefineCustomBoolVariable("pg_wait_sampling.profile_pid",
+ "Sets whether profile should be collected per pid.", NULL,
+ &pgws_collector_hdr->profilePid, true,
+ PGC_SUSET, 0, shmem_bool_guc_check_hook, NULL, NULL);
+
+ if (!profile_queries_found)
+ DefineCustomEnumVariable("pg_wait_sampling.profile_queries",
+ "Sets whether profile should be collected per query.", NULL,
+ &pgws_collector_hdr->profileQueries, PGWS_PROFILE_QUERIES_TOP, pgws_profile_queries_options,
+ PGC_SUSET, 0, shmem_enum_guc_check_hook, NULL, NULL);
+
+ if (!sample_cpu_found)
+ DefineCustomBoolVariable("pg_wait_sampling.sample_cpu",
+ "Sets whether not waiting backends should be sampled.", NULL,
+ &pgws_collector_hdr->sampleCpu, true,
+ PGC_SUSET, 0, shmem_bool_guc_check_hook, NULL, NULL);
+
+ if (history_size_found
+ || history_period_found
+ || profile_period_found
+ || profile_pid_found
+ || profile_queries_found
+ || sample_cpu_found)
+ {
+ ProcessConfigFile(PGC_SIGHUP);
+ }
+}
+
+#if PG_VERSION_NUM >= 150000
+/*
+ * shmem_request hook: request additional shared memory resources.
+ *
+ * If you change code here, don't forget to also report the modifications in
+ * _PG_init() for pg14 and below.
+ */
+static void
+pgws_shmem_request(void)
+{
+ if (prev_shmem_request_hook)
+ prev_shmem_request_hook();
+
+ RequestAddinShmemSpace(pgws_shmem_size());
+}
+#endif
+
+/*
+ * Distribute shared memory.
+ */
+static void
+pgws_shmem_startup(void)
+{
+ bool found;
+ Size segsize = pgws_shmem_size();
+ void *pgws;
+ shm_toc *toc;
+
+ pgws = ShmemInitStruct("pg_wait_sampling", segsize, &found);
+
+ if (!found)
+ {
+ toc = shm_toc_create(PG_WAIT_SAMPLING_MAGIC, pgws, segsize);
+
+ pgws_collector_hdr = shm_toc_allocate(toc, sizeof(CollectorShmqHeader));
+ shm_toc_insert(toc, 0, pgws_collector_hdr);
+ /* needed to please check_GUC_init */
+ pgws_collector_hdr->profileQueries = PGWS_PROFILE_QUERIES_TOP;
+ pgws_collector_mq = shm_toc_allocate(toc, COLLECTOR_QUEUE_SIZE);
+ shm_toc_insert(toc, 1, pgws_collector_mq);
+ pgws_proc_queryids = shm_toc_allocate(toc,
+ sizeof(uint64) * get_max_procs_count());
+ shm_toc_insert(toc, 2, pgws_proc_queryids);
+ MemSet(pgws_proc_queryids, 0, sizeof(uint64) * get_max_procs_count());
+
+ /* Initialize GUC variables in shared memory */
+ setup_gucs();
+ }
+ else
+ {
+ toc = shm_toc_attach(PG_WAIT_SAMPLING_MAGIC, pgws);
+ pgws_collector_hdr = shm_toc_lookup(toc, 0, false);
+ pgws_collector_mq = shm_toc_lookup(toc, 1, false);
+ pgws_proc_queryids = shm_toc_lookup(toc, 2, false);
+ }
+
+ shmem_initialized = true;
+
+ if (prev_shmem_startup_hook)
+ prev_shmem_startup_hook();
+}
+
+/*
+ * Check shared memory is initialized. Report an error otherwise.
+ */
+static void
+check_shmem(void)
+{
+ if (!shmem_initialized)
+ {
+ ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("pg_wait_sampling shared memory wasn't initialized yet")));
+ }
+}
+
+static void
+pgws_cleanup_callback(int code, Datum arg)
+{
+ elog(DEBUG3, "pg_wait_sampling cleanup: detaching shm_mq and releasing queue lock");
+ shm_mq_detach(recv_mqh);
+ LockRelease(&queueTag, ExclusiveLock, false);
+}
+
+/*
+ * Module load callback
+ */
+void
+_PG_init(void)
+{
+ if (!process_shared_preload_libraries_in_progress)
+ return;
+
+#if PG_VERSION_NUM < 150000
+ /*
+ * Request additional shared resources. (These are no-ops if we're not in
+ * the postmaster process.) We'll allocate or attach to the shared
+ * resources in pgws_shmem_startup().
+ *
+ * If you change code here, don't forget to also report the modifications
+ * in pgsp_shmem_request() for pg15 and later.
+ */
+ RequestAddinShmemSpace(pgws_shmem_size());
+#endif
+
+ pgws_register_wait_collector();
+
+ /*
+ * Install hooks.
+ */
+#if PG_VERSION_NUM >= 150000
+ prev_shmem_request_hook = shmem_request_hook;
+ shmem_request_hook = pgws_shmem_request;
+#endif
+ prev_shmem_startup_hook = shmem_startup_hook;
+ shmem_startup_hook = pgws_shmem_startup;
+ planner_hook_next = planner_hook;
+ planner_hook = pgws_planner_hook;
+ prev_ExecutorStart = ExecutorStart_hook;
+ ExecutorStart_hook = pgws_ExecutorStart;
+ prev_ExecutorRun = ExecutorRun_hook;
+ ExecutorRun_hook = pgws_ExecutorRun;
+ prev_ExecutorFinish = ExecutorFinish_hook;
+ ExecutorFinish_hook = pgws_ExecutorFinish;
+ prev_ExecutorEnd = ExecutorEnd_hook;
+ ExecutorEnd_hook = pgws_ExecutorEnd;
+ prev_ProcessUtility = ProcessUtility_hook;
+ ProcessUtility_hook = pgws_ProcessUtility;
+}
+
+/*
+ * Find PGPROC entry responsible for given pid assuming ProcArrayLock was
+ * already taken.
+ */
+static PGPROC *
+search_proc(int pid)
+{
+ int i;
+
+ if (pid == 0)
+ return MyProc;
+
+ for (i = 0; i < ProcGlobal->allProcCount; i++)
+ {
+ PGPROC *proc = &ProcGlobal->allProcs[i];
+ if (proc->pid && proc->pid == pid)
+ {
+ return proc;
+ }
+ }
+
+ ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("backend with pid=%d not found", pid)));
+ return NULL;
+}
+
+/*
+ * Decide whether this PGPROC entry should be included in profiles and output
+ * views.
+ */
+bool
+pgws_should_sample_proc(PGPROC *proc)
+{
+ if (proc->wait_event_info == 0 && !pgws_collector_hdr->sampleCpu)
+ return false;
+
+ /*
+ * On PostgreSQL versions < 17 the PGPROC->pid field is not reset on
+ * process exit. This would lead to such processes getting counted for
+ * null wait events. So instead we make use of DisownLatch() resetting
+ * owner_pid during ProcKill().
+ */
+ if (proc->pid == 0 || proc->procLatch.owner_pid == 0 || proc->pid == MyProcPid)
+ return false;
+
+ return true;
+}
+
+typedef struct
+{
+ HistoryItem *items;
+ TimestampTz ts;
+} WaitCurrentContext;
+
+PG_FUNCTION_INFO_V1(pg_wait_sampling_get_current);
+Datum
+pg_wait_sampling_get_current(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ WaitCurrentContext *params;
+
+ check_shmem();
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+ TupleDesc tupdesc;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ params = (WaitCurrentContext *)palloc0(sizeof(WaitCurrentContext));
+ params->ts = GetCurrentTimestamp();
+
+ funcctx->user_fctx = params;
+ tupdesc = CreateTemplateTupleDesc(4);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pid",
+ INT4OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "type",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "event",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "queryid",
+ INT8OID, -1, 0);
+
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+ LWLockAcquire(ProcArrayLock, LW_SHARED);
+
+ if (!PG_ARGISNULL(0))
+ {
+ HistoryItem *item;
+ PGPROC *proc;
+
+ proc = search_proc(PG_GETARG_UINT32(0));
+ params->items = (HistoryItem *) palloc0(sizeof(HistoryItem));
+ item = ¶ms->items[0];
+ item->pid = proc->pid;
+ item->wait_event_info = proc->wait_event_info;
+ item->queryId = pgws_proc_queryids[proc - ProcGlobal->allProcs];
+ funcctx->max_calls = 1;
+ }
+ else
+ {
+ int procCount = ProcGlobal->allProcCount,
+ i,
+ j = 0;
+
+ params->items = (HistoryItem *) palloc0(sizeof(HistoryItem) * procCount);
+ for (i = 0; i < procCount; i++)
+ {
+ PGPROC *proc = &ProcGlobal->allProcs[i];
+
+ if (!pgws_should_sample_proc(proc))
+ continue;
+
+ params->items[j].pid = proc->pid;
+ params->items[j].wait_event_info = proc->wait_event_info;
+ params->items[j].queryId = pgws_proc_queryids[i];
+ j++;
+ }
+ funcctx->max_calls = j;
+ }
+
+ LWLockRelease(ProcArrayLock);
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+ params = (WaitCurrentContext *) funcctx->user_fctx;
+
+ if (funcctx->call_cntr < funcctx->max_calls)
+ {
+ HeapTuple tuple;
+ Datum values[4];
+ bool nulls[4];
+ const char *event_type,
+ *event;
+ HistoryItem *item;
+
+ item = ¶ms->items[funcctx->call_cntr];
+
+ /* Make and return next tuple to caller */
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ event_type = pgstat_get_wait_event_type(item->wait_event_info);
+ event = pgstat_get_wait_event(item->wait_event_info);
+ values[0] = Int32GetDatum(item->pid);
+ if (event_type)
+ values[1] = PointerGetDatum(cstring_to_text(event_type));
+ else
+ nulls[1] = true;
+ if (event)
+ values[2] = PointerGetDatum(cstring_to_text(event));
+ else
+ nulls[2] = true;
+
+ values[3] = UInt64GetDatum(item->queryId);
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+
+ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+ }
+ else
+ {
+ SRF_RETURN_DONE(funcctx);
+ }
+}
+
+typedef struct
+{
+ Size count;
+ ProfileItem *items;
+} Profile;
+
+void
+pgws_init_lock_tag(LOCKTAG *tag, uint32 lock)
+{
+ tag->locktag_field1 = PG_WAIT_SAMPLING_MAGIC;
+ tag->locktag_field2 = lock;
+ tag->locktag_field3 = 0;
+ tag->locktag_field4 = 0;
+ tag->locktag_type = LOCKTAG_USERLOCK;
+ tag->locktag_lockmethodid = USER_LOCKMETHOD;
+}
+
+static void *
+receive_array(SHMRequest request, Size item_size, Size *count)
+{
+ LOCKTAG collectorTag;
+ shm_mq_result res;
+ Size len,
+ i;
+ void *data;
+ Pointer result,
+ ptr;
+ MemoryContext oldctx;
+
+ /* Ensure nobody else trying to send request to queue */
+ pgws_init_lock_tag(&queueTag, PGWS_QUEUE_LOCK);
+ LockAcquire(&queueTag, ExclusiveLock, false, false);
+
+ pgws_init_lock_tag(&collectorTag, PGWS_COLLECTOR_LOCK);
+ LockAcquire(&collectorTag, ExclusiveLock, false, false);
+ recv_mq = shm_mq_create(pgws_collector_mq, COLLECTOR_QUEUE_SIZE);
+ pgws_collector_hdr->request = request;
+ LockRelease(&collectorTag, ExclusiveLock, false);
+
+ if (!pgws_collector_hdr->latch)
+ ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("pg_wait_sampling collector wasn't started")));
+
+ SetLatch(pgws_collector_hdr->latch);
+
+ shm_mq_set_receiver(recv_mq, MyProc);
+
+ /*
+ * We switch to TopMemoryContext, so that recv_mqh is allocated there
+ * and is guaranteed to survive until before_shmem_exit callbacks are
+ * fired. Anyway, shm_mq_detach() will free handler on its own.
+ *
+ * NB: we do not pass `seg` to shm_mq_attach(), so it won't set its own
+ * callback, i.e. we do not interfere here with shm_mq_detach_callback().
+ */
+ oldctx = MemoryContextSwitchTo(TopMemoryContext);
+ recv_mqh = shm_mq_attach(recv_mq, NULL, NULL);
+ MemoryContextSwitchTo(oldctx);
+
+ /*
+ * Now we surely attached to the shm_mq and got collector's attention.
+ * If anything went wrong (e.g. Ctrl+C received from the client) we have
+ * to cleanup some things, i.e. detach from the shm_mq, so collector was
+ * able to continue responding to other requests.
+ *
+ * PG_ENSURE_ERROR_CLEANUP() guaranties that cleanup callback will be
+ * fired for both ERROR and FATAL.
+ */
+ PG_ENSURE_ERROR_CLEANUP(pgws_cleanup_callback, 0);
+ {
+ res = shm_mq_receive(recv_mqh, &len, &data, false);
+ if (res != SHM_MQ_SUCCESS || len != sizeof(*count))
+ elog(ERROR, "error reading mq");
+
+ memcpy(count, data, sizeof(*count));
+
+ result = palloc(item_size * (*count));
+ ptr = result;
+
+ for (i = 0; i < *count; i++)
+ {
+ res = shm_mq_receive(recv_mqh, &len, &data, false);
+ if (res != SHM_MQ_SUCCESS || len != item_size)
+ elog(ERROR, "error reading mq");
+
+ memcpy(ptr, data, item_size);
+ ptr += item_size;
+ }
+ }
+ PG_END_ENSURE_ERROR_CLEANUP(pgws_cleanup_callback, 0);
+
+ /* We still have to detach and release lock during normal operation. */
+ shm_mq_detach(recv_mqh);
+ LockRelease(&queueTag, ExclusiveLock, false);
+
+ return result;
+}
+
+
+PG_FUNCTION_INFO_V1(pg_wait_sampling_get_profile);
+Datum
+pg_wait_sampling_get_profile(PG_FUNCTION_ARGS)
+{
+ Profile *profile;
+ FuncCallContext *funcctx;
+
+ check_shmem();
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+ TupleDesc tupdesc;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* Receive profile from shmq */
+ profile = (Profile *) palloc0(sizeof(Profile));
+ profile->items = (ProfileItem *) receive_array(PROFILE_REQUEST,
+ sizeof(ProfileItem), &profile->count);
+
+ funcctx->user_fctx = profile;
+ funcctx->max_calls = profile->count;
+
+ /* Make tuple descriptor */
+ tupdesc = CreateTemplateTupleDesc(5);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pid",
+ INT4OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "type",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "event",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "queryid",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 5, "count",
+ INT8OID, -1, 0);
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ profile = (Profile *) funcctx->user_fctx;
+
+ if (funcctx->call_cntr < funcctx->max_calls)
+ {
+ /* for each row */
+ Datum values[5];
+ bool nulls[5];
+ HeapTuple tuple;
+ ProfileItem *item;
+ const char *event_type,
+ *event;
+
+ item = &profile->items[funcctx->call_cntr];
+
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ /* Make and return next tuple to caller */
+ event_type = pgstat_get_wait_event_type(item->wait_event_info);
+ event = pgstat_get_wait_event(item->wait_event_info);
+ values[0] = Int32GetDatum(item->pid);
+ if (event_type)
+ values[1] = PointerGetDatum(cstring_to_text(event_type));
+ else
+ nulls[1] = true;
+ if (event)
+ values[2] = PointerGetDatum(cstring_to_text(event));
+ else
+ nulls[2] = true;
+
+ if (pgws_collector_hdr->profileQueries)
+ values[3] = UInt64GetDatum(item->queryId);
+ else
+ values[3] = (Datum) 0;
+
+ values[4] = UInt64GetDatum(item->count);
+
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+
+ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+ }
+ else
+ {
+ /* nothing left */
+ SRF_RETURN_DONE(funcctx);
+ }
+}
+
+PG_FUNCTION_INFO_V1(pg_wait_sampling_reset_profile);
+Datum
+pg_wait_sampling_reset_profile(PG_FUNCTION_ARGS)
+{
+ LOCKTAG collectorTag;
+
+ check_shmem();
+
+ pgws_init_lock_tag(&queueTag, PGWS_QUEUE_LOCK);
+
+ LockAcquire(&queueTag, ExclusiveLock, false, false);
+
+ pgws_init_lock_tag(&collectorTag, PGWS_COLLECTOR_LOCK);
+ LockAcquire(&collectorTag, ExclusiveLock, false, false);
+ pgws_collector_hdr->request = PROFILE_RESET;
+ LockRelease(&collectorTag, ExclusiveLock, false);
+
+ SetLatch(pgws_collector_hdr->latch);
+
+ LockRelease(&queueTag, ExclusiveLock, false);
+
+ PG_RETURN_VOID();
+}
+
+PG_FUNCTION_INFO_V1(pg_wait_sampling_get_history);
+Datum
+pg_wait_sampling_get_history(PG_FUNCTION_ARGS)
+{
+ History *history;
+ FuncCallContext *funcctx;
+
+ check_shmem();
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+ TupleDesc tupdesc;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* Receive history from shmq */
+ history = (History *) palloc0(sizeof(History));
+ history->items = (HistoryItem *) receive_array(HISTORY_REQUEST,
+ sizeof(HistoryItem), &history->count);
+
+ funcctx->user_fctx = history;
+ funcctx->max_calls = history->count;
+
+ /* Make tuple descriptor */
+ tupdesc = CreateTemplateTupleDesc(5);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "pid",
+ INT4OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "sample_ts",
+ TIMESTAMPTZOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "type",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "event",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 5, "queryid",
+ INT8OID, -1, 0);
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ history = (History *) funcctx->user_fctx;
+
+ if (history->index < history->count)
+ {
+ HeapTuple tuple;
+ HistoryItem *item;
+ Datum values[5];
+ bool nulls[5];
+ const char *event_type,
+ *event;
+
+ item = &history->items[history->index];
+
+ /* Make and return next tuple to caller */
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ event_type = pgstat_get_wait_event_type(item->wait_event_info);
+ event = pgstat_get_wait_event(item->wait_event_info);
+ values[0] = Int32GetDatum(item->pid);
+ values[1] = TimestampTzGetDatum(item->ts);
+ if (event_type)
+ values[2] = PointerGetDatum(cstring_to_text(event_type));
+ else
+ nulls[2] = true;
+ if (event)
+ values[3] = PointerGetDatum(cstring_to_text(event));
+ else
+ nulls[3] = true;
+
+ values[4] = UInt64GetDatum(item->queryId);
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+
+ history->index++;
+ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+ }
+ else
+ {
+ /* nothing left */
+ SRF_RETURN_DONE(funcctx);
+ }
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * planner_hook hook, save queryId for collector
+ */
+static PlannedStmt *
+pgws_planner_hook(Query *parse,
+#if PG_VERSION_NUM >= 130000
+ const char *query_string,
+#endif
+ int cursorOptions,
+ ParamListInfo boundParams)
+{
+ PlannedStmt *result;
+ int i = MyProc - ProcGlobal->allProcs;
+ uint64 save_queryId = 0;
+
+ if (pgws_enabled(nesting_level))
+ {
+ save_queryId = pgws_proc_queryids[i];
+ pgws_proc_queryids[i] = parse->queryId;
+ }
+
+ nesting_level++;
+ PG_TRY();
+ {
+ /* Invoke original hook if needed */
+ if (planner_hook_next)
+ result = planner_hook_next(parse,
+#if PG_VERSION_NUM >= 130000
+ query_string,
+#endif
+ cursorOptions, boundParams);
+ else
+ result = standard_planner(parse,
+#if PG_VERSION_NUM >= 130000
+ query_string,
+#endif
+ cursorOptions, boundParams);
+ nesting_level--;
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+ else if (pgws_enabled(nesting_level))
+ pgws_proc_queryids[i] = save_queryId;
+ }
+ PG_CATCH();
+ {
+ nesting_level--;
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+ else if (pgws_enabled(nesting_level))
+ pgws_proc_queryids[i] = save_queryId;
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ return result;
+}
+
+/*
+ * ExecutorStart hook: save queryId for collector
+ */
+static void
+pgws_ExecutorStart(QueryDesc *queryDesc, int eflags)
+{
+ int i = MyProc - ProcGlobal->allProcs;
+ if (pgws_enabled(nesting_level))
+ pgws_proc_queryids[i] = queryDesc->plannedstmt->queryId;
+ if (prev_ExecutorStart)
+ prev_ExecutorStart(queryDesc, eflags);
+ else
+ standard_ExecutorStart(queryDesc, eflags);
+}
+
+static void
+pgws_ExecutorRun(QueryDesc *queryDesc,
+ ScanDirection direction,
+ uint64 count, bool execute_once)
+{
+ int i = MyProc - ProcGlobal->allProcs;
+ uint64 save_queryId = pgws_proc_queryids[i];
+
+ nesting_level++;
+ PG_TRY();
+ {
+ if (prev_ExecutorRun)
+ prev_ExecutorRun(queryDesc, direction, count, execute_once);
+ else
+ standard_ExecutorRun(queryDesc, direction, count, execute_once);
+ nesting_level--;
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+ else
+ pgws_proc_queryids[i] = save_queryId;
+ }
+ PG_CATCH();
+ {
+ nesting_level--;
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+ else
+ pgws_proc_queryids[i] = save_queryId;
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+}
+
+static void
+pgws_ExecutorFinish(QueryDesc *queryDesc)
+{
+ int i = MyProc - ProcGlobal->allProcs;
+ uint64 save_queryId = pgws_proc_queryids[i];
+
+ nesting_level++;
+ PG_TRY();
+ {
+ if (prev_ExecutorFinish)
+ prev_ExecutorFinish(queryDesc);
+ else
+ standard_ExecutorFinish(queryDesc);
+ nesting_level--;
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+ else
+ pgws_proc_queryids[i] = save_queryId;
+ }
+ PG_CATCH();
+ {
+ nesting_level--;
+ pgws_proc_queryids[i] = save_queryId;
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+}
+
+/*
+ * ExecutorEnd hook: clear queryId
+ */
+static void
+pgws_ExecutorEnd(QueryDesc *queryDesc)
+{
+ int i = MyProc - ProcGlobal->allProcs;
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+
+ if (prev_ExecutorEnd)
+ prev_ExecutorEnd(queryDesc);
+ else
+ standard_ExecutorEnd(queryDesc);
+}
+
+static void
+pgws_ProcessUtility(PlannedStmt *pstmt,
+ const char *queryString,
+#if PG_VERSION_NUM >= 140000
+ bool readOnlyTree,
+#endif
+ ProcessUtilityContext context,
+ ParamListInfo params,
+ QueryEnvironment *queryEnv,
+ DestReceiver *dest,
+#if PG_VERSION_NUM >= 130000
+ QueryCompletion *qc
+#else
+ char *completionTag
+#endif
+ )
+{
+ int i = MyProc - ProcGlobal->allProcs;
+ uint64 save_queryId = 0;
+
+ if (pgws_enabled(nesting_level))
+ {
+ save_queryId = pgws_proc_queryids[i];
+ pgws_proc_queryids[i] = pstmt->queryId;
+ }
+
+ nesting_level++;
+ PG_TRY();
+ {
+ if (prev_ProcessUtility)
+ prev_ProcessUtility (pstmt, queryString,
+#if PG_VERSION_NUM >= 140000
+ readOnlyTree,
+#endif
+ context, params, queryEnv,
+ dest,
+#if PG_VERSION_NUM >= 130000
+ qc
+#else
+ completionTag
+#endif
+ );
+ else
+ standard_ProcessUtility(pstmt, queryString,
+#if PG_VERSION_NUM >= 140000
+ readOnlyTree,
+#endif
+ context, params, queryEnv,
+ dest,
+#if PG_VERSION_NUM >= 130000
+ qc
+#else
+ completionTag
+#endif
+ );
+ nesting_level--;
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+ else if (pgws_enabled(nesting_level))
+ pgws_proc_queryids[i] = save_queryId;
+ }
+ PG_CATCH();
+ {
+ nesting_level--;
+ if (nesting_level == 0)
+ pgws_proc_queryids[i] = UINT64CONST(0);
+ else if (pgws_enabled(nesting_level))
+ pgws_proc_queryids[i] = save_queryId;
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+}
diff --git a/contrib/pg_wait_sampling/pg_wait_sampling.control b/contrib/pg_wait_sampling/pg_wait_sampling.control
new file mode 100644
index 0000000..97d9a34
--- /dev/null
+++ b/contrib/pg_wait_sampling/pg_wait_sampling.control
@@ -0,0 +1,5 @@
+# pg_wait_sampling extension
+comment = 'sampling based statistics of wait events'
+default_version = '1.1'
+module_pathname = '$libdir/pg_wait_sampling'
+relocatable = true
diff --git a/contrib/pg_wait_sampling/pg_wait_sampling.h b/contrib/pg_wait_sampling/pg_wait_sampling.h
new file mode 100644
index 0000000..a8a550f
--- /dev/null
+++ b/contrib/pg_wait_sampling/pg_wait_sampling.h
@@ -0,0 +1,80 @@
+/*
+ * pg_wait_sampling.h
+ * Headers for pg_wait_sampling extension.
+ *
+ * Copyright (c) 2015-2016, Postgres Professional
+ *
+ * IDENTIFICATION
+ * contrib/pg_wait_sampling/pg_wait_sampling.h
+ */
+#ifndef __PG_WAIT_SAMPLING_H__
+#define __PG_WAIT_SAMPLING_H__
+
+#include "postgres.h"
+
+#include "storage/proc.h"
+#include "storage/shm_mq.h"
+#include "utils/timestamp.h"
+
+#define PG_WAIT_SAMPLING_MAGIC 0xCA94B107
+#define COLLECTOR_QUEUE_SIZE (16 * 1024)
+#define HISTORY_TIME_MULTIPLIER 10
+#define PGWS_QUEUE_LOCK 0
+#define PGWS_COLLECTOR_LOCK 1
+
+typedef struct
+{
+ uint32 pid;
+ uint32 wait_event_info;
+ uint64 queryId;
+ uint64 count;
+} ProfileItem;
+
+typedef struct
+{
+ uint32 pid;
+ uint32 wait_event_info;
+ uint64 queryId;
+ TimestampTz ts;
+} HistoryItem;
+
+typedef struct
+{
+ bool wraparound;
+ Size index;
+ Size count;
+ HistoryItem *items;
+} History;
+
+typedef enum
+{
+ NO_REQUEST,
+ HISTORY_REQUEST,
+ PROFILE_REQUEST,
+ PROFILE_RESET
+} SHMRequest;
+
+typedef struct
+{
+ Latch *latch;
+ SHMRequest request;
+ int historySize;
+ int historyPeriod;
+ int profilePeriod;
+ bool profilePid;
+ int profileQueries;
+ bool sampleCpu;
+} CollectorShmqHeader;
+
+/* pg_wait_sampling.c */
+extern CollectorShmqHeader *pgws_collector_hdr;
+extern shm_mq *pgws_collector_mq;
+extern uint64 *pgws_proc_queryids;
+extern void pgws_init_lock_tag(LOCKTAG *tag, uint32 lock);
+extern bool pgws_should_sample_proc(PGPROC *proc);
+
+/* collector.c */
+extern void pgws_register_wait_collector(void);
+extern PGDLLEXPORT void pgws_collector_main(Datum main_arg);
+
+#endif
diff --git a/contrib/pg_wait_sampling/run-tests.sh b/contrib/pg_wait_sampling/run-tests.sh
new file mode 100755
index 0000000..f3f1bba
--- /dev/null
+++ b/contrib/pg_wait_sampling/run-tests.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+set -ev
+
+PATH=/usr/lib/postgresql/$PG_MAJOR/bin:$PATH
+export PGDATA=/var/lib/postgresql/$PG_MAJOR/test
+export COPT=-Werror
+export USE_PGXS=1
+
+sudo chmod 1777 /var/lib/postgresql/$PG_MAJOR
+sudo chmod 1777 /var/run/postgresql
+
+make clean
+make
+
+sudo -E env PATH=$PATH make install
+
+initdb
+echo "shared_preload_libraries = pg_wait_sampling" >> $PGDATA/postgresql.conf
+
+pg_ctl -l logfile start
+make installcheck
+pg_ctl stop
diff --git a/contrib/pg_wait_sampling/sql/load.sql b/contrib/pg_wait_sampling/sql/load.sql
new file mode 100644
index 0000000..d772e3e
--- /dev/null
+++ b/contrib/pg_wait_sampling/sql/load.sql
@@ -0,0 +1,7 @@
+CREATE EXTENSION pg_wait_sampling;
+
+\d pg_wait_sampling_current
+\d pg_wait_sampling_history
+\d pg_wait_sampling_profile
+
+DROP EXTENSION pg_wait_sampling;
diff --git a/contrib/pg_wait_sampling/sql/queries.sql b/contrib/pg_wait_sampling/sql/queries.sql
new file mode 100644
index 0000000..de44c6d
--- /dev/null
+++ b/contrib/pg_wait_sampling/sql/queries.sql
@@ -0,0 +1,18 @@
+CREATE EXTENSION pg_wait_sampling;
+
+WITH t as (SELECT sum(0) FROM pg_wait_sampling_current)
+ SELECT sum(0) FROM generate_series(1, 2), t;
+
+WITH t as (SELECT sum(0) FROM pg_wait_sampling_history)
+ SELECT sum(0) FROM generate_series(1, 2), t;
+
+WITH t as (SELECT sum(0) FROM pg_wait_sampling_profile)
+ SELECT sum(0) FROM generate_series(1, 2), t;
+
+-- Some dummy checks just to be sure that all our functions work and return something.
+SELECT count(*) = 1 as test FROM pg_wait_sampling_get_current(pg_backend_pid());
+SELECT count(*) >= 0 as test FROM pg_wait_sampling_get_profile();
+SELECT count(*) >= 0 as test FROM pg_wait_sampling_get_history();
+SELECT pg_wait_sampling_reset_profile();
+
+DROP EXTENSION pg_wait_sampling;
diff --git a/contrib/plantuner/COPYRIGHT b/contrib/plantuner/COPYRIGHT
new file mode 100644
index 00000000000..6e4705bc561
--- /dev/null
+++ b/contrib/plantuner/COPYRIGHT
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2009 Teodor Sigaev <teodor@sigaev.ru>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
diff --git a/contrib/plantuner/Makefile b/contrib/plantuner/Makefile
new file mode 100644
index 00000000000..f2e8350e84c
--- /dev/null
+++ b/contrib/plantuner/Makefile
@@ -0,0 +1,15 @@
+MODULE_big = plantuner
+DOCS = README.plantuner
+REGRESS = plantuner
+OBJS=plantuner.o
+
+ifdef USE_PGXS
+PGXS = $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/plantuner
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/plantuner/README.plantuner b/contrib/plantuner/README.plantuner
new file mode 100644
index 00000000000..17c8ba010b8
--- /dev/null
+++ b/contrib/plantuner/README.plantuner
@@ -0,0 +1,99 @@
+Plantuner - enable planner hints
+
+ contrib/plantuner is a contribution module for PostgreSQL 8.4+, which
+ enable planner hints.
+
+ All work was done by Teodor Sigaev (teodor@sigaev.ru) and Oleg Bartunov
+ (oleg@sai.msu.su).
+
+ Sponsor: Nomao project (http://www.nomao.com)
+
+Motivation
+
+ Whether somebody think it's bad or not, but sometime it's very
+ interesting to be able to control planner (provide hints, which tells
+ optimizer to ignore its algorithm in part), which is currently
+ impossible in POstgreSQL. Oracle, for example, has over 120 hints, SQL
+ Server also provides hints.
+
+ This first version of plantuner provides a possibility to hide
+ specified indexes from PostgreSQL planner, so it will not use them.
+
+ There are many situation, when developer want to temporarily disable
+ specific index(es), without dropping them, or to instruct planner to
+ use specific index.
+
+ Next, for some workload PostgreSQL could be too pessimistic for
+ newly created tables and assumes much more rows in table than
+ it actually has. If plantuner.fix_empty_table GUC variable is set
+ to true then module will set to zero number of pages/tuples of
+ table which hasn't blocks in file.
+
+Installation
+
+ * Get latest source of plantuner from CVS Repository
+ * gmake && gmake install && gmake installcheck
+
+Syntax
+ plantuner.forbid_index (deprecated)
+ plantuner.disable_index
+ List of indexes invisible to planner
+ plantuner.enable_index
+ List of indexes visible to planner even they are hided
+ by plantuner.disable_index.
+ plantuner.only_index
+ List of explicitly enabled indexes (overload plantuner.disable_index
+ and plantuner.enable_index), so, only indexes in this list are allowed.
+
+Usage
+
+ To enable the module you can either load shared library 'plantuner' in
+ psql session or specify 'shared_preload_libraries' option in
+ postgresql.conf.
+=# LOAD 'plantuner';
+=# create table test(id int);
+=# create index id_idx on test(id);
+=# create index id_idx2 on test(id);
+=# \d test
+ Table "public.test"
+ Column | Type | Modifiers
+--------+---------+-----------
+ id | integer |
+Indexes:
+ "id_idx" btree (id)
+ "id_idx2" btree (id)
+=# explain select id from test where id=1;
+ QUERY PLAN
+-----------------------------------------------------------------------
+ Bitmap Heap Scan on test (cost=4.34..15.03 rows=12 width=4)
+ Recheck Cond: (id = 1)
+ -> Bitmap Index Scan on id_idx2 (cost=0.00..4.34 rows=12 width=0)
+ Index Cond: (id = 1)
+(4 rows)
+=# set enable_seqscan=off;
+=# set plantuner.disable_index='id_idx2';
+=# explain select id from test where id=1;
+ QUERY PLAN
+----------------------------------------------------------------------
+ Bitmap Heap Scan on test (cost=4.34..15.03 rows=12 width=4)
+ Recheck Cond: (id = 1)
+ -> Bitmap Index Scan on id_idx (cost=0.00..4.34 rows=12 width=0)
+ Index Cond: (id = 1)
+(4 rows)
+=# set plantuner.disable_index='id_idx2,id_idx';
+=# explain select id from test where id=1;
+ QUERY PLAN
+-------------------------------------------------------------------------
+ Seq Scan on test (cost=10000000000.00..10000000040.00 rows=12 width=4)
+ Filter: (id = 1)
+(2 rows)
+=# set plantuner.enable_index='id_idx';
+=# explain select id from test where id=1;
+ QUERY PLAN
+-----------------------------------------------------------------------
+ Bitmap Heap Scan on test (cost=4.34..15.03 rows=12 width=4)
+ Recheck Cond: (id = 1)
+ -> Bitmap Index Scan on id_idx (cost=0.00..4.34 rows=12 width=0)
+ Index Cond: (id = 1)
+(4 rows)
+
diff --git a/contrib/plantuner/expected/plantuner.out b/contrib/plantuner/expected/plantuner.out
new file mode 100644
index 00000000000..70d2bcaaef2
--- /dev/null
+++ b/contrib/plantuner/expected/plantuner.out
@@ -0,0 +1,96 @@
+LOAD 'plantuner';
+SHOW plantuner.disable_index;
+ plantuner.disable_index
+-------------------------
+
+(1 row)
+
+CREATE TABLE wow (i int, j int);
+CREATE INDEX i_idx ON wow (i);
+CREATE INDEX j_idx ON wow (j);
+CREATE INDEX i1 ON WOW (i);
+CREATE INDEX i2 ON WOW (i);
+CREATE INDEX i3 ON WOW (i);
+SET enable_seqscan=off;
+SELECT * FROM wow;
+ i | j
+---+---
+(0 rows)
+
+SET plantuner.disable_index="i_idx, j_idx";
+SELECT * FROM wow;
+ i | j
+---+---
+(0 rows)
+
+SHOW plantuner.disable_index;
+ plantuner.disable_index
+----------------------------
+ public.i_idx, public.j_idx
+(1 row)
+
+SET plantuner.disable_index="i_idx, nonexistent, public.j_idx, wow";
+WARNING: 'nonexistent' does not exist
+WARNING: 'wow' is not an index
+SHOW plantuner.disable_index;
+ plantuner.disable_index
+----------------------------
+ public.i_idx, public.j_idx
+(1 row)
+
+SET plantuner.enable_index="i_idx";
+SHOW plantuner.enable_index;
+ plantuner.enable_index
+------------------------
+ public.i_idx
+(1 row)
+
+SELECT * FROM wow;
+ i | j
+---+---
+(0 rows)
+
+--test only index
+RESET plantuner.disable_index;
+RESET plantuner.enable_index;
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+SET enable_indexonlyscan=off;
+SET plantuner.only_index="i1";
+SHOW plantuner.only_index;
+ plantuner.only_index
+----------------------
+ public.i1
+(1 row)
+
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
+ QUERY PLAN
+----------------------------
+ Index Scan using i1 on wow
+ Index Cond: (i = 0)
+(2 rows)
+
+SET plantuner.disable_index="i1,i2,i3";
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
+ QUERY PLAN
+----------------------------
+ Index Scan using i1 on wow
+ Index Cond: (i = 0)
+(2 rows)
+
+SET plantuner.only_index="i2";
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
+ QUERY PLAN
+----------------------------
+ Index Scan using i2 on wow
+ Index Cond: (i = 0)
+(2 rows)
+
+RESET plantuner.only_index;
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
+ QUERY PLAN
+-------------------------------
+ Index Scan using i_idx on wow
+ Index Cond: (i = 0)
+(2 rows)
+
diff --git a/contrib/plantuner/meson.build b/contrib/plantuner/meson.build
new file mode 100644
index 00000000000..cf1b1ae8cdb
--- /dev/null
+++ b/contrib/plantuner/meson.build
@@ -0,0 +1,34 @@
+plantuner_sources = files(
+ 'plantuner.c'
+)
+
+if host_system == 'windows'
+ plantuner_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'plantuner',
+ '--FILEDESC', 'plantuner',])
+endif
+
+plantuner = shared_module('plantuner',
+ plantuner_sources,
+ kwargs: contrib_mod_args + {
+ 'dependencies': contrib_mod_args['dependencies'],
+ },
+)
+contrib_targets += plantuner
+
+install_data(
+ kwargs: contrib_data_args,
+)
+
+tests += {
+ 'name': 'plantuner',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'plantuner',
+ ]
+ },
+}
+
+# TODO: DOCS = README.plantuner
\ No newline at end of file
diff --git a/contrib/plantuner/plantuner.c b/contrib/plantuner/plantuner.c
new file mode 100644
index 00000000000..29f7627f8c8
--- /dev/null
+++ b/contrib/plantuner/plantuner.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (c) 2009 Teodor Sigaev <teodor@sigaev.ru>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <postgres.h>
+
+#include <fmgr.h>
+#include <miscadmin.h>
+#include <access/heapam.h>
+#include <access/xact.h>
+#include <catalog/namespace.h>
+#include <catalog/pg_class.h>
+#if PG_VERSION_NUM >= 160000
+#include <nodes/miscnodes.h>
+#endif
+#include <nodes/pg_list.h>
+#include <optimizer/plancat.h>
+#include <storage/bufmgr.h>
+#include <utils/builtins.h>
+#include <utils/guc.h>
+#include <utils/lsyscache.h>
+#include <utils/rel.h>
+#if PG_VERSION_NUM >= 100000
+#include <utils/regproc.h>
+#include <utils/varlena.h>
+#endif
+
+PG_MODULE_MAGIC;
+
+#if PG_VERSION_NUM >= 130000
+#define heap_open(r, l) table_open(r, l)
+#define heap_close(r, l) table_close(r, l)
+#endif
+
+static int nDisabledIndexes = 0;
+static Oid *disabledIndexes = NULL;
+static char *disableIndexesOutStr = "";
+
+static int nEnabledIndexes = 0;
+static Oid *enabledIndexes = NULL;
+static char *enableIndexesOutStr = "";
+
+static int nOnlyIndexes = 0;
+static Oid *onlyIndexes = NULL;
+static char *onlyIndexesOutStr = "";
+
+get_relation_info_hook_type prevHook = NULL;
+static bool fix_empty_table = false;
+
+static bool plantuner_enable_inited = false;
+static bool plantuner_only_inited = false;
+static bool plantuner_disable_inited = false;
+
+typedef enum IndexListKind {
+ EnabledKind,
+ DisabledKind,
+ OnlyKind
+} IndexListKind;
+
+static const char *
+indexesAssign(const char * newval, bool doit, GucSource source,
+ IndexListKind kind)
+{
+ char *rawname;
+ List *namelist;
+ ListCell *l;
+ Oid *newOids = NULL;
+ int nOids = 0,
+ i = 0;
+
+ rawname = pstrdup(newval);
+
+ if (!SplitIdentifierString(rawname, ',', &namelist))
+ goto cleanup;
+
+ /*
+ * follow work could be done only in normal processing because of
+ * accsess to system catalog
+ */
+ if (MyProcNumber == INVALID_PROC_NUMBER || !IsUnderPostmaster ||
+ !IsTransactionState())
+ {
+ /* reset init state */
+ switch(kind)
+ {
+ case EnabledKind:
+ plantuner_enable_inited = false;
+ break;
+ case DisabledKind:
+ plantuner_disable_inited = false;
+ break;
+ case OnlyKind:
+ plantuner_only_inited = false;
+ break;
+ default:
+ elog(ERROR, "wrong kind");
+ }
+
+ return newval;
+ }
+
+ if (doit)
+ {
+ nOids = list_length(namelist);
+ newOids = malloc(sizeof(Oid) * (nOids+1));
+ if (!newOids)
+ elog(ERROR,"could not allocate %d bytes",
+ (int)(sizeof(Oid) * (nOids+1)));
+ }
+
+ switch(kind)
+ {
+ case EnabledKind:
+ plantuner_enable_inited = true;
+ break;
+ case DisabledKind:
+ plantuner_disable_inited = true;
+ break;
+ case OnlyKind:
+ plantuner_only_inited = true;
+ break;
+ default:
+ elog(ERROR, "wrong kind");
+ }
+
+ foreach(l, namelist)
+ {
+ char *curname = (char *) lfirst(l);
+#if PG_VERSION_NUM >= 90200
+ List *cur_namelist;
+ Oid indexOid;
+
+#if PG_VERSION_NUM >= 160000
+ ErrorSaveContext escontext = {T_ErrorSaveContext};
+
+ cur_namelist = stringToQualifiedNameList(curname, (Node *) &escontext);
+
+ /* bad name list syntax */
+ if (cur_namelist == NIL)
+ continue;
+#else
+ cur_namelist = stringToQualifiedNameList(curname);
+#endif
+
+ indexOid = RangeVarGetRelid(makeRangeVarFromNameList(cur_namelist),
+ NoLock, true);
+#else
+ Oid indexOid = RangeVarGetRelid(
+ makeRangeVarFromNameList(stringToQualifiedNameList(curname)),
+ true);
+#endif
+
+ if (indexOid == InvalidOid)
+ {
+#if PG_VERSION_NUM >= 90100
+ if (doit == false)
+#endif
+ elog(WARNING,"'%s' does not exist", curname);
+ continue;
+ }
+ else if ( get_rel_relkind(indexOid) != RELKIND_INDEX )
+ {
+#if PG_VERSION_NUM >= 90100
+ if (doit == false)
+#endif
+ elog(WARNING,"'%s' is not an index", curname);
+ continue;
+ }
+ else if (doit)
+ {
+ newOids[i++] = indexOid;
+ }
+ }
+
+ if (doit)
+ {
+ switch(kind)
+ {
+ case EnabledKind:
+ nEnabledIndexes = i;
+ if (enabledIndexes)
+ free(enabledIndexes);
+ enabledIndexes = newOids;
+ break;
+ case DisabledKind:
+ nDisabledIndexes = i;
+ if (disabledIndexes)
+ free(disabledIndexes);
+ disabledIndexes = newOids;
+ break;
+ case OnlyKind:
+ nOnlyIndexes = i;
+ if (onlyIndexes)
+ free(onlyIndexes);
+ onlyIndexes = newOids;
+ break;
+ default:
+ elog(ERROR, "wrong kind");
+ }
+ }
+
+ pfree(rawname);
+ list_free(namelist);
+
+ return newval;
+
+cleanup:
+ if (newOids)
+ free(newOids);
+ pfree(rawname);
+ list_free(namelist);
+ return NULL;
+}
+
+static const char *
+assignDisabledIndexes(const char * newval, bool doit, GucSource source)
+{
+ return indexesAssign(newval, doit, source, DisabledKind);
+}
+
+static const char *
+assignEnabledIndexes(const char * newval, bool doit, GucSource source)
+{
+ return indexesAssign(newval, doit, source, EnabledKind);
+}
+
+static const char *
+assignOnlyIndexes(const char * newval, bool doit, GucSource source)
+{
+ return indexesAssign(newval, doit, source, OnlyKind);
+}
+
+static void
+lateInit()
+{
+ if (!plantuner_only_inited)
+ indexesAssign(onlyIndexesOutStr, true, PGC_S_USER, OnlyKind);
+ if (!plantuner_enable_inited)
+ indexesAssign(enableIndexesOutStr, true, PGC_S_USER, EnabledKind);
+ if (!plantuner_disable_inited)
+ indexesAssign(disableIndexesOutStr, true, PGC_S_USER, DisabledKind);
+}
+
+#if PG_VERSION_NUM >= 90100
+
+static bool
+checkOnlyIndexes(char **newval, void **extra, GucSource source)
+{
+ char *val;
+
+ val = (char*)indexesAssign(*newval, false, source, OnlyKind);
+
+ if (val)
+ {
+ *newval = val;
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+checkDisabledIndexes(char **newval, void **extra, GucSource source)
+{
+ char *val;
+
+ val = (char*)indexesAssign(*newval, false, source, DisabledKind);
+
+ if (val)
+ {
+ *newval = val;
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+checkEnabledIndexes(char **newval, void **extra, GucSource source)
+{
+ char *val;
+
+ val = (char*)indexesAssign(*newval, false, source, EnabledKind);
+
+ if (val)
+ {
+ *newval = val;
+ return true;
+ }
+
+ return false;
+}
+
+static void
+assignDisabledIndexesNew(const char *newval, void *extra)
+{
+ assignDisabledIndexes(newval, true, PGC_S_USER /* doesn't matter */);
+}
+
+static void
+assignEnabledIndexesNew(const char *newval, void *extra)
+{
+ assignEnabledIndexes(newval, true, PGC_S_USER /* doesn't matter */);
+}
+
+static void
+assignOnlyIndexesNew(const char *newval, void *extra)
+{
+ assignOnlyIndexes(newval, true, PGC_S_USER /* doesn't matter */);
+}
+
+#endif
+
+static void
+indexFilter(PlannerInfo *root, Oid relationObjectId, bool inhparent,
+ RelOptInfo *rel)
+{
+ int i;
+
+ lateInit();
+
+ if (nOnlyIndexes > 0)
+ {
+ ListCell *l;
+
+restart1:
+ foreach(l, rel->indexlist)
+ {
+ IndexOptInfo *info = (IndexOptInfo*)lfirst(l);
+ bool remove = true;
+
+ for(i=0; remove && i<nOnlyIndexes; i++)
+ if (onlyIndexes[i] == info->indexoid)
+ remove = false;
+
+ if (remove)
+ {
+ rel->indexlist = list_delete_ptr(rel->indexlist, info);
+ goto restart1;
+ }
+ }
+
+ return;
+ }
+
+ for(i=0; i<nDisabledIndexes; i++)
+ {
+ ListCell *l;
+
+ foreach(l, rel->indexlist)
+ {
+ IndexOptInfo *info = (IndexOptInfo*)lfirst(l);
+
+ if (disabledIndexes[i] == info->indexoid)
+ {
+ int j;
+
+ for(j=0; j<nEnabledIndexes; j++)
+ if (enabledIndexes[j] == info->indexoid)
+ break;
+
+ if (j >= nEnabledIndexes)
+ rel->indexlist = list_delete_ptr(rel->indexlist, info);
+
+ break;
+ }
+ }
+ }
+}
+
+static void
+execPlantuner(PlannerInfo *root, Oid relationObjectId, bool inhparent,
+ RelOptInfo *rel)
+{
+ Relation relation;
+
+ relation = heap_open(relationObjectId, NoLock);
+ if (relation->rd_rel->relkind == RELKIND_RELATION)
+ {
+ if (fix_empty_table && RelationGetNumberOfBlocks(relation) == 0)
+ {
+ /*
+ * estimate_rel_size() could be too pessimistic for particular
+ * workload
+ */
+ rel->pages = 1.0;
+ rel->tuples = 0.0;
+ }
+
+ indexFilter(root, relationObjectId, inhparent, rel);
+ }
+ heap_close(relation, NoLock);
+
+ /*
+ * Call next hook if it exists
+ */
+ if (prevHook)
+ prevHook(root, relationObjectId, inhparent, rel);
+}
+
+static const char*
+IndexFilterShow(Oid* indexes, int nIndexes)
+{
+ char *val, *ptr;
+ int i,
+ len;
+
+ lateInit();
+
+ len = 1 /* \0 */ + nIndexes * (2 * NAMEDATALEN + 2 /* ', ' */ + 1 /* . */);
+ ptr = val = palloc(len);
+
+ *ptr =(char)'\0';
+ for(i=0; i<nIndexes; i++)
+ {
+ char *relname = get_rel_name(indexes[i]);
+ Oid nspOid = get_rel_namespace(indexes[i]);
+ char *nspname = get_namespace_name(nspOid);
+
+ if ( relname == NULL || nspOid == InvalidOid || nspname == NULL )
+ continue;
+
+ ptr += snprintf(ptr, len - (ptr - val), "%s%s.%s",
+ (i==0) ? "" : ", ",
+ nspname,
+ relname);
+ }
+
+ return val;
+}
+
+static const char*
+disabledIndexFilterShow(void)
+{
+ return IndexFilterShow(disabledIndexes, nDisabledIndexes);
+}
+
+static const char*
+enabledIndexFilterShow(void)
+{
+ return IndexFilterShow(enabledIndexes, nEnabledIndexes);
+}
+
+static const char*
+onlyIndexFilterShow(void)
+{
+ return IndexFilterShow(onlyIndexes, nOnlyIndexes);
+}
+
+void _PG_init(void);
+void
+_PG_init(void)
+{
+ DefineCustomStringVariable(
+ "plantuner.forbid_index",
+ "List of forbidden indexes (deprecated)",
+ "Listed indexes will not be used in queries (deprecated, use plantuner.disable_index)",
+ &disableIndexesOutStr,
+ "",
+ PGC_USERSET,
+ 0,
+#if PG_VERSION_NUM >= 90100
+ checkDisabledIndexes,
+ assignDisabledIndexesNew,
+#else
+ assignDisabledIndexes,
+#endif
+ disabledIndexFilterShow
+ );
+
+ DefineCustomStringVariable(
+ "plantuner.disable_index",
+ "List of disabled indexes",
+ "Listed indexes will not be used in queries",
+ &disableIndexesOutStr,
+ "",
+ PGC_USERSET,
+ 0,
+#if PG_VERSION_NUM >= 90100
+ checkDisabledIndexes,
+ assignDisabledIndexesNew,
+#else
+ assignDisabledIndexes,
+#endif
+ disabledIndexFilterShow
+ );
+
+ DefineCustomStringVariable(
+ "plantuner.enable_index",
+ "List of enabled indexes (overload plantuner.disable_index)",
+ "Listed indexes which could be used in queries even they are listed in plantuner.disable_index",
+ &enableIndexesOutStr,
+ "",
+ PGC_USERSET,
+ 0,
+#if PG_VERSION_NUM >= 90100
+ checkEnabledIndexes,
+ assignEnabledIndexesNew,
+#else
+ assignEnabledIndexes,
+#endif
+ enabledIndexFilterShow
+ );
+
+ DefineCustomStringVariable(
+ "plantuner.only_index",
+ "List of explicitly enabled indexes (overload plantuner.disable_index and plantuner.enable_index)",
+ "Only indexes in this list are allowed",
+ &onlyIndexesOutStr,
+ "",
+ PGC_USERSET,
+ 0,
+#if PG_VERSION_NUM >= 90100
+ checkOnlyIndexes,
+ assignOnlyIndexesNew,
+#else
+ assignOnlyIndexes,
+#endif
+ onlyIndexFilterShow
+ );
+
+ DefineCustomBoolVariable(
+ "plantuner.fix_empty_table",
+ "Sets to zero estimations for empty tables",
+ "Sets to zero estimations for empty or newly created tables",
+ &fix_empty_table,
+#if PG_VERSION_NUM >= 80400
+ fix_empty_table,
+#endif
+ PGC_USERSET,
+#if PG_VERSION_NUM >= 80400
+ GUC_NOT_IN_SAMPLE,
+#if PG_VERSION_NUM >= 90100
+ NULL,
+#endif
+#endif
+ NULL,
+ NULL
+ );
+
+ if (get_relation_info_hook != execPlantuner )
+ {
+ prevHook = get_relation_info_hook;
+ get_relation_info_hook = execPlantuner;
+ }
+}
diff --git a/contrib/plantuner/sql/plantuner.sql b/contrib/plantuner/sql/plantuner.sql
new file mode 100644
index 00000000000..ddd6fcc94f1
--- /dev/null
+++ b/contrib/plantuner/sql/plantuner.sql
@@ -0,0 +1,51 @@
+LOAD 'plantuner';
+
+SHOW plantuner.disable_index;
+
+CREATE TABLE wow (i int, j int);
+CREATE INDEX i_idx ON wow (i);
+CREATE INDEX j_idx ON wow (j);
+CREATE INDEX i1 ON WOW (i);
+CREATE INDEX i2 ON WOW (i);
+CREATE INDEX i3 ON WOW (i);
+
+SET enable_seqscan=off;
+
+SELECT * FROM wow;
+
+SET plantuner.disable_index="i_idx, j_idx";
+
+SELECT * FROM wow;
+
+SHOW plantuner.disable_index;
+
+SET plantuner.disable_index="i_idx, nonexistent, public.j_idx, wow";
+
+SHOW plantuner.disable_index;
+
+SET plantuner.enable_index="i_idx";
+
+SHOW plantuner.enable_index;
+
+SELECT * FROM wow;
+--test only index
+RESET plantuner.disable_index;
+RESET plantuner.enable_index;
+
+SET enable_seqscan=off;
+SET enable_bitmapscan=off;
+SET enable_indexonlyscan=off;
+
+SET plantuner.only_index="i1";
+SHOW plantuner.only_index;
+
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
+
+SET plantuner.disable_index="i1,i2,i3";
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
+
+SET plantuner.only_index="i2";
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
+
+RESET plantuner.only_index;
+EXPLAIN (COSTS OFF) SELECT * FROM wow WHERE i = 0;
diff --git a/contrib/postgres_fdw/connection.c b/contrib/postgres_fdw/connection.c
index 2fc52b81e13..7201b221986 100644
--- a/contrib/postgres_fdw/connection.c
+++ b/contrib/postgres_fdw/connection.c
@@ -1043,6 +1043,9 @@ pgfdw_xact_callback(XactEvent event, void *arg)
else
pgfdw_abort_cleanup(entry, true);
break;
+ case XACT_EVENT_PRE_ABORT:
+ case XACT_EVENT_PARALLEL_PRE_ABORT:
+ break;
}
}
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 81e752877ce..ced94704d69 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2616,22 +2616,19 @@ SELECT * FROM local_tbl LEFT JOIN (SELECT ft1.* FROM ft1 INNER JOIN ft2 ON (ft1.
-> Sort
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, ft2.c2
Sort Key: ft1.c3
- -> Merge Join
+ -> Hash Join
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*, ft2.*, ft2.c2
- Merge Cond: (ft1.c1 = ft2.c1)
+ Hash Cond: (ft1.c1 = ft2.c1)
Join Filter: ((ft1.c1 - postgres_fdw_abs(ft2.c2)) = 0)
- -> Sort
+ -> Foreign Scan on public.ft1
Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
- Sort Key: ft1.c1
- -> Foreign Scan on public.ft1
- Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
- Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100))
- -> Materialize
+ Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100))
+ -> Hash
Output: ft2.*, ft2.c1, ft2.c2
-> Foreign Scan on public.ft2
Output: ft2.*, ft2.c1, ft2.c2
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY "C 1" ASC NULLS LAST
-(32 rows)
+(29 rows)
ALTER SERVER loopback OPTIONS (DROP fdw_startup_cost);
ALTER SERVER loopback OPTIONS (ADD extensions 'postgres_fdw');
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 1ec048ac31b..c0996bfa15b 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -5629,6 +5629,22 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
</listitem>
</varlistentry>
+ <varlistentry id="guc-enable_self_join_removal" xreflabel="enable_self_join_removal">
+ <term><varname>enable_self_join_removal</varname> (<type>boolean</type>)
+ <indexterm>
+ <primary><varname>enable_self_join_removal</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Enables or disables the query planner's optimization which analyses
+ the query tree and replaces self joins with semantically equivalent
+ single scans. Takes into consideration only plain tables.
+ The default is <literal>on</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-enable-seqscan" xreflabel="enable_seqscan">
<term><varname>enable_seqscan</varname> (<type>boolean</type>)
<indexterm>
diff --git a/src/backend/access/gin/ginbulk.c b/src/backend/access/gin/ginbulk.c
index 7f89cd5e826..64b513659b3 100644
--- a/src/backend/access/gin/ginbulk.c
+++ b/src/backend/access/gin/ginbulk.c
@@ -117,6 +117,7 @@ ginInitBA(BuildAccumulator *accum)
ginCombineData,
ginAllocEntryAccumulator,
NULL, /* no freefunc needed */
+ NULL, /* no fixfunc needed */
(void *) accum);
}
diff --git a/src/backend/access/heap/Makefile b/src/backend/access/heap/Makefile
index af0bd1888e5..8ff528148a4 100644
--- a/src/backend/access/heap/Makefile
+++ b/src/backend/access/heap/Makefile
@@ -20,6 +20,8 @@ OBJS = \
hio.o \
pruneheap.o \
rewriteheap.o \
+ tempcat.o \
+ tupmap.o \
vacuumlazy.o \
visibilitymap.o
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index cce38f482bd..296b22be62e 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -43,6 +43,7 @@
#include "access/syncscan.h"
#include "access/sysattr.h"
#include "access/tableam.h"
+#include "access/tempcat.h"
#include "access/transam.h"
#include "access/valid.h"
#include "access/visibilitymap.h"
@@ -73,6 +74,10 @@
#include "utils/spccache.h"
#include "utils/syscache.h"
+#include "nodes/execnodes.h"
+#include "catalog/index.h"
+#include "utils/memutils.h"
+#include "access/tempcat.h"
static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
TransactionId xid, CommandId cid, int options);
@@ -6438,6 +6443,13 @@ heap_inplace_update_and_unlock(Relation relation,
uint32 newlen;
Assert(ItemPointerEquals(&oldtup->t_self, &tuple->t_self));
+
+ if (enable_temp_memory_catalog && IsTempTableScope())
+ {
+ temp_catalog_update_inplace(relation, tuple);
+ return;
+ }
+
oldlen = oldtup->t_len - htup->t_hoff;
newlen = tuple->t_len - tuple->t_data->t_hoff;
if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff)
@@ -6530,6 +6542,12 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
uint32 oldlen;
uint32 newlen;
+ if (enable_temp_memory_catalog && IsTempTableScope())
+ {
+ temp_catalog_update_inplace(relation, tuple);
+ return;
+ }
+
/*
* For now, we don't allow parallel updates. Unlike a regular update,
* this should never create a combo CID, so it might be possible to relax
diff --git a/src/backend/access/heap/meson.build b/src/backend/access/heap/meson.build
index e00d5b4f0de..d0635b2f928 100644
--- a/src/backend/access/heap/meson.build
+++ b/src/backend/access/heap/meson.build
@@ -8,6 +8,8 @@ backend_sources += files(
'hio.c',
'pruneheap.c',
'rewriteheap.c',
+ 'tempcat.c',
+ 'tupmap.c',
'vacuumlazy.c',
'visibilitymap.c',
)
diff --git a/src/backend/access/heap/tempcat.c b/src/backend/access/heap/tempcat.c
new file mode 100644
index 00000000000..dc5c7381ffb
--- /dev/null
+++ b/src/backend/access/heap/tempcat.c
@@ -0,0 +1,962 @@
+#include "postgres.h"
+
+#include "access/skey.h"
+#include "access/table.h"
+#include "access/tempcat.h"
+#include "access/xact.h"
+#include "catalog/index.h"
+#include "catalog/namespace.h"
+#include "common/hashfn.h"
+#include "lib/rbtree.h"
+#include "nodes/execnodes.h"
+#include "pgstat.h"
+#include "utils/catcache.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/typcache.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+
+typedef struct TupleItem{
+ dlist_node node;
+ dlist_node stageCreatedNode;
+ dlist_node stageDeletedNode;
+ HeapTuple tuple;
+ Datum *values;
+ bool *nulls;
+} TupleItem;
+
+
+typedef struct TreeItem{
+ RBTNode node;
+ dlist_head tuples; /* TuplePointerItem::node */
+}TreeItem;
+
+typedef struct TuplePointerItem{
+ dlist_node node;
+ dlist_node deletionNode;
+ TupleItem* tupleItem;
+ TreeItem* owner;
+}TuplePointerItem;
+
+typedef struct RelationItem{
+ dlist_node node;
+ Oid relid;
+ dlist_head indexes; /* IndexItem::node */
+ dlist_head indexesScheduledForDeletion; /* IndexItem::node */
+ dlist_head allTuples; /* TupleItem::node */
+ dlist_head unstagedCreatedTuples; /* TupleItem::stageCreatedNode */
+ dlist_head unstagedDeletedTuples; /* TupleItem::stageDeletedNode */
+ dlist_head stagedCreatedTuples; /* TupleItem::stageCreatedNode */
+ dlist_head stagedDeletedTuples; /* TupleItem::stageDeletedNode */
+} RelationItem;
+
+typedef struct IndexItem{
+ dlist_node node;
+ RelationItem* rel;
+ int nkeys;
+ AttrNumber attrNumbers[INDEX_MAX_KEYS];
+ FmgrInfo keyCmpFuncs[INDEX_MAX_KEYS];
+ Oid keyCollations[INDEX_MAX_KEYS];
+ RBTree* tree; /* TreeItem::node */
+ dlist_head scans; /* TempCatScanData::node */
+ dlist_head tuplePointersScheduledForDeletion; /* TuplePointerItem::deletionNode */
+} IndexItem;
+
+struct TempCatScanData{
+ dlist_node node;
+ ScanKey key;
+ int nkeys;
+ bool started;
+ bool finished;
+ RelationItem* rel;
+ IndexItem* index;
+ RBTreeIterator treeIter;
+ dlist_iter listIter;
+ RBTNode* endNode;
+ bool continuous;
+ uint64_t count;
+};
+
+
+static dlist_head temp_rels; /* RelationItem::node */
+static bool initialized = false;
+bool enable_temp_memory_catalog = false;
+
+
+static void
+rbt_destroy(RBTree* tree)
+{
+ for(;;){
+ RBTNode* node = rbt_leftmost(tree);
+ if(node)
+ {
+ TreeItem* item = (TreeItem*)node;
+
+ while(!dlist_is_empty(&item->tuples))
+ {
+ dlist_node* keyNode = dlist_pop_head_node(&item->tuples);
+ TuplePointerItem* keyItem = dlist_container(TuplePointerItem,node,keyNode);
+
+ if(!dlist_node_is_detached(&keyItem->deletionNode))
+ dlist_delete(&keyItem->deletionNode);
+
+ pfree(keyItem);
+ }
+
+ rbt_delete(tree, node);
+ continue;
+ }
+
+ pfree(tree);
+ return;
+ }
+}
+
+
+static void
+LocalInvalidateCatCache(int cacheId, uint32 hashValue, Oid dbId)
+{
+ SysCacheInvalidate(cacheId, hashValue);
+ CallSyscacheCallbacks(cacheId, hashValue);
+}
+
+
+static void
+LocalInvalidateCatCacheTupleNow(Oid relid, HeapTuple tuple, HeapTuple newTuple)
+{
+ GetTopTransactionId();
+ GetCurrentCommandId(true);
+ InvalidateCatalogSnapshot();
+
+ if (!RelationInvalidatesSnapshotsOnly(relid)){
+ PrepareToInvalidateCacheTuple(relid, tuple, newTuple, LocalInvalidateCatCache);
+ }
+}
+
+
+static void
+LocalInvalidateCatCacheTuple(Relation rel, HeapTuple tuple, HeapTuple newTuple)
+{
+ GetTopTransactionId();
+ GetCurrentCommandId(true);
+ CacheInvalidateHeapTuple(rel, tuple, newTuple);
+ AcceptInvalidationMessages();
+}
+
+
+static TreeItem*
+find_index_tree_item(IndexItem* indexItem, TupleItem* tupleItem, int cmp)
+{
+
+ TuplePointerItem keyToSearch;
+ TreeItem nodeToSearch;
+
+ keyToSearch.tupleItem = tupleItem;
+
+ dlist_init(&nodeToSearch.tuples);
+ dlist_push_tail(&nodeToSearch.tuples, &keyToSearch.node);
+
+ if(cmp < 0)
+ return (TreeItem*)rbt_find_less(indexItem->tree, (RBTNode*)&nodeToSearch, false);
+ else if(cmp > 0)
+ return (TreeItem*)rbt_find_great(indexItem->tree, (RBTNode*)&nodeToSearch, false);
+ else
+ return (TreeItem*)rbt_find(indexItem->tree, (RBTNode*)&nodeToSearch);
+}
+
+
+
+
+static void
+delete_pending_key_items(IndexItem* idxItem)
+{
+ while (!dlist_is_empty(&idxItem->tuplePointersScheduledForDeletion))
+ {
+ dlist_node* node = dlist_pop_head_node(&idxItem->tuplePointersScheduledForDeletion);
+ TuplePointerItem* tuplePointerItem = (TuplePointerItem*) dlist_container(TuplePointerItem, deletionNode, node);
+ TreeItem* owner = tuplePointerItem->owner;
+ dlist_delete(&tuplePointerItem->node);
+ pfree(tuplePointerItem);
+
+ if (dlist_is_empty(&owner->tuples))
+ rbt_delete(idxItem->tree, &owner->node);
+ }
+}
+
+
+static void
+cleanup( RelationItem* relEntry )
+{
+ dlist_iter indexIter;
+ dlist_mutable_iter miter;
+
+ dlist_foreach(indexIter, &relEntry->indexes)
+ {
+ IndexItem* index = (IndexItem*) dlist_container(IndexItem, node, indexIter.cur);
+ if (!dlist_is_empty(&index->scans))
+ continue;
+
+ delete_pending_key_items(index);
+ }
+
+ dlist_foreach_modify(miter, &relEntry->indexesScheduledForDeletion)
+ {
+ IndexItem* index = (IndexItem*) dlist_container(IndexItem, node, miter.cur);
+
+ /* Don't delete (yet) indexes that are currently used for scans*/
+ if (!dlist_is_empty(&index->scans))
+ continue;
+
+ rbt_destroy(index->tree);
+ dlist_delete(&index->node);
+ pfree(index);
+ }
+}
+
+
+static void
+insert_tuple_entry_for_index(IndexItem* idxItem, TupleItem* tupItem)
+{
+ TuplePointerItem* tuplePointer;
+ TreeItem tempTreeItem;
+ bool isNew;
+
+ tuplePointer = palloc_object(TuplePointerItem);
+ dlist_node_init(&tuplePointer->deletionNode);
+ tuplePointer->tupleItem = tupItem;
+ tuplePointer->owner = NULL;
+
+ dlist_init(&tempTreeItem.tuples);
+ dlist_push_tail(&tempTreeItem.tuples, &tuplePointer->node);
+ rbt_insert(idxItem->tree, &tempTreeItem.node, &isNew);
+
+ // if (isNew)
+ // {
+ // dlist_init(&tuplePointer->owner->tuples);
+ // dlist_push_tail(&tuplePointer->owner->tuples, &tuplePointer->node);
+ // }
+}
+
+
+static TupleItem*
+create_tuple_item(HeapTuple htup, TupleDesc tupdesc, ItemPointer ptr)
+{
+ int attributeIndex;
+ TupleItem* entry = palloc_object(TupleItem);
+ entry->values = palloc_array(Datum,tupdesc->natts);
+ entry->nulls = palloc_array(bool, tupdesc->natts);
+ entry->tuple = heap_copytuple(htup);
+ dlist_node_init(&entry->stageCreatedNode);
+ dlist_node_init(&entry->stageDeletedNode);
+
+ for(attributeIndex=0; attributeIndex < tupdesc->natts ;attributeIndex++)
+ entry->values[attributeIndex] = heap_getattr(entry->tuple, attributeIndex+1, tupdesc, &entry->nulls[attributeIndex]);
+
+ return entry;
+}
+
+
+static void
+add_tuple_entry(RelationItem* relItem, TupleItem* tupItem, bool addToUnstaged)
+{
+ dlist_iter iter;
+
+ dlist_foreach(iter, &relItem->indexes)
+ {
+ IndexItem* idxItem = dlist_container(IndexItem, node, iter.cur);
+ insert_tuple_entry_for_index(idxItem, tupItem);
+ }
+
+ dlist_push_head(&relItem->allTuples, &tupItem->node);
+
+ if (addToUnstaged)
+ dlist_push_tail(&relItem->unstagedCreatedTuples, &tupItem->stageCreatedNode);
+
+ tupItem->tuple->t_self = temp_catalog_tupmap_assign(NULL, tupItem);
+}
+
+
+static void
+remove_tuple_entry(RelationItem* relItem, TupleItem* tupItem, bool staging)
+{
+ dlist_iter indexIter;
+
+ dlist_foreach(indexIter, &relItem->indexes)
+ {
+ dlist_iter tupIter;
+ IndexItem* idxItem = (IndexItem*) dlist_container(IndexItem, node, indexIter.cur);
+ TreeItem* node = find_index_tree_item(idxItem, tupItem, 0);
+ if (node)
+ {
+ dlist_foreach(tupIter, &node->tuples)
+ {
+ TuplePointerItem* key = (TuplePointerItem*) dlist_container(TuplePointerItem, node, tupIter.cur);
+ if (key->tupleItem == tupItem)
+ {
+ if (dlist_node_is_detached(&key->deletionNode))
+ dlist_push_tail(&idxItem->tuplePointersScheduledForDeletion, &key->deletionNode);
+ break;
+ }
+ }
+ }
+
+ if (!staging)
+ delete_pending_key_items(idxItem);
+ }
+
+ dlist_delete(&tupItem->node);
+ dlist_node_init(&tupItem->node);
+
+ if (staging)
+ dlist_push_tail(&relItem->unstagedDeletedTuples, &tupItem->stageDeletedNode);
+
+ temp_catalog_tupmap_unassign(&tupItem->tuple->t_self, tupItem);
+}
+
+
+
+static RBTNode*
+rbt_alloc(void *arg)
+{
+ return &(palloc_object(TreeItem)->node);
+}
+
+
+static void
+rbt_free(RBTNode *x, void *arg)
+{
+ pfree(x);
+}
+
+
+static int
+rbt_compare(TreeItem* a, TreeItem* b, IndexItem* index)
+{
+ TuplePointerItem* aKey = dlist_head_element(TuplePointerItem,node,&a->tuples);
+ TuplePointerItem* bKey = dlist_head_element(TuplePointerItem,node,&b->tuples);
+
+ for (int keyIndex=0; keyIndex < index->nkeys; keyIndex++)
+ {
+ int attributeIndex = index->attrNumbers[keyIndex] - 1;
+ int cmp;
+
+ cmp = DatumGetInt32(DirectFunctionCall2Coll(index->keyCmpFuncs[keyIndex].fn_addr, index->keyCollations[keyIndex], aKey->tupleItem->values[attributeIndex], bKey->tupleItem->values[attributeIndex]));
+ if (cmp)
+ return cmp;
+ }
+ return 0;
+}
+
+static void
+rbt_combine(TreeItem* existing, TreeItem* newdata, IndexItem* index)
+{
+ while (!dlist_is_empty(&newdata->tuples))
+ {
+ dlist_node* newTuplePointerNode = dlist_pop_head_node(&newdata->tuples);
+ TuplePointerItem* newTuplePointer = dlist_container(TuplePointerItem, node, newTuplePointerNode);
+ dlist_push_tail(&existing->tuples, &newTuplePointer->node);
+ newTuplePointer->owner = existing;
+ }
+}
+
+
+static void
+rbt_fix(RBTNode *x, void *arg)
+{
+ dlist_iter tupIter;
+ TreeItem* item = (TreeItem*)x;
+ dlist_node* head = &item->tuples.head;
+
+ /* Fix old head element address. */
+ if (head->next == head->prev && head->next == head->next->next)
+ {
+ head->next = head;
+ head->prev = head;
+ }
+ else
+ {
+ head->next->prev = head;
+ head->prev->next = head;
+ }
+
+ dlist_foreach(tupIter, &item->tuples)
+ {
+ TuplePointerItem* key = (TuplePointerItem*) dlist_container(TuplePointerItem, node, tupIter.cur);
+ key->owner = item;
+ }
+}
+
+
+static bool
+compare_keyItem_with_scanKey(IndexItem* indexItem, TuplePointerItem* keyItem, ScanKey keys, int nkeys)
+{
+ for (int keyIndex=0; keyIndex < nkeys; keyIndex++)
+ {
+ int attributeIndex = indexItem->attrNumbers[keyIndex] - 1;
+ int cmp;
+
+ cmp = DatumGetInt32(DirectFunctionCall2Coll(indexItem->keyCmpFuncs[keyIndex].fn_addr, indexItem->keyCollations[keyIndex], keyItem->tupleItem->values[attributeIndex], keys[keyIndex].sk_argument));
+
+ switch(keys[keyIndex].sk_strategy){
+ case BTLessStrategyNumber:
+ if (cmp >= 0)
+ return false;
+ break;
+ case BTLessEqualStrategyNumber:
+ if (cmp > 0)
+ return false;
+ break;
+ case BTEqualStrategyNumber:
+ if (cmp != 0)
+ return false;
+ break;
+ case BTGreaterEqualStrategyNumber:
+ if (cmp < 0)
+ return false;
+ break;
+ case BTGreaterStrategyNumber:
+ if (cmp <= 0)
+ return false;
+ break;
+
+ default:
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
+static IndexItem*
+get_index_entry(RelationItem* relEntry, Relation relation, AttrNumber* attrNumbers, int numKeys)
+{
+ IndexItem* indexEntry = NULL;
+ int keyIndex;
+ dlist_iter iter;
+ dlist_mutable_iter indexIter;
+
+ dlist_foreach_modify(indexIter, &relEntry->indexes)
+ {
+ IndexItem* index = (IndexItem*) dlist_container(IndexItem, node, indexIter.cur);
+ if (index->nkeys >= numKeys && memcmp(index->attrNumbers, attrNumbers, sizeof(AttrNumber) * numKeys)==0)
+ return index;
+
+ if (index->nkeys < numKeys && memcmp(index->attrNumbers, attrNumbers, sizeof(AttrNumber) * index->nkeys)==0)
+ {
+ dlist_delete(&index->node);
+ dlist_push_tail(&relEntry->indexesScheduledForDeletion, &index->node);
+ }
+ }
+
+ indexEntry = palloc_object(IndexItem);
+ indexEntry->rel = relEntry;
+ indexEntry->nkeys = numKeys;
+ indexEntry->tree = rbt_create( sizeof(TreeItem), (rbt_comparator)rbt_compare, (rbt_combiner)rbt_combine, rbt_alloc, rbt_free, rbt_fix, indexEntry);
+ dlist_init(&indexEntry->scans);
+ dlist_init(&indexEntry->tuplePointersScheduledForDeletion);
+
+ for(keyIndex=0; keyIndex < numKeys; keyIndex++ )
+ {
+ TypeCacheEntry* typeEntry;
+ FormData_pg_attribute* attribute;
+
+ attribute = &relation->rd_att->attrs[attrNumbers[keyIndex]-1];
+
+ typeEntry = lookup_type_cache(attribute->atttypid, TYPECACHE_CMP_PROC_FINFO);
+ Assert(OidIsValid(typeEntry->cmp_proc_finfo.fn_oid));
+
+ indexEntry->keyCmpFuncs[keyIndex] = typeEntry->cmp_proc_finfo;
+ indexEntry->keyCollations[keyIndex] = attribute->attcollation;
+ indexEntry->attrNumbers[keyIndex] = attribute->attnum;
+ }
+
+ dlist_foreach(iter, &relEntry->allTuples)
+ {
+ TupleItem* tupleEntry = dlist_container(TupleItem, node, iter.cur);
+ insert_tuple_entry_for_index(indexEntry, tupleEntry);
+ }
+
+ dlist_push_tail(&relEntry->indexes, &indexEntry->node);
+
+ return indexEntry;
+}
+
+
+static RelationItem*
+find_relation_entry(Relation rel)
+{
+ dlist_iter iter;
+ dlist_foreach(iter, &temp_rels)
+ {
+ RelationItem* item = dlist_container(RelationItem, node, iter.cur);
+ if (item->relid == rel->rd_rel->oid)
+ return item;
+ }
+ return NULL;
+}
+
+
+static RelationItem*
+get_relation_entry(Relation relation)
+{
+ RelationItem* relEntry = find_relation_entry(relation);
+ if (!relEntry)
+ {
+ relEntry = palloc_object(RelationItem);
+ relEntry->relid = relation->rd_rel->oid;
+ dlist_init(&relEntry->indexes);
+ dlist_init(&relEntry->allTuples);
+ dlist_init(&relEntry->indexesScheduledForDeletion);
+ dlist_init(&relEntry->unstagedCreatedTuples);
+ dlist_init(&relEntry->unstagedDeletedTuples);
+ dlist_init(&relEntry->stagedCreatedTuples);
+ dlist_init(&relEntry->stagedDeletedTuples);
+ dlist_push_tail(&temp_rels, &relEntry->node);
+ }
+
+ return relEntry;
+}
+
+
+void
+temp_catalog_insert(Relation relation, HeapTuple htup)
+{
+ RelationItem* relEntry = NULL;
+ MemoryContext oldctx;
+ TupleItem* item;
+
+ oldctx = MemoryContextSwitchTo(TopMemoryContext);
+
+ relEntry = get_relation_entry(relation);
+
+ item = create_tuple_item(htup, relation->rd_att, NULL);
+ add_tuple_entry(relEntry, item, true);
+
+ LocalInvalidateCatCacheTuple(relation, item->tuple, NULL);
+ htup->t_self = item->tuple->t_self;
+
+ MemoryContextSwitchTo(oldctx);
+}
+
+
+void
+temp_catalog_delete(Relation relation, ItemPointer ptr)
+{
+ RelationItem* relEntry;
+ TupleItem* tupleEntry;
+
+ relEntry = find_relation_entry(relation);
+ if (!relEntry)
+ return;
+
+ tupleEntry = temp_catalog_tupmap_get(ptr);
+ if (!tupleEntry)
+ return;
+
+ remove_tuple_entry(relEntry, tupleEntry, true);
+
+ LocalInvalidateCatCacheTuple(relation, tupleEntry->tuple, NULL);
+
+ pgstat_count_heap_delete(relation);
+
+ cleanup(relEntry);
+}
+
+
+void
+temp_catalog_update(Relation relation, ItemPointer ptr, HeapTuple htup)
+{
+ RelationItem* relEntry = NULL;
+ TupleItem* oldTupleEntry;
+ TupleItem* newTupleEntry;
+ MemoryContext oldctx;
+
+ relEntry = find_relation_entry(relation);
+ if (!relEntry)
+ return;
+
+ oldctx = MemoryContextSwitchTo(TopMemoryContext);
+
+ oldTupleEntry = (TupleItem*)temp_catalog_tupmap_get(ptr);
+
+ remove_tuple_entry(relEntry, oldTupleEntry, true);
+
+ newTupleEntry = create_tuple_item(htup, relation->rd_att, ptr);
+ add_tuple_entry(relEntry, newTupleEntry, true);
+
+ LocalInvalidateCatCacheTuple(relation, oldTupleEntry->tuple, newTupleEntry->tuple);
+
+ cleanup(relEntry);
+
+ MemoryContextSwitchTo(oldctx);
+}
+
+
+void
+temp_catalog_update_inplace(Relation relation, HeapTuple htup)
+{
+ temp_catalog_update(relation, &htup->t_self, htup);
+}
+
+
+struct TempCatScanData*
+temp_catalog_beginscan(Relation relation, int nkeys, ScanKey key)
+{
+ IndexItem* indexEntry = NULL;
+ RelationItem* relEntry = NULL;
+ AttrNumber attrNumbers[INDEX_MAX_KEYS];
+ MemoryContext oldctx;
+ int strategy;
+ TempCatScanData* scan;
+ int walkDir;
+ TreeItem* lastItem;
+ TreeItem* endItem;
+ bool continuous;
+ static bool nested = false;
+
+ if (nested)
+ return NULL;
+
+ nested = true;
+
+ relEntry = find_relation_entry(relation);
+ if (!relEntry)
+ {
+ nested = false;
+ return NULL;
+ }
+
+ for (int c=0; c < nkeys; c++)
+ attrNumbers[c] = key[c].sk_attno;
+
+ oldctx = MemoryContextSwitchTo(TopMemoryContext);
+
+ indexEntry = get_index_entry(relEntry, relation, attrNumbers, nkeys);
+
+ scan = palloc_object(TempCatScanData);
+ scan->rel = relEntry;
+ scan->index = indexEntry;
+ scan->key = key;
+ scan->nkeys = nkeys;
+ scan->started = false;
+ scan->finished = false;
+ scan->listIter.cur = NULL;
+ scan->listIter.end = NULL;
+ scan->count = 0;
+
+ if (nkeys)
+ {
+ strategy = key[nkeys-1].sk_strategy;
+ for (int c=0; c < nkeys-1; c++)
+ {
+ if (key[c].sk_strategy != BTEqualStrategyNumber)
+ {
+ strategy = InvalidStrategy;
+ break;
+ }
+ }
+ }
+ else
+ strategy = InvalidStrategy;
+
+ if (strategy != BTEqualStrategyNumber &&
+ strategy != BTGreaterStrategyNumber &&
+ strategy != BTGreaterEqualStrategyNumber &&
+ strategy != BTLessStrategyNumber &&
+ strategy != BTLessEqualStrategyNumber)
+ {
+ walkDir = LeftRightWalk;
+ lastItem = NULL;
+ endItem = NULL;
+ continuous = false;
+ }
+ else
+ {
+ TupleItem tempTuple;
+ Datum* attrValues;
+ int maxAtt=0;
+
+ for (int c=0; c < nkeys; c++)
+ maxAtt = Max(maxAtt,attrNumbers[c]);
+
+ attrValues = palloc_array(Datum,maxAtt);
+ for (int c=0; c < nkeys; c++)
+ attrValues[attrNumbers[c]-1] = key[c].sk_argument;
+
+ tempTuple.values = attrValues;
+
+ continuous = true;
+
+ if (strategy == BTEqualStrategyNumber)
+ {
+ walkDir = LeftRightWalk;
+ lastItem = find_index_tree_item(indexEntry, &tempTuple, -1);
+ endItem = find_index_tree_item(indexEntry, &tempTuple, +1);
+ }
+ else if (strategy == BTGreaterStrategyNumber || strategy == BTGreaterEqualStrategyNumber)
+ {
+ walkDir = LeftRightWalk;
+ lastItem = find_index_tree_item(indexEntry, &tempTuple, -1);
+ endItem = NULL;
+ }
+ else if (strategy == BTLessStrategyNumber || strategy == BTLessEqualStrategyNumber)
+ {
+ walkDir = RightLeftWalk;
+ lastItem = find_index_tree_item(indexEntry, &tempTuple, +1);
+ endItem = NULL;
+ }
+
+ pfree(attrValues);
+ }
+
+ rbt_begin_iterate(indexEntry->tree, walkDir, &scan->treeIter);
+ scan->treeIter.last_visited = &lastItem->node;
+ scan->endNode = &endItem->node;
+ scan->continuous = continuous;
+
+ dlist_push_tail(&indexEntry->scans, &scan->node);
+
+ MemoryContextSwitchTo(oldctx);
+
+ nested = false;
+
+ return scan;
+}
+
+
+void
+temp_catalog_endscan(TempCatScanData* scan)
+{
+ if (!scan)
+ return;
+
+ dlist_delete(&scan->node);
+ cleanup(scan->rel);
+ pfree(scan);
+}
+
+
+HeapTuple
+temp_catalog_getnext(TempCatScanData* scan, BufferHeapTupleTableSlot* bslot)
+{
+ if (!scan || scan->finished)
+ return NULL;
+
+ for(;;){
+ TuplePointerItem* key;
+
+ while (scan->listIter.cur == scan->listIter.end)
+ {
+ TreeItem* nextNode = (TreeItem*)rbt_iterate(&scan->treeIter);
+ if (!nextNode)
+ {
+ scan->finished = true;
+ return NULL;
+ }
+
+ scan->listIter.end = &nextNode->tuples.head;
+ scan->listIter.cur = scan->listIter.end->next ? scan->listIter.end->next : scan->listIter.end;
+ }
+
+ key = dlist_container(TuplePointerItem,node,scan->listIter.cur);
+
+ scan->listIter.cur = scan->listIter.cur->next;
+
+ scan->count++;
+
+ if (!dlist_node_is_detached(&key->deletionNode))
+ continue;
+
+ if (!compare_keyItem_with_scanKey(scan->index, key, scan->key, scan->nkeys))
+ {
+ if (scan->continuous)
+ {
+ scan->finished = true;
+ return NULL;
+ }
+
+ continue;
+ }
+
+ scan->started = true;
+ bslot->base.tuple = key->tupleItem->tuple;
+ return key->tupleItem->tuple;
+ }
+}
+
+
+bool
+temp_catalog_is_fetched(TempCatScanData* scan)
+{
+ return scan && scan->started && !scan->finished;
+}
+
+
+static void
+dlist_move(dlist_head *dst, dlist_head *src)
+{
+ if (dst->head.next == NULL) /* convert NULL header to circular */
+ dlist_init(dst);
+
+ if (!dlist_is_empty(src))
+ {
+ dlist_node* head = dlist_head_node(src);
+ dlist_node* tail = dlist_tail_node(src);
+ tail->next = &dst->head;
+ head->prev = dst->head.prev;
+ dst->head.prev->next = head;
+ dst->head.prev = tail;
+ dlist_init(src);
+ }
+}
+
+
+static void
+free_tuple(TupleItem* item)
+{
+ pfree(item->values);
+ pfree(item->nulls);
+ heap_freetuple(item->tuple);
+ pfree(item);
+}
+
+
+static void
+free_deleted_tuples(dlist_head* list)
+{
+ while (!dlist_is_empty(list))
+ {
+ dlist_node* node = dlist_pop_head_node(list);
+ TupleItem* tupleItem = dlist_container(TupleItem, stageDeletedNode, node);
+ free_tuple(tupleItem);
+ }
+
+ dlist_init(list);
+}
+
+
+static void
+revert_created_tuples(RelationItem* relEntry, dlist_head* list)
+{
+ while (!dlist_is_empty(list))
+ {
+ dlist_node* node = dlist_pop_head_node(list);
+ TupleItem* tupleItem = dlist_container(TupleItem, stageCreatedNode, node);
+
+ if (dlist_node_is_detached(&tupleItem->stageDeletedNode)){
+ remove_tuple_entry(relEntry, tupleItem, false);
+ }else{
+ dlist_delete(&tupleItem->stageDeletedNode);
+ }
+
+ LocalInvalidateCatCacheTupleNow(relEntry->relid, tupleItem->tuple, NULL);
+
+ free_tuple(tupleItem);
+ }
+}
+
+
+static void
+revert_deleted_tuples(RelationItem* relEntry, dlist_head* list)
+{
+ while (!dlist_is_empty(list))
+ {
+ dlist_node* node = dlist_pop_head_node(list);
+ TupleItem* tupleItem = dlist_container(TupleItem, stageDeletedNode, node);
+
+ dlist_node_init(&tupleItem->stageDeletedNode);
+
+ add_tuple_entry(relEntry, tupleItem, false);
+
+ LocalInvalidateCatCacheTupleNow(relEntry->relid, tupleItem->tuple, NULL);
+ }
+}
+
+
+static void
+detach_created_tuples(dlist_head* list)
+{
+ while (!dlist_is_empty(list))
+ {
+ dlist_node* node = dlist_pop_head_node(list);
+ dlist_node_init(node);
+ }
+}
+
+
+static void
+temp_cat_xact_cb(XactEvent event, void *arg)
+{
+ dlist_iter iter;
+ if (event == XACT_EVENT_PRE_COMMIT || event == XACT_EVENT_PARALLEL_PRE_COMMIT)
+ {
+ dlist_foreach(iter, &temp_rels)
+ {
+ RelationItem* item = dlist_container(RelationItem, node, iter.cur);
+ detach_created_tuples(&item->unstagedCreatedTuples);
+ detach_created_tuples(&item->stagedCreatedTuples);
+ free_deleted_tuples(&item->unstagedDeletedTuples);
+ free_deleted_tuples(&item->stagedDeletedTuples);
+ cleanup(item);
+ }
+ }
+ else if (event == XACT_EVENT_PRE_ABORT || event == XACT_EVENT_PARALLEL_PRE_ABORT)
+ {
+ dlist_foreach(iter, &temp_rels)
+ {
+ RelationItem* item = dlist_container(RelationItem, node, iter.cur);
+ MemoryContext oldctx = MemoryContextSwitchTo(TopMemoryContext);
+ revert_created_tuples(item, &item->unstagedCreatedTuples);
+ revert_created_tuples(item, &item->stagedCreatedTuples);
+ revert_deleted_tuples(item, &item->unstagedDeletedTuples);
+ revert_deleted_tuples(item, &item->stagedDeletedTuples);
+ cleanup(item);
+ MemoryContextSwitchTo(oldctx);
+ }
+ }
+}
+
+
+static void
+temp_cat_subxact_cb(SubXactEvent event, SubTransactionId mySubid,
+ SubTransactionId parentSubid, void *arg)
+{
+ dlist_iter iter;
+ if (event == SUBXACT_EVENT_COMMIT_SUB )
+ {
+ dlist_foreach(iter, &temp_rels)
+ {
+ RelationItem* item = dlist_container(RelationItem, node, iter.cur);
+ dlist_move(&item->stagedCreatedTuples, &item->unstagedCreatedTuples);
+ dlist_move(&item->stagedDeletedTuples, &item->unstagedDeletedTuples);
+ }
+ }
+ else if (event == SUBXACT_EVENT_PRE_ABORT_SUB)
+ {
+ dlist_foreach(iter, &temp_rels)
+ {
+ RelationItem* item = dlist_container(RelationItem, node, iter.cur);
+ MemoryContext oldctx = MemoryContextSwitchTo(TopMemoryContext);
+ revert_created_tuples(item, &item->unstagedCreatedTuples);
+ revert_deleted_tuples(item, &item->unstagedDeletedTuples);
+ cleanup(item);
+ MemoryContextSwitchTo(oldctx);
+ }
+ }
+}
+
+
+void
+temp_catalog_init(void)
+{
+ if (!initialized)
+ {
+ dlist_init(&temp_rels);
+ RegisterSubXactCallback(temp_cat_subxact_cb, NULL);
+ RegisterXactCallback(temp_cat_xact_cb, NULL);
+ initialized = true;
+ }
+}
diff --git a/src/backend/access/heap/tupmap.c b/src/backend/access/heap/tupmap.c
new file mode 100644
index 00000000000..a86c675e274
--- /dev/null
+++ b/src/backend/access/heap/tupmap.c
@@ -0,0 +1,136 @@
+#include "postgres.h"
+#include "storage/itemptr.h"
+#include "lib/rbtree.h"
+#include "access/tempcat.h"
+
+typedef struct MapItem{
+ RBTNode node;
+ ItemPointerData pointer;
+ void* data;
+}MapItem;
+
+static RBTree* tree;
+static uint64_t counter;
+static bool overwrite = false;
+
+#define COUNTER_MAX ( ((((uint64_t)1)<<16)-1) * (((uint64_t)1)<<32) )
+
+static int64_t
+ItemPointerToInt(ItemPointer ptr)
+{
+ return (((int64_t)(ptr->ip_posid-1)) << 32) | (ptr->ip_blkid.bi_hi<<16) | ptr->ip_blkid.bi_lo;
+}
+
+static ItemPointerData
+IntToItemPointer(int64_t i)
+{
+ ItemPointerData ret;
+ ret.ip_posid = (i >> 32) + 1;
+ ret.ip_blkid.bi_hi = i >>16;
+ ret.ip_blkid.bi_lo = i;
+ return ret;
+}
+
+static int
+tupmap_rbt_compare(const RBTNode *a, const RBTNode *b, void *arg)
+{
+ MapItem* aItem = (MapItem*)a;
+ MapItem* bItem = (MapItem*)b;
+
+ return ItemPointerToInt(&aItem->pointer) - ItemPointerToInt(&bItem->pointer);
+}
+
+static void
+tupmap_rbt_combine(RBTNode *existing, const RBTNode *newdata, void *arg)
+{
+ if (overwrite)
+ ((MapItem*)existing)->data = ((MapItem*)newdata)->data;
+}
+
+static RBTNode*
+tupmap_rbt_alloc(void *arg)
+{
+ return (RBTNode*)palloc_object(MapItem);
+}
+
+static void
+tupmap_rbt_free(RBTNode *x, void *arg)
+{
+ pfree(x);
+}
+
+
+ItemPointerData
+temp_catalog_tupmap_assign(ItemPointer ptr, void* data)
+{
+ if (!tree)
+ tree = rbt_create( sizeof(MapItem), tupmap_rbt_compare, tupmap_rbt_combine, tupmap_rbt_alloc, tupmap_rbt_free, NULL, NULL);
+
+ for(;;){
+ bool isNew;
+ MapItem newItem;
+ MapItem* node;
+ newItem.data = data;
+
+ if (ptr){
+ newItem.pointer = *ptr;
+ overwrite = true;
+ }else{
+ if (unlikely(!counter))
+ counter = 1;
+ newItem.pointer = IntToItemPointer(counter);
+ counter++;
+ if (unlikely(counter >= COUNTER_MAX))
+ counter = 0;
+
+ overwrite = false;
+ }
+
+ node = (MapItem*)rbt_insert(tree, (RBTNode*)&newItem, &isNew);
+ if(!isNew && !overwrite){
+ continue;
+ }
+
+ return node->pointer;
+ }
+}
+
+
+bool
+temp_catalog_tupmap_unassign(ItemPointer ptr, void* data)
+{
+ MapItem searchItem;
+ MapItem* item;
+
+ if (!tree)
+ return false;
+
+ searchItem.pointer = *ptr;
+ item = (MapItem*)rbt_find(tree, (RBTNode*)&searchItem);
+ if (!item)
+ return false;
+
+ if (item->data != data)
+ return false;
+
+ rbt_delete(tree, (RBTNode*)item);
+ return true;
+}
+
+
+void*
+temp_catalog_tupmap_get(ItemPointer ptr)
+{
+ MapItem searchItem;
+ MapItem* item;
+
+ if (!tree)
+ return false;
+
+ searchItem.pointer = *ptr;
+ item = (MapItem*)rbt_find(tree, (RBTNode*)&searchItem);
+ if (!item)
+ return NULL;
+
+ return item->data;
+}
\ No newline at end of file
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 8b24e7bc33c..944a922b393 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -626,7 +626,8 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
* to keep checking for creation or extension of the file, which happens
* infrequently.
*/
- CacheInvalidateSmgr(RelationGetSmgr(rel)->smgr_rlocator);
+ if (!RELATION_IS_LOCAL(rel))
+ CacheInvalidateSmgr(RelationGetSmgr(rel)->smgr_rlocator);
return buf;
}
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index b123acc5a60..78c0f790710 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -23,6 +23,7 @@
#include "access/heapam.h"
#include "access/relscan.h"
#include "access/tableam.h"
+#include "access/tempcat.h"
#include "access/transam.h"
#include "catalog/index.h"
#include "lib/stringinfo.h"
@@ -404,6 +405,7 @@ systable_beginscan(Relation heapRelation,
sysscan->heap_rel = heapRelation;
sysscan->irel = irel;
sysscan->slot = table_slot_create(heapRelation, NULL);
+ sysscan->tempscan = NULL;
if (snapshot == NULL)
{
@@ -418,6 +420,9 @@ systable_beginscan(Relation heapRelation,
sysscan->snapshot = NULL;
}
+ if (enable_temp_memory_catalog)
+ sysscan->tempscan = temp_catalog_beginscan(heapRelation, nkeys, key);
+
if (irel)
{
int i;
@@ -467,6 +472,7 @@ systable_beginscan(Relation heapRelation,
if (TransactionIdIsValid(CheckXidAlive))
bsysscan = true;
+
return sysscan;
}
@@ -506,6 +512,16 @@ systable_getnext(SysScanDesc sysscan)
{
HeapTuple htup = NULL;
+ if (sysscan->tempscan)
+ {
+ htup = temp_catalog_getnext(sysscan->tempscan, (BufferHeapTupleTableSlot *) sysscan->slot);
+ if(htup)
+ {
+ HandleConcurrentAbort();
+ return htup;
+ }
+ }
+
if (sysscan->irel)
{
if (index_getnext_slot(sysscan->iscan, ForwardScanDirection, sysscan->slot))
@@ -566,6 +582,9 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
Snapshot freshsnap;
bool result;
+ if (sysscan->tempscan && temp_catalog_is_fetched(sysscan->tempscan))
+ return true;
+
Assert(tup == ExecFetchSlotHeapTuple(sysscan->slot, false, NULL));
/*
@@ -597,6 +616,9 @@ systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
void
systable_endscan(SysScanDesc sysscan)
{
+ if (sysscan->tempscan)
+ temp_catalog_endscan(sysscan->tempscan);
+
if (sysscan->slot)
{
ExecDropSingleTupleTableSlot(sysscan->slot);
@@ -855,6 +877,10 @@ systable_inplace_update_begin(Relation relation,
slot = scan->slot;
Assert(TTS_IS_BUFFERTUPLE(slot));
bslot = (BufferHeapTupleTableSlot *) slot;
+
+ /* When using in-memory temp catalog the pointer is zero */
+ if (!bslot->buffer)
+ break;
} while (!heap_inplace_lock(scan->heap_rel,
bslot->base.tuple, bslot->buffer,
(void (*) (void *)) systable_endscan, scan));
@@ -898,6 +924,8 @@ systable_inplace_update_cancel(void *state)
HeapTuple oldtup = bslot->base.tuple;
Buffer buffer = bslot->buffer;
- heap_inplace_unlock(relation, oldtup, buffer);
+ if (buffer)
+ heap_inplace_unlock(relation, oldtup, buffer);
+
systable_endscan(scan);
}
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 4cecf630060..777a87076c3 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -2815,6 +2815,9 @@ AbortTransaction(void)
TransStateAsString(s->state));
Assert(s->parent == NULL);
+ CallXactCallbacks(is_parallel_worker ? XACT_EVENT_PARALLEL_PRE_ABORT
+ : XACT_EVENT_PRE_ABORT);
+
/*
* set the current transaction state information appropriately during the
* abort processing
@@ -5248,6 +5251,9 @@ AbortSubTransaction(void)
AtEOSubXact_Parallel(false, s->subTransactionId);
s->parallelModeLevel = 0;
+ CallSubXactCallbacks(SUBXACT_EVENT_PRE_ABORT_SUB, s->subTransactionId,
+ s->parent->subTransactionId);
+
/*
* We can skip all this stuff if the subxact failed before creating a
* ResourceOwner...
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ffea4993177..a315e13a952 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -98,6 +98,7 @@
#include "utils/guc_hooks.h"
#include "utils/guc_tables.h"
#include "utils/injection_point.h"
+#include "utils/inval.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
@@ -166,6 +167,8 @@ static double PrevCheckPointDistance = 0;
*/
static bool check_wal_consistency_checking_deferred = false;
+static bool have_non_temp_records = false;
+
/*
* GUC support
*/
@@ -780,6 +783,9 @@ XLogInsertRecord(XLogRecData *rdata,
if (!XLogInsertAllowed())
elog(ERROR, "cannot make new WAL entries during recovery");
+ if (!IsTempTableScope() && rechdr->xl_rmid != RM_XACT_ID)
+ have_non_temp_records = true;
+
/*
* Given that we're not in recovery, InsertTimeLineID is set and can't
* change, so we can read it without a lock.
@@ -2603,6 +2609,8 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Assert(Insert >= Write);
}
#endif
+
+ have_non_temp_records = false;
}
/*
@@ -8703,6 +8711,9 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
Assert(tli != 0);
+ if (MyBackendType == B_BACKEND && !have_non_temp_records)
+ return;
+
/*
* Quick exit if fsync is disabled or write() has already synced the WAL
* file.
diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c
index 8aefbcd6c74..2f0b54ef09b 100644
--- a/src/backend/catalog/catalog.c
+++ b/src/backend/catalog/catalog.c
@@ -579,6 +579,14 @@ GetNewRelFileNumber(Oid reltablespace, Relation pg_class, char relpersistence)
else
rlocator.locator.relNumber = GetNewObjectId();
+ /* There is no chance that temporary table name will collide, because
+ * name contains backend id which is unique among all backends */
+ if (rlocator.backend != INVALID_PROC_NUMBER)
+ {
+ collides = false;
+ break;
+ }
+
/* Check for existing file of same name */
rpath = relpath(rlocator, MAIN_FORKNUM);
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index 0489cbabcb8..f8f5a9ba0f2 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -276,6 +276,27 @@ performDeletion(const ObjectAddress *object,
Relation depRel;
ObjectAddresses *targetObjects;
+ if (flags & PERFORM_DELETION_CONCURRENTLY)
+ {
+ /*
+ * We must commit our transaction in order to make the first pg_index
+ * state update visible to other sessions. If the DROP machinery has
+ * already performed any other actions (removal of other objects,
+ * pg_depend entries, etc), the commit would make those actions
+ * permanent, which would leave us with inconsistent catalog state if
+ * we fail partway through the following sequence. Since DROP INDEX
+ * CONCURRENTLY is restricted to dropping just one index that has no
+ * dependencies, we should get here before anything's been done ---
+ * but let's check that to be sure. We can verify that the current
+ * transaction has not executed any transactional updates by checking
+ * that no XID has been assigned.
+ */
+ if (GetTopTransactionIdIfAny() != InvalidTransactionId)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
+ }
+
/*
* We save some cycles by opening pg_depend just once and passing the
* Relation pointer down to all the recursive deletion steps.
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 2d66639a209..b96c7e1c8f9 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -1284,6 +1284,11 @@ heap_create_with_catalog(const char *relname,
else
relacl = NULL;
+ /*
+ * This prevents sending cache invalidation messages for temporary tables.
+ */
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(relpersistence == RELPERSISTENCE_TEMP);
+
/*
* Create the relcache entry (mostly dummy at this point) and the physical
* disk file. (If we fail further down, it's the smgr's responsibility to
@@ -1510,6 +1515,8 @@ heap_create_with_catalog(const char *relname,
table_close(new_rel_desc, NoLock); /* do not unlock till end of xact */
table_close(pg_class_desc, RowExclusiveLock);
+ END_TEMP_TABLE_SCOPE();
+
return relid;
}
@@ -3118,6 +3125,8 @@ heap_truncate_one_rel(Relation rel)
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
return;
+ BEGIN_TEMP_TABLE_SCOPE_SHARED(rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
/* Truncate the underlying relation */
table_relation_nontransactional_truncate(rel);
@@ -3135,6 +3144,8 @@ heap_truncate_one_rel(Relation rel)
/* keep the lock... */
table_close(toastrel, NoLock);
}
+
+ END_TEMP_TABLE_SCOPE();
}
/*
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index abd8eef0865..f710b05d630 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -28,6 +28,7 @@
#include "access/multixact.h"
#include "access/relscan.h"
#include "access/tableam.h"
+#include "access/tempcat.h"
#include "access/toast_compression.h"
#include "access/transam.h"
#include "access/visibilitymap.h"
@@ -56,6 +57,7 @@
#include "commands/progress.h"
#include "commands/tablecmds.h"
#include "commands/trigger.h"
+#include "commands/typecmds.h"
#include "executor/executor.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
@@ -131,7 +133,7 @@ static void SetReindexProcessing(Oid heapOid, Oid indexOid);
static void ResetReindexProcessing(void);
static void SetReindexPending(List *indexes);
static void RemoveReindexPending(Oid indexOid);
-
+static void IndexTypeCreate(Relation indexRelation);
/*
* relationHasPrimaryKey
@@ -672,6 +674,112 @@ UpdateIndexRelation(Oid indexoid,
heap_freetuple(tuple);
}
+/*
+ * We only need to create reltype for multicolumn user-defined
+ * B-tree indexes that don't have a reltype yet.
+ */
+#define INDEX_NEEDS_RELTYPE(indexRelation, indexInfo, accessMethodOid) ( \
+ !IsSystemRelation(indexRelation) \
+ && indexInfo->ii_NumIndexKeyAttrs > 1 \
+ && accessMethodOid == BTREE_AM_OID \
+ && indexRelation->rd_rel->reltype == InvalidOid \
+ && (!IsBinaryUpgrade || binary_upgrade_next_pg_type_oid != InvalidOid))
+
+/*
+ * IndexTypeCreate
+ *
+ * Create type for specified index.
+ */
+void
+IndexTypeCreate(Relation indexRelation)
+{
+ Oid ownerId = GetUserId();
+ Oid namespaceId = RelationGetNamespace(indexRelation);
+ Oid new_array_oid = AssignTypeArrayOid();
+ ObjectAddress new_type_addr;
+ char* relarrayname;
+
+ /* Index must not have a reltype yet */
+ Assert(indexRelation->rd_rel->reltype == InvalidOid);
+
+ /*
+ * Build compound type for compound index to be able to use it in statistic.
+ * We need to collect statistic for compound indexes to be able to better
+ * predict selectivity of multicolumn joins.
+ */
+ new_type_addr = TypeCreate(InvalidOid,
+ RelationGetRelationName(indexRelation),
+ namespaceId,
+ RelationGetRelid(indexRelation),
+ RELKIND_INDEX,
+ ownerId, /* owner's ID */
+ -1, /* internal size (varlena) */
+ TYPTYPE_COMPOSITE, /* type-type (composite) */
+ TYPCATEGORY_COMPOSITE, /* type-category (ditto) */
+ false, /* composite types are never preferred */
+ DEFAULT_TYPDELIM, /* default array delimiter */
+ F_RECORD_IN, /* input procedure */
+ F_RECORD_OUT, /* output procedure */
+ F_RECORD_RECV, /* receive procedure */
+ F_RECORD_SEND, /* send procedure */
+ InvalidOid, /* typmodin procedure - none */
+ InvalidOid, /* typmodout procedure - none */
+ InvalidOid, /* analyze procedure - default */
+ InvalidOid, /* subscript procedure - default */
+ InvalidOid, /* array element type - irrelevant */
+ false, /* this is not an array type */
+ new_array_oid, /* array type if any */
+ InvalidOid, /* domain base type - irrelevant */
+ NULL, /* default value - none */
+ NULL, /* default binary representation */
+ false, /* passed by reference */
+ 'd', /* alignment - must be the largest! */
+ 'x', /* fully TOASTable */
+ -1, /* typmod */
+ 0, /* array dimensions for typBaseType */
+ false, /* Type NOT NULL */
+ InvalidOid); /* rowtypes never have a collation */
+
+ indexRelation->rd_rel->reltype = new_type_addr.objectId;
+
+ relarrayname = makeArrayTypeName(RelationGetRelationName(indexRelation),
+ namespaceId);
+
+ TypeCreate(new_array_oid, /* force the type's OID to this */
+ relarrayname, /* Array type name */
+ namespaceId, /* Same namespace as parent */
+ InvalidOid, /* Not composite, no relationOid */
+ 0, /* relkind, also N/A here */
+ ownerId, /* owner's ID */
+ -1, /* Internal size (varlena) */
+ TYPTYPE_BASE, /* Not composite - typelem is */
+ TYPCATEGORY_ARRAY, /* type-category (array) */
+ false, /* array types are never preferred */
+ DEFAULT_TYPDELIM, /* default array delimiter */
+ F_ARRAY_IN, /* array input proc */
+ F_ARRAY_OUT, /* array output proc */
+ F_ARRAY_RECV, /* array recv (bin) proc */
+ F_ARRAY_SEND, /* array send (bin) proc */
+ InvalidOid, /* typmodin procedure - none */
+ InvalidOid, /* typmodout procedure - none */
+ F_ARRAY_TYPANALYZE, /* array analyze procedure */
+ F_ARRAY_SUBSCRIPT_HANDLER, /* subscript procedure - default */
+ indexRelation->rd_rel->reltype, /* array element type - the rowtype */
+ true, /* yes, this is an array type */
+ InvalidOid, /* this has no array type */
+ InvalidOid, /* domain base type - irrelevant */
+ NULL, /* default value - none */
+ NULL, /* default binary representation */
+ false, /* passed by reference */
+ 'd', /* alignment - must be the largest! */
+ 'x', /* fully TOASTable */
+ -1, /* typmod */
+ 0, /* array dimensions for typBaseType */
+ false, /* Type NOT NULL */
+ InvalidOid); /* rowtypes never have a collation */
+
+ pfree(relarrayname);
+}
/*
* index_create
@@ -757,6 +865,7 @@ index_create(Relation heapRelation,
bool invalid = (flags & INDEX_CREATE_INVALID) != 0;
bool concurrent = (flags & INDEX_CREATE_CONCURRENT) != 0;
bool partitioned = (flags & INDEX_CREATE_PARTITIONED) != 0;
+ bool withoutType = (flags & INDEX_CREATE_WITHOUT_TYPE) != 0;
char relkind;
TransactionId relfrozenxid;
MultiXactId relminmxid;
@@ -914,6 +1023,11 @@ index_create(Relation heapRelation,
indexRelationName, RelationGetRelationName(heapRelation))));
}
+ /*
+ * Don't send cache invalidation messages for indexes on temp tables
+ */
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(heapRelation->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
/*
* construct tuple descriptor for index tuples
*/
@@ -991,6 +1105,11 @@ index_create(Relation heapRelation,
Assert(relminmxid == InvalidMultiXactId);
Assert(indexRelationId == RelationGetRelid(indexRelation));
+ /* Create a reltype for index if it is needed */
+ if (withoutType == false && INDEX_NEEDS_RELTYPE(indexRelation, indexInfo, accessMethodId)
+ && !is_internal)
+ IndexTypeCreate(indexRelation);
+
/*
* Obtain exclusive lock on it. Although no other transactions can see it
* until we commit, this prevents deadlock-risk complaints from lock
@@ -1282,6 +1401,8 @@ index_create(Relation heapRelation,
*/
index_close(indexRelation, NoLock);
+ END_TEMP_TABLE_SCOPE();
+
return indexRelationId;
}
@@ -1455,7 +1576,7 @@ index_concurrently_create_copy(Relation heapRelation, Oid oldIndexId,
indcoloptions->values,
stattargets,
reloptionsDatum,
- INDEX_CREATE_SKIP_BUILD | INDEX_CREATE_CONCURRENT,
+ INDEX_CREATE_SKIP_BUILD | INDEX_CREATE_CONCURRENT | INDEX_CREATE_WITHOUT_TYPE,
0,
true, /* allow table to be a system catalog? */
false, /* is_internal? */
@@ -1597,6 +1718,32 @@ index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName)
newClassForm->relispartition = oldClassForm->relispartition;
oldClassForm->relispartition = isPartition;
+ /* copy index type to new index */
+ newClassForm->reltype = oldClassForm->reltype;
+
+ if (OidIsValid(oldClassForm->reltype))
+ {
+ Relation pg_type;
+ HeapTuple typeTuple;
+ Form_pg_type typeForm;
+
+ pg_type = table_open(TypeRelationId, RowExclusiveLock);
+
+ typeTuple = SearchSysCacheCopy1(TYPEOID,
+ ObjectIdGetDatum(oldClassForm->reltype));
+ if (!HeapTupleIsValid(typeTuple))
+ elog(ERROR, "could not find tuple for type %u", oldClassForm->reltype);
+
+ typeForm = (Form_pg_type) GETSTRUCT(typeTuple);
+
+ typeForm->typrelid = newIndexId;
+
+ CatalogTupleUpdate(pg_type, &typeTuple->t_self, typeTuple);
+
+ heap_freetuple(typeTuple);
+ table_close(pg_type, RowExclusiveLock);
+ }
+
CatalogTupleUpdate(pg_class, &oldClassTuple->t_self, oldClassTuple);
CatalogTupleUpdate(pg_class, &newClassTuple->t_self, newClassTuple);
@@ -1785,8 +1932,9 @@ index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName)
* vice-versa. Note that a call to CommandCounterIncrement() would cause
* duplicate entries in pg_depend, so this should not be done.
*/
- changeDependenciesOf(RelationRelationId, newIndexId, oldIndexId);
- changeDependenciesOn(RelationRelationId, newIndexId, oldIndexId);
+ //changeDependenciesOf(RelationRelationId, newIndexId, oldIndexId);
+ //changeDependenciesOn(RelationRelationId, newIndexId, oldIndexId);
+ deleteDependencyRecordsFor(RelationRelationId, newIndexId, false);
changeDependenciesOf(RelationRelationId, oldIndexId, newIndexId);
changeDependenciesOn(RelationRelationId, oldIndexId, newIndexId);
@@ -2119,6 +2267,7 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
Relation indexRelation;
HeapTuple tuple;
bool hasexprs;
+ bool remove_statistics;
LockRelId heaprelid,
indexrelid;
LOCKTAG heaplocktag;
@@ -2195,24 +2344,6 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
*/
if (concurrent)
{
- /*
- * We must commit our transaction in order to make the first pg_index
- * state update visible to other sessions. If the DROP machinery has
- * already performed any other actions (removal of other objects,
- * pg_depend entries, etc), the commit would make those actions
- * permanent, which would leave us with inconsistent catalog state if
- * we fail partway through the following sequence. Since DROP INDEX
- * CONCURRENTLY is restricted to dropping just one index that has no
- * dependencies, we should get here before anything's been done ---
- * but let's check that to be sure. We can verify that the current
- * transaction has not executed any transactional updates by checking
- * that no XID has been assigned.
- */
- if (GetTopTransactionIdIfAny() != InvalidTransactionId)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
-
/*
* Mark index invalid by updating its pg_index entry
*/
@@ -2312,6 +2443,16 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
/* ensure that stats are dropped if transaction commits */
pgstat_drop_relation(userIndexRelation);
+ /*
+ * We might have stored multicolumn statistics for btree indexes. They are
+ * created only for non-system and non-TOAST indexes, so check only for such
+ * such indexes.
+ */
+ remove_statistics =
+ IndexRelationGetNumberOfKeyAttributes(userIndexRelation) > 1 &&
+ userIndexRelation->rd_rel->relam == BTREE_AM_OID &&
+ !IsSystemRelation(userIndexRelation);
+
/*
* Close and flush the index's relcache entry, to ensure relcache doesn't
* try to rebuild it while we're deleting catalog entries. We keep the
@@ -2339,10 +2480,10 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
table_close(indexRelation, RowExclusiveLock);
/*
- * if it has any expression columns, we might have stored statistics about
- * them.
+ * if it has any expression columns or whole index stat, we might have
+ * stored statistics about them.
*/
- if (hasexprs)
+ if (hasexprs || remove_statistics)
RemoveStatistics(indexId, 0);
/*
@@ -2880,6 +3021,14 @@ index_update_stats(Relation rel,
dirty = true;
}
+ /* If index's reltype has been created, update it in pg_class. */
+ if (rel->rd_rel->relkind == RELKIND_INDEX &&
+ rd_rel->reltype != rel->rd_rel->reltype)
+ {
+ rd_rel->reltype = rel->rd_rel->reltype;
+ dirty = true;
+ }
+
if (update_stats)
{
if (rd_rel->relpages != (int32) relpages)
@@ -3706,6 +3855,8 @@ reindex_index(const ReindexStmt *stmt, Oid indexId,
*/
CheckTableNotInUse(iRel, "REINDEX INDEX");
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(iRel->rd_islocaltemp);
+
/* Set new tablespace, if requested */
if (set_tablespace)
{
@@ -3748,6 +3899,10 @@ reindex_index(const ReindexStmt *stmt, Oid indexId,
/* Create a new physical relation for the index */
RelationSetNewRelfilenumber(iRel, persistence);
+ /* Create a reltype for index if it is needed */
+ if (INDEX_NEEDS_RELTYPE(iRel, indexInfo, iRel->rd_rel->relam))
+ IndexTypeCreate(iRel);
+
/* Initialize the index and rebuild */
/* Note: we do not need to re-establish pkey setting */
index_build(heapRelation, iRel, indexInfo, true, true);
@@ -3840,6 +3995,8 @@ reindex_index(const ReindexStmt *stmt, Oid indexId,
/* Restore userid and security context */
SetUserIdAndSecContext(save_userid, save_sec_context);
+ END_TEMP_TABLE_SCOPE();
+
/* Close rels, but keep locks */
index_close(iRel, NoLock);
table_close(heapRelation, NoLock);
diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c
index d0d1abda58a..82bdae6ab31 100644
--- a/src/backend/catalog/indexing.c
+++ b/src/backend/catalog/indexing.c
@@ -18,10 +18,12 @@
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
+#include "access/tempcat.h"
#include "access/xact.h"
#include "catalog/index.h"
#include "catalog/indexing.h"
#include "executor/executor.h"
+#include "utils/inval.h"
#include "utils/rel.h"
@@ -234,6 +236,12 @@ CatalogTupleInsert(Relation heapRel, HeapTuple tup)
{
CatalogIndexState indstate;
+ if (enable_temp_memory_catalog && IsTempTableScope())
+ {
+ temp_catalog_insert(heapRel, tup);
+ return;
+ }
+
CatalogTupleCheckConstraints(heapRel, tup);
indstate = CatalogOpenIndexes(heapRel);
@@ -256,6 +264,12 @@ void
CatalogTupleInsertWithInfo(Relation heapRel, HeapTuple tup,
CatalogIndexState indstate)
{
+ if (enable_temp_memory_catalog && IsTempTableScope())
+ {
+ temp_catalog_insert(heapRel, tup);
+ return;
+ }
+
CatalogTupleCheckConstraints(heapRel, tup);
simple_heap_insert(heapRel, tup);
@@ -277,6 +291,14 @@ CatalogTuplesMultiInsertWithInfo(Relation heapRel, TupleTableSlot **slot,
if (ntuples <= 0)
return;
+ if (enable_temp_memory_catalog && IsTempTableScope())
+ {
+ for (int i = 0; i < ntuples; i++)
+ temp_catalog_insert(heapRel, ExecFetchSlotHeapTuple(slot[i], true, NULL));
+ return;
+ }
+
+
heap_multi_insert(heapRel, slot, ntuples,
GetCurrentCommandId(true), 0, NULL);
@@ -315,6 +337,12 @@ CatalogTupleUpdate(Relation heapRel, ItemPointer otid, HeapTuple tup)
CatalogIndexState indstate;
TU_UpdateIndexes updateIndexes = TU_All;
+ if (enable_temp_memory_catalog && IsTempTableScope())
+ {
+ temp_catalog_update(heapRel, otid, tup);
+ return;
+ }
+
CatalogTupleCheckConstraints(heapRel, tup);
indstate = CatalogOpenIndexes(heapRel);
@@ -339,6 +367,12 @@ CatalogTupleUpdateWithInfo(Relation heapRel, ItemPointer otid, HeapTuple tup,
{
TU_UpdateIndexes updateIndexes = TU_All;
+ if (enable_temp_memory_catalog && IsTempTableScope())
+ {
+ temp_catalog_update(heapRel, otid, tup);
+ return;
+ }
+
CatalogTupleCheckConstraints(heapRel, tup);
simple_heap_update(heapRel, otid, tup, &updateIndexes);
@@ -364,5 +398,11 @@ CatalogTupleUpdateWithInfo(Relation heapRel, ItemPointer otid, HeapTuple tup,
void
CatalogTupleDelete(Relation heapRel, ItemPointer tid)
{
+ if (enable_temp_memory_catalog && IsTempTableScope())
+ {
+ temp_catalog_delete(heapRel, tid);
+ return;
+ }
+
simple_heap_delete(heapRel, tid);
}
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 30807f91904..7faf92d5ad2 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -4610,11 +4610,18 @@ RemoveTempRelations(Oid tempNamespaceId)
object.objectId = tempNamespaceId;
object.objectSubId = 0;
+ /*
+ * Don't bother sending invalidation messages when deleting temp ralations
+ */
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(true);
+
performDeletion(&object, DROP_CASCADE,
PERFORM_DELETION_INTERNAL |
PERFORM_DELETION_QUIETLY |
PERFORM_DELETION_SKIP_ORIGINAL |
PERFORM_DELETION_SKIP_EXTENSIONS);
+
+ END_TEMP_TABLE_SCOPE();
}
/*
diff --git a/src/backend/catalog/pg_namespace.c b/src/backend/catalog/pg_namespace.c
index 3ce6e380091..4a34b43e950 100644
--- a/src/backend/catalog/pg_namespace.c
+++ b/src/backend/catalog/pg_namespace.c
@@ -22,6 +22,7 @@
#include "catalog/objectaccess.h"
#include "catalog/pg_namespace.h"
#include "utils/builtins.h"
+#include "utils/inval.h"
#include "utils/rel.h"
#include "utils/syscache.h"
@@ -69,6 +70,11 @@ NamespaceCreate(const char *nspName, Oid ownerId, bool isTemp)
else
nspacl = NULL;
+ /*
+ * Don't send invalidation messages related to temporary namespaces.
+ */
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(isTemp);
+
nspdesc = table_open(NamespaceRelationId, RowExclusiveLock);
tupDesc = nspdesc->rd_att;
@@ -116,5 +122,7 @@ NamespaceCreate(const char *nspName, Oid ownerId, bool isTemp)
/* Post creation hook for new schema */
InvokeObjectPostCreateHook(NamespaceRelationId, nspoid, 0);
+ END_TEMP_TABLE_SCOPE();
+
return nspoid;
}
diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c
index a3e554e372e..6c95c60851a 100644
--- a/src/backend/catalog/storage.c
+++ b/src/backend/catalog/storage.c
@@ -19,6 +19,7 @@
#include "postgres.h"
+#include "access/tempcat.h"
#include "access/visibilitymap.h"
#include "access/xact.h"
#include "access/xlog.h"
@@ -30,6 +31,7 @@
#include "storage/bulk_write.h"
#include "storage/freespace.h"
#include "storage/proc.h"
+#include "storage/rd.h"
#include "storage/smgr.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
@@ -436,6 +438,13 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
*/
if (need_fsm_vacuum)
FreeSpaceMapVacuumRange(rel, nblocks, InvalidBlockNumber);
+
+ if (enable_temp_rd_buffers
+ && nblocks == 0
+ && rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+ {
+ rd_reset(RelationGetSmgr(rel));
+ }
}
/*
diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c
index ad3082c62ac..2c9357676d8 100644
--- a/src/backend/catalog/toasting.c
+++ b/src/backend/catalog/toasting.c
@@ -30,6 +30,7 @@
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "utils/fmgroids.h"
+#include "utils/inval.h"
#include "utils/rel.h"
#include "utils/syscache.h"
@@ -200,6 +201,12 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
snprintf(toast_idxname, sizeof(toast_idxname),
"pg_toast_%u_index", relOid);
+ /*
+ * Don't send shared invalidation messages for TOASTs created for temporary tables,
+ * because those TOAST anyway couldn't be accessed from other sessions.
+ */
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
/* this is pretty painful... need a tuple descriptor */
tupdesc = CreateTemplateTupleDesc(3);
TupleDescInitEntry(tupdesc, (AttrNumber) 1,
@@ -391,6 +398,8 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
*/
CommandCounterIncrement();
+ END_TEMP_TABLE_SCOPE();
+
return true;
}
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index c590a2adc35..08c180dba01 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -36,8 +36,11 @@
#include "common/pg_prng.h"
#include "executor/executor.h"
#include "foreign/fdwapi.h"
+#include "funcapi.h"
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
+#include "nodes/makefuncs.h"
+#include "nodes/pg_list.h"
#include "parser/parse_oper.h"
#include "parser/parse_relation.h"
#include "pgstat.h"
@@ -49,6 +52,7 @@
#include "utils/attoptcache.h"
#include "utils/datum.h"
#include "utils/guc.h"
+#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/pg_rusage.h"
@@ -57,6 +61,7 @@
#include "utils/spccache.h"
#include "utils/syscache.h"
#include "utils/timestamp.h"
+#include "utils/typcache.h"
/* Per-index data for ANALYZE */
@@ -66,6 +71,7 @@ typedef struct AnlIndexData
double tupleFract; /* fraction of rows for partial index */
VacAttrStats **vacattrstats; /* index attrs to analyze */
int attr_cnt;
+ bool multicolumn; /* Collect compound row statistic for multicolumn index */
} AnlIndexData;
@@ -243,6 +249,8 @@ analyze_rel(Oid relid, RangeVar *relation,
pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE,
RelationGetRelid(onerel));
+ BEGIN_TEMP_TABLE_SCOPE_SHARED(onerel->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
/*
* Do the normal non-recursive ANALYZE. We can skip this for partitioned
* tables, which don't contain any rows.
@@ -266,6 +274,8 @@ analyze_rel(Oid relid, RangeVar *relation,
*/
relation_close(onerel, NoLock);
+ END_TEMP_TABLE_SCOPE();
+
pgstat_progress_end_command();
}
@@ -308,6 +318,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
int64 AnalyzePageDirty = VacuumPageDirty;
PgStat_Counter startreadtime = 0;
PgStat_Counter startwritetime = 0;
+ int rowsAttrPitch;
+ Datum *rowsAttrValues;
+ bool *rowsAttrNulls;
if (inh)
ereport(elevel,
@@ -320,6 +333,8 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
get_namespace_name(RelationGetNamespace(onerel)),
RelationGetRelationName(onerel))));
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(RelationUsesLocalBuffers(onerel))
+
/*
* Set up a working context so that we can easily free whatever junk gets
* created.
@@ -476,6 +491,21 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
}
thisdata->attr_cnt = tcnt;
}
+ else if (indexInfo->ii_NumIndexAttrs > 1 && va_cols == NIL &&
+ Irel[ind]->rd_rel->reltype != InvalidOid)
+ {
+ /* Collect statistic for multicolumn index for better predicting selectivity of multicolumn joins */
+ RowExpr* row = makeNode(RowExpr);
+ row->row_typeid = Irel[ind]->rd_rel->reltype;
+ row->row_format = COERCE_EXPLICIT_CAST;
+ row->location = -1;
+ row->colnames = NULL;
+ thisdata->vacattrstats = (VacAttrStats **)palloc(sizeof(VacAttrStats *));
+ thisdata->vacattrstats[0] = examine_attribute(Irel[ind], 1, (Node*)row);
+ thisdata->vacattrstats[0]->tupDesc = lookup_type_cache(row->row_typeid, TYPECACHE_TUPDESC)->tupDesc;
+ thisdata->attr_cnt = 1;
+ thisdata->multicolumn = true;
+ }
}
}
@@ -528,6 +558,25 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
rows, targrows,
&totalrows, &totaldeadrows);
+
+ if (va_cols == NIL && AllocSizeIsValid(numrows * onerel->rd_att->natts * sizeof(Datum)))
+ {
+ rowsAttrPitch = onerel->rd_att->natts;
+ rowsAttrValues = (Datum *) palloc(numrows * rowsAttrPitch * sizeof(Datum));
+ rowsAttrNulls = (bool *) palloc(numrows * rowsAttrPitch * sizeof(bool));
+ for(i = 0; i < numrows; i++)
+ {
+ size_t index = i * rowsAttrPitch;
+ heap_deform_tuple(rows[i], onerel->rd_att, rowsAttrValues + index, rowsAttrNulls + index);
+ }
+ }
+ else
+ {
+ rowsAttrPitch = 0;
+ rowsAttrValues = NULL;
+ rowsAttrNulls = NULL;
+ }
+
/*
* Compute the statistics. Temporary results during the calculations for
* each column are stored in a child context. The calc routines are
@@ -553,6 +602,10 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
AttributeOpts *aopt;
stats->rows = rows;
+ stats->rowsAttrPitch = rowsAttrPitch;
+ stats->rowsAttrValues = rowsAttrValues + (stats->tupattnum - 1);
+ stats->rowsAttrNulls = rowsAttrNulls + (stats->tupattnum - 1);
+
stats->tupDesc = onerel->rd_att;
stats->compute_stats(stats,
std_fetch_func,
@@ -819,6 +872,8 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
MemoryContextSwitchTo(caller_context);
MemoryContextDelete(anl_context);
anl_context = NULL;
+
+ END_TEMP_TABLE_SCOPE();
}
/*
@@ -919,28 +974,41 @@ compute_index_stats(Relation onerel, double totalrows,
values,
isnull);
- /*
- * Save just the columns we care about. We copy the values
- * into ind_context from the estate's per-tuple context.
- */
- for (i = 0; i < attr_cnt; i++)
+ if (thisdata->multicolumn)
{
- VacAttrStats *stats = thisdata->vacattrstats[i];
- int attnum = stats->tupattnum;
-
- if (isnull[attnum - 1])
- {
- exprvals[tcnt] = (Datum) 0;
- exprnulls[tcnt] = true;
- }
- else
+ /* For multicolumn index construct compound value */
+ VacAttrStats *stats = thisdata->vacattrstats[0];
+ exprvals[tcnt] = HeapTupleGetDatum(heap_form_tuple(stats->tupDesc,
+ values,
+ isnull));
+ exprnulls[tcnt] = false;
+ tcnt++;
+ }
+ else
+ {
+ /*
+ * Save just the columns we care about. We copy the values
+ * into ind_context from the estate's per-tuple context.
+ */
+ for (i = 0; i < attr_cnt; i++)
{
- exprvals[tcnt] = datumCopy(values[attnum - 1],
- stats->attrtype->typbyval,
- stats->attrtype->typlen);
- exprnulls[tcnt] = false;
+ VacAttrStats *stats = thisdata->vacattrstats[i];
+ int attnum = stats->tupattnum;
+
+ if (isnull[attnum - 1])
+ {
+ exprvals[tcnt] = (Datum) 0;
+ exprnulls[tcnt] = true;
+ }
+ else
+ {
+ exprvals[tcnt] = datumCopy(values[attnum - 1],
+ stats->attrtype->typbyval,
+ stats->attrtype->typlen);
+ exprnulls[tcnt] = false;
+ }
+ tcnt++;
}
- tcnt++;
}
}
}
@@ -1751,11 +1819,22 @@ update_attstats(Oid relid, bool inh, int natts, VacAttrStats **vacattrstats)
static Datum
std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
{
- int attnum = stats->tupattnum;
- HeapTuple tuple = stats->rows[rownum];
- TupleDesc tupDesc = stats->tupDesc;
+ if (stats->rowsAttrPitch)
+ {
+ size_t index = rownum * stats->rowsAttrPitch;
+ *isNull = stats->rowsAttrNulls[index];
+
+ return stats->rowsAttrValues[index];
+ }
+ else
+ {
+ int attnum = stats->tupattnum;
+ HeapTuple tuple = stats->rows[rownum];
+ TupleDesc tupDesc = stats->tupDesc;
+
+ return heap_getattr(tuple, attnum, tupDesc, isNull);
+ }
- return heap_getattr(tuple, attnum, tupDesc, isNull);
}
/*
@@ -2696,6 +2775,7 @@ compute_scalar_stats(VacAttrStatsP stats,
* histogram won't collapse to empty or a singleton.)
*/
num_hist = ndistinct - num_mcv;
+
if (num_hist > num_bins)
num_hist = num_bins + 1;
if (num_hist >= 2)
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 8086607710e..50791b592d5 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -181,7 +181,7 @@ static SerializeMetrics GetSerializationMetrics(DestReceiver *dest);
*/
void
ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
- ParamListInfo params, DestReceiver *dest)
+ ParamListInfo params, DestReceiver *dest, uint64 *processed)
{
ExplainState *es = NewExplainState();
TupOutputState *tstate;
@@ -192,6 +192,9 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
bool timing_set = false;
bool summary_set = false;
+ if (processed)
+ *processed = 0;
+
/* Parse options list. */
foreach(lc, stmt->options)
{
@@ -363,6 +366,9 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
end_tup_output(tstate);
pfree(es->str->data);
+
+ if (processed)
+ *processed = es->es_processed;
}
/*
@@ -774,6 +780,8 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
*/
INSTR_TIME_SET_CURRENT(starttime);
+ es->es_processed += queryDesc->estate->es_processed;
+
ExecutorEnd(queryDesc);
FreeQueryDesc(queryDesc);
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index 2b64a480e16..cf47f7326ee 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -48,6 +48,7 @@
#include "utils/resowner.h"
#include "utils/syscache.h"
#include "utils/varlena.h"
+#include "utils/inval.h"
/*
@@ -205,6 +206,8 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
stmt->tablespacename = NULL;
stmt->if_not_exists = seq->if_not_exists;
+ BEGIN_TEMP_TABLE_SCOPE_SHARED(stmt->relation->relpersistence == RELPERSISTENCE_TEMP);
+
address = DefineRelation(stmt, RELKIND_SEQUENCE, seq->ownerId, NULL, NULL);
seqoid = address.objectId;
Assert(seqoid != InvalidOid);
@@ -243,6 +246,8 @@ DefineSequence(ParseState *pstate, CreateSeqStmt *seq)
heap_freetuple(tuple);
table_close(rel, RowExclusiveLock);
+ END_TEMP_TABLE_SCOPE();
+
return address;
}
@@ -466,6 +471,8 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
init_sequence(relid, &elm, &seqrel);
+ BEGIN_TEMP_TABLE_SCOPE_SHARED(seqrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
rel = table_open(SequenceRelationId, RowExclusiveLock);
seqtuple = SearchSysCacheCopy1(SEQRELID,
ObjectIdGetDatum(relid));
@@ -534,6 +541,8 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt)
table_close(rel, RowExclusiveLock);
sequence_close(seqrel, NoLock);
+ END_TEMP_TABLE_SCOPE();
+
return address;
}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index fb64730a7e1..c2698c0a697 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -980,6 +980,8 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
accessMethodId = get_table_am_oid(default_table_access_method, false);
}
+ BEGIN_TEMP_TABLE_SCOPE_SHARED(stmt->relation->relpersistence == RELPERSISTENCE_TEMP);
+
/*
* Create the relation. Inherited defaults and constraints are passed in
* for immediate handling --- since they don't need parsing, they can be
@@ -1277,6 +1279,8 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
*/
relation_close(rel, NoLock);
+ END_TEMP_TABLE_SCOPE();
+
return address;
}
@@ -1472,6 +1476,7 @@ RemoveRelations(DropStmt *drop)
ListCell *cell;
int flags = 0;
LOCKMODE lockmode = AccessExclusiveLock;
+ bool haveNonTempRelations = false;
/* DROP CONCURRENTLY uses a weaker lock, and has some restrictions */
if (drop->concurrent)
@@ -1615,10 +1620,18 @@ RemoveRelations(DropStmt *drop)
obj.objectSubId = 0;
add_exact_object_address(&obj, objects);
+
+ if (get_rel_persistence(relOid) != RELPERSISTENCE_TEMP)
+ haveNonTempRelations = true;
}
+ /* Don't send invalidation messages if and only all relations being deleted are temporary */
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(!haveNonTempRelations);
+
performMultipleDeletions(objects, drop->behavior, flags);
+ END_TEMP_TABLE_SCOPE();
+
free_object_addresses(objects);
}
@@ -2121,6 +2134,8 @@ ExecuteTruncateGuts(List *explicit_rels,
continue;
}
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
/*
* Normally, we need a transaction-safe truncation here. However, if
* the table was either created in the current (sub)transaction or has
@@ -2181,6 +2196,8 @@ ExecuteTruncateGuts(List *explicit_rels,
}
pgstat_count_truncate(rel);
+
+ END_TEMP_TABLE_SCOPE();
}
/* Now go through the hash table, and truncate foreign tables */
@@ -4154,6 +4171,8 @@ RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bo
targetrelation = relation_open(myrelid, is_index ? ShareUpdateExclusiveLock : AccessExclusiveLock);
namespaceId = RelationGetNamespace(targetrelation);
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(targetrelation->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
/*
* Find relation's pg_class tuple, and make sure newrelname isn't in use.
*/
@@ -4219,6 +4238,8 @@ RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bo
* Close rel, but keep lock!
*/
relation_close(targetrelation, NoLock);
+
+ END_TEMP_TABLE_SCOPE();
}
/*
@@ -5236,6 +5257,8 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab,
ObjectAddress address = InvalidObjectAddress;
Relation rel = tab->rel;
+ BEGIN_TEMP_TABLE_SCOPE_SHARED(rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP);
+
switch (cmd->subtype)
{
case AT_AddColumn: /* ADD COLUMN */
@@ -5537,6 +5560,8 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab,
break;
}
+ END_TEMP_TABLE_SCOPE();
+
/*
* Report the subcommand to interested event triggers.
*/
diff --git a/src/backend/commands/variable.c b/src/backend/commands/variable.c
index aee6bbd4f44..505c15646b3 100644
--- a/src/backend/commands/variable.c
+++ b/src/backend/commands/variable.c
@@ -1210,10 +1210,12 @@ check_default_with_oids(bool *newval, void **extra, GucSource source)
if (*newval)
{
/* check the GUC's definition for an explanation */
- GUC_check_errcode(ERRCODE_FEATURE_NOT_SUPPORTED);
+ ereport(WARNING,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tables declared WITH OIDS are not supported, ignored")));
GUC_check_errmsg("tables declared WITH OIDS are not supported");
- return false;
+ *newval = false;
}
return true;
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 1e3b93a69d8..a664804d657 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -258,12 +258,32 @@ ExecInitQual(List *qual, PlanState *parent)
foreach_ptr(Expr, node, qual)
{
+ ExprEvalStep *lastStep;
/* first evaluate expression */
ExecInitExprRec(node, state, &state->resvalue, &state->resnull);
/* then emit EEOP_QUAL to detect if it's false (or null) */
scratch.d.qualexpr.jumpdone = -1;
+
+ lastStep = &state->steps[state->steps_len-1];
+ if (list_length(qual) == 1 &&
+ (lastStep->opcode == EEOP_BOOL_OR_STEP_LAST ||
+ lastStep->opcode == EEOP_BOOL_AND_STEP_LAST))
+ scratch.d.qualexpr.guaranteed_empty =
+ lastStep->d.boolexpr.guaranteed_empty;
+ else if (list_length(qual) == 1 &&
+ lastStep->opcode == EEOP_SUBPLAN)
+ {
+ scratch.d.qualexpr.guaranteed_empty =
+ lastStep->d.subplan.guaranteed_empty =
+ palloc(sizeof(bool));
+ *scratch.d.qualexpr.guaranteed_empty = false;
+ }
+ else
+ scratch.d.qualexpr.guaranteed_empty = NULL;
+
ExprEvalPushStep(state, &scratch);
+
adjust_jumps = lappend_int(adjust_jumps,
state->steps_len - 1);
}
@@ -1334,8 +1354,15 @@ ExecInitExprRec(Expr *node, ExprState *state,
ListCell *lc;
/* allocate scratch memory used by all steps of AND/OR */
+ scratch.d.boolexpr.guaranteed_empty = NULL;
if (boolexpr->boolop != NOT_EXPR)
+ {
scratch.d.boolexpr.anynull = (bool *) palloc(sizeof(bool));
+ scratch.d.boolexpr.guaranteed_empty = (bool *) palloc(sizeof(bool));
+ scratch.d.boolexpr.count_guaranteed_empty = (int *) palloc(sizeof(int));
+ *scratch.d.boolexpr.guaranteed_empty = false;
+ scratch.d.boolexpr.nargs = nargs;
+ }
/*
* For each argument evaluate the argument itself, then
@@ -1354,10 +1381,16 @@ ExecInitExprRec(Expr *node, ExprState *state,
foreach(lc, boolexpr->args)
{
Expr *arg = (Expr *) lfirst(lc);
+ ExprEvalStep *lastStep;
/* Evaluate argument into our output variable */
ExecInitExprRec(arg, state, resv, resnull);
+ lastStep = &state->steps[state->steps_len-1];
+ if (lastStep->opcode == EEOP_SUBPLAN)
+ lastStep->d.subplan.guaranteed_empty =
+ scratch.d.boolexpr.guaranteed_empty;
+
/* Perform the appropriate step type */
switch (boolexpr->boolop)
{
@@ -1442,6 +1475,7 @@ ExecInitExprRec(Expr *node, ExprState *state,
scratch.opcode = EEOP_SUBPLAN;
scratch.d.subplan.sstate = sstate;
+ scratch.d.subplan.guaranteed_empty = false;
ExprEvalPushStep(state, &scratch);
break;
@@ -4067,6 +4101,7 @@ ExecBuildGroupingEqual(TupleDesc ldesc, TupleDesc rdesc,
/* then emit EEOP_QUAL to detect if result is false (or null) */
scratch.opcode = EEOP_QUAL;
+ scratch.d.qualexpr.guaranteed_empty = NULL;
scratch.d.qualexpr.jumpdone = -1;
scratch.resvalue = &state->resvalue;
scratch.resnull = &state->resnull;
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 366975dad68..8afcb5dbe1a 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -820,6 +820,9 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
EEO_JUMP(op->d.boolexpr.jumpdone);
}
+ /* reset */
+ *op->d.boolexpr.guaranteed_empty = false;
+
EEO_NEXT();
}
@@ -828,6 +831,8 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
if (*op->resnull)
{
/* result is already set to NULL, need not change it */
+ /* reset */
+ *op->d.boolexpr.guaranteed_empty = false;
}
else if (!DatumGetBool(*op->resvalue))
{
@@ -843,10 +848,15 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
{
*op->resvalue = (Datum) 0;
*op->resnull = true;
+ /* reset */
+ *op->d.boolexpr.guaranteed_empty = false;
}
else
{
/* result is already set to TRUE, need not change it */
+ /* reset */
+ *op->d.boolexpr.guaranteed_empty = false;
+
}
EEO_NEXT();
@@ -865,6 +875,7 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
EEO_CASE(EEOP_BOOL_OR_STEP_FIRST)
{
*op->d.boolexpr.anynull = false;
+ *op->d.boolexpr.count_guaranteed_empty = 0;
/*
* EEOP_BOOL_OR_STEP_FIRST resets anynull, otherwise it's the same
@@ -876,6 +887,10 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
EEO_CASE(EEOP_BOOL_OR_STEP)
{
+ *op->d.boolexpr.count_guaranteed_empty +=
+ (int) (*op->d.boolexpr.guaranteed_empty);
+ *op->d.boolexpr.guaranteed_empty = false;
+
if (*op->resnull)
{
*op->d.boolexpr.anynull = true;
@@ -892,6 +907,10 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
EEO_CASE(EEOP_BOOL_OR_STEP_LAST)
{
+ *op->d.boolexpr.count_guaranteed_empty +=
+ (int) (*op->d.boolexpr.guaranteed_empty);
+ *op->d.boolexpr.guaranteed_empty = false;
+
if (*op->resnull)
{
/* result is already set to NULL, need not change it */
@@ -913,6 +932,10 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
}
else
{
+ if (*op->d.boolexpr.count_guaranteed_empty == op->d.boolexpr.nargs)
+ *op->d.boolexpr.guaranteed_empty = true;
+ else
+ *op->d.boolexpr.guaranteed_empty = false;
/* result is already set to FALSE, need not change it */
}
@@ -943,6 +966,9 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
/* ... bail out early, returning FALSE */
*op->resnull = false;
*op->resvalue = BoolGetDatum(false);
+ if (op->d.qualexpr.guaranteed_empty &&
+ op - state->steps == state->steps_len - 2 /* + EEOP_DONE */)
+ state->guaranteed_empty = *op->d.qualexpr.guaranteed_empty;
EEO_JUMP(op->d.qualexpr.jumpdone);
}
@@ -4757,7 +4783,16 @@ ExecEvalSubPlan(ExprState *state, ExprEvalStep *op, ExprContext *econtext)
/* could potentially be nested, so make sure there's enough stack */
check_stack_depth();
- *op->resvalue = ExecSubPlan(sstate, econtext, op->resnull);
+ if (sstate->guaranteed_empty == false)
+ *op->resvalue = ExecSubPlan(sstate, econtext, op->resnull);
+ else
+ {
+ *op->resvalue = false;
+ *op->resnull = false;
+ }
+
+ if (op->opcode == EEOP_SUBPLAN && op->d.subplan.guaranteed_empty && sstate->guaranteed_empty)
+ *op->d.subplan.guaranteed_empty = sstate->guaranteed_empty;
}
/*
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index e35ddd0e898..e9330a7dd87 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -40,6 +40,7 @@
#include "access/sysattr.h"
#include "access/table.h"
#include "access/tableam.h"
+#include "access/tempcat.h"
#include "access/xact.h"
#include "catalog/namespace.h"
#include "catalog/partition.h"
@@ -59,7 +60,7 @@
#include "utils/partcache.h"
#include "utils/rls.h"
#include "utils/snapmgr.h"
-
+#include "utils/syscache.h"
/* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */
ExecutorStart_hook_type ExecutorStart_hook = NULL;
@@ -611,6 +612,15 @@ ExecCheckPermissions(List *rangeTable, List *rteperminfos,
RTEPermissionInfo *perminfo = lfirst_node(RTEPermissionInfo, l);
Assert(OidIsValid(perminfo->relid));
+
+ if (enable_temp_memory_catalog && IsParallelWorker())
+ {
+ HeapTuple htup = SearchSysCache1(RELOID, ObjectIdGetDatum(perminfo->relid));
+ if (!htup)
+ continue;
+ ReleaseSysCache(htup);
+ }
+
result = ExecCheckOneRelPerms(perminfo);
if (!result)
{
diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c
index f52e28c58e9..34f5b5a3cd4 100644
--- a/src/backend/executor/execScan.c
+++ b/src/backend/executor/execScan.c
@@ -185,6 +185,7 @@ ExecScan(ScanState *node,
* storage allocated in the previous tuple cycle.
*/
ResetExprContext(econtext);
+ node->ps.guaranteed_empty = false;
/*
* get a tuple from the access method. Loop until we obtain a tuple that
@@ -243,7 +244,14 @@ ExecScan(ScanState *node,
return slot;
}
}
- else
+ else if (qual && qual->guaranteed_empty)
+ {
+ /* Qual guarantees the absence of results */
+ node->ps.guaranteed_empty = true;
+ ExecClearTuple(slot);
+
+ return slot;
+ } else
InstrCountFiltered1(node, 1);
/*
diff --git a/src/backend/executor/nodeMaterial.c b/src/backend/executor/nodeMaterial.c
index 22e1787fbdd..0b79854b1f3 100644
--- a/src/backend/executor/nodeMaterial.c
+++ b/src/backend/executor/nodeMaterial.c
@@ -135,6 +135,8 @@ ExecMaterial(PlanState *pstate)
if (TupIsNull(outerslot))
{
node->eof_underlying = true;
+ if (tuplestore_tuple_count(tuplestorestate) == 0)
+ node->ss.ps.guaranteed_empty = true;
return NULL;
}
@@ -358,6 +360,9 @@ ExecReScanMaterial(MaterialState *node)
*/
if (outerPlan->chgParam == NULL)
ExecReScan(outerPlan);
+ else
+ node->ss.ps.guaranteed_empty = false;
+
node->eof_underlying = false;
}
}
diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c
index 7f4bf6c4dbb..52e2bea2e3e 100644
--- a/src/backend/executor/nodeNestloop.c
+++ b/src/backend/executor/nodeNestloop.c
@@ -163,6 +163,11 @@ ExecNestLoop(PlanState *pstate)
{
ENL1_printf("no inner tuple, need new outer tuple");
+ if (innerPlan->guaranteed_empty &&
+ (node->js.jointype == JOIN_INNER ||
+ node->js.jointype == JOIN_SEMI))
+ return NULL;
+
node->nl_NeedNewOuter = true;
if (!node->nl_MatchedOuter &&
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 9697b1f396d..50a84a60b49 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -84,7 +84,11 @@ ExecSubPlan(SubPlanState *node,
/* Select appropriate evaluation strategy */
if (subplan->useHashTable)
+ {
retval = ExecHashSubPlan(node, econtext, isNull);
+ if (node->planstate->guaranteed_empty)
+ node->guaranteed_empty = true;
+ }
else
retval = ExecScanSubPlan(node, econtext, isNull);
@@ -105,6 +109,9 @@ ExecHashSubPlan(SubPlanState *node,
SubPlan *subplan = node->subplan;
PlanState *planstate = node->planstate;
TupleTableSlot *slot;
+ bool hasParam = (planstate->plan->extParam != NULL ||
+ subplan->setParam != NIL ||
+ planstate->chgParam != NULL);
/* Shouldn't have any direct correlation Vars */
if (subplan->parParam != NIL || node->args != NIL)
@@ -122,8 +129,11 @@ ExecHashSubPlan(SubPlanState *node,
* lefthand side.
*/
*isNull = false;
- if (!node->havehashrows && !node->havenullrows)
+ if (!node->havehashrows && !node->havenullrows) {
+ if (hasParam == false)
+ node->planstate->guaranteed_empty = true;
return BoolGetDatum(false);
+ }
/*
* Evaluate lefthand expressions and form a projection tuple. First we
diff --git a/src/backend/lib/rbtree.c b/src/backend/lib/rbtree.c
index 19ae7482321..da7ae550765 100644
--- a/src/backend/lib/rbtree.c
+++ b/src/backend/lib/rbtree.c
@@ -50,6 +50,7 @@ struct RBTree
rbt_combiner combiner;
rbt_allocfunc allocfunc;
rbt_freefunc freefunc;
+ rbt_fixfunc fixfunc;
/* Passthrough arg passed to all manipulation functions */
void *arg;
};
@@ -104,6 +105,7 @@ rbt_create(Size node_size,
rbt_combiner combiner,
rbt_allocfunc allocfunc,
rbt_freefunc freefunc,
+ rbt_fixfunc fixfunc,
void *arg)
{
RBTree *tree = (RBTree *) palloc(sizeof(RBTree));
@@ -116,6 +118,7 @@ rbt_create(Size node_size,
tree->combiner = combiner;
tree->allocfunc = allocfunc;
tree->freefunc = freefunc;
+ tree->fixfunc = fixfunc;
tree->arg = arg;
@@ -127,6 +130,8 @@ static inline void
rbt_copy_data(RBTree *rbt, RBTNode *dest, const RBTNode *src)
{
memcpy(dest + 1, src + 1, rbt->node_size - sizeof(RBTNode));
+ if (rbt->fixfunc)
+ rbt->fixfunc(dest, rbt->arg);
}
/**********************************************************************
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index bdda4fbb918..01354e480d1 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -107,7 +107,7 @@ my @nodetag_only_files = qw(
# ABI stability during development.
my $last_nodetag = 'WindowObjectData';
-my $last_nodetag_no = 474;
+my $last_nodetag_no = 476;
# output file names
my @output_files;
diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index d2e2af4f811..acdab625c4a 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -2932,16 +2932,25 @@ range_table_entry_walker_impl(RangeTblEntry *rte,
Node *
expression_tree_mutator_impl(Node *node,
tree_mutator_callback mutator,
- void *context)
+ void *context, int flags)
{
/*
* The mutator has already decided not to modify the current node, but we
* must call the mutator for any sub-nodes.
*/
-#define FLATCOPY(newnode, node, nodetype) \
- ( (newnode) = (nodetype *) palloc(sizeof(nodetype)), \
- memcpy((newnode), (node), sizeof(nodetype)) )
+#define FLATCOPY(newnode, node, nodetype, flags) \
+ do { \
+ if ((flags) & QTW_DONT_COPY_DEFAULT) \
+ { \
+ (newnode) = (node); \
+ } \
+ else \
+ { \
+ (newnode) = (nodetype *) palloc(sizeof(nodetype)); \
+ memcpy((newnode), (node), sizeof(nodetype)); \
+ } \
+ } while(0)
#define MUTATE(newfield, oldfield, fieldtype) \
( (newfield) = (fieldtype) mutator((Node *) (oldfield), context) )
@@ -2964,7 +2973,7 @@ expression_tree_mutator_impl(Node *node,
Var *var = (Var *) node;
Var *newnode;
- FLATCOPY(newnode, var, Var);
+ FLATCOPY(newnode, var, Var, flags);
/* Assume we need not copy the varnullingrels bitmapset */
return (Node *) newnode;
}
@@ -2974,7 +2983,7 @@ expression_tree_mutator_impl(Node *node,
Const *oldnode = (Const *) node;
Const *newnode;
- FLATCOPY(newnode, oldnode, Const);
+ FLATCOPY(newnode, oldnode, Const, flags);
/* XXX we don't bother with datumCopy; should we? */
return (Node *) newnode;
}
@@ -2997,7 +3006,7 @@ expression_tree_mutator_impl(Node *node,
WithCheckOption *wco = (WithCheckOption *) node;
WithCheckOption *newnode;
- FLATCOPY(newnode, wco, WithCheckOption);
+ FLATCOPY(newnode, wco, WithCheckOption, flags);
MUTATE(newnode->qual, wco->qual, Node *);
return (Node *) newnode;
}
@@ -3006,7 +3015,7 @@ expression_tree_mutator_impl(Node *node,
Aggref *aggref = (Aggref *) node;
Aggref *newnode;
- FLATCOPY(newnode, aggref, Aggref);
+ FLATCOPY(newnode, aggref, Aggref, flags);
/* assume mutation doesn't change types of arguments */
newnode->aggargtypes = list_copy(aggref->aggargtypes);
MUTATE(newnode->aggdirectargs, aggref->aggdirectargs, List *);
@@ -3022,7 +3031,7 @@ expression_tree_mutator_impl(Node *node,
GroupingFunc *grouping = (GroupingFunc *) node;
GroupingFunc *newnode;
- FLATCOPY(newnode, grouping, GroupingFunc);
+ FLATCOPY(newnode, grouping, GroupingFunc, flags);
MUTATE(newnode->args, grouping->args, List *);
/*
@@ -3045,7 +3054,7 @@ expression_tree_mutator_impl(Node *node,
WindowFunc *wfunc = (WindowFunc *) node;
WindowFunc *newnode;
- FLATCOPY(newnode, wfunc, WindowFunc);
+ FLATCOPY(newnode, wfunc, WindowFunc, flags);
MUTATE(newnode->args, wfunc->args, List *);
MUTATE(newnode->aggfilter, wfunc->aggfilter, Expr *);
return (Node *) newnode;
@@ -3056,7 +3065,7 @@ expression_tree_mutator_impl(Node *node,
WindowFuncRunCondition *wfuncrc = (WindowFuncRunCondition *) node;
WindowFuncRunCondition *newnode;
- FLATCOPY(newnode, wfuncrc, WindowFuncRunCondition);
+ FLATCOPY(newnode, wfuncrc, WindowFuncRunCondition, flags);
MUTATE(newnode->arg, wfuncrc->arg, Expr *);
return (Node *) newnode;
}
@@ -3066,7 +3075,7 @@ expression_tree_mutator_impl(Node *node,
SubscriptingRef *sbsref = (SubscriptingRef *) node;
SubscriptingRef *newnode;
- FLATCOPY(newnode, sbsref, SubscriptingRef);
+ FLATCOPY(newnode, sbsref, SubscriptingRef, flags);
MUTATE(newnode->refupperindexpr, sbsref->refupperindexpr,
List *);
MUTATE(newnode->reflowerindexpr, sbsref->reflowerindexpr,
@@ -3084,7 +3093,7 @@ expression_tree_mutator_impl(Node *node,
FuncExpr *expr = (FuncExpr *) node;
FuncExpr *newnode;
- FLATCOPY(newnode, expr, FuncExpr);
+ FLATCOPY(newnode, expr, FuncExpr, flags);
MUTATE(newnode->args, expr->args, List *);
return (Node *) newnode;
}
@@ -3094,7 +3103,7 @@ expression_tree_mutator_impl(Node *node,
NamedArgExpr *nexpr = (NamedArgExpr *) node;
NamedArgExpr *newnode;
- FLATCOPY(newnode, nexpr, NamedArgExpr);
+ FLATCOPY(newnode, nexpr, NamedArgExpr, flags);
MUTATE(newnode->arg, nexpr->arg, Expr *);
return (Node *) newnode;
}
@@ -3104,7 +3113,7 @@ expression_tree_mutator_impl(Node *node,
OpExpr *expr = (OpExpr *) node;
OpExpr *newnode;
- FLATCOPY(newnode, expr, OpExpr);
+ FLATCOPY(newnode, expr, OpExpr, flags & ~QTW_DONT_COPY_DEFAULT);
MUTATE(newnode->args, expr->args, List *);
return (Node *) newnode;
}
@@ -3114,7 +3123,7 @@ expression_tree_mutator_impl(Node *node,
DistinctExpr *expr = (DistinctExpr *) node;
DistinctExpr *newnode;
- FLATCOPY(newnode, expr, DistinctExpr);
+ FLATCOPY(newnode, expr, DistinctExpr, flags);
MUTATE(newnode->args, expr->args, List *);
return (Node *) newnode;
}
@@ -3124,7 +3133,7 @@ expression_tree_mutator_impl(Node *node,
NullIfExpr *expr = (NullIfExpr *) node;
NullIfExpr *newnode;
- FLATCOPY(newnode, expr, NullIfExpr);
+ FLATCOPY(newnode, expr, NullIfExpr, flags);
MUTATE(newnode->args, expr->args, List *);
return (Node *) newnode;
}
@@ -3134,7 +3143,7 @@ expression_tree_mutator_impl(Node *node,
ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
ScalarArrayOpExpr *newnode;
- FLATCOPY(newnode, expr, ScalarArrayOpExpr);
+ FLATCOPY(newnode, expr, ScalarArrayOpExpr, flags);
MUTATE(newnode->args, expr->args, List *);
return (Node *) newnode;
}
@@ -3144,7 +3153,7 @@ expression_tree_mutator_impl(Node *node,
BoolExpr *expr = (BoolExpr *) node;
BoolExpr *newnode;
- FLATCOPY(newnode, expr, BoolExpr);
+ FLATCOPY(newnode, expr, BoolExpr, flags);
MUTATE(newnode->args, expr->args, List *);
return (Node *) newnode;
}
@@ -3154,7 +3163,7 @@ expression_tree_mutator_impl(Node *node,
SubLink *sublink = (SubLink *) node;
SubLink *newnode;
- FLATCOPY(newnode, sublink, SubLink);
+ FLATCOPY(newnode, sublink, SubLink, flags);
MUTATE(newnode->testexpr, sublink->testexpr, Node *);
/*
@@ -3170,7 +3179,7 @@ expression_tree_mutator_impl(Node *node,
SubPlan *subplan = (SubPlan *) node;
SubPlan *newnode;
- FLATCOPY(newnode, subplan, SubPlan);
+ FLATCOPY(newnode, subplan, SubPlan, flags);
/* transform testexpr */
MUTATE(newnode->testexpr, subplan->testexpr, Node *);
/* transform args list (params to be passed to subplan) */
@@ -3184,7 +3193,7 @@ expression_tree_mutator_impl(Node *node,
AlternativeSubPlan *asplan = (AlternativeSubPlan *) node;
AlternativeSubPlan *newnode;
- FLATCOPY(newnode, asplan, AlternativeSubPlan);
+ FLATCOPY(newnode, asplan, AlternativeSubPlan, flags);
MUTATE(newnode->subplans, asplan->subplans, List *);
return (Node *) newnode;
}
@@ -3194,7 +3203,7 @@ expression_tree_mutator_impl(Node *node,
FieldSelect *fselect = (FieldSelect *) node;
FieldSelect *newnode;
- FLATCOPY(newnode, fselect, FieldSelect);
+ FLATCOPY(newnode, fselect, FieldSelect, flags);
MUTATE(newnode->arg, fselect->arg, Expr *);
return (Node *) newnode;
}
@@ -3204,7 +3213,7 @@ expression_tree_mutator_impl(Node *node,
FieldStore *fstore = (FieldStore *) node;
FieldStore *newnode;
- FLATCOPY(newnode, fstore, FieldStore);
+ FLATCOPY(newnode, fstore, FieldStore, flags);
MUTATE(newnode->arg, fstore->arg, Expr *);
MUTATE(newnode->newvals, fstore->newvals, List *);
newnode->fieldnums = list_copy(fstore->fieldnums);
@@ -3216,7 +3225,7 @@ expression_tree_mutator_impl(Node *node,
RelabelType *relabel = (RelabelType *) node;
RelabelType *newnode;
- FLATCOPY(newnode, relabel, RelabelType);
+ FLATCOPY(newnode, relabel, RelabelType, flags);
MUTATE(newnode->arg, relabel->arg, Expr *);
return (Node *) newnode;
}
@@ -3226,7 +3235,7 @@ expression_tree_mutator_impl(Node *node,
CoerceViaIO *iocoerce = (CoerceViaIO *) node;
CoerceViaIO *newnode;
- FLATCOPY(newnode, iocoerce, CoerceViaIO);
+ FLATCOPY(newnode, iocoerce, CoerceViaIO, flags);
MUTATE(newnode->arg, iocoerce->arg, Expr *);
return (Node *) newnode;
}
@@ -3236,7 +3245,7 @@ expression_tree_mutator_impl(Node *node,
ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
ArrayCoerceExpr *newnode;
- FLATCOPY(newnode, acoerce, ArrayCoerceExpr);
+ FLATCOPY(newnode, acoerce, ArrayCoerceExpr, flags);
MUTATE(newnode->arg, acoerce->arg, Expr *);
MUTATE(newnode->elemexpr, acoerce->elemexpr, Expr *);
return (Node *) newnode;
@@ -3247,7 +3256,7 @@ expression_tree_mutator_impl(Node *node,
ConvertRowtypeExpr *convexpr = (ConvertRowtypeExpr *) node;
ConvertRowtypeExpr *newnode;
- FLATCOPY(newnode, convexpr, ConvertRowtypeExpr);
+ FLATCOPY(newnode, convexpr, ConvertRowtypeExpr, flags);
MUTATE(newnode->arg, convexpr->arg, Expr *);
return (Node *) newnode;
}
@@ -3257,7 +3266,7 @@ expression_tree_mutator_impl(Node *node,
CollateExpr *collate = (CollateExpr *) node;
CollateExpr *newnode;
- FLATCOPY(newnode, collate, CollateExpr);
+ FLATCOPY(newnode, collate, CollateExpr, flags);
MUTATE(newnode->arg, collate->arg, Expr *);
return (Node *) newnode;
}
@@ -3267,7 +3276,7 @@ expression_tree_mutator_impl(Node *node,
CaseExpr *caseexpr = (CaseExpr *) node;
CaseExpr *newnode;
- FLATCOPY(newnode, caseexpr, CaseExpr);
+ FLATCOPY(newnode, caseexpr, CaseExpr, flags);
MUTATE(newnode->arg, caseexpr->arg, Expr *);
MUTATE(newnode->args, caseexpr->args, List *);
MUTATE(newnode->defresult, caseexpr->defresult, Expr *);
@@ -3279,7 +3288,7 @@ expression_tree_mutator_impl(Node *node,
CaseWhen *casewhen = (CaseWhen *) node;
CaseWhen *newnode;
- FLATCOPY(newnode, casewhen, CaseWhen);
+ FLATCOPY(newnode, casewhen, CaseWhen, flags);
MUTATE(newnode->expr, casewhen->expr, Expr *);
MUTATE(newnode->result, casewhen->result, Expr *);
return (Node *) newnode;
@@ -3290,7 +3299,7 @@ expression_tree_mutator_impl(Node *node,
ArrayExpr *arrayexpr = (ArrayExpr *) node;
ArrayExpr *newnode;
- FLATCOPY(newnode, arrayexpr, ArrayExpr);
+ FLATCOPY(newnode, arrayexpr, ArrayExpr, flags);
MUTATE(newnode->elements, arrayexpr->elements, List *);
return (Node *) newnode;
}
@@ -3300,7 +3309,7 @@ expression_tree_mutator_impl(Node *node,
RowExpr *rowexpr = (RowExpr *) node;
RowExpr *newnode;
- FLATCOPY(newnode, rowexpr, RowExpr);
+ FLATCOPY(newnode, rowexpr, RowExpr, flags);
MUTATE(newnode->args, rowexpr->args, List *);
/* Assume colnames needn't be duplicated */
return (Node *) newnode;
@@ -3311,7 +3320,7 @@ expression_tree_mutator_impl(Node *node,
RowCompareExpr *rcexpr = (RowCompareExpr *) node;
RowCompareExpr *newnode;
- FLATCOPY(newnode, rcexpr, RowCompareExpr);
+ FLATCOPY(newnode, rcexpr, RowCompareExpr, flags);
MUTATE(newnode->largs, rcexpr->largs, List *);
MUTATE(newnode->rargs, rcexpr->rargs, List *);
return (Node *) newnode;
@@ -3322,7 +3331,7 @@ expression_tree_mutator_impl(Node *node,
CoalesceExpr *coalesceexpr = (CoalesceExpr *) node;
CoalesceExpr *newnode;
- FLATCOPY(newnode, coalesceexpr, CoalesceExpr);
+ FLATCOPY(newnode, coalesceexpr, CoalesceExpr, flags);
MUTATE(newnode->args, coalesceexpr->args, List *);
return (Node *) newnode;
}
@@ -3332,7 +3341,7 @@ expression_tree_mutator_impl(Node *node,
MinMaxExpr *minmaxexpr = (MinMaxExpr *) node;
MinMaxExpr *newnode;
- FLATCOPY(newnode, minmaxexpr, MinMaxExpr);
+ FLATCOPY(newnode, minmaxexpr, MinMaxExpr, flags);
MUTATE(newnode->args, minmaxexpr->args, List *);
return (Node *) newnode;
}
@@ -3342,7 +3351,7 @@ expression_tree_mutator_impl(Node *node,
XmlExpr *xexpr = (XmlExpr *) node;
XmlExpr *newnode;
- FLATCOPY(newnode, xexpr, XmlExpr);
+ FLATCOPY(newnode, xexpr, XmlExpr, flags);
MUTATE(newnode->named_args, xexpr->named_args, List *);
/* assume mutator does not care about arg_names */
MUTATE(newnode->args, xexpr->args, List *);
@@ -3354,7 +3363,7 @@ expression_tree_mutator_impl(Node *node,
JsonReturning *jr = (JsonReturning *) node;
JsonReturning *newnode;
- FLATCOPY(newnode, jr, JsonReturning);
+ FLATCOPY(newnode, jr, JsonReturning, flags);
MUTATE(newnode->format, jr->format, JsonFormat *);
return (Node *) newnode;
@@ -3364,7 +3373,7 @@ expression_tree_mutator_impl(Node *node,
JsonValueExpr *jve = (JsonValueExpr *) node;
JsonValueExpr *newnode;
- FLATCOPY(newnode, jve, JsonValueExpr);
+ FLATCOPY(newnode, jve, JsonValueExpr, flags);
MUTATE(newnode->raw_expr, jve->raw_expr, Expr *);
MUTATE(newnode->formatted_expr, jve->formatted_expr, Expr *);
MUTATE(newnode->format, jve->format, JsonFormat *);
@@ -3376,7 +3385,7 @@ expression_tree_mutator_impl(Node *node,
JsonConstructorExpr *jce = (JsonConstructorExpr *) node;
JsonConstructorExpr *newnode;
- FLATCOPY(newnode, jce, JsonConstructorExpr);
+ FLATCOPY(newnode, jce, JsonConstructorExpr, flags);
MUTATE(newnode->args, jce->args, List *);
MUTATE(newnode->func, jce->func, Expr *);
MUTATE(newnode->coercion, jce->coercion, Expr *);
@@ -3389,7 +3398,7 @@ expression_tree_mutator_impl(Node *node,
JsonIsPredicate *pred = (JsonIsPredicate *) node;
JsonIsPredicate *newnode;
- FLATCOPY(newnode, pred, JsonIsPredicate);
+ FLATCOPY(newnode, pred, JsonIsPredicate, flags);
MUTATE(newnode->expr, pred->expr, Node *);
MUTATE(newnode->format, pred->format, JsonFormat *);
@@ -3400,7 +3409,7 @@ expression_tree_mutator_impl(Node *node,
JsonExpr *jexpr = (JsonExpr *) node;
JsonExpr *newnode;
- FLATCOPY(newnode, jexpr, JsonExpr);
+ FLATCOPY(newnode, jexpr, JsonExpr, flags);
MUTATE(newnode->formatted_expr, jexpr->formatted_expr, Node *);
MUTATE(newnode->path_spec, jexpr->path_spec, Node *);
MUTATE(newnode->passing_values, jexpr->passing_values, List *);
@@ -3415,7 +3424,7 @@ expression_tree_mutator_impl(Node *node,
JsonBehavior *behavior = (JsonBehavior *) node;
JsonBehavior *newnode;
- FLATCOPY(newnode, behavior, JsonBehavior);
+ FLATCOPY(newnode, behavior, JsonBehavior, flags);
MUTATE(newnode->expr, behavior->expr, Node *);
return (Node *) newnode;
}
@@ -3425,7 +3434,7 @@ expression_tree_mutator_impl(Node *node,
NullTest *ntest = (NullTest *) node;
NullTest *newnode;
- FLATCOPY(newnode, ntest, NullTest);
+ FLATCOPY(newnode, ntest, NullTest, flags);
MUTATE(newnode->arg, ntest->arg, Expr *);
return (Node *) newnode;
}
@@ -3435,7 +3444,7 @@ expression_tree_mutator_impl(Node *node,
BooleanTest *btest = (BooleanTest *) node;
BooleanTest *newnode;
- FLATCOPY(newnode, btest, BooleanTest);
+ FLATCOPY(newnode, btest, BooleanTest, flags);
MUTATE(newnode->arg, btest->arg, Expr *);
return (Node *) newnode;
}
@@ -3445,7 +3454,7 @@ expression_tree_mutator_impl(Node *node,
CoerceToDomain *ctest = (CoerceToDomain *) node;
CoerceToDomain *newnode;
- FLATCOPY(newnode, ctest, CoerceToDomain);
+ FLATCOPY(newnode, ctest, CoerceToDomain, flags);
MUTATE(newnode->arg, ctest->arg, Expr *);
return (Node *) newnode;
}
@@ -3455,7 +3464,7 @@ expression_tree_mutator_impl(Node *node,
TargetEntry *targetentry = (TargetEntry *) node;
TargetEntry *newnode;
- FLATCOPY(newnode, targetentry, TargetEntry);
+ FLATCOPY(newnode, targetentry, TargetEntry, flags & ~QTW_DONT_COPY_DEFAULT);
MUTATE(newnode->expr, targetentry->expr, Expr *);
return (Node *) newnode;
}
@@ -3468,7 +3477,7 @@ expression_tree_mutator_impl(Node *node,
WindowClause *wc = (WindowClause *) node;
WindowClause *newnode;
- FLATCOPY(newnode, wc, WindowClause);
+ FLATCOPY(newnode, wc, WindowClause, flags);
MUTATE(newnode->partitionClause, wc->partitionClause, List *);
MUTATE(newnode->orderClause, wc->orderClause, List *);
MUTATE(newnode->startOffset, wc->startOffset, Node *);
@@ -3481,7 +3490,7 @@ expression_tree_mutator_impl(Node *node,
CTECycleClause *cc = (CTECycleClause *) node;
CTECycleClause *newnode;
- FLATCOPY(newnode, cc, CTECycleClause);
+ FLATCOPY(newnode, cc, CTECycleClause, flags);
MUTATE(newnode->cycle_mark_value, cc->cycle_mark_value, Node *);
MUTATE(newnode->cycle_mark_default, cc->cycle_mark_default, Node *);
return (Node *) newnode;
@@ -3492,7 +3501,7 @@ expression_tree_mutator_impl(Node *node,
CommonTableExpr *cte = (CommonTableExpr *) node;
CommonTableExpr *newnode;
- FLATCOPY(newnode, cte, CommonTableExpr);
+ FLATCOPY(newnode, cte, CommonTableExpr, flags);
/*
* Also invoke the mutator on the CTE's Query node, so it can
@@ -3511,7 +3520,7 @@ expression_tree_mutator_impl(Node *node,
PartitionBoundSpec *pbs = (PartitionBoundSpec *) node;
PartitionBoundSpec *newnode;
- FLATCOPY(newnode, pbs, PartitionBoundSpec);
+ FLATCOPY(newnode, pbs, PartitionBoundSpec, flags);
MUTATE(newnode->listdatums, pbs->listdatums, List *);
MUTATE(newnode->lowerdatums, pbs->lowerdatums, List *);
MUTATE(newnode->upperdatums, pbs->upperdatums, List *);
@@ -3523,7 +3532,7 @@ expression_tree_mutator_impl(Node *node,
PartitionRangeDatum *prd = (PartitionRangeDatum *) node;
PartitionRangeDatum *newnode;
- FLATCOPY(newnode, prd, PartitionRangeDatum);
+ FLATCOPY(newnode, prd, PartitionRangeDatum, flags);
MUTATE(newnode->value, prd->value, Node *);
return (Node *) newnode;
}
@@ -3553,7 +3562,7 @@ expression_tree_mutator_impl(Node *node,
FromExpr *from = (FromExpr *) node;
FromExpr *newnode;
- FLATCOPY(newnode, from, FromExpr);
+ FLATCOPY(newnode, from, FromExpr, flags);
MUTATE(newnode->fromlist, from->fromlist, List *);
MUTATE(newnode->quals, from->quals, Node *);
return (Node *) newnode;
@@ -3564,7 +3573,7 @@ expression_tree_mutator_impl(Node *node,
OnConflictExpr *oc = (OnConflictExpr *) node;
OnConflictExpr *newnode;
- FLATCOPY(newnode, oc, OnConflictExpr);
+ FLATCOPY(newnode, oc, OnConflictExpr, flags);
MUTATE(newnode->arbiterElems, oc->arbiterElems, List *);
MUTATE(newnode->arbiterWhere, oc->arbiterWhere, Node *);
MUTATE(newnode->onConflictSet, oc->onConflictSet, List *);
@@ -3579,7 +3588,7 @@ expression_tree_mutator_impl(Node *node,
MergeAction *action = (MergeAction *) node;
MergeAction *newnode;
- FLATCOPY(newnode, action, MergeAction);
+ FLATCOPY(newnode, action, MergeAction, flags);
MUTATE(newnode->qual, action->qual, Node *);
MUTATE(newnode->targetList, action->targetList, List *);
@@ -3591,7 +3600,7 @@ expression_tree_mutator_impl(Node *node,
PartitionPruneStepOp *opstep = (PartitionPruneStepOp *) node;
PartitionPruneStepOp *newnode;
- FLATCOPY(newnode, opstep, PartitionPruneStepOp);
+ FLATCOPY(newnode, opstep, PartitionPruneStepOp, flags);
MUTATE(newnode->exprs, opstep->exprs, List *);
return (Node *) newnode;
@@ -3605,7 +3614,7 @@ expression_tree_mutator_impl(Node *node,
JoinExpr *join = (JoinExpr *) node;
JoinExpr *newnode;
- FLATCOPY(newnode, join, JoinExpr);
+ FLATCOPY(newnode, join, JoinExpr, flags);
MUTATE(newnode->larg, join->larg, Node *);
MUTATE(newnode->rarg, join->rarg, Node *);
MUTATE(newnode->quals, join->quals, Node *);
@@ -3618,7 +3627,7 @@ expression_tree_mutator_impl(Node *node,
SetOperationStmt *setop = (SetOperationStmt *) node;
SetOperationStmt *newnode;
- FLATCOPY(newnode, setop, SetOperationStmt);
+ FLATCOPY(newnode, setop, SetOperationStmt, flags);
MUTATE(newnode->larg, setop->larg, Node *);
MUTATE(newnode->rarg, setop->rarg, Node *);
/* We do not mutate groupClauses by default */
@@ -3630,7 +3639,7 @@ expression_tree_mutator_impl(Node *node,
IndexClause *iclause = (IndexClause *) node;
IndexClause *newnode;
- FLATCOPY(newnode, iclause, IndexClause);
+ FLATCOPY(newnode, iclause, IndexClause, flags);
MUTATE(newnode->rinfo, iclause->rinfo, RestrictInfo *);
MUTATE(newnode->indexquals, iclause->indexquals, List *);
return (Node *) newnode;
@@ -3641,7 +3650,7 @@ expression_tree_mutator_impl(Node *node,
PlaceHolderVar *phv = (PlaceHolderVar *) node;
PlaceHolderVar *newnode;
- FLATCOPY(newnode, phv, PlaceHolderVar);
+ FLATCOPY(newnode, phv, PlaceHolderVar, flags);
MUTATE(newnode->phexpr, phv->phexpr, Expr *);
/* Assume we need not copy the relids bitmapsets */
return (Node *) newnode;
@@ -3652,7 +3661,7 @@ expression_tree_mutator_impl(Node *node,
InferenceElem *inferenceelemdexpr = (InferenceElem *) node;
InferenceElem *newnode;
- FLATCOPY(newnode, inferenceelemdexpr, InferenceElem);
+ FLATCOPY(newnode, inferenceelemdexpr, InferenceElem, flags);
MUTATE(newnode->expr, newnode->expr, Node *);
return (Node *) newnode;
}
@@ -3662,7 +3671,7 @@ expression_tree_mutator_impl(Node *node,
AppendRelInfo *appinfo = (AppendRelInfo *) node;
AppendRelInfo *newnode;
- FLATCOPY(newnode, appinfo, AppendRelInfo);
+ FLATCOPY(newnode, appinfo, AppendRelInfo, flags);
MUTATE(newnode->translated_vars, appinfo->translated_vars, List *);
/* Assume nothing need be done with parent_colnos[] */
return (Node *) newnode;
@@ -3673,7 +3682,7 @@ expression_tree_mutator_impl(Node *node,
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) node;
PlaceHolderInfo *newnode;
- FLATCOPY(newnode, phinfo, PlaceHolderInfo);
+ FLATCOPY(newnode, phinfo, PlaceHolderInfo, flags);
MUTATE(newnode->ph_var, phinfo->ph_var, PlaceHolderVar *);
/* Assume we need not copy the relids bitmapsets */
return (Node *) newnode;
@@ -3684,7 +3693,7 @@ expression_tree_mutator_impl(Node *node,
RangeTblFunction *rtfunc = (RangeTblFunction *) node;
RangeTblFunction *newnode;
- FLATCOPY(newnode, rtfunc, RangeTblFunction);
+ FLATCOPY(newnode, rtfunc, RangeTblFunction, flags);
MUTATE(newnode->funcexpr, rtfunc->funcexpr, Node *);
/* Assume we need not copy the coldef info lists */
return (Node *) newnode;
@@ -3695,7 +3704,7 @@ expression_tree_mutator_impl(Node *node,
TableSampleClause *tsc = (TableSampleClause *) node;
TableSampleClause *newnode;
- FLATCOPY(newnode, tsc, TableSampleClause);
+ FLATCOPY(newnode, tsc, TableSampleClause, flags);
MUTATE(newnode->args, tsc->args, List *);
MUTATE(newnode->repeatable, tsc->repeatable, Expr *);
return (Node *) newnode;
@@ -3706,7 +3715,7 @@ expression_tree_mutator_impl(Node *node,
TableFunc *tf = (TableFunc *) node;
TableFunc *newnode;
- FLATCOPY(newnode, tf, TableFunc);
+ FLATCOPY(newnode, tf, TableFunc, flags);
MUTATE(newnode->ns_uris, tf->ns_uris, List *);
MUTATE(newnode->docexpr, tf->docexpr, Node *);
MUTATE(newnode->rowexpr, tf->rowexpr, Node *);
@@ -3758,7 +3767,7 @@ query_tree_mutator_impl(Query *query,
{
Query *newquery;
- FLATCOPY(newquery, query, Query);
+ FLATCOPY(newquery, query, Query, flags);
query = newquery;
}
@@ -3802,7 +3811,7 @@ query_tree_mutator_impl(Query *query,
WindowClause *wc = lfirst_node(WindowClause, temp);
WindowClause *newnode;
- FLATCOPY(newnode, wc, WindowClause);
+ FLATCOPY(newnode, wc, WindowClause, flags);
MUTATE(newnode->startOffset, wc->startOffset, Node *);
MUTATE(newnode->endOffset, wc->endOffset, Node *);
@@ -3851,7 +3860,7 @@ range_table_mutator_impl(List *rtable,
RangeTblEntry *rte = (RangeTblEntry *) lfirst(rt);
RangeTblEntry *newrte;
- FLATCOPY(newrte, rte, RangeTblEntry);
+ FLATCOPY(newrte, rte, RangeTblEntry, flags);
switch (rte->rtekind)
{
case RTE_RELATION:
diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile
index 1e199ff66f7..06dd07f3270 100644
--- a/src/backend/optimizer/path/Makefile
+++ b/src/backend/optimizer/path/Makefile
@@ -14,6 +14,7 @@ include $(top_builddir)/src/Makefile.global
OBJS = \
allpaths.o \
+ appendorpath.o \
clausesel.o \
costsize.o \
equivclass.o \
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 4895cee9944..e5014dd4690 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -782,6 +782,9 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
/* Consider index scans */
create_index_paths(root, rel);
+ /* Consider index scans with rewrited quals */
+ keybased_rewrite_index_paths(root, rel);
+
/* Consider TID scans */
create_tidscan_paths(root, rel);
}
diff --git a/src/backend/optimizer/path/appendorpath.c b/src/backend/optimizer/path/appendorpath.c
new file mode 100644
index 00000000000..b703f960e33
--- /dev/null
+++ b/src/backend/optimizer/path/appendorpath.c
@@ -0,0 +1,1049 @@
+/*
+ * support Append plan for ORed clauses
+ * Teodor Sigaev <teodor@sigaev.ru>
+ */
+#include "postgres.h"
+
+#include "access/skey.h"
+#include "catalog/pg_am.h"
+#include "optimizer/cost.h"
+#include "optimizer/clauses.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/paths.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/planmain.h"
+#include "optimizer/restrictinfo.h"
+#include "utils/lsyscache.h"
+
+typedef struct CKey {
+ RestrictInfo *rinfo; /* original rinfo */
+ int n; /* IndexPath's number in bitmapquals */
+ OpExpr *normalizedexpr; /* expression with Var on left */
+ Var *var;
+ Node *value;
+ Oid opfamily;
+ int strategy;
+ uint8 strategyMask;
+} CKey;
+#define BTMASK(x) ( 1<<(x) )
+
+static List* find_common_quals( BitmapOrPath *path );
+static RestrictInfo* unionOperation(PlannerInfo *root, CKey *key);
+static BitmapOrPath* cleanup_nested_quals( PlannerInfo *root, RelOptInfo *rel, BitmapOrPath *path );
+static List* sortIndexScans( List* ipaths );
+static List* reverseScanDirIdxPaths(List *indexPaths);
+static IndexPath* reverseScanDirIdxPath(IndexPath *ipath);
+static bool checkSameIndex(Path *path, Oid *indexoid);
+
+#define IS_LESS(a) ( (a) == BTLessStrategyNumber || (a)== BTLessEqualStrategyNumber )
+#define IS_GREATER(a) ( (a) == BTGreaterStrategyNumber || (a) == BTGreaterEqualStrategyNumber )
+#define IS_ONE_DIRECTION(a,b) ( \
+ ( IS_LESS(a) && IS_LESS(b) ) \
+ || \
+ ( IS_GREATER(a) && IS_GREATER(b) ) \
+)
+
+typedef struct ExExpr {
+ OpExpr *expr;
+ Oid opfamily;
+ Oid lefttype;
+ Oid righttype;
+ int strategy;
+ int attno;
+} ExExpr;
+
+
+typedef struct IndexPathEx {
+ IndexPath *path;
+ List *preparedquals; /* list of ExExpr */
+} IndexPathEx;
+
+static List*
+clauses_get_exprs(List *listIndexClause) {
+ ListCell *i, *c;
+ List *exprs=NULL;
+
+ foreach(i, listIndexClause)
+ {
+ IndexClause *ic = lfirst(i);
+
+ foreach(c, ic->indexquals)
+ {
+ RestrictInfo *rinfo = lfirst(c);
+ OpExpr *expr = (OpExpr*)rinfo->clause;
+
+ exprs = lappend(exprs, expr);
+ }
+ }
+
+ return exprs;
+}
+
+
+/*----------
+ * keybased_rewrite_or_index_quals
+ * Examine join OR-of-AND quals to see if any useful common restriction
+ * clauses can be extracted. If so, try to use for creating new index paths.
+ *
+ * For example consider
+ * WHERE ( a.x=5 and a.y>10 ) OR a.x>5
+ * and there is an index on a.x or (a.x, a.y). So, plan
+ * will be seqscan or BitmapOr(IndexPath,IndexPath)
+ * So, we can add some restriction:
+ * WHERE (( a.x=5 and a.y>10 ) OR a.x>5) AND a.x>=5
+ * and plan may be so
+ * Index Scan (a.x>=5)
+ * Filter( (( a.x=5 and a.y>10 ) OR a.x>5) )
+ *
+ * We don't want to add new clauses to baserestrictinfo, just
+ * use it as index quals.
+ *
+ * Next thing which it possible to test is use append of
+ * searches instead of OR.
+ * For example consider
+ * WHERE ( a.x=5 and a.y>10 ) OR a.x>6
+ * and there is an index on (a.x) (a.x, a.y)
+ * So, we can suggest follow plan:
+ * Append
+ * Filter ( a.x=5 and a.y>10 ) OR (a.x>6)
+ * Index Scan (a.x=5) --in case of index on (a.x)
+ * Index Scan (a.x>6)
+ * For that we should proof that index quals isn't overlapped,
+ * also, some index quals may be containedi in other, so it can be eliminated
+ */
+
+void
+keybased_rewrite_index_paths(PlannerInfo *root, RelOptInfo *rel)
+{
+ BitmapOrPath *bestpath = NULL;
+ ListCell *i;
+ List *commonquals;
+ AppendPath *appendidxpath;
+ List *indexPaths;
+ IndexOptInfo *index;
+
+ foreach(i, rel->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(i);
+
+ if (restriction_is_or_clause(rinfo))
+ {
+ /*
+ * Use the generate_bitmap_or_paths() machinery to estimate the
+ * value of each OR clause. We can use regular restriction
+ * clauses along with the OR clause contents to generate
+ * indexquals. We pass outer_rel = NULL so that sub-clauses
+ * that are actually joins will be ignored.
+ */
+ List *orpaths;
+ ListCell *k;
+
+ orpaths = generate_bitmap_or_paths(root, rel,
+ list_make1(rinfo),
+ rel->baserestrictinfo);
+
+ /* Locate the cheapest OR path */
+ foreach(k, orpaths)
+ {
+ BitmapOrPath *path = (BitmapOrPath *) lfirst(k);
+ Oid indexoid = InvalidOid;
+
+ Assert(IsA(path, BitmapOrPath));
+
+ if (checkSameIndex((Path*)path, &indexoid) == false)
+ continue;
+
+ if (bestpath == NULL ||
+ path->path.total_cost < bestpath->path.total_cost)
+ {
+ bestpath = path;
+ }
+ }
+ }
+ }
+
+ /* Fail if no suitable clauses found */
+ if (bestpath == NULL)
+ return;
+
+ commonquals = find_common_quals(bestpath);
+ /* Found quals with the same args, but with, may be, different
+ operations */
+ if ( commonquals != NULL ) {
+ List *addon=NIL;
+
+ foreach(i, commonquals) {
+ CKey *key = (CKey*)lfirst(i);
+ RestrictInfo *rinfo;
+
+ /*
+ * get 'union' of operation for key
+ */
+ rinfo = unionOperation(root, key);
+ if ( rinfo )
+ addon = lappend(addon, rinfo);
+ }
+
+ /*
+ * Ok, we found common quals and union it, so we will try to
+ * create new possible index paths
+ */
+ if ( addon ) {
+ List *origbaserestrictinfo = list_copy(rel->baserestrictinfo);
+
+ rel->baserestrictinfo = list_concat(rel->baserestrictinfo, addon);
+
+ create_index_paths(root, rel);
+
+ rel->baserestrictinfo = origbaserestrictinfo;
+ }
+ }
+
+ /*
+ * Check if indexquals isn't overlapped and all index scan
+ * are on the same index.
+ */
+ if ( (bestpath = cleanup_nested_quals( root, rel, bestpath )) == NULL )
+ return;
+
+ if (IsA(bestpath, IndexPath)) {
+ IndexPath *ipath = (IndexPath*)bestpath;
+
+ /*
+ * It's possible to do only one index scan :)
+ */
+ index = ipath->indexinfo;
+
+ if ( root->query_pathkeys != NIL && index->sortopfamily && OidIsValid(index->sortopfamily[0]) )
+ {
+ List *pathkeys;
+
+ pathkeys = build_index_pathkeys(root, index,
+ ForwardScanDirection);
+ pathkeys = truncate_useless_pathkeys(root, rel,
+ pathkeys);
+
+ ipath->path.pathkeys = pathkeys;
+ add_path(rel, (Path *) ipath);
+
+ /*
+ * add path ordered in backward direction if our pathkeys
+ * is still unusable...
+ */
+ if ( pathkeys == NULL || pathkeys_useful_for_ordering(root, pathkeys) == 0 )
+ {
+ pathkeys = build_index_pathkeys(root, index,
+ BackwardScanDirection);
+ pathkeys = truncate_useless_pathkeys(root, rel,
+ pathkeys);
+
+ ipath = reverseScanDirIdxPath( ipath );
+
+ ipath->path.pathkeys = pathkeys;
+ add_path(rel, (Path *) ipath);
+ }
+ } else
+ add_path(rel, (Path *) ipath);
+ return;
+ }
+
+ /* recount costs */
+ foreach(i, bestpath->bitmapquals ) {
+ IndexPath *ipath = (IndexPath*)lfirst(i);
+
+ Assert( IsA(ipath, IndexPath) );
+ ipath->path.rows = rel->tuples * clauselist_selectivity(root,
+ clauses_get_exprs(ipath->indexclauses),
+ rel->relid,
+ JOIN_INNER,
+ NULL);
+ ipath->path.rows = clamp_row_est(ipath->path.rows);
+ cost_index(ipath, root, 1, false);
+ }
+
+ /*
+ * Check if append index can suggest ordering of result
+ *
+ * Also, we should say to AppendPath about targetlist:
+ * target list will be taked from indexscan
+ */
+ index = ((IndexPath*)linitial(bestpath->bitmapquals))->indexinfo;
+ if ( root->query_pathkeys != NIL && index->sortopfamily && OidIsValid(index->sortopfamily[0]) &&
+ (indexPaths = sortIndexScans( bestpath->bitmapquals )) !=NULL ) {
+ List *pathkeys;
+
+ pathkeys = build_index_pathkeys(root, index,
+ ForwardScanDirection);
+ pathkeys = truncate_useless_pathkeys(root, rel,
+ pathkeys);
+
+ appendidxpath = create_append_path_ext(root, rel, indexPaths, NIL, pathkeys, NULL, 0,
+ false, -1.0, true);
+ add_path(rel, (Path *) appendidxpath);
+
+ /*
+ * add path ordered in backward direction if our pathkeys
+ * is still unusable...
+ */
+ if ( pathkeys == NULL || pathkeys_useful_for_ordering(root, pathkeys) == 0 ) {
+
+ pathkeys = build_index_pathkeys(root, index,
+ BackwardScanDirection);
+ pathkeys = truncate_useless_pathkeys(root, rel,
+ pathkeys);
+
+ indexPaths = reverseScanDirIdxPaths(indexPaths);
+ appendidxpath = create_append_path_ext(root, rel, indexPaths, NIL,
+ pathkeys, NULL,
+ 0, false, -1.0,
+ true);
+ add_path(rel, (Path *) appendidxpath);
+ }
+ } else {
+ appendidxpath = create_append_path_ext(root, rel, bestpath->bitmapquals,
+ NIL, NIL, NULL,
+ 0, false, -1.0, true);
+ add_path(rel, (Path *) appendidxpath);
+ }
+}
+
+/*
+ * returns true if all indexscan below uses the same index
+ */
+static bool
+checkSameIndex(Path *path, Oid *indexoid) {
+ ListCell *i;
+ List *subpaths;
+
+ if (IsA(path, IndexPath))
+ {
+ IndexPath *indpath = (IndexPath*)path;
+
+ if (indpath->indexinfo->relam != BTREE_AM_OID)
+ return false;
+
+ if (*indexoid == InvalidOid)
+ *indexoid = indpath->indexinfo->indexoid;
+ else if (*indexoid != indpath->indexinfo->indexoid)
+ return false;
+
+ return true;
+ }
+ else if (IsA(path, BitmapOrPath))
+ {
+ BitmapOrPath *orpath = (BitmapOrPath*)path;
+
+ subpaths = orpath->bitmapquals;
+
+ }
+ else if (IsA(path, BitmapAndPath))
+ {
+ BitmapAndPath *andpath = (BitmapAndPath*)path;
+
+ subpaths = andpath->bitmapquals;
+ }
+ else
+ {
+ elog(ERROR, "unexpected path type: %d", nodeTag(path));
+ }
+
+ Assert(list_length(subpaths) > 0);
+
+ foreach(i, subpaths)
+ {
+ Path *subpath = (Path *) lfirst(i);
+
+ if (checkSameIndex(subpath, indexoid) == false)
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * transformToCkey - transform RestrictionInfo
+ * to CKey struct. Fucntion checks possibility and correctness of
+ * RestrictionInfo to use it as common key, normalizes
+ * expression and "caches" some information. Note,
+ * original RestrictInfo isn't touched
+ */
+
+static CKey*
+transformToCkey( IndexOptInfo *index, RestrictInfo* rinfo, int indexcol) {
+ CKey *key;
+ OpExpr *expr = (OpExpr*)rinfo->clause;
+
+ if ( !IsA(expr, OpExpr) )
+ return NULL;
+
+ if ( contain_mutable_functions((Node*)expr) )
+ return NULL;
+
+ if ( list_length( expr->args ) != 2 )
+ return NULL;
+
+ key = (CKey*)palloc(sizeof(CKey));
+ key->rinfo = rinfo;
+
+ key->normalizedexpr = (OpExpr*)copyObject( expr );
+ if (!bms_equal(rinfo->left_relids, index->rel->relids))
+ CommuteOpExpr(key->normalizedexpr);
+
+ /*
+ * fix_indexqual_operand returns copy of object
+ */
+ key->var = (Var*)fix_indexqual_operand(linitial(key->normalizedexpr->args), index, indexcol);
+ Assert( IsA(key->var, Var) );
+
+ key->opfamily = index->opfamily[ key->var->varattno - 1 ];
+
+ /* restore varattno, because it may be different in different index */
+ key->var->varattno = key->var->varattnosyn;
+
+ key->value = (Node*)lsecond(key->normalizedexpr->args);
+
+ key->strategy = get_op_opfamily_strategy( key->normalizedexpr->opno, key->opfamily);
+ Assert( key->strategy != InvalidStrategy );
+
+ key->strategyMask = BTMASK(key->strategy);
+
+ return key;
+}
+
+/*
+ * get_index_quals - get list of quals in
+ * CKeys form
+ */
+
+static List*
+get_index_quals(IndexPath *path, int cnt) {
+ ListCell *i, *c;
+ List *quals = NIL;
+
+ foreach(i, path->indexclauses) {
+ IndexClause *ic = lfirst(i);
+
+ foreach(c, ic->indexquals) {
+ CKey *k = transformToCkey( path->indexinfo,
+ (RestrictInfo*)lfirst(c),
+ ic->indexcol);
+ if ( k ) {
+ k->n = cnt;
+ quals = lappend(quals, k);
+ }
+ }
+ }
+ return quals;
+}
+
+/*
+ * extract all quals from bitmapquals->indexquals for
+ */
+static List*
+find_all_quals( BitmapOrPath *path, int *counter ) {
+ ListCell *i,*j;
+ List *allquals = NIL;
+
+ *counter = 0;
+
+ foreach(i, path->bitmapquals )
+ {
+ Path *subpath = (Path *) lfirst(i);
+
+ if ( IsA(subpath, BitmapAndPath) ) {
+ foreach(j, ((BitmapAndPath*)subpath)->bitmapquals) {
+ Path *subsubpath = (Path *) lfirst(j);
+
+ if ( IsA(subsubpath, IndexPath) ) {
+ if ( ((IndexPath*)subsubpath)->indexinfo->relam != BTREE_AM_OID )
+ return NIL;
+ allquals = list_concat(allquals, get_index_quals( (IndexPath*)subsubpath, *counter ));
+ } else
+ return NIL;
+ }
+ } else if ( IsA(subpath, IndexPath) ) {
+ if ( ((IndexPath*)subpath)->indexinfo->relam != BTREE_AM_OID )
+ return NIL;
+ allquals = list_concat(allquals, get_index_quals( (IndexPath*)subpath, *counter ));
+ } else
+ return NIL;
+
+ (*counter)++;
+ }
+
+ return allquals;
+}
+
+/*
+ * Compares aruments of operation
+ */
+static bool
+iseqCKeyArgs( CKey *a, CKey *b ) {
+ if ( a->opfamily != b->opfamily )
+ return false;
+
+ if ( !equal( a->value, b->value ) )
+ return false;
+
+ if ( !equal( a->var, b->var ) )
+ return false;
+
+ return true;
+}
+
+/*
+ * Count entries of CKey with the same arguments
+ */
+static int
+count_entry( List *allquals, CKey *tocmp ) {
+ ListCell *i;
+ int curcnt=0;
+
+ foreach(i, allquals) {
+ CKey *key = lfirst(i);
+
+ if ( key->n == curcnt ) {
+ continue;
+ } else if ( key->n == curcnt+1 ) {
+ if ( iseqCKeyArgs( key, tocmp ) ) {
+ tocmp->strategyMask |= key->strategyMask;
+ curcnt++;
+ }
+ } else
+ return -1;
+ }
+
+ return curcnt+1;
+}
+
+/*
+ * Finds all CKey with the same arguments
+ */
+static List*
+find_common_quals( BitmapOrPath *path ) {
+ List *allquals;
+ List *commonquals = NIL;
+ ListCell *i;
+ int counter;
+
+ if ( (allquals = find_all_quals( path, &counter ))==NIL )
+ return NIL;
+
+ foreach(i, allquals) {
+ CKey *key = lfirst(i);
+
+ if ( key->n != 0 )
+ break;
+
+ if ( counter == count_entry(allquals, key) )
+ commonquals = lappend( commonquals, key );
+ }
+
+ return commonquals;
+}
+
+/*
+ * unionOperation - make RestrictInfo with combined operation
+ */
+
+static RestrictInfo*
+unionOperation(PlannerInfo *root, CKey *key) {
+ RestrictInfo *rinfo;
+ Oid lefttype, righttype;
+ int strategy;
+
+ switch( key->strategyMask ) {
+ case BTMASK(BTLessStrategyNumber):
+ case BTMASK(BTLessEqualStrategyNumber):
+ case BTMASK(BTEqualStrategyNumber):
+ case BTMASK(BTGreaterEqualStrategyNumber):
+ case BTMASK(BTGreaterStrategyNumber):
+ /* trivial case */
+ break;
+ case BTMASK(BTLessStrategyNumber) | BTMASK(BTLessEqualStrategyNumber):
+ case BTMASK(BTLessStrategyNumber) | BTMASK(BTLessEqualStrategyNumber) | BTMASK(BTEqualStrategyNumber):
+ case BTMASK(BTLessStrategyNumber) | BTMASK(BTEqualStrategyNumber):
+ case BTMASK(BTLessEqualStrategyNumber) | BTMASK(BTEqualStrategyNumber):
+ /* any subset of <, <=, = can be unioned with <= */
+ key->strategy = BTLessEqualStrategyNumber;
+ break;
+ case BTMASK(BTGreaterEqualStrategyNumber) | BTMASK(BTGreaterStrategyNumber):
+ case BTMASK(BTEqualStrategyNumber) | BTMASK(BTGreaterEqualStrategyNumber) | BTMASK(BTGreaterStrategyNumber):
+ case BTMASK(BTEqualStrategyNumber) | BTMASK(BTGreaterStrategyNumber):
+ case BTMASK(BTEqualStrategyNumber) | BTMASK(BTGreaterEqualStrategyNumber):
+ /* any subset of >, >=, = can be unioned with >= */
+ key->strategy = BTGreaterEqualStrategyNumber;
+ break;
+ default:
+ /*
+ * Can't make common restrict qual
+ */
+ return NULL;
+ }
+
+ get_op_opfamily_properties(key->normalizedexpr->opno, key->opfamily, false,
+ &strategy, &lefttype, &righttype);
+
+ if ( strategy != key->strategy ) {
+ /*
+ * We should check because it's possible to have "strange"
+ * opfamilies - without some strategies...
+ */
+ key->normalizedexpr->opno = get_opfamily_member(key->opfamily, lefttype, righttype, key->strategy);
+
+ if ( key->normalizedexpr->opno == InvalidOid )
+ return NULL;
+
+ key->normalizedexpr->opfuncid = get_opcode( key->normalizedexpr->opno );
+ Assert ( key->normalizedexpr->opfuncid != InvalidOid );
+ }
+
+ rinfo = make_simple_restrictinfo(root, (Expr*)key->normalizedexpr);
+
+ return rinfo;
+}
+
+/*
+ * Remove unneeded RestrioctionInfo nodes as it
+ * needed by predicate_*_by()
+ */
+static void
+make_predicate(List *indexclauses, List **preds) {
+ ListCell *i, *c;
+
+ *preds = NIL;
+
+ foreach(i, indexclauses)
+ {
+ IndexClause *ic = lfirst(i);
+
+ foreach(c, ic->indexquals)
+ {
+ RestrictInfo *rinfoq = lfirst(c);
+ OpExpr *expr = (OpExpr*)rinfoq->clause;
+
+ if ( !IsA(expr, OpExpr) )
+ goto end;
+
+ if ( list_length( expr->args ) != 2 )
+ goto end;
+ }
+
+ *preds = lappend(*preds, ic);
+
+end:
+ continue;
+ }
+}
+
+#define CELL_GET_CLAUSES(x) ( ((IndexPath*)lfirst(x))->indexclauses )
+
+/*
+ * returns list of all nested quals
+ */
+static List*
+contained_quals(List *nested, List* quals, ListCell *check) {
+ ListCell *i;
+ List *checkpred;
+
+ if ( list_member_ptr( nested, lfirst(check) ) )
+ return nested;
+
+ checkpred = clauses_get_exprs(CELL_GET_CLAUSES(check));
+
+ if ( contain_mutable_functions((Node*)checkpred) )
+ return nested;
+
+ foreach(i, quals )
+ {
+ if ( check == i )
+ continue;
+
+ if ( list_member_ptr( nested, lfirst(i) ) )
+ continue;
+
+ if (predicate_implied_by( checkpred,
+ clauses_get_exprs(CELL_GET_CLAUSES(i)),
+ false ) )
+ nested = lappend( nested, lfirst(i) );
+ }
+ return nested;
+}
+
+/*
+ * Checks that one row can be in several quals.
+ * It's guaranteed by predicate_refuted_by()
+ */
+static bool
+is_intersect(List *quals, ListCell *check) {
+ ListCell *i;
+ List *checkpred=NULL;
+
+ checkpred=clauses_get_exprs(CELL_GET_CLAUSES(check));
+ Assert( checkpred != NULL );
+
+ for_each_cell(i, quals, check) {
+ if ( i==check )
+ continue;
+
+ if ( predicate_refuted_by( checkpred,
+ clauses_get_exprs(CELL_GET_CLAUSES(i)),
+ false ) == false )
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Removes nested quals and gurantees that quals are not intersected,
+ * ie one row can't satisfy to several quals. It's open a possibility of
+ * Append node using instead of BitmapOr
+ */
+static BitmapOrPath*
+cleanup_nested_quals( PlannerInfo *root, RelOptInfo *rel, BitmapOrPath *path ) {
+ ListCell *i;
+ IndexOptInfo *index=NULL;
+ List *nested = NULL;
+
+ /*
+ * check all path to use only one index
+ */
+ foreach(i, path->bitmapquals )
+ {
+
+ if ( IsA(lfirst(i), IndexPath) ) {
+ List *preds;
+ IndexPath *subpath = (IndexPath *) lfirst(i);
+
+ if ( subpath->indexinfo->relam != BTREE_AM_OID )
+ return NULL;
+
+ if ( index == NULL )
+ index = subpath->indexinfo;
+ else if ( index->indexoid != subpath->indexinfo->indexoid )
+ return NULL;
+
+ /*
+ * work only with optimizable quals
+ */
+ make_predicate(subpath->indexclauses, &preds);
+ if (preds == NIL)
+ return NULL;
+ subpath->indexclauses = preds;
+ } else
+ return NULL;
+ }
+
+ /*
+ * eliminate nested quals
+ */
+ foreach(i, path->bitmapquals ) {
+ nested = contained_quals(nested, path->bitmapquals, i);
+ }
+
+ if ( nested != NIL ) {
+ path->bitmapquals = list_difference_ptr( path->bitmapquals, nested );
+
+ Assert( list_length( path->bitmapquals )>0 );
+
+ /*
+ * All quals becomes only one after eliminating nested quals
+ */
+ if (list_length( path->bitmapquals ) == 1)
+ return (BitmapOrPath*)linitial(path->bitmapquals);
+ }
+
+ /*
+ * Checks for intersection
+ */
+ foreach(i, path->bitmapquals ) {
+ if ( is_intersect( path->bitmapquals, i ) )
+ return NULL;
+ }
+
+ return path;
+}
+
+/*
+ * Checks if whole result of one simple operation is contained
+ * in another
+ */
+static int
+simpleCmpExpr( ExExpr *a, ExExpr *b ) {
+ if ( predicate_implied_by((List*)a->expr, (List*)b->expr, false) )
+ /*
+ * a:( Var < 15 ) > b:( Var <= 10 )
+ */
+ return 1;
+ else if ( predicate_implied_by((List*)b->expr, (List*)a->expr, false) )
+ /*
+ * a:( Var <= 10 ) < b:( Var < 15 )
+ */
+ return -1;
+ else
+ return 0;
+}
+
+/*
+ * Trys to define where is equation - on left or right side
+ * a(< 10) b(=11) - on right
+ * a(> 10) b(=9) - on left
+ * a(= 10) b(=11) - on right
+ * a(= 10) b(=9) - on left
+ * Any other - result is 0;
+ */
+static int
+cmpEqExpr( ExExpr *a, ExExpr *b ) {
+ Oid oldop = b->expr->opno;
+ int res=0;
+
+ b->expr->opno = get_opfamily_member(b->opfamily, b->lefttype, b->righttype, BTLessStrategyNumber);
+ if ( b->expr->opno != InvalidOid ) {
+ b->expr->opfuncid = get_opcode( b->expr->opno );
+ res = simpleCmpExpr(a,b);
+ }
+
+ if ( res == 0 ) {
+ b->expr->opno = get_opfamily_member(b->opfamily, b->lefttype, b->righttype, BTGreaterStrategyNumber);
+ if ( b->expr->opno != InvalidOid ) {
+ b->expr->opfuncid = get_opcode( b->expr->opno );
+ res = -simpleCmpExpr(a,b);
+ }
+ }
+
+ b->expr->opno = oldop;
+ b->expr->opfuncid = get_opcode( b->expr->opno );
+
+ return res;
+}
+
+/*
+ * Is result of a contained in result of b or on the contrary?
+ */
+static int
+cmpNegCmp( ExExpr *a, ExExpr *b ) {
+ Oid oldop = b->expr->opno;
+ int res = 0;
+
+ b->expr->opno = get_negator( b->expr->opno );
+ if ( b->expr->opno != InvalidOid ) {
+ b->expr->opfuncid = get_opcode( b->expr->opno );
+ res = simpleCmpExpr(a,b);
+ }
+
+ b->expr->opno = oldop;
+ b->expr->opfuncid = get_opcode( b->expr->opno );
+
+ return ( IS_LESS(a->strategy) ) ? res : -res;
+}
+
+/*
+ * Returns 1 if whole result of a is on left comparing with result of b
+ * Returns -1 if whole result of a is on right comparing with result of b
+ * Return 0 if it's impossible to define or results is overlapped
+ * Expressions should use the same attribute of index and should be
+ * a simple: just one operation with index.
+ */
+static int
+cmpExpr( ExExpr *a, ExExpr *b ) {
+ int res;
+
+ /*
+ * If a and b are overlapped, we can't decide which one is
+ * lefter or righter
+ */
+ if ( IS_ONE_DIRECTION(a->strategy, b->strategy) ||
+ predicate_refuted_by((List*)a->expr, (List*)b->expr, false) == false )
+ return 0;
+
+ /*
+ * In this place it's impossible to have a row which satisfies
+ * a and b expressions, so we will try to find relatiove position of that results
+ */
+ if (a->strategy == BTEqualStrategyNumber &&
+ b->strategy == BTEqualStrategyNumber) {
+ return cmpEqExpr(a, b);
+ } else if ( b->strategy == BTEqualStrategyNumber ) {
+ return -cmpEqExpr(a, b); /* Covers cases with any operations in a */
+ } else if ( a->strategy == BTEqualStrategyNumber ) {
+ return cmpEqExpr(b, a);
+ } else if ( (res = cmpNegCmp(a, b)) == 0 ) { /* so, a(<10) b(>20) */
+ res = -cmpNegCmp(b, a);
+ }
+
+ return res;
+}
+
+static IndexOptInfo *sortingIndex = NULL;
+static bool volatile unableToDefine = false;
+
+/*
+ * Try to define positions of result which satisfy indexquals a and b per
+ * one index's attribute.
+ */
+static int
+cmpColumnQuals( List *a, List *b, int attno ) {
+ int res = 0;
+ ListCell *ai, *bi;
+
+ foreach(ai, a) {
+ ExExpr *ae = (ExExpr*)lfirst(ai);
+
+ if ( attno != ae->attno )
+ continue;
+
+ foreach(bi, b) {
+ ExExpr *be = (ExExpr*)lfirst(bi);
+
+ if ( attno != be->attno )
+ continue;
+
+ if ((res=cmpExpr(ae, be))!=0)
+ return res;
+
+ if (res == 0 && ae->strategy == be->strategy &&
+ be->strategy != BTEqualStrategyNumber &&
+ equal(ae->expr, be->expr))
+ {
+ /*
+ * It's impossible to get defined order for non-eq the same clauses
+ */
+ unableToDefine = true;
+ PG_RE_THROW(); /* it should be PG_THROW(), but it's the same */
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Compare result of two indexquals.
+ * Warinig: it use PG_RE_THROW(), so any call should be wrapped with
+ * PG_TRY(). Try/catch construction is used here for minimize unneeded
+ * actions when sorting is impossible
+ */
+static int
+cmpIndexPathEx(const void *a, const void *b) {
+ IndexPathEx *aipe = (IndexPathEx*)a;
+ IndexPathEx *bipe = (IndexPathEx*)b;
+ int attno, res = 0;
+
+ for(attno=1; res==0 && attno<=sortingIndex->ncolumns; attno++)
+ res=cmpColumnQuals(aipe->preparedquals, bipe->preparedquals, attno);
+
+ if ( res==0 ) {
+ unableToDefine = true;
+ PG_RE_THROW(); /* it should be PG_THROW(), but it's the same */
+ }
+
+ return res;
+}
+
+/*
+ * Initialize lists of operation in useful form
+ */
+static List*
+prepareQuals(IndexOptInfo *index, List *indexclauses) {
+ ListCell *i, *c;
+ List *res=NULL;
+ ExExpr *ex;
+
+ foreach(i, indexclauses)
+ {
+ IndexClause *ic = lfirst(i);
+
+ foreach(c, ic->indexquals)
+ {
+ RestrictInfo *rinfo = lfirst(c);
+ OpExpr *expr = (OpExpr*)rinfo->clause;
+
+ if ( !IsA(expr, OpExpr) )
+ return NULL;
+
+ if ( list_length( expr->args ) != 2 )
+ return NULL;
+
+ if ( contain_mutable_functions((Node*)expr) )
+ return NULL;
+
+ ex = (ExExpr*)palloc(sizeof(ExExpr));
+ ex->expr = (OpExpr*)copyObject( expr );
+ if (!bms_equal(rinfo->left_relids, index->rel->relids))
+ CommuteOpExpr(ex->expr);
+ linitial(ex->expr->args) = fix_indexqual_operand(linitial(ex->expr->args), index, ic->indexcol);
+ ex->attno = ((Var*)linitial(ex->expr->args))->varattno;
+ ex->opfamily = index->opfamily[ ex->attno - 1 ];
+ get_op_opfamily_properties( ex->expr->opno, ex->opfamily, false,
+ &ex->strategy, &ex->lefttype, &ex->righttype);
+
+ res = lappend(res, ex);
+ }
+ }
+
+ return res;
+}
+
+/*
+ * sortIndexScans - sorts index scans to get sorted results.
+ * Function supposed that index is the same for all
+ * index scans
+ */
+static List*
+sortIndexScans( List* ipaths ) {
+ ListCell *i;
+ int j=0;
+ IndexPathEx *ipe = (IndexPathEx*)palloc( sizeof(IndexPathEx)*list_length(ipaths) );
+ List *orderedPaths = NIL;
+ IndexOptInfo *index = ((IndexPath*)linitial(ipaths))->indexinfo;
+
+ foreach(i, ipaths) {
+ ipe[j].path = (IndexPath*)lfirst(i);
+ ipe[j].preparedquals = prepareQuals(index, ipe[j].path->indexclauses);
+
+ if (ipe[j].preparedquals == NULL)
+ return NULL;
+ j++;
+ }
+
+ sortingIndex = index;
+ unableToDefine = false;
+ PG_TRY(); {
+ qsort(ipe, list_length(ipaths), sizeof(IndexPathEx), cmpIndexPathEx);
+ } PG_CATCH(); {
+ if ( unableToDefine == false )
+ PG_RE_THROW(); /* not our problem */
+ } PG_END_TRY();
+
+ if ( unableToDefine == true )
+ return NULL;
+
+ for(j=0;j<list_length(ipaths);j++)
+ orderedPaths = lappend(orderedPaths, ipe[j].path);
+
+ return orderedPaths;
+}
+
+static IndexPath*
+reverseScanDirIdxPath(IndexPath *ipath) {
+ IndexPath *n = makeNode(IndexPath);
+
+ *n = *ipath;
+
+ n->indexscandir = BackwardScanDirection;
+
+ return n;
+}
+
+static List*
+reverseScanDirIdxPaths(List *indexPaths) {
+ List *idxpath = NIL;
+ ListCell *i;
+
+ foreach(i, indexPaths) {
+ idxpath = lcons(reverseScanDirIdxPath( (IndexPath*)lfirst(i) ), idxpath);
+ }
+
+ return idxpath;
+}
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 0ab021c1e89..4b4650d0c63 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -12,8 +12,16 @@
*
*-------------------------------------------------------------------------
*/
+#include <math.h>
#include "postgres.h"
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "catalog/pg_collation.h"
+#include "common/pg_prng.h"
+#include "commands/vacuum.h"
+#include "funcapi.h"
+#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/optimizer.h"
@@ -24,6 +32,17 @@
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
+#include "parser/parsetree.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+#define EXHAUSTIVE_IN_SELECTIVITY_THRESHOLD (default_statistics_target/4)
+#define RANGE_IN_SELECTIVITY_THRESHOLD (default_statistics_target/20)
+#define MULTICOLUMN_STATISTIC_FALLBACK_DISTINCT_THRESHOLD (default_statistics_target)
+
/*
* Data structure for accumulating info about possible range-query
* clause pairs in clauselist_selectivity.
@@ -49,6 +68,1095 @@ static Selectivity clauselist_selectivity_or(PlannerInfo *root,
SpecialJoinInfo *sjinfo,
bool use_extended_stats);
+static bool treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
+ int varRelid, SpecialJoinInfo *sjinfo);
+
+typedef enum CorrelationKind {
+ CKRestrict = 0,
+ CKIndepend, /* unknown correlation */
+ CKLikelySelf, /* Seems, should be close to be correlated, like agg with
+ self join */
+ CKSelf, /* 100% correlation because of self join */
+ CKMul /* product of all CKLikelySelf * CKSelf */
+} CorrelationKind;
+static CorrelationKind get_correlation_kind(PlannerInfo *root, int varRelid,
+ OpExpr* expr);
+
+/*
+ * Get variabe node. Returns null if node is not a Var node.
+ */
+static inline Var*
+get_var(Node* node)
+{
+ if (IsA(node, RelabelType))
+ node = (Node *) ((RelabelType *) node)->arg;
+
+ return IsA(node, Var) ? (Var*)node : NULL;
+}
+
+/*
+ * Locate compound index which can be used for multicolumn clauses/join.
+ */
+static IndexOptInfo*
+locate_inner_multicolumn_index(PlannerInfo *root, Index varno, List* vars,
+ int n_clauses,
+ int **permutation, List **missed_vars, int* n_keys)
+{
+ ListCell *ilist;
+ RelOptInfo *rel = find_base_rel(root, varno);
+ IndexOptInfo *index_opt = NULL;
+ List *missed_vars_opt = NIL;
+ int *permutation_opt = NULL;
+ int n_index_cols_opt = 0;
+ bool used[INDEX_MAX_KEYS];
+ int posvars[INDEX_MAX_KEYS];
+
+ *n_keys = 0;
+ *missed_vars = NIL;
+
+ Assert(list_length(vars) >= 1);
+ Assert(list_length(vars) <= n_clauses);
+
+ foreach(ilist, rel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
+ ListCell *vlist;
+ int i, n_index_cols = 0;
+ List *missed = NIL;
+ int *perm = NULL;
+ int last_idx = 0;
+
+ memset(used, 0, sizeof(used));
+ perm = palloc(n_clauses * sizeof(*perm));
+ for(i=0; i<n_clauses; i++)
+ perm[i] = -1;
+
+ i = 0;
+ foreach (vlist, vars)
+ {
+ Var* var = lfirst(vlist);
+ int pos;
+
+ for (pos = 0; pos < index->nkeycolumns; pos++)
+ {
+ if (index->indexkeys[pos] == var->varattno)
+ {
+ if (used[pos])
+ missed = lappend(missed, var);
+ else
+ {
+ used[pos] = true;
+ posvars[pos] = i;
+ perm[i] = pos;
+ n_index_cols++;
+ last_idx = Max(last_idx, pos);
+ break;
+ }
+ }
+ }
+
+ /* var isn't found in index columns */
+ if (pos == index->nkeycolumns && !list_member_ptr(missed, var))
+ missed = lappend(missed, var);
+
+ i += 1;
+ }
+
+ if (n_index_cols == 0)
+ continue;
+
+ /* check that found columns are first columns in index */
+ if (index->nkeycolumns != n_index_cols)
+ {
+ int old_n_index_cols = n_index_cols;
+
+ for (i = 0; i <= last_idx; i++)
+ {
+ if (n_index_cols != old_n_index_cols)
+ {
+ /*
+ * We will use only first n_index_cols columns instead of
+ * found old_n_index_cols, so, all other columns should be
+ * added to missed list
+ */
+ if (used[i])
+ {
+ Var *var = list_nth(vars, posvars[i]);
+
+ missed = lappend(missed, var);
+ }
+ }
+ else if (!used[i])
+ {
+ if (i==0)
+ /* there isn't useful prefix */
+ goto TryNextIndex;
+
+ /* we will use only first i columns, save as new n_index_cols */
+ n_index_cols = i;
+ }
+ }
+ }
+
+ /* found exact match vars - index, immediately return */
+ if (vlist == NULL && list_length(missed) == 0 && n_index_cols == index->nkeycolumns)
+ {
+ *permutation = perm;
+ *n_keys = n_index_cols;
+ return index;
+ }
+
+ /* save partially matched index */
+ if (index_opt == NULL ||
+ n_index_cols > n_index_cols_opt ||
+ (n_index_cols == n_index_cols_opt && index->nkeycolumns < index_opt->nkeycolumns))
+ {
+ index_opt = index;
+ missed_vars_opt = missed;
+ if (permutation_opt)
+ pfree(permutation_opt);
+ permutation_opt = perm;
+ perm = NULL;
+ n_index_cols_opt = n_index_cols;
+ }
+TryNextIndex:
+ if (perm)
+ pfree(perm);
+ }
+
+ if (index_opt)
+ {
+ *missed_vars = list_concat_unique(*missed_vars, missed_vars_opt);
+ *permutation = permutation_opt;
+ *n_keys = n_index_cols_opt;
+ }
+
+ return index_opt;
+}
+
+/*
+ * verify that used vars are leading columns
+ */
+static bool
+check_leading_vars_index(IndexOptInfo *index, int n_vars,
+ bool used[INDEX_MAX_KEYS])
+{
+ int i;
+
+ if (index->nkeycolumns == n_vars)
+ return true;
+
+ for(i=0; i<n_vars; i++)
+ if (used[i] == false)
+ return false;
+
+ return true;
+}
+
+
+/*
+ * Locate index which exactly match joins vars
+ */
+static IndexOptInfo*
+locate_outer_multicolumn_index(PlannerInfo *root, Index varno, List* vars,
+ int *permutation)
+{
+ ListCell *ilist;
+ RelOptInfo* rel = find_base_rel(root, varno);
+ int n_vars = list_length(vars);
+ bool used[INDEX_MAX_KEYS];
+ IndexOptInfo *index_opt = NULL;
+
+ Assert(n_vars >= 1);
+
+ foreach(ilist, rel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
+ ListCell *vlist;
+ int i;
+
+ if (index->nkeycolumns < n_vars)
+ continue;
+
+ memset(used, 0, sizeof(used));
+
+ i = 0;
+ foreach (vlist, vars)
+ {
+ Var* var = lfirst(vlist);
+
+ if (permutation[i] < 0 ||
+ index->nkeycolumns <= permutation[i] ||
+ index->indexkeys[permutation[i]] != var->varattno)
+ break;
+
+ used[i] = true;
+ i += 1;
+ }
+
+ if (vlist == NULL && check_leading_vars_index(index, n_vars, used))
+ {
+ if (index->nkeycolumns == n_vars)
+ /* found exact match vars - index, immediately return */
+ return index;
+ else if (index_opt == NULL ||
+ index_opt->nkeycolumns > index->nkeycolumns)
+ /* found better candidate - store it */
+ index_opt = index;
+ }
+ }
+
+ return index_opt;
+}
+
+typedef struct InArrayClause
+{
+ ArrayType* array;
+ Datum* elems;
+ bool* nulls;
+ int index;
+ int n_elems;
+ int curr_elem;
+} InArrayClause;
+
+typedef struct TupleIterator
+{
+ Datum values [INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ int n_variants;
+ int i_variant;
+ int *permutation;
+ List *in_clauses;
+ bool isExhaustive;
+} TupleIterator;
+
+static void
+initTupleIterator(TupleIterator *it, List *consts, int *permutation,
+ List *in_clauses)
+{
+ ListCell *l;
+ int i;
+ double n_variants = 1;
+
+ it->n_variants = 1;
+ it->permutation = permutation;
+ it->in_clauses = in_clauses;
+ it->isExhaustive = false;
+ for(i = 0; i < INDEX_MAX_KEYS; i++)
+ it->isnull[i] = true;
+
+ i = 0;
+ foreach (l, consts)
+ {
+ Const* c = (Const*) lfirst(l);
+ int j = permutation[i++];
+
+ if (j<0)
+ continue;
+ it->values[j] = c->constvalue;
+ it->isnull[j] = c->constisnull;
+ }
+
+ foreach (l, in_clauses)
+ {
+ InArrayClause* iac = (InArrayClause*) lfirst(l);
+ int16 elmlen;
+ bool elmbyval;
+ char elmalign;
+
+ get_typlenbyvalalign(iac->array->elemtype,
+ &elmlen, &elmbyval, &elmalign);
+ deconstruct_array(iac->array, iac->array->elemtype,
+ elmlen, elmbyval, elmalign,
+ &iac->elems, &iac->nulls, &iac->n_elems);
+ iac->curr_elem = 0;
+ n_variants *= (double)iac->n_elems;
+ }
+
+ if (n_variants > EXHAUSTIVE_IN_SELECTIVITY_THRESHOLD)
+ {
+ it->isExhaustive = true;
+ it->n_variants = EXHAUSTIVE_IN_SELECTIVITY_THRESHOLD;
+ }
+ else
+ it->n_variants = n_variants;
+
+ it->i_variant = it->n_variants;
+}
+
+static void
+resetTupleIterator(TupleIterator *it)
+{
+ ListCell *l;
+
+ it->i_variant = it->n_variants;
+
+ foreach (l, it->in_clauses)
+ {
+ InArrayClause* iac = (InArrayClause*) lfirst(l);
+
+ iac->curr_elem = 0;
+ }
+}
+
+static bool
+getTupleIterator(TupleIterator *it)
+{
+ ListCell *l;
+ int carry = 1;
+
+ if (it->i_variant == 0)
+ return false;
+
+ it->i_variant--;
+
+ foreach (l, it->in_clauses)
+ {
+ InArrayClause* iac = (InArrayClause*) lfirst(l);
+ int j = it->permutation[iac->index];
+
+ if (j<0)
+ continue;
+
+ if (it->isExhaustive)
+ {
+ /* use random subset of IN list(s) */
+ iac->curr_elem = pg_prng_uint64(&pg_global_prng_state) % iac->n_elems;
+ }
+ else if ((iac->curr_elem += carry) >= iac->n_elems)
+ {
+ iac->curr_elem = 0;
+ carry = 1;
+ }
+ else
+ carry = 0;
+
+ it->values[j] = iac->elems[iac->curr_elem];
+ it->isnull[j] = iac->nulls[iac->curr_elem];
+ }
+
+ return true;
+}
+
+static double
+get_numdistinct(PlannerInfo *root, IndexOptInfo* index, int n_keys)
+{
+ double numdistinct = 1.0;
+ ListCell *lc;
+ int i = 0;
+
+ foreach(lc, index->indextlist)
+ {
+ TargetEntry *tle = lfirst(lc);
+ VariableStatData vardata;
+ bool isdefault;
+
+ examine_variable(root, (Node*)tle->expr, 0, &vardata);
+
+ numdistinct *= get_variable_numdistinct(&vardata, &isdefault);
+
+ ReleaseVariableStats(vardata);
+
+ if (++i >= n_keys)
+ break;
+ }
+
+ if (numdistinct > index->tuples)
+ numdistinct = index->tuples;
+
+ return numdistinct;
+}
+
+static Selectivity
+estimate_selectivity_by_index(PlannerInfo *root, IndexOptInfo* index,
+ VariableStatData *vardata,
+ List *consts, List** missed_vars, int *permutation,
+ List *in_clauses, int n_keys,
+ bool *usedEqSel,
+ bool *isWeakSelectivity)
+{
+ TupleIterator it;
+ Selectivity sum = 0.0;
+ TypeCacheEntry *typentry;
+ Datum constant;
+ int nBins;
+ double nDistinct = 0.0;
+
+ *isWeakSelectivity = false;
+
+ if (n_keys < index->nkeycolumns )
+ {
+ double nd;
+ bool isdefault;
+
+ nDistinct = get_numdistinct(root, index, n_keys);
+ nd = get_variable_numdistinct(vardata, &isdefault);
+
+ if (isdefault == false && nDistinct > nd)
+ nDistinct = sqrt(nDistinct * nd);
+ }
+
+ /*
+ * Assume that two compound types are coherent, so we can use equality
+ * function from one type to compare it with other type. Use >= and <= range
+ * definition.
+ */
+ typentry = lookup_type_cache(vardata->atttype,
+ TYPECACHE_EQ_OPR | TYPECACHE_TUPDESC);
+ initTupleIterator(&it, consts, permutation, in_clauses);
+
+ /*
+ * Try to simplify calculations: if all variants matches to small amount of
+ * bins histogram the we don't need to check tuples separately, it's enough
+ * to checck min and max tuples and compute selecivity by range of bins
+ */
+
+ if (n_keys != index->nkeycolumns &&
+ it.n_variants > RANGE_IN_SELECTIVITY_THRESHOLD)
+ {
+ Datum constantMax = 0,
+ constantMin = 0;
+ FmgrInfo opprocLT, opprocGT;
+
+ fmgr_info(F_RECORD_GT, &opprocGT);
+ fmgr_info(F_RECORD_LT, &opprocLT);
+
+ /*
+ * Find min and max tuples
+ */
+ while(getTupleIterator(&it))
+ {
+ /* we check cache invalidation message */
+ if (typentry->tupDesc == NULL)
+ typentry = lookup_type_cache(vardata->atttype,
+ TYPECACHE_EQ_OPR | TYPECACHE_TUPDESC);
+ constant = HeapTupleGetDatum(heap_form_tuple(typentry->tupDesc,
+ it.values, it.isnull));
+
+ if (constantMax == 0 ||
+ DatumGetBool(FunctionCall2Coll(&opprocGT,
+ DEFAULT_COLLATION_OID,
+ constant, constantMax)))
+ {
+ constantMax = constant;
+ if (constantMin != 0)
+ continue;
+ }
+ if (constantMin == 0 ||
+ DatumGetBool(FunctionCall2Coll(&opprocLT,
+ DEFAULT_COLLATION_OID,
+ constant, constantMin)))
+ {
+ constantMin = constant;
+ }
+ }
+
+ sum = prefix_record_histogram_selectivity(vardata,
+ constantMin, constantMax,
+ n_keys, nDistinct,
+ &nBins);
+
+ if (sum > 0 && (nBins == it.n_variants || nBins <=2))
+ /*
+ * conclude that all tuples are in the same, rather small, range of
+ * bins
+ */
+ goto finish;
+
+ /*
+ * let try tuples one by one
+ */
+ sum = 0.0;
+ resetTupleIterator(&it);
+ }
+
+ while(getTupleIterator(&it))
+ {
+ Selectivity s;
+
+ /* we check cache invalidation message */
+ if (typentry->tupDesc == NULL)
+ typentry = lookup_type_cache(vardata->atttype,
+ TYPECACHE_EQ_OPR | TYPECACHE_TUPDESC);
+ constant = HeapTupleGetDatum(heap_form_tuple(typentry->tupDesc,
+ it.values, it.isnull));
+
+ if (n_keys != index->nkeycolumns)
+ {
+ s = prefix_record_histogram_selectivity(vardata,
+ constant, constant,
+ n_keys,
+ nDistinct,
+ &nBins);
+
+ if (nBins == 0 && n_keys < index->nkeycolumns && nDistinct > MULTICOLUMN_STATISTIC_FALLBACK_DISTINCT_THRESHOLD)
+ *isWeakSelectivity = true;
+
+ if (s < 0)
+ {
+ /*
+ * There is no histogram, fallback to single available option
+ */
+ s = eqconst_selectivity(typentry->eq_opr, DEFAULT_COLLATION_OID, vardata,
+ constant, false, true, false,
+ n_keys);
+
+ if (usedEqSel)
+ *usedEqSel = true;
+ }
+ }
+ else
+ {
+ s = eqconst_selectivity(typentry->eq_opr, DEFAULT_COLLATION_OID, vardata,
+ constant, false, true, false,
+ -1);
+ }
+
+ sum += s - s*sum;
+ }
+
+finish:
+ if (it.isExhaustive)
+ sum *= ((double)(it.n_variants))/EXHAUSTIVE_IN_SELECTIVITY_THRESHOLD;
+
+ return sum;
+}
+
+typedef struct ClauseVarPair
+{
+ Var *var;
+ int idx;
+} ClauseVarPair;
+
+static void
+appendCVP(List **cvp, Var *var, int idx)
+{
+ ClauseVarPair *e;
+
+ e = palloc(sizeof(*e));
+ e->var = var;
+ e->idx = idx;
+
+ *cvp = lappend(*cvp, e);
+}
+
+static int
+findCVP(List* cvp, Var* var)
+{
+ ListCell* lc;
+ foreach(lc, cvp)
+ {
+ ClauseVarPair* c = (ClauseVarPair*)lfirst(lc);
+ if (c->var == var)
+ return c->idx;
+ }
+ return -1;
+}
+
+static bool
+initVarData(IndexOptInfo *index, VariableStatData *vardata)
+{
+ Relation indexRel = index_open(index->indexoid, AccessShareLock);
+
+ if (!indexRel->rd_rel->reltype)
+ {
+ index_close(indexRel, AccessShareLock);
+
+ return false;
+ }
+
+ memset(vardata, 0, sizeof(*vardata));
+ vardata->isunique = index->unique;
+ vardata->atttype = indexRel->rd_rel->reltype;
+ vardata->rel = index->rel;
+ vardata->acl_ok = true;
+ vardata->statsTuple = SearchSysCache3(STATRELATTINH,
+ ObjectIdGetDatum(index->indexoid),
+ Int16GetDatum(1),
+ BoolGetDatum(false));
+ vardata->freefunc = ReleaseSysCache;
+
+ index_close(indexRel, AccessShareLock);
+
+ if (!HeapTupleIsValid(vardata->statsTuple))
+ {
+ ReleaseVariableStats(*vardata);
+ return false;
+ }
+
+ vardata->sslots = index->sslots;
+
+ return true;
+}
+
+static int
+markEstimatedColumns(Bitmapset **estimatedclauses, List *pairs,
+ List *vars, List *missed_vars)
+{
+ ListCell *l;
+ int n_estimated = 0;
+
+ foreach(l, vars)
+ {
+ Var* var = (Var *) lfirst(l);
+ ListCell *ll;
+
+ if (list_member_ptr(missed_vars, var))
+ continue;
+
+ foreach(ll, pairs)
+ {
+ ClauseVarPair *cvp=(ClauseVarPair*)lfirst(ll);
+
+ if (cvp->var == var)
+ {
+ *estimatedclauses = bms_add_member(*estimatedclauses, cvp->idx);
+ n_estimated += 1;
+ break;
+ }
+ }
+
+ Assert(ll != NULL);
+ }
+
+ return n_estimated;
+}
+
+#define SET_VARNOS(vn) do { \
+ if ((vn) != 0) \
+ { \
+ if (data[0].varno == 0) \
+ data[0].varno = (vn); \
+ else if (data[1].varno == 0 && data[0].varno != (vn)) \
+ data[1].varno = (vn); \
+ } \
+} while(0)
+
+#define GET_RELBY_NO(vn) \
+((data[0].varno == (vn) && (vn) != 0) ? &data[0] : ((data[1].varno == (vn) && (vn) != 0) ? &data[1] : NULL))
+
+#define SET_CURDATA(vn) ((cur = GET_RELBY_NO(vn)) != NULL)
+
+static bool
+hasSAOPRestriction(List *clauses, Bitmapset *estimatedclauses)
+{
+ ListCell *l;
+ int i = -1;
+
+ foreach(l, clauses)
+ {
+ Node* clause = (Node *) lfirst(l);
+ RestrictInfo *rinfo = NULL;
+
+ i++;
+ if (bms_is_member(i, estimatedclauses))
+ continue;
+
+ if (IsA(clause, RestrictInfo))
+ {
+ rinfo = (RestrictInfo *) clause;
+ if (!rinfo->orclause)
+ clause = (Node*)rinfo->clause;
+ }
+
+ if (IsA(clause, ScalarArrayOpExpr))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Check if clauses represent multicolumn join with compound indexes available
+ * for both side of comparison of indexed columns of one relation with constant
+ * values. If so, calculates selectivity of compound type comparison and returns
+ * true.
+ */
+static bool
+use_multicolumn_statistic(PlannerInfo *root, List *clauses, int varRelid,
+ JoinType jointype, SpecialJoinInfo *sjinfo,
+ Selectivity* restrict_selectivity, Selectivity *join_selectivity,
+ Bitmapset **estimatedclauses, CorrelationKind
+ *correlationKind)
+{
+ ListCell *l;
+ List* var_clause_map = NIL;
+ List* missed_vars = NIL;
+ int i;
+ int *permutation = NULL;
+ int n_estimated = 0;
+ int n_keys;
+ TypeCacheEntry *typentry;
+
+ struct {
+ Index varno;
+
+ List *restrictionColumns;
+ List *restrictionConsts;
+ List *in_clauses;
+ List *ineqRestrictionClauses;
+
+ List *joinColumns;
+
+ IndexOptInfo *index;
+ VariableStatData vardata;
+ } data[2], *cur;
+
+ if (list_length(clauses) < 1)
+ return false;
+
+ /*
+ * For simple queries default estimator is good enough, but multicolumn
+ * statistic could be too expensive because of search and decompress a lot
+ * of stat data (histogramm of multicolumn indexes).
+ */
+ if (root->join_rel_list == NIL &&
+ root->simple_rel_array_size <= 2 /* 0th is always empty */ &&
+ /* list_length(clauses) < 4 && */
+ hasSAOPRestriction(clauses, *estimatedclauses) == false)
+ return false;
+
+ *correlationKind = CKIndepend;
+ memset(data, 0, sizeof(data));
+
+ i=-1;
+ foreach(l, clauses)
+ {
+ Node* clause = (Node *) lfirst(l);
+ RestrictInfo* rinfo = NULL;
+ OpExpr *opclause = NULL;
+
+ i++;
+
+ /* do not use already estimated clauses */
+ if (bms_is_member(i, *estimatedclauses))
+ continue;
+
+ if (IsA(clause, RestrictInfo))
+ {
+ rinfo = (RestrictInfo *) clause;
+ if (!rinfo->orclause)
+ clause = (Node*)rinfo->clause;
+ }
+ if (IsA(clause, OpExpr))
+ opclause = (OpExpr*)clause;
+
+ if (IsA(clause, Var)) /* boolean variable */
+ {
+ Var* var1 = (Var*)clause;
+
+ SET_VARNOS(var1->varno);
+ if (SET_CURDATA(var1->varno))
+ {
+ cur->restrictionColumns = lappend(cur->restrictionColumns, var1);
+ appendCVP(&var_clause_map, var1, i);
+ cur->restrictionConsts = lappend(cur->restrictionConsts,
+ makeBoolConst(true, false));
+ }
+ }
+ else if (IsA(clause, BoolExpr) && ((BoolExpr*)clause)->boolop == NOT_EXPR) /* (NOT bool_expr) */
+ {
+ Node* arg1 = (Node*) linitial( ((BoolExpr*)clause)->args);
+ Var* var1 = get_var(arg1);
+
+ if (var1 == NULL)
+ continue;
+
+ SET_VARNOS(var1->varno);
+ if (SET_CURDATA(var1->varno))
+ {
+ cur->restrictionColumns = lappend(cur->restrictionColumns, var1);
+ appendCVP(&var_clause_map, var1, i);
+ cur->restrictionConsts = lappend(cur->restrictionConsts,
+ makeBoolConst(false, false));
+ }
+ }
+ else if (IsA(clause, ScalarArrayOpExpr))
+ {
+ ScalarArrayOpExpr* in = (ScalarArrayOpExpr*)clause;
+ Var* var1;
+ Node* arg2;
+ InArrayClause* iac;
+
+ var1 = get_var((Node*)linitial(in->args));
+ arg2 = (Node*) lsecond(in->args);
+
+ if (!in->useOr
+ || list_length(in->args) != 2
+ || get_oprrest(in->opno) != F_EQSEL
+ || var1 == NULL
+ || !IsA(arg2, Const))
+ {
+ continue;
+ }
+
+ SET_VARNOS(var1->varno);
+ if (SET_CURDATA(var1->varno))
+ {
+ cur->restrictionColumns = lappend(cur->restrictionColumns, var1);
+ appendCVP(&var_clause_map, var1, i);
+ cur->restrictionConsts = lappend(cur->restrictionConsts, arg2);
+
+ iac = (InArrayClause*)palloc(sizeof(InArrayClause));
+ iac->array = (ArrayType*)DatumGetPointer(((Const*)arg2)->constvalue);
+ iac->index = list_length(cur->restrictionConsts) - 1;
+
+ cur->in_clauses = lappend(cur->in_clauses, iac);
+ }
+ }
+ else if (opclause
+ && list_length(opclause->args) == 2)
+ {
+ int oprrest = get_oprrest(opclause->opno);
+ Node* arg1 = (Node*) linitial(opclause->args);
+ Node* arg2 = (Node*) lsecond(opclause->args);
+ Var* var1 = get_var(arg1);
+ Var* var2 = get_var(arg2);
+
+ if (oprrest == F_EQSEL && treat_as_join_clause(root, (Node*)opclause, NULL, varRelid, sjinfo))
+ {
+ if (var1 == NULL || var2 == NULL || var1->vartype != var2->vartype)
+ continue;
+
+ SET_VARNOS(var1->varno);
+ SET_VARNOS(var2->varno);
+
+ if (var1->varno == data[0].varno && var2->varno == data[1].varno)
+ {
+ data[0].joinColumns = lappend(data[0].joinColumns, var1);
+ appendCVP(&var_clause_map, var1, i);
+ data[1].joinColumns = lappend(data[1].joinColumns, var2);
+ appendCVP(&var_clause_map, var2, i);
+ }
+ else if (var1->varno == data[1].varno && var2->varno == data[0].varno)
+ {
+ data[0].joinColumns = lappend(data[0].joinColumns, var2);
+ appendCVP(&var_clause_map, var2, i);
+ data[1].joinColumns = lappend(data[1].joinColumns, var1);
+ appendCVP(&var_clause_map, var1, i);
+ }
+ }
+ else /* Estimate selectivity for a restriction clause. */
+ {
+ /*
+ * Give up if it is not equality comparison of variable with
+ * constant or some other clause is treated as join condition
+ */
+ if (((var1 == NULL) == (var2 == NULL)))
+ continue;
+
+ if (var1 == NULL)
+ {
+ /* swap var1 and var2 */
+ var1 = var2;
+ arg2 = arg1;
+ }
+
+ SET_VARNOS(var1->varno);
+
+ if (SET_CURDATA(var1->varno))
+ {
+ if ((rinfo && is_pseudo_constant_clause_relids(arg2, rinfo->right_relids))
+ || (!rinfo && NumRelids(root, clause) == 1 && is_pseudo_constant_clause(arg2)))
+ {
+ /* Restriction clause with a pseudoconstant . */
+ Node* const_val = estimate_expression_value(root, arg2);
+
+ if (IsA(const_val, Const))
+ {
+ switch (oprrest)
+ {
+ case F_EQSEL:
+ cur->restrictionColumns =
+ lappend(cur->restrictionColumns, var1);
+ cur->restrictionConsts =
+ lappend(cur->restrictionConsts, const_val);
+ appendCVP(&var_clause_map, var1, i);
+ break;
+ case F_SCALARGTSEL:
+ case F_SCALARGESEL:
+ case F_SCALARLTSEL:
+ case F_SCALARLESEL:
+ /*
+ * We do not consider range predicates now,
+ * but we can mark them as estimated
+ * if their variables are covered by index.
+ */
+ appendCVP(&var_clause_map, var1, i);
+ cur->ineqRestrictionClauses =
+ lappend(cur->ineqRestrictionClauses, var1);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+ }
+ }
+ }
+ /* else just skip clause to work with it later in caller */
+ }
+
+ *restrict_selectivity = 1.0;
+ *join_selectivity = 1.0;
+
+ /*
+ * First, try to estimate selectivity by restrictions
+ */
+ for(i=0; i<lengthof(data); i++)
+ {
+ cur = &data[i];
+
+ /* compute restriction clauses if applicable */
+ if (cur->varno == 0 || list_length(cur->restrictionColumns) < 1)
+ continue;
+
+ cur->index = locate_inner_multicolumn_index(
+ root, cur->varno, cur->restrictionColumns,
+ list_length(clauses), &permutation, &missed_vars, &n_keys);
+
+ if (cur->index && n_keys > 1 &&
+ initVarData(cur->index, &cur->vardata))
+ {
+ bool usedEqSel= false;
+ double s;
+ bool usedWeakSelectivity;
+
+ s = estimate_selectivity_by_index(
+ root, cur->index, &cur->vardata,
+ cur->restrictionConsts, &missed_vars, permutation,
+ cur->in_clauses, n_keys, &usedEqSel, &usedWeakSelectivity);
+
+ if (usedWeakSelectivity)
+ {
+ double simpleSelectivity = 1.0;
+ foreach(l, cur->restrictionColumns)
+ {
+ Node* clause;
+ int clauseIndex;
+ double clauseSelectivity;
+
+ clauseIndex = findCVP(var_clause_map, (Var*)lfirst(l));
+ if (clauseIndex < 0)
+ {
+ simpleSelectivity = -1;
+ break;
+ }
+
+ clause = (Node*)list_nth(clauses, clauseIndex);
+
+ clauseSelectivity = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo, false);
+
+ simpleSelectivity *= clauseSelectivity;
+ }
+
+ if (simpleSelectivity > 0 && simpleSelectivity < s)
+ {
+ s = simpleSelectivity;
+ }
+ }
+
+ *restrict_selectivity *= s;
+
+ ReleaseVariableStats(cur->vardata);
+
+ /*
+ * mark inequality clauses as used, see estimate_selectivity_by_index()
+ */
+ if (usedEqSel)
+ {
+ foreach(l, cur->ineqRestrictionClauses)
+ {
+ Var* var = (Var *) lfirst(l);
+
+ /*
+ * Note, restrictionColumns will contains extra columns !
+ */
+ for(i=0; i<cur->index->nkeycolumns; i++)
+ if (cur->index->indexkeys[i] == var->varattno)
+ cur->restrictionColumns =
+ lappend(cur->restrictionColumns, var);
+ }
+ }
+
+ n_estimated +=
+ markEstimatedColumns(estimatedclauses, var_clause_map,
+ cur->restrictionColumns, missed_vars);
+ }
+
+ if (permutation)
+ {
+ pfree(permutation);
+ permutation = NULL;
+ }
+ }
+
+ /* Deal with join clauses, if possible */
+ if (list_length(data[0].joinColumns) < 1)
+ goto cleanup;
+
+ data[0].index = locate_inner_multicolumn_index(
+ root,
+ data[0].varno, data[0].joinColumns,
+ list_length(clauses), &permutation, &missed_vars, &n_keys);
+
+ if (!data[0].index || n_keys < 1)
+ goto cleanup;
+
+ Assert(permutation != NULL);
+ Assert(data[1].varno != 0);
+ Assert(list_length(data[0].joinColumns) == list_length(data[1].joinColumns));
+
+ data[1].index = locate_outer_multicolumn_index(
+ root,
+ data[1].varno, data[1].joinColumns,
+ permutation);
+
+ if (!data[1].index)
+ goto cleanup;
+
+ if (!initVarData(data[0].index, &data[0].vardata))
+ goto cleanup;
+
+ if (!initVarData(data[1].index, &data[1].vardata))
+ {
+ ReleaseVariableStats(data[0].vardata);
+ goto cleanup;
+ }
+
+ typentry = lookup_type_cache(data[0].vardata.atttype, TYPECACHE_EQ_OPR);
+ *join_selectivity *= eqjoin_selectivity(root, typentry->eq_opr,
+ DEFAULT_COLLATION_OID,
+ &data[0].vardata, &data[1].vardata,
+ sjinfo, n_keys);
+
+ /* for self join */
+ if (data[0].index->indexoid == data[1].index->indexoid)
+ *correlationKind = CKSelf;
+ else
+ {
+ RangeTblEntry *lrte = planner_rt_fetch(data[0].index->rel->relid, root),
+ *rrte = planner_rt_fetch(data[1].index->rel->relid, root);
+
+ if (lrte->relid == rrte->relid)
+ *correlationKind = CKSelf;
+ }
+
+ for (i = 0; i < lengthof(data); i++)
+ ReleaseVariableStats(data[i].vardata);
+
+ n_estimated +=
+ markEstimatedColumns(estimatedclauses, var_clause_map,
+ data[0].joinColumns, missed_vars);
+
+cleanup:
+ if (permutation)
+ pfree(permutation);
+
+ return n_estimated != 0;
+}
+
/****************************************************************************
* ROUTINES TO COMPUTE SELECTIVITIES
****************************************************************************/
@@ -96,6 +1204,54 @@ static Selectivity clauselist_selectivity_or(PlannerInfo *root,
* Of course this is all very dependent on the behavior of the inequality
* selectivity functions; perhaps some day we can generalize the approach.
*/
+
+static void
+appendSelectivityRes(Selectivity s[5], Selectivity sel, CorrelationKind ck)
+{
+ switch(ck)
+ {
+ case CKRestrict:
+ s[ck] *= sel;
+ break;
+ case CKSelf:
+ case CKLikelySelf:
+ s[CKMul] *= sel;
+ if (s[ck] > sel)
+ s[ck] = sel;
+ /* FALLTHROUGH */
+ case CKIndepend:
+ s[CKIndepend] *= sel;
+ break;
+ default:
+ elog(ERROR, "unknown selectivity kind: %d", ck);
+ }
+}
+
+static Selectivity
+finalizeSelectivityRes(Selectivity s[5])
+{
+ Selectivity sel;
+
+ sel = s[CKRestrict] * s[CKIndepend];
+
+ if (s[CKIndepend] != s[CKMul])
+ {
+ /* we have both independ and correlated - fallback */
+ sel *= s[CKMul];
+ }
+ else
+ {
+ /* we have only correlated join clauses */
+ if (s[CKLikelySelf] != 1.0 && sel < s[CKLikelySelf])
+ sel = sel + (s[CKLikelySelf] - sel) * 0.25;
+
+ if (s[CKSelf] != 1.0 && sel < s[CKSelf])
+ sel = sel + (s[CKSelf] - sel) * 1.0;
+ }
+
+ return sel;
+}
+
Selectivity
clauselist_selectivity(PlannerInfo *root,
List *clauses,
@@ -121,12 +1277,14 @@ clauselist_selectivity_ext(PlannerInfo *root,
SpecialJoinInfo *sjinfo,
bool use_extended_stats)
{
- Selectivity s1 = 1.0;
+ Selectivity s[5 /* per CorrelationKind */] = {1.0, 1.0, 1.0, 1.0, 1.0};
+ Selectivity s2 = 1.0, s3 = 1.0;
RelOptInfo *rel;
Bitmapset *estimatedclauses = NULL;
RangeQueryClause *rqlist = NULL;
ListCell *l;
int listidx;
+ CorrelationKind ck;
/*
* If there's exactly one clause, just go directly to
@@ -150,9 +1308,23 @@ clauselist_selectivity_ext(PlannerInfo *root,
* 'estimatedclauses' is populated with the 0-based list position
* index of clauses estimated here, and that should be ignored below.
*/
- s1 = statext_clauselist_selectivity(root, clauses, varRelid,
+ s2 = statext_clauselist_selectivity(root, clauses, varRelid,
jointype, sjinfo, rel,
&estimatedclauses, false);
+ appendSelectivityRes(s, s2, CKRestrict);
+ }
+
+ /*
+ * Check if join conjuncts corresponds to some compound indexes on left and
+ * right joined relations or indexed columns of one relation is compared
+ * with constant values. In this case selectivity of join can be calculated
+ * based on statistic of this compound index.
+ */
+ while(use_multicolumn_statistic(root, clauses, varRelid, jointype, sjinfo,
+ &s2, &s3, &estimatedclauses, &ck))
+ {
+ appendSelectivityRes(s, s2, CKRestrict);
+ appendSelectivityRes(s, s3, ck);
}
/*
@@ -168,7 +1340,6 @@ clauselist_selectivity_ext(PlannerInfo *root,
{
Node *clause = (Node *) lfirst(l);
RestrictInfo *rinfo;
- Selectivity s2;
listidx++;
@@ -194,7 +1365,7 @@ clauselist_selectivity_ext(PlannerInfo *root,
rinfo = (RestrictInfo *) clause;
if (rinfo->pseudoconstant)
{
- s1 = s1 * s2;
+ appendSelectivityRes(s, s2, CKRestrict);
continue;
}
clause = (Node *) rinfo->clause;
@@ -208,12 +1379,17 @@ clauselist_selectivity_ext(PlannerInfo *root,
* the simple way we are expecting.) Most of the tests here can be
* done more efficiently with rinfo than without.
*/
+ ck = treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo) ?
+ CKIndepend : CKRestrict;
if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
{
OpExpr *expr = (OpExpr *) clause;
bool varonleft = true;
bool ok;
+ if (ck == CKIndepend)
+ ck = get_correlation_kind(root, varRelid, expr);
+
if (rinfo)
{
ok = (rinfo->num_base_rels == 1) &&
@@ -252,7 +1428,7 @@ clauselist_selectivity_ext(PlannerInfo *root,
break;
default:
/* Just merge the selectivity in generically */
- s1 = s1 * s2;
+ appendSelectivityRes(s, s2, ck);
break;
}
continue; /* drop to loop bottom */
@@ -260,7 +1436,7 @@ clauselist_selectivity_ext(PlannerInfo *root,
}
/* Not the right form, so treat it generically. */
- s1 = s1 * s2;
+ appendSelectivityRes(s, s2, ck);
}
/*
@@ -273,7 +1449,6 @@ clauselist_selectivity_ext(PlannerInfo *root,
if (rqlist->have_lobound && rqlist->have_hibound)
{
/* Successfully matched a pair of range clauses */
- Selectivity s2;
/*
* Exact equality to the default value probably means the
@@ -322,15 +1497,13 @@ clauselist_selectivity_ext(PlannerInfo *root,
}
}
/* Merge in the selectivity of the pair of clauses */
- s1 *= s2;
+ appendSelectivityRes(s, s2, CKRestrict);
}
else
{
/* Only found one of a pair, merge it in generically */
- if (rqlist->have_lobound)
- s1 *= rqlist->lobound;
- else
- s1 *= rqlist->hibound;
+ appendSelectivityRes(s, (rqlist->have_lobound) ? rqlist->lobound :
+ rqlist->hibound, CKRestrict);
}
/* release storage and advance */
rqnext = rqlist->next;
@@ -338,7 +1511,7 @@ clauselist_selectivity_ext(PlannerInfo *root,
rqlist = rqnext;
}
- return s1;
+ return finalizeSelectivityRes(s);
}
/*
@@ -624,6 +1797,137 @@ treat_as_join_clause(PlannerInfo *root, Node *clause, RestrictInfo *rinfo,
}
}
+typedef struct RangeTblEntryContext {
+ RangeTblEntry *rte;
+ int count;
+} RangeTblEntryContext;
+
+static bool
+find_rte_walker(Node *node, RangeTblEntryContext *context)
+{
+ if (node == NULL)
+ return false;
+
+ if (context->count > 1)
+ return true; /* skip rest */
+
+ if (IsA(node, RangeTblEntry)) {
+ RangeTblEntry *rte = (RangeTblEntry*)node;
+
+ if (rte->rtekind == RTE_RELATION)
+ {
+ if (context->count == 0)
+ {
+ context->count++;
+ context->rte=rte;
+ }
+ else if (rte->relid != context->rte->relid)
+ {
+ context->count++;
+ return true; /* more that one relation in subtree */
+ }
+ }
+ else if (!(rte->rtekind == RTE_SUBQUERY || rte->rtekind == RTE_JOIN ||
+ rte->rtekind == RTE_CTE))
+ {
+ context->count++;
+ return true; /* more that one relation in subtree */
+ }
+
+ return false; /* allow range_table_walker to continue */
+ }
+
+ if (IsA(node, Query))
+ return query_tree_walker((Query *) node, find_rte_walker,
+ (void *) context, QTW_EXAMINE_RTES_BEFORE);
+
+ return expression_tree_walker(node, find_rte_walker, (void *) context);
+}
+
+static RangeTblEntry*
+find_single_rte(RangeTblEntry *node)
+{
+ RangeTblEntryContext context;
+
+ context.rte = NULL;
+ context.count = 0;
+
+ (void)range_table_walker(list_make1(node),
+ find_rte_walker,
+ (void *) &context, QTW_EXAMINE_RTES_BEFORE);
+
+ return context.count == 1 ? context.rte : NULL;
+}
+
+#define IsSameRelationRTE(a, b) ( \
+ (a)->rtekind == (b)->rtekind && \
+ (a)->rtekind == RTE_RELATION && \
+ (a)->relid == (b)->relid \
+)
+
+
+/*
+ * Any self join or join with aggregation over the same table
+ */
+
+static CorrelationKind
+get_correlation_kind(PlannerInfo *root, int varRelid, OpExpr* expr)
+{
+ Node *left_arg, *right_arg;
+ Relids left_varnos, right_varnos;
+ int left_varno, right_varno;
+ RangeTblEntry *left_rte, *right_rte;
+
+ if (varRelid != 0)
+ /* We consider only case of joins, not restriction mode */
+ return CKIndepend;
+
+ /* Check if it is equality comparison */
+ if (get_oprrest(expr->opno) != F_EQSEL)
+ return CKIndepend;
+
+ left_arg = linitial(expr->args);
+ right_arg = lsecond(expr->args);
+
+ /*
+ * Check if it is join of two different relations
+ */
+ left_varnos = pull_varnos(root, left_arg);
+ right_varnos = pull_varnos(root, right_arg);
+ if (!bms_get_singleton_member(left_varnos, &left_varno) ||
+ !bms_get_singleton_member(right_varnos, &right_varno) ||
+ left_varno == right_varno)
+ return CKIndepend;
+
+ left_rte = planner_rt_fetch(left_varno, root);
+ right_rte = planner_rt_fetch(right_varno, root);
+
+ if (IsSameRelationRTE(left_rte, right_rte))
+ {
+ Var *lvar = get_var(left_arg),
+ *rvar = get_var(right_arg);
+
+ /* self join detected, check if it simple a=b clause */
+ if (lvar == NULL || rvar == NULL)
+ return CKLikelySelf;
+ return (lvar->varattno == rvar->varattno) ?
+ CKSelf : CKLikelySelf;
+ }
+
+ if ((left_rte = find_single_rte(left_rte)) == NULL)
+ return CKIndepend;
+ if ((right_rte = find_single_rte(right_rte)) == NULL)
+ return CKIndepend;
+
+ if (IsSameRelationRTE(left_rte, right_rte))
+ {
+ /* self join detected, but over some transformation which cannot be
+ * flatten */
+ return CKLikelySelf;
+ }
+
+ return CKIndepend;
+}
/*
* clause_selectivity -
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 52ebdd90fcb..3fa12800624 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -157,6 +157,7 @@ typedef struct
{
PlannerInfo *root;
QualCost total;
+ bool calccoalesce;
} cost_qual_eval_context;
static List *extract_nonindex_conditions(List *qual_clauses, List *indexclauses);
@@ -322,7 +323,7 @@ cost_seqscan(Path *path, PlannerInfo *root,
cpu_run_cost = cpu_per_tuple * baserel->tuples;
/* tlist eval costs are paid per output row, not per tuple scanned */
startup_cost += path->pathtarget->cost.startup;
- cpu_run_cost += path->pathtarget->cost.per_tuple * path->rows;
+ cpu_run_cost += path->pathtarget->cost.per_tuple * ((!param_info && baserel->rowsUnclamped > 0 && baserel->rows <= 1) ? baserel->rowsUnclamped : path->rows);
/* Adjust costing for parallelism, if used. */
if (path->parallel_workers > 0)
@@ -795,7 +796,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
cost_qual_eval(&qpqual_cost, qpquals, root);
startup_cost += qpqual_cost.startup;
- cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+ cpu_per_tuple = cpu_tuple_cost + 2.0*qpqual_cost.per_tuple;
cpu_run_cost += cpu_per_tuple * tuples_fetched;
@@ -1026,6 +1027,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
double spc_seq_page_cost,
spc_random_page_cost;
double T;
+ double rows_est;
/* Should only be applied to base relations */
Assert(IsA(baserel, RelOptInfo));
@@ -1081,7 +1083,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
startup_cost += qpqual_cost.startup;
- cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
+ cpu_per_tuple = cpu_tuple_cost + 2.0*qpqual_cost.per_tuple;
cpu_run_cost = cpu_per_tuple * tuples_fetched;
/* Adjust costing for parallelism, if used. */
@@ -1095,12 +1097,21 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
path->rows = clamp_row_est(path->rows / parallel_divisor);
}
-
run_cost += cpu_run_cost;
/* tlist eval costs are paid per output row, not per tuple scanned */
startup_cost += path->pathtarget->cost.startup;
- run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+ if (!param_info && baserel->rowsUnclamped > 0 && baserel->rows <= 1)
+ {
+ rows_est = baserel->rowsUnclamped;
+ if (path->parallel_workers > 0)
+ rows_est /= get_parallel_divisor(path);
+ }
+ else
+ rows_est = path->rows;
+
+ run_cost += path->pathtarget->cost.per_tuple * rows_est;
path->startup_cost = startup_cost;
path->total_cost = startup_cost + run_cost;
@@ -1844,6 +1855,327 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
rterm->pathtarget->width);
}
+/*
+ * is_fake_var
+ * Workaround for generate_append_tlist() which generates fake Vars with
+ * varno == 0, that will cause a fail of estimate_num_group() call
+ *
+ * XXX Ummm, why would estimate_num_group fail with this?
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+ if (IsA(expr, RelabelType))
+ expr = (Expr *) ((RelabelType *) expr)->arg;
+
+ return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ * Returns relative complexity of comparing two values based on its width.
+ * The idea behind is that the comparison becomes more expensive the longer the
+ * value is. Return value is in cpu_operator_cost units.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+ double width = -1.0; /* fake value */
+
+ if (IsA(expr, RelabelType))
+ expr = (Expr *) ((RelabelType *) expr)->arg;
+
+ /* Try to find actual stat in corresponding relation */
+ if (IsA(expr, Var))
+ {
+ Var *var = (Var *) expr;
+
+ if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+ {
+ RelOptInfo *rel = root->simple_rel_array[var->varno];
+
+ if (rel != NULL &&
+ var->varattno >= rel->min_attr &&
+ var->varattno <= rel->max_attr)
+ {
+ int ndx = var->varattno - rel->min_attr;
+
+ if (rel->attr_widths[ndx] > 0)
+ width = rel->attr_widths[ndx];
+ }
+ }
+ }
+
+ /* Didn't find any actual stats, try using type width instead. */
+ if (width < 0.0)
+ {
+ Node *node = (Node *) expr;
+
+ width = get_typavgwidth(exprType(node), exprTypmod(node));
+ }
+
+ /*
+ * Values are passed as Datum type, so comparisons can't be cheaper than
+ * comparing a Datum value.
+ *
+ * FIXME I find this reasoning questionable. We may pass int2, and
+ * comparing it is probably a bit cheaper than comparing a bigint.
+ */
+ if (width <= sizeof(Datum))
+ return 1.0;
+
+ /*
+ * We consider the cost of a comparison not to be directly proportional to
+ * width of the argument, because widths of the arguments could be
+ * slightly different (we only know the average width for the whole
+ * column). So we use log16(width) as an estimate.
+ */
+ return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ * compute CPU cost of sort (i.e. in-memory)
+ *
+ * The main thing we need to calculate to estimate sort CPU costs is the number
+ * of calls to the comparator functions. The difficulty is that for multi-column
+ * sorts there may be different data types involved (for some of which the calls
+ * may be much more expensive). Furthermore, columns may have a very different
+ * number of distinct values - the higher the number, the fewer comparisons will
+ * be needed for the following columns.
+ *
+ * The algorithm is incremental - we add pathkeys one by one, and at each step we
+ * estimate the number of necessary comparisons (based on the number of distinct
+ * groups in the current pathkey prefix and the new pathkey), and the comparison
+ * costs (which is data type specific).
+ *
+ * Estimation of the number of comparisons is based on ideas from:
+ *
+ * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
+ * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
+ *
+ * In term of that paper, let N - number of tuples, Xi - number of identical
+ * tuples with value Ki, then the estimate of number of comparisons is:
+ *
+ * log(N! / (X1! * X2! * ..)) ~ sum(Xi * log(N/Xi))
+ *
+ * We assume all Xi the same because now we don't have any estimation of
+ * group sizes, we have only know the estimate of number of groups (distinct
+ * values). In that case, formula becomes:
+ *
+ * N * log(NumberOfGroups)
+ *
+ * For multi-column sorts we need to estimate the number of comparisons for
+ * each individual column - for example with columns (c1, c2, ..., ck) we
+ * can estimate that number of comparisons on ck is roughly
+ *
+ * ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
+ *
+ * Let k be a column number, Gk - number of groups defined by k columns, and Fk
+ * the cost of the comparison is
+ *
+ * N * sum( Fk * log(Gk) )
+ *
+ * Note: We also consider column width, not just the comparator cost.
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys. In this case, it will fallback to
+ * simple comparison cost estimate.
+ */
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+ Cost comparison_cost, double tuples, double output_tuples,
+ bool heapSort)
+{
+ Cost per_tuple_cost = 0.0;
+ ListCell *lc;
+ List *pathkeyExprs = NIL;
+ double tuplesPerPrevGroup = tuples;
+ double totalFuncCost = 1.0;
+ bool has_fake_var = false;
+ int i = 0;
+ Oid prev_datatype = InvalidOid;
+ List *cache_varinfos = NIL;
+
+ /* fallback if pathkeys is unknown */
+ if (list_length(pathkeys) == 0)
+ {
+ /*
+ * If we'll use a bounded heap-sort keeping just K tuples in memory,
+ * for a total number of tuple comparisons of N log2 K; but the
+ * constant factor is a bit higher than for quicksort. Tweak it so
+ * that the cost curve is continuous at the crossover point.
+ */
+ output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+ per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+ /* add cost provided by caller */
+ per_tuple_cost += comparison_cost;
+
+ return per_tuple_cost * tuples;
+ }
+
+ /*
+ * Computing total cost of sorting takes into account the per-column
+ * comparison function cost. We try to compute the needed number of
+ * comparisons per column.
+ */
+ foreach(lc, pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(lc);
+ EquivalenceMember *em;
+ double nGroups,
+ correctedNGroups;
+ Cost funcCost = 1.0;
+
+ /*
+ * We believe that equivalence members aren't very different, so, to
+ * estimate cost we consider just the first member.
+ */
+ em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+ if (em->em_datatype != InvalidOid)
+ {
+ /* do not lookup funcCost if the data type is the same */
+ if (prev_datatype != em->em_datatype)
+ {
+ Oid sortop;
+ QualCost cost;
+
+ sortop = get_opfamily_member(pathkey->pk_opfamily,
+ em->em_datatype, em->em_datatype,
+ pathkey->pk_strategy);
+
+ cost.startup = 0;
+ cost.per_tuple = 0;
+ add_function_cost(root, get_opcode(sortop), NULL, &cost);
+
+ /*
+ * add_function_cost returns the product of cpu_operator_cost
+ * and procost, but we need just procost, co undo that.
+ */
+ funcCost = cost.per_tuple / cpu_operator_cost;
+
+ prev_datatype = em->em_datatype;
+ }
+ }
+
+ /* factor in the width of the values in this column */
+ funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+ /* now we have per-key cost, so add to the running total */
+ totalFuncCost += funcCost;
+
+ /* remember if we have found a fake Var in pathkeys */
+ has_fake_var |= is_fake_var(em->em_expr);
+ pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+ /*
+ * We need to calculate the number of comparisons for this column,
+ * which requires knowing the group size. So we estimate the number of
+ * groups by calling estimate_num_groups_incremental(), which
+ * estimates the group size for "new" pathkeys.
+ *
+ * Note: estimate_num_groups_incremental does not handle fake Vars, so
+ * use a default estimate otherwise.
+ */
+ if (!has_fake_var)
+ nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+ tuplesPerPrevGroup, NULL, NULL,
+ &cache_varinfos,
+ list_length(pathkeyExprs) - 1);
+ else if (tuples > 4.0)
+
+ /*
+ * Use geometric mean as estimation if there are no stats.
+ *
+ * We don't use DEFAULT_NUM_DISTINCT here, because that's used for
+ * a single column, but here we're dealing with multiple columns.
+ */
+ nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
+ else
+ nGroups = tuples;
+
+ /*
+ * Presorted keys are not considered in the cost above, but we still
+ * do have to compare them in the qsort comparator. So make sure to
+ * factor in the cost in that case.
+ */
+ if (i >= nPresortedKeys)
+ {
+ if (heapSort)
+ {
+ /*
+ * have to keep at least one group, and a multiple of group
+ * size
+ */
+ correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup);
+ }
+ else
+ /* all groups in the input */
+ correctedNGroups = nGroups;
+
+ correctedNGroups = Max(1.0, ceil(correctedNGroups));
+
+ per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+ }
+
+ i++;
+
+ /*
+ * Once we get single-row group, it means tuples in the group are
+ * unique and we can skip all remaining columns.
+ */
+ if (tuplesPerPrevGroup <= nGroups)
+ break;
+
+ /*
+ * Uniform distributions with all groups being of the same size are
+ * the best case, with nice smooth behavior. Real-world distributions
+ * tend not to be uniform, though, and we don't have any reliable
+ * easy-to-use information. As a basic defense against skewed
+ * distributions, we use a 1.5 factor to make the expected group a bit
+ * larger, but we need to be careful not to make the group larger than
+ * in the preceding step.
+ */
+ tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
+ ceil(1.5 * tuplesPerPrevGroup / nGroups));
+ }
+
+ list_free(pathkeyExprs);
+
+ /* per_tuple_cost is in cpu_operator_cost units */
+ per_tuple_cost *= cpu_operator_cost;
+
+ /*
+ * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles
+ * E. Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort
+ * estimation formula has additional term proportional to number of tuples
+ * (see Chapter 8.2 and Theorem 4.1). That affects cases with a low number
+ * of tuples, approximately less than 1e4. We could implement it as an
+ * additional multiplier under the logarithm, but we use a bit more
+ * complex formula which takes into account the number of unique tuples
+ * and it's not clear how to combine the multiplier with the number of
+ * groups. Estimate it as 10 cpu_operator_cost units.
+ */
+ per_tuple_cost += 10 * cpu_operator_cost;
+
+ per_tuple_cost += comparison_cost;
+
+ return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+ double tuples)
+{
+ return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+ 0, tuples, tuples, false);
+}
+
/*
* cost_tuplesort
* Determines and returns the cost of sorting a relation using tuplesort,
@@ -1860,7 +2192,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
* number of initial runs formed and M is the merge order used by tuplesort.c.
* Since the average initial run should be about sort_mem, we have
* disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- * cpu = comparison_cost * t * log2(t)
+ * and cpu cost (computed by compute_cpu_sort_cost()).
*
* If the sort is bounded (i.e., only the first k result tuples are needed)
* and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1879,9 +2211,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
* 'comparison_cost' is the extra cost per comparison, if any
* 'sort_mem' is the number of kilobytes of work memory allowed for the sort
* 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later
*/
static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
double tuples, int width,
Cost comparison_cost, int sort_mem,
double limit_tuples)
@@ -1898,9 +2232,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
if (tuples < 2.0)
tuples = 2.0;
- /* Include the default cost-per-comparison */
- comparison_cost += 2.0 * cpu_operator_cost;
-
/* Do we have a useful LIMIT? */
if (limit_tuples > 0 && limit_tuples < tuples)
{
@@ -1924,12 +2255,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
double log_runs;
double npageaccesses;
- /*
- * CPU costs
- *
- * Assume about N log2 N comparisons
- */
- *startup_cost = comparison_cost * tuples * LOG2(tuples);
+ /* CPU costs */
+ *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+ comparison_cost, tuples,
+ tuples, false);
/* Disk costs */
@@ -1945,18 +2274,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
}
else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
{
- /*
- * We'll use a bounded heap-sort keeping just K tuples in memory, for
- * a total number of tuple comparisons of N log2 K; but the constant
- * factor is a bit higher than for quicksort. Tweak it so that the
- * cost curve is continuous at the crossover point.
- */
- *startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+ /* We'll use a bounded heap-sort keeping just K tuples in memory. */
+ *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+ comparison_cost, tuples,
+ output_tuples, true);
}
else
{
/* We'll use plain quicksort on all the input tuples */
- *startup_cost = comparison_cost * tuples * LOG2(tuples);
+ *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+ comparison_cost, tuples,
+ tuples, false);
}
/*
@@ -2070,7 +2398,7 @@ cost_incremental_sort(Path *path,
* Estimate the average cost of sorting of one group where presorted keys
* are equal.
*/
- cost_tuplesort(&group_startup_cost, &group_run_cost,
+ cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
group_tuples, width, comparison_cost, sort_mem,
limit_tuples);
@@ -2130,7 +2458,7 @@ cost_sort(Path *path, PlannerInfo *root,
Cost startup_cost;
Cost run_cost;
- cost_tuplesort(&startup_cost, &run_cost,
+ cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
tuples, width,
comparison_cost, sort_mem,
limit_tuples);
@@ -2228,7 +2556,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
* Determines and returns the cost of an Append node.
*/
void
-cost_append(AppendPath *apath)
+cost_append_ext(AppendPath *apath, PlannerInfo *root)
{
ListCell *l;
@@ -2296,7 +2624,7 @@ cost_append(AppendPath *apath)
* any child.
*/
cost_sort(&sort_path,
- NULL, /* doesn't currently need root */
+ root,
pathkeys,
subpath->total_cost,
subpath->rows,
@@ -3703,8 +4031,9 @@ initial_cost_mergejoin(PlannerInfo *root, JoinCostWorkspace *workspace,
/* CPU costs left for later */
/* Public result fields */
- workspace->startup_cost = startup_cost;
- workspace->total_cost = startup_cost + run_cost + inner_run_cost;
+ workspace->startup_cost = startup_cost + outer_path->total_cost/outer_rows +
+ inner_path->total_cost/inner_rows;
+ workspace->total_cost = workspace->startup_cost + run_cost + inner_run_cost;
/* Save private data for final_cost_mergejoin */
workspace->run_cost = run_cost;
workspace->inner_run_cost = inner_run_cost;
@@ -4648,6 +4977,7 @@ cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root)
context.root = root;
context.total.startup = 0;
context.total.per_tuple = 0;
+ context.calccoalesce = true;
/* We don't charge any cost for the implicit ANDing at top level ... */
@@ -4673,6 +5003,22 @@ cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root)
context.root = root;
context.total.startup = 0;
context.total.per_tuple = 0;
+ context.calccoalesce = true;
+
+ cost_qual_eval_walker(qual, &context);
+
+ *cost = context.total;
+}
+
+void
+cost_qual_eval_node_index(QualCost *cost, Node *qual, PlannerInfo *root)
+{
+ cost_qual_eval_context context;
+
+ context.root = root;
+ context.total.startup = 0;
+ context.total.per_tuple = 0;
+ context.calccoalesce = false;
cost_qual_eval_walker(qual, &context);
@@ -4702,6 +5048,7 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
locContext.root = context->root;
locContext.total.startup = 0;
locContext.total.per_tuple = 0;
+ locContext.calccoalesce = context->calccoalesce;
/*
* For an OR clause, recurse into the marked-up tree so that we
@@ -4943,6 +5290,11 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
*/
return false;
}
+ else if (IsA(node, CoalesceExpr) && context->calccoalesce)
+ {
+ context->total.per_tuple += cpu_operator_cost *
+ list_length(((CoalesceExpr *) node)->args);
+ }
/* recurse into children */
return expression_tree_walker(node, cost_qual_eval_walker,
@@ -5255,6 +5607,13 @@ set_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel)
rel->rows = clamp_row_est(nrows);
+ if (isnan(nrows) || nrows > MAXIMUM_ROWCOUNT)
+ rel->rowsUnclamped = rel->rows;
+ else if (nrows < 0.001)
+ rel->rowsUnclamped = 0.001;
+ else
+ rel->rowsUnclamped = nrows;
+
cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root);
set_rel_width(root, rel);
@@ -5405,6 +5764,7 @@ calc_joinrel_size_estimate(PlannerInfo *root,
Selectivity jselec;
Selectivity pselec;
double nrows;
+ bool apply_righthand = false;
/*
* Compute joinclause selectivity. Note that we are only considering
@@ -5443,9 +5803,11 @@ calc_joinrel_size_estimate(PlannerInfo *root,
{
RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
- if (RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
+ if (RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids)) {
pushedquals = lappend(pushedquals, rinfo);
- else
+ apply_righthand |= bms_overlap(rinfo->clause_relids,
+ sjinfo->min_righthand);
+ } else
joinquals = lappend(joinquals, rinfo);
}
@@ -5497,6 +5859,8 @@ calc_joinrel_size_estimate(PlannerInfo *root,
nrows = outer_rows * inner_rows * fkselec * jselec;
if (nrows < outer_rows)
nrows = outer_rows;
+ if (apply_righthand && inner_rows < outer_rows)
+ pselec *= inner_rows / outer_rows;
nrows *= pselec;
break;
case JOIN_FULL:
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 503eb0f3c94..7bf9e772a19 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -651,7 +651,18 @@ get_eclass_for_sort_expr(PlannerInfo *root,
if (opcintype == cur_em->em_datatype &&
equal(expr, cur_em->em_expr))
- return cur_ec; /* Match! */
+ {
+ /*
+ * Match!
+ *
+ * Copy the sortref if it wasn't set yet. That may happen if
+ * the ec was constructed from WHERE clause, i.e. it doesn't
+ * have a target reference at all.
+ */
+ if (cur_ec->ec_sortref == 0 && sortref > 0)
+ cur_ec->ec_sortref = sortref;
+ return cur_ec;
+ }
}
}
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index c0fcc7d78df..e0a3c1be41e 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -109,8 +109,6 @@ static List *build_index_paths(PlannerInfo *root, RelOptInfo *rel,
bool *skip_nonnative_saop);
static List *build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
List *clauses, List *other_clauses);
-static List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
- List *clauses, List *other_clauses);
static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
List *paths);
static int path_usage_comparator(const void *a, const void *b);
@@ -1176,7 +1174,7 @@ build_paths_for_OR(PlannerInfo *root, RelOptInfo *rel,
* for the purpose of generating indexquals, but are not to be searched for
* ORs. (See build_paths_for_OR() for motivation.)
*/
-static List *
+List *
generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
List *clauses, List *other_clauses)
{
@@ -3214,7 +3212,6 @@ match_clause_to_ordering_op(IndexOptInfo *index,
return clause;
}
-
/****************************************************************************
* ---- ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS ----
****************************************************************************/
@@ -3440,6 +3437,22 @@ bool
relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
List *restrictlist,
List *exprlist, List *oprlist)
+{
+ return relation_has_unique_index_ext(root, rel, restrictlist,
+ exprlist, oprlist, NULL);
+}
+
+/*
+ * relation_has_unique_index_ext
+ * Same as relation_has_unique_index_for(), but supports extra_clauses
+ * parameter. If extra_clauses isn't NULL, return baserestrictinfo clauses
+ * which were used to derive uniqueness.
+ */
+bool
+relation_has_unique_index_ext(PlannerInfo *root, RelOptInfo *rel,
+ List *restrictlist,
+ List *exprlist, List *oprlist,
+ List **extra_clauses)
{
ListCell *ic;
@@ -3495,6 +3508,7 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
{
IndexOptInfo *ind = (IndexOptInfo *) lfirst(ic);
int c;
+ List *exprs = NIL;
/*
* If the index is not unique, or not immediately enforced, or if it's
@@ -3546,6 +3560,24 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
if (match_index_to_operand(rexpr, c, ind))
{
matched = true; /* column is unique */
+
+ if (bms_membership(rinfo->clause_relids) == BMS_SINGLETON)
+ {
+ MemoryContext oldMemCtx =
+ MemoryContextSwitchTo(root->planner_cxt);
+
+ /*
+ * Add filter clause into a list allowing caller to
+ * know if uniqueness have made not only by join
+ * clauses.
+ */
+ Assert(bms_is_empty(rinfo->left_relids) ||
+ bms_is_empty(rinfo->right_relids));
+ if (extra_clauses)
+ exprs = lappend(exprs, rinfo);
+ MemoryContextSwitchTo(oldMemCtx);
+ }
+
break;
}
}
@@ -3588,7 +3620,11 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
/* Matched all key columns of this index? */
if (c == ind->nkeycolumns)
+ {
+ if (extra_clauses)
+ *extra_clauses = exprs;
return true;
+ }
}
return false;
diff --git a/src/backend/optimizer/path/meson.build b/src/backend/optimizer/path/meson.build
index b09fbe44669..1ca7de35c53 100644
--- a/src/backend/optimizer/path/meson.build
+++ b/src/backend/optimizer/path/meson.build
@@ -2,6 +2,7 @@
backend_sources += files(
'allpaths.c',
+ 'appendorpath.c',
'clausesel.c',
'costsize.c',
'equivclass.c',
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 416fc4e240b..cf7dd31a2e1 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -17,15 +17,20 @@
*/
#include "postgres.h"
+#include <float.h>
+
+#include "miscadmin.h"
#include "access/stratnum.h"
#include "catalog/pg_opfamily.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/cost.h"
+#include "optimizer/cost.h"
#include "optimizer/optimizer.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "partitioning/partbounds.h"
#include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
/* Consider reordering of GROUP BY keys? */
bool enable_group_by_reordering = true;
@@ -364,7 +369,7 @@ pathkeys_contained_in(List *keys1, List *keys2)
*
* Returns the number of GROUP BY keys with a matching pathkey.
*/
-static int
+int
group_keys_reorder_by_pathkeys(List *pathkeys, List **group_pathkeys,
List **group_clauses,
int num_groupby_pathkeys)
@@ -2133,7 +2138,7 @@ right_merge_direction(PlannerInfo *root, PathKey *pathkey)
* ordering. Thus we return 0, if no valuable keys are found, or the number
* of leading keys shared by the list and the requested ordering..
*/
-static int
+int
pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
{
int n_common_pathkeys;
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index c3fd4a81f8a..d4bfa1abcd5 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -22,6 +22,7 @@
*/
#include "postgres.h"
+#include "catalog/pg_class.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/joininfo.h"
#include "optimizer/optimizer.h"
@@ -31,10 +32,31 @@
#include "optimizer/restrictinfo.h"
#include "utils/lsyscache.h"
+/*
+ * The context for replace_varno_walker() containing source and target relids.
+ */
+typedef struct
+{
+ int from;
+ int to;
+} ReplaceVarnoContext;
+
+/*
+ * The struct containing self-join candidate. Used to find duplicate reloids.
+ */
+typedef struct
+{
+ int relid;
+ Oid reloid;
+} SelfJoinCandidate;
+
+bool enable_self_join_removal;
+
/* local functions */
static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo);
-static void remove_rel_from_query(PlannerInfo *root, int relid,
- SpecialJoinInfo *sjinfo);
+
+static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
+ SpecialJoinInfo *sjinfo);
static void remove_rel_from_restrictinfo(RestrictInfo *rinfo,
int relid, int ojrelid);
static void remove_rel_from_eclass(EquivalenceClass *ec,
@@ -42,14 +64,20 @@ static void remove_rel_from_eclass(EquivalenceClass *ec,
static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved);
static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel);
static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
- List *clause_list);
+ List *clause_list, List **extra_clauses);
static Oid distinct_col_search(int colno, List *colnos, List *opids);
static bool is_innerrel_unique_for(PlannerInfo *root,
Relids joinrelids,
Relids outerrelids,
RelOptInfo *innerrel,
JoinType jointype,
- List *restrictlist);
+ List *restrictlist,
+ List **extra_clauses);
+static Bitmapset *replace_relid(Relids relids, int oldId, int newId);
+static void replace_varno(Node *node, int from, int to);
+static bool replace_varno_walker(Node *node, ReplaceVarnoContext *ctx);
+static int self_join_candidates_cmp(const void *a, const void *b);
+
/*
@@ -87,7 +115,7 @@ restart:
*/
innerrelid = bms_singleton_member(sjinfo->min_righthand);
- remove_rel_from_query(root, innerrelid, sjinfo);
+ remove_leftjoinrel_from_query(root, innerrelid, sjinfo);
/* We verify that exactly one reference gets removed from joinlist */
nremoved = 0;
@@ -306,7 +334,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
* Now that we have the relevant equality join clauses, try to prove the
* innerrel distinct.
*/
- if (rel_is_distinct_for(root, innerrel, clause_list))
+ if (rel_is_distinct_for(root, innerrel, clause_list, NULL))
return true;
/*
@@ -318,29 +346,24 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo)
/*
- * Remove the target relid and references to the target join from the
+ * Remove the target rel->relid and references to the target join from the
* planner's data structures, having determined that there is no need
- * to include them in the query.
+ * to include them in the query. Optionally replace them with subst if subst
+ * is non-negative.
*
- * We are not terribly thorough here. We only bother to update parts of
- * the planner's data structures that will actually be consulted later.
+ * This function updates only parts needed for both left-join removal and
+ * self-join removal.
*/
static void
-remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
+remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
+ int subst, SpecialJoinInfo *sjinfo,
+ Relids joinrelids)
{
- RelOptInfo *rel = find_base_rel(root, relid);
- int ojrelid = sjinfo->ojrelid;
- Relids joinrelids;
- Relids join_plus_commute;
- List *joininfos;
+ int relid = rel->relid;
+ int ojrelid = (sjinfo != NULL) ? sjinfo->ojrelid : -1;
Index rti;
ListCell *l;
- /* Compute the relid set for the join we are considering */
- joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
- Assert(ojrelid != 0);
- joinrelids = bms_add_member(joinrelids, ojrelid);
-
/*
* Remove references to the rel from other baserels' attr_needed arrays.
*/
@@ -364,19 +387,22 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
attroff--)
{
otherrel->attr_needed[attroff] =
- bms_del_member(otherrel->attr_needed[attroff], relid);
+ replace_relid(otherrel->attr_needed[attroff], relid, subst);
otherrel->attr_needed[attroff] =
- bms_del_member(otherrel->attr_needed[attroff], ojrelid);
+ replace_relid(otherrel->attr_needed[attroff], ojrelid, subst);
}
+
+ /* Update lateral references. */
+ replace_varno((Node *) otherrel->lateral_vars, relid, subst);
}
/*
* Update all_baserels and related relid sets.
*/
- root->all_baserels = bms_del_member(root->all_baserels, relid);
- root->outer_join_rels = bms_del_member(root->outer_join_rels, ojrelid);
- root->all_query_rels = bms_del_member(root->all_query_rels, relid);
- root->all_query_rels = bms_del_member(root->all_query_rels, ojrelid);
+ root->all_baserels = replace_relid(root->all_baserels, relid, subst);
+ root->outer_join_rels = replace_relid(root->outer_join_rels, ojrelid, subst);
+ root->all_query_rels = replace_relid(root->all_query_rels, relid, subst);
+ root->all_query_rels = replace_relid(root->all_query_rels, ojrelid, subst);
/*
* Likewise remove references from SpecialJoinInfo data structures.
@@ -401,19 +427,21 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand);
sjinf->syn_righthand = bms_copy(sjinf->syn_righthand);
/* Now remove relid and ojrelid bits from the sets: */
- sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, relid);
- sjinf->min_righthand = bms_del_member(sjinf->min_righthand, relid);
- sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, relid);
- sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, relid);
- sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid);
- sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid);
- sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid);
- sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid);
+ sjinf->min_lefthand = replace_relid(sjinf->min_lefthand, relid, subst);
+ sjinf->min_righthand = replace_relid(sjinf->min_righthand, relid, subst);
+ sjinf->syn_lefthand = replace_relid(sjinf->syn_lefthand, relid, subst);
+ sjinf->syn_righthand = replace_relid(sjinf->syn_righthand, relid, subst);
+ sjinf->min_lefthand = replace_relid(sjinf->min_lefthand, ojrelid, subst);
+ sjinf->min_righthand = replace_relid(sjinf->min_righthand, ojrelid, subst);
+ sjinf->syn_lefthand = replace_relid(sjinf->syn_lefthand, ojrelid, subst);
+ sjinf->syn_righthand = replace_relid(sjinf->syn_righthand, ojrelid, subst);
/* relid cannot appear in these fields, but ojrelid can: */
- sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid);
- sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid);
- sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid);
- sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid);
+ sjinf->commute_above_l = replace_relid(sjinf->commute_above_l, ojrelid, subst);
+ sjinf->commute_above_r = replace_relid(sjinf->commute_above_r, ojrelid, subst);
+ sjinf->commute_below_l = replace_relid(sjinf->commute_below_l, ojrelid, subst);
+ sjinf->commute_below_r = replace_relid(sjinf->commute_below_r, ojrelid, subst);
+
+ replace_varno((Node *) sjinf->semi_rhs_exprs, relid, subst);
}
/*
@@ -447,19 +475,50 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo)
{
PlaceHolderVar *phv = phinfo->ph_var;
- phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
- phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, ojrelid);
+ phinfo->ph_eval_at = replace_relid(phinfo->ph_eval_at, relid, subst);
+ phinfo->ph_eval_at = replace_relid(phinfo->ph_eval_at, ojrelid, subst);
Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */
- phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
- phinfo->ph_needed = bms_del_member(phinfo->ph_needed, ojrelid);
+ phinfo->ph_needed = replace_relid(phinfo->ph_needed, relid, subst);
+ phinfo->ph_needed = replace_relid(phinfo->ph_needed, ojrelid, subst);
/* ph_needed might or might not become empty */
- phv->phrels = bms_del_member(phv->phrels, relid);
- phv->phrels = bms_del_member(phv->phrels, ojrelid);
+ phv->phrels = replace_relid(phv->phrels, relid, subst);
+ phv->phrels = replace_relid(phv->phrels, ojrelid, subst);
Assert(!bms_is_empty(phv->phrels));
+ replace_varno((Node *) phv->phexpr, relid, subst);
Assert(phv->phnullingrels == NULL); /* no need to adjust */
}
}
+ if (root->parse->resultRelation == relid)
+ root->parse->resultRelation = subst;
+}
+
+/*
+ * Remove the target relid and references to the target join from the
+ * planner's data structures, having determined that there is no need
+ * to include them in the query.
+ *
+ * We are not terribly thorough here. We only bother to update parts of
+ * the planner's data structures that will actually be consulted later.
+ */
+static void
+remove_leftjoinrel_from_query(PlannerInfo *root, int relid,
+ SpecialJoinInfo *sjinfo)
+{
+ List *joininfos;
+ int ojrelid = sjinfo->ojrelid;
+ RelOptInfo *rel = find_base_rel(root, relid);
+ Relids join_plus_commute;
+ Relids joinrelids;
+ ListCell *l;
+
+ /* Compute the relid set for the join we are considering */
+ joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
+ Assert(ojrelid != 0);
+ joinrelids = bms_add_member(joinrelids, ojrelid);
+
+ remove_rel_from_query(root, rel, -1, sjinfo, joinrelids);
+
/*
* Remove any joinquals referencing the rel from the joininfo lists.
*
@@ -856,9 +915,14 @@ rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel)
* Note that the passed-in clause_list may be destructively modified! This
* is OK for current uses, because the clause_list is built by the caller for
* the sole purpose of passing to this function.
+ *
+ * outer_exprs contains the right sides of baserestrictinfo clauses looking
+ * like x = const if distinctness is derived from such clauses, not joininfo
+ * clause. Pass NULL to the outer_exprs, if its value is not needed.
*/
static bool
-rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
+rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list,
+ List **extra_clauses)
{
/*
* We could skip a couple of tests here if we assume all callers checked
@@ -871,10 +935,11 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list)
{
/*
* Examine the indexes to see if we have a matching unique index.
- * relation_has_unique_index_for automatically adds any usable
+ * relation_has_unique_index_ext automatically adds any usable
* restriction clauses for the rel, so we needn't do that here.
*/
- if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
+ if (relation_has_unique_index_ext(root, rel, clause_list, NIL, NIL,
+ extra_clauses))
return true;
}
else if (rel->rtekind == RTE_SUBQUERY)
@@ -1188,9 +1253,30 @@ innerrel_is_unique(PlannerInfo *root,
JoinType jointype,
List *restrictlist,
bool force_cache)
+{
+ return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel,
+ jointype, restrictlist, force_cache, NULL);
+}
+
+/*
+ * innerrel_is_unique_ext
+ * Do the same as innerrel_is_unique(), but also return additional clauses
+ * from a baserestrictinfo list that were used to prove uniqueness.
+ */
+bool
+innerrel_is_unique_ext(PlannerInfo *root,
+ Relids joinrelids,
+ Relids outerrelids,
+ RelOptInfo *innerrel,
+ JoinType jointype,
+ List *restrictlist,
+ bool force_cache,
+ List **extra_clauses)
{
MemoryContext old_context;
ListCell *lc;
+ UniqueRelInfo *uniqueRelInfo;
+ List *outer_exprs = NIL;
/* Certainly can't prove uniqueness when there are no joinclauses */
if (restrictlist == NIL)
@@ -1212,10 +1298,14 @@ innerrel_is_unique(PlannerInfo *root,
*/
foreach(lc, innerrel->unique_for_rels)
{
- Relids unique_for_rels = (Relids) lfirst(lc);
+ uniqueRelInfo = (UniqueRelInfo *) lfirst(lc);
- if (bms_is_subset(unique_for_rels, outerrelids))
+ if (bms_is_subset(uniqueRelInfo->outerrelids, outerrelids))
+ {
+ if (extra_clauses)
+ *extra_clauses = uniqueRelInfo->extra_clauses;
return true; /* Success! */
+ }
}
/*
@@ -1232,7 +1322,7 @@ innerrel_is_unique(PlannerInfo *root,
/* No cached information, so try to make the proof. */
if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
- jointype, restrictlist))
+ jointype, restrictlist, &outer_exprs))
{
/*
* Cache the positive result for future probes, being sure to keep it
@@ -1245,10 +1335,15 @@ innerrel_is_unique(PlannerInfo *root,
* supersets of them anyway.
*/
old_context = MemoryContextSwitchTo(root->planner_cxt);
+ uniqueRelInfo = makeNode(UniqueRelInfo);
+ uniqueRelInfo->extra_clauses = outer_exprs;
+ uniqueRelInfo->outerrelids = bms_copy(outerrelids);
innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
- bms_copy(outerrelids));
+ uniqueRelInfo);
MemoryContextSwitchTo(old_context);
+ if (extra_clauses)
+ *extra_clauses = outer_exprs;
return true; /* Success! */
}
else
@@ -1294,7 +1389,8 @@ is_innerrel_unique_for(PlannerInfo *root,
Relids outerrelids,
RelOptInfo *innerrel,
JoinType jointype,
- List *restrictlist)
+ List *restrictlist,
+ List **extra_clauses)
{
List *clause_list = NIL;
ListCell *lc;
@@ -1324,17 +1420,1007 @@ is_innerrel_unique_for(PlannerInfo *root,
continue; /* not mergejoinable */
/*
- * Check if clause has the form "outer op inner" or "inner op outer",
- * and if so mark which side is inner.
+ * Check if the clause has the form "outer op inner" or "inner op
+ * outer", and if so mark which side is inner.
*/
if (!clause_sides_match_join(restrictinfo, outerrelids,
innerrel->relids))
continue; /* no good for these input relations */
- /* OK, add to list */
+ /* OK, add to the list */
clause_list = lappend(clause_list, restrictinfo);
}
/* Let rel_is_distinct_for() do the hard work */
- return rel_is_distinct_for(root, innerrel, clause_list);
+ return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses);
+}
+
+/*
+ * Replace each occurrence of removing relid with the keeping one
+ */
+static void
+replace_varno(Node *node, int from, int to)
+{
+ ReplaceVarnoContext ctx;
+
+ if (to <= 0)
+ return;
+
+ ctx.from = from;
+ ctx.to = to;
+ replace_varno_walker(node, &ctx);
+}
+
+/*
+ * Walker function for replace_varno()
+ */
+static bool
+replace_varno_walker(Node *node, ReplaceVarnoContext *ctx)
+{
+ if (node == NULL)
+ return false;
+
+ if (IsA(node, Var))
+ {
+ Var *var = (Var *) node;
+
+ if (var->varno == ctx->from)
+ {
+ var->varno = ctx->to;
+ var->varnosyn = ctx->to;
+ }
+ return false;
+ }
+ if (IsA(node, RestrictInfo))
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) node;
+ int relid = -1;
+ bool is_req_equal =
+ (rinfo->required_relids == rinfo->clause_relids);
+
+ if (bms_is_member(ctx->from, rinfo->clause_relids))
+ {
+ replace_varno((Node *) rinfo->clause, ctx->from, ctx->to);
+ replace_varno((Node *) rinfo->orclause, ctx->from, ctx->to);
+ rinfo->clause_relids = replace_relid(rinfo->clause_relids, ctx->from, ctx->to);
+ rinfo->left_relids = replace_relid(rinfo->left_relids, ctx->from, ctx->to);
+ rinfo->right_relids = replace_relid(rinfo->right_relids, ctx->from, ctx->to);
+ }
+
+ if (is_req_equal)
+ rinfo->required_relids = rinfo->clause_relids;
+ else
+ rinfo->required_relids = replace_relid(rinfo->required_relids, ctx->from, ctx->to);
+
+ rinfo->outer_relids = replace_relid(rinfo->outer_relids, ctx->from, ctx->to);
+ rinfo->incompatible_relids = replace_relid(rinfo->incompatible_relids, ctx->from, ctx->to);
+
+ if (rinfo->mergeopfamilies &&
+ bms_get_singleton_member(rinfo->clause_relids, &relid) &&
+ relid == ctx->to && IsA(rinfo->clause, OpExpr))
+ {
+ Expr *leftOp;
+ Expr *rightOp;
+
+ leftOp = (Expr *) get_leftop(rinfo->clause);
+ rightOp = (Expr *) get_rightop(rinfo->clause);
+
+ if (leftOp != NULL && equal(leftOp, rightOp))
+ {
+ NullTest *ntest = makeNode(NullTest);
+
+ ntest->arg = leftOp;
+ ntest->nulltesttype = IS_NOT_NULL;
+ ntest->argisrow = false;
+ ntest->location = -1;
+ rinfo->clause = (Expr *) ntest;
+ rinfo->mergeopfamilies = NIL;
+ }
+ Assert(rinfo->orclause == NULL);
+ }
+
+ return false;
+ }
+ return expression_tree_walker(node, replace_varno_walker, (void *) ctx);
+}
+
+/*
+ * Substitute newId by oldId in relids.
+ */
+static Bitmapset *
+replace_relid(Relids relids, int oldId, int newId)
+{
+ if (oldId < 0)
+ return relids;
+
+ if (newId < 0)
+ /* Delete relid without substitution. */
+ return bms_del_member(relids, oldId);
+
+ if (bms_is_member(oldId, relids))
+ return bms_add_member(bms_del_member(relids, oldId), newId);
+
+ return relids;
+}
+
+/*
+ * Update EC members to point to the remaining relation instead of the removed
+ * one, removing duplicates.
+ *
+ * Restriction clauses for base relations are already distributed to
+ * the respective baserestrictinfo lists (see
+ * generate_implied_equalities_for_column). The above code has already processed
+ * this list, and updated these clauses to reference the remaining
+ * relation, so we can skip them here based on their relids.
+ *
+ * Likewise, we have already processed the join clauses that join the
+ * removed relation to the remaining one.
+ *
+ * Finally, there are join clauses that join the removed relation to
+ * some third relation. We can't just delete the source clauses and
+ * regenerate them from the EC because the corresponding equality
+ * operators might be missing (see the handling of ec_broken).
+ * Therefore, we will update the references in the source clauses.
+ *
+ * Derived clauses can be generated again, so it is simpler to just
+ * delete them.
+ */
+static void
+update_eclasses(EquivalenceClass *ec, int from, int to)
+{
+ List *new_members = NIL;
+ List *new_sources = NIL;
+ ListCell *lc;
+ ListCell *lc1;
+
+ foreach(lc, ec->ec_members)
+ {
+ EquivalenceMember *em = lfirst_node(EquivalenceMember, lc);
+ bool is_redundant = false;
+
+ if (!bms_is_member(from, em->em_relids))
+ {
+ new_members = lappend(new_members, em);
+ continue;
+ }
+
+ em->em_relids = replace_relid(em->em_relids, from, to);
+ em->em_jdomain->jd_relids = replace_relid(em->em_jdomain->jd_relids, from, to);
+
+ /* We only process inner joins */
+ replace_varno((Node *) em->em_expr, from, to);
+
+ foreach(lc1, new_members)
+ {
+ EquivalenceMember *other = lfirst_node(EquivalenceMember, lc1);
+
+ if (!equal(em->em_relids, other->em_relids))
+ continue;
+
+ if (equal(em->em_expr, other->em_expr))
+ {
+ is_redundant = true;
+ break;
+ }
+ }
+
+ if (!is_redundant)
+ new_members = lappend(new_members, em);
+ }
+
+ list_free(ec->ec_members);
+ ec->ec_members = new_members;
+
+ list_free(ec->ec_derives);
+ ec->ec_derives = NULL;
+
+ /* Update EC source expressions */
+ foreach(lc, ec->ec_sources)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+ bool is_redundant = false;
+
+ if (!bms_is_member(from, rinfo->required_relids))
+ {
+ new_sources = lappend(new_sources, rinfo);
+ continue;
+ }
+
+ replace_varno((Node *) rinfo, from, to);
+
+ /*
+ * After switching the clause to the remaining relation, check it for
+ * redundancy with existing ones. We don't have to check for
+ * redundancy with derived clauses, because we've just deleted them.
+ */
+ foreach(lc1, new_sources)
+ {
+ RestrictInfo *other = lfirst_node(RestrictInfo, lc1);
+
+ if (!equal(rinfo->clause_relids, other->clause_relids))
+ continue;
+
+ if (equal(rinfo->clause, other->clause))
+ {
+ is_redundant = true;
+ break;
+ }
+ }
+
+ if (!is_redundant)
+ new_sources = lappend(new_sources, rinfo);
+ }
+
+ list_free(ec->ec_sources);
+ ec->ec_sources = new_sources;
+ ec->ec_relids = replace_relid(ec->ec_relids, from, to);
+}
+
+static bool
+sje_walker(Node *node, ReplaceVarnoContext *ctx)
+{
+ if (node == NULL)
+ return false;
+
+ if (IsA(node, Var))
+ {
+ Var *var = (Var *) node;
+
+ if (var->varno == ctx->from)
+ {
+ var->varno = ctx->to;
+ var->varnosyn = ctx->to;
+ }
+ return false;
+ }
+ return expression_tree_walker(node, sje_walker, (void *) ctx);
+}
+
+/*
+ * Remove a relation after we have proven that it participates only in an
+ * unneeded unique self join.
+ *
+ * Replace any links in planner info structures.
+ *
+ * Transfer join and restriction clauses from the removed relation to the
+ * remaining one. We change the Vars of the clause to point to the
+ * remaining relation instead of the removed one. The clauses that require
+ * a subset of joinrelids become restriction clauses of the remaining
+ * relation, and others remain join clauses. We append them to
+ * baserestrictinfo and joininfo respectively, trying not to introduce
+ * duplicates.
+ *
+ * We also have to process the 'joinclauses' list here, because it
+ * contains EC-derived join clauses which must become filter clauses. It
+ * is not enough to just correct the ECs because the EC-derived
+ * restrictions are generated before join removal (see
+ * generate_base_implied_equalities).
+ */
+static void
+remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark,
+ RelOptInfo *toKeep, RelOptInfo *toRemove,
+ List *restrictlist)
+{
+ List *joininfos;
+ ListCell *lc;
+ int i;
+ List *jinfo_candidates = NIL;
+ List *binfo_candidates = NIL;
+ ReplaceVarnoContext ctx = {.from = toRemove->relid,.to = toKeep->relid};
+
+ Assert(toKeep->relid != -1);
+
+ /*
+ * Replace the index of the removing table with the keeping one. The
+ * technique of removing/distributing restrictinfo is used here to attach
+ * just appeared (for keeping relation) join clauses and avoid adding
+ * duplicates of those that already exist in the joininfo list.
+ */
+ joininfos = list_copy(toRemove->joininfo);
+ foreach(lc, joininfos)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+
+ remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
+ replace_varno((Node *) rinfo, toRemove->relid, toKeep->relid);
+
+ if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
+ jinfo_candidates = lappend(jinfo_candidates, rinfo);
+ else
+ binfo_candidates = lappend(binfo_candidates, rinfo);
+ }
+
+ /*
+ * Concatenate restrictlist to the list of base restrictions of the
+ * removing table just to simplify the replacement procedure: all of them
+ * weren't connected to any keeping relations and need to be added to some
+ * rels.
+ */
+ toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo,
+ restrictlist);
+ foreach(lc, toRemove->baserestrictinfo)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+
+ replace_varno((Node *) rinfo, toRemove->relid, toKeep->relid);
+
+ if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE)
+ jinfo_candidates = lappend(jinfo_candidates, rinfo);
+ else
+ binfo_candidates = lappend(binfo_candidates, rinfo);
+ }
+
+ /*
+ * Now, add all non-redundant clauses to the keeping relation.
+ * Contradictory operation. On the one side, we reduce the length of
+ * restrict lists that can impact planning or executing time.
+ * Additionally, we improve the accuracy of cardinality estimation. On the
+ * other side, it is one more place that can make planning time much
+ * longer in specific cases. It would have been better to avoid calling
+ * the equal() function here, but it's the only way to detect duplicated
+ * inequality expressions.
+ */
+ foreach(lc, binfo_candidates)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+ ListCell *olc = NULL;
+ bool is_redundant = false;
+
+ Assert(!bms_is_member(toRemove->relid, rinfo->required_relids));
+
+ foreach(olc, toKeep->baserestrictinfo)
+ {
+ RestrictInfo *src = lfirst_node(RestrictInfo, olc);
+
+ if (!bms_equal(src->clause_relids, rinfo->clause_relids))
+ /* Const and non-const expressions can't be equal */
+ continue;
+
+ if (src == rinfo ||
+ (rinfo->parent_ec != NULL
+ && src->parent_ec == rinfo->parent_ec)
+ || equal(rinfo->clause, src->clause))
+ {
+ is_redundant = true;
+ break;
+ }
+ }
+ if (!is_redundant)
+ distribute_restrictinfo_to_rels(root, rinfo);
+ }
+ foreach(lc, jinfo_candidates)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+ ListCell *olc = NULL;
+ bool is_redundant = false;
+
+ Assert(!bms_is_member(toRemove->relid, rinfo->required_relids));
+
+ foreach(olc, toKeep->joininfo)
+ {
+ RestrictInfo *src = lfirst_node(RestrictInfo, olc);
+
+ if (!bms_equal(src->clause_relids, rinfo->clause_relids))
+ /* Can't compare trivially different clauses */
+ continue;
+
+ if (src == rinfo ||
+ (rinfo->parent_ec != NULL
+ && src->parent_ec == rinfo->parent_ec)
+ || equal(rinfo->clause, src->clause))
+ {
+ is_redundant = true;
+ break;
+ }
+ }
+ if (!is_redundant)
+ distribute_restrictinfo_to_rels(root, rinfo);
+ }
+ list_free(binfo_candidates);
+ list_free(jinfo_candidates);
+
+ /*
+ * Arrange equivalence classes, mentioned removing a table, with the
+ * keeping one: varno of removing table should be replaced in members and
+ * sources lists. Also, remove duplicated elements if this replacement
+ * procedure created them.
+ */
+ i = -1;
+ while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0)
+ {
+ EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i);
+
+ update_eclasses(ec, toRemove->relid, toKeep->relid);
+ toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i);
+ }
+
+ /*
+ * Transfer the targetlist and attr_needed flags.
+ */
+
+ foreach(lc, toRemove->reltarget->exprs)
+ {
+ Node *node = lfirst(lc);
+
+ replace_varno(node, toRemove->relid, toKeep->relid);
+ if (!list_member(toKeep->reltarget->exprs, node))
+ toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node);
+ }
+
+ for (i = toKeep->min_attr; i <= toKeep->max_attr; i++)
+ {
+ int attno = i - toKeep->min_attr;
+
+ toRemove->attr_needed[attno] = replace_relid(toRemove->attr_needed[attno],
+ toRemove->relid, toKeep->relid);
+ toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno],
+ toRemove->attr_needed[attno]);
+ }
+
+ /*
+ * If the removed relation has a row mark, transfer it to the remaining
+ * one.
+ *
+ * If both rels have row marks, just keep the one corresponding to the
+ * remaining relation, because we verified earlier that they have the same
+ * strength.
+ */
+ if (rmark)
+ {
+ if (kmark)
+ {
+ Assert(kmark->markType == rmark->markType);
+
+ root->rowMarks = list_delete_ptr(root->rowMarks, rmark);
+ }
+ else
+ {
+ /* Shouldn't have inheritance children here. */
+ Assert(rmark->rti == rmark->prti);
+
+ rmark->rti = rmark->prti = toKeep->relid;
+ }
+ }
+
+ /* Replace varno in all the query structures */
+ query_tree_walker(root->parse, sje_walker, &ctx, QTW_EXAMINE_SORTGROUP);
+
+ /* Replace links in the planner info */
+ remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL);
+
+ /* At last, replace varno in root targetlist and HAVING clause */
+ replace_varno((Node *) root->processed_tlist,
+ toRemove->relid, toKeep->relid);
+ replace_varno((Node *) root->processed_groupClause,
+ toRemove->relid, toKeep->relid);
+
+ /*
+ * There may be references to the rel in root->fkey_list, but if so,
+ * match_foreign_keys_to_quals() will get rid of them.
+ */
+
+ /*
+ * Finally, remove the rel from the baserel array to prevent it from being
+ * referenced again. (We can't do this earlier because
+ * remove_join_clause_from_rels will touch it.)
+ */
+ root->simple_rel_array[toRemove->relid] = NULL;
+
+ /* And nuke the RelOptInfo, just in case there's another access path */
+ pfree(toRemove);
+}
+
+/*
+ * split_selfjoin_quals
+ * Processes 'joinquals' by building two lists: one containing the quals
+ * where the columns/exprs are on either side of the join match, and
+ * another one containing the remaining quals.
+ *
+ * 'joinquals' must only contain quals for a RTE_RELATION being joined to
+ * itself.
+ */
+static void
+split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals,
+ List **otherjoinquals, int from, int to)
+{
+ ListCell *lc;
+ List *sjoinquals = NIL;
+ List *ojoinquals = NIL;
+
+ foreach(lc, joinquals)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+ OpExpr *expr;
+ Node *leftexpr;
+ Node *rightexpr;
+
+ /* In general, clause looks like F(arg1) = G(arg2) */
+ if (!rinfo->mergeopfamilies ||
+ bms_num_members(rinfo->clause_relids) != 2 ||
+ bms_membership(rinfo->left_relids) != BMS_SINGLETON ||
+ bms_membership(rinfo->right_relids) != BMS_SINGLETON)
+ {
+ ojoinquals = lappend(ojoinquals, rinfo);
+ continue;
+ }
+
+ expr = (OpExpr *) rinfo->clause;
+
+ if (!IsA(expr, OpExpr) || list_length(expr->args) != 2)
+ {
+ ojoinquals = lappend(ojoinquals, rinfo);
+ continue;
+ }
+
+ leftexpr = get_leftop(rinfo->clause);
+ rightexpr = copyObject(get_rightop(rinfo->clause));
+
+ if (leftexpr && IsA(leftexpr, RelabelType))
+ leftexpr = (Node *) ((RelabelType *) leftexpr)->arg;
+ if (rightexpr && IsA(rightexpr, RelabelType))
+ rightexpr = (Node *) ((RelabelType *) rightexpr)->arg;
+
+ /*
+ * Quite an expensive operation, narrowing the use case. For example,
+ * when we have cast of the same var to different (but compatible)
+ * types.
+ */
+ replace_varno(rightexpr, bms_singleton_member(rinfo->right_relids),
+ bms_singleton_member(rinfo->left_relids));
+
+ if (equal(leftexpr, rightexpr))
+ sjoinquals = lappend(sjoinquals, rinfo);
+ else
+ ojoinquals = lappend(ojoinquals, rinfo);
+ }
+
+ *selfjoinquals = sjoinquals;
+ *otherjoinquals = ojoinquals;
+}
+
+/*
+ * Check for a case when uniqueness is at least partly derived from a
+ * baserestrictinfo clause. In this case, we have a chance to return only
+ * one row (if such clauses on both sides of SJ are equal) or nothing (if they
+ * are different).
+ */
+static bool
+match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses,
+ Index relid)
+{
+ ListCell *lc;
+
+ foreach(lc, uclauses)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
+ Expr *clause;
+ Node *iclause;
+ Node *c1;
+ bool matched = false;
+ ListCell *olc;
+
+ Assert(outer->relid > 0 && relid > 0);
+
+ /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */
+ Assert(bms_is_empty(rinfo->left_relids) ^
+ bms_is_empty(rinfo->right_relids));
+
+ clause = (Expr *) copyObject(rinfo->clause);
+ replace_varno((Node *) clause, relid, outer->relid);
+
+ iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) :
+ get_leftop(clause);
+ c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) :
+ get_rightop(clause);
+
+ /*
+ * Compare these left and right sides with the corresponding sides of
+ * the outer's filters. If no one is detected - return immediately.
+ */
+ foreach(olc, outer->baserestrictinfo)
+ {
+ RestrictInfo *orinfo = lfirst_node(RestrictInfo, olc);
+ Node *oclause;
+ Node *c2;
+
+ if (orinfo->mergeopfamilies == NIL)
+ /* Don't consider clauses which aren't similar to 'F(X)=G(Y)' */
+ continue;
+
+ Assert(is_opclause(orinfo->clause));
+
+ oclause = bms_is_empty(orinfo->left_relids) ?
+ get_rightop(orinfo->clause) : get_leftop(orinfo->clause);
+ c2 = (bms_is_empty(orinfo->left_relids) ?
+ get_leftop(orinfo->clause) : get_rightop(orinfo->clause));
+
+ if (equal(iclause, oclause) && equal(c1, c2))
+ {
+ matched = true;
+ break;
+ }
+ }
+
+ if (!matched)
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Find and remove unique self joins in a group of base relations that have
+ * the same Oid.
+ *
+ * Returns a set of relids that were removed.
+ */
+static Relids
+remove_self_joins_one_group(PlannerInfo *root, Relids relids)
+{
+ Relids result = NULL;
+ int k; /* Index of kept relation */
+ int r = -1; /* Index of removed relation */
+
+ while ((r = bms_next_member(relids, r)) > 0)
+ {
+ RelOptInfo *inner = root->simple_rel_array[r];
+
+ k = r;
+
+ while ((k = bms_next_member(relids, k)) > 0)
+ {
+ Relids joinrelids = NULL;
+ RelOptInfo *outer = root->simple_rel_array[k];
+ List *restrictlist;
+ List *selfjoinquals;
+ List *otherjoinquals;
+ ListCell *lc;
+ bool jinfo_check = true;
+ PlanRowMark *omark = NULL;
+ PlanRowMark *imark = NULL;
+ List *uclauses = NIL;
+
+ /* A sanity check: the relations have the same Oid. */
+ Assert(root->simple_rte_array[k]->relid ==
+ root->simple_rte_array[r]->relid);
+
+ /*
+ * It is impossible to eliminate join of two relations if they
+ * belong to different rules of order. Otherwise planner can't be
+ * able to find any variants of correct query plan.
+ */
+ foreach(lc, root->join_info_list)
+ {
+ SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc);
+
+ if ((bms_is_member(k, info->syn_lefthand) ^
+ bms_is_member(r, info->syn_lefthand)) ||
+ (bms_is_member(k, info->syn_righthand) ^
+ bms_is_member(r, info->syn_righthand)))
+ {
+ jinfo_check = false;
+ break;
+ }
+ }
+ if (!jinfo_check)
+ continue;
+
+ /*
+ * Check Row Marks equivalence. We can't remove the join if the
+ * relations have row marks of different strength (e.g. one is
+ * locked FOR UPDATE and another just has ROW_MARK_REFERENCE for
+ * EvalPlanQual rechecking).
+ */
+ foreach(lc, root->rowMarks)
+ {
+ PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc);
+
+ if (rowMark->rti == r)
+ {
+ Assert(imark == NULL);
+ imark = rowMark;
+ }
+ else if (rowMark->rti == k)
+ {
+ Assert(omark == NULL);
+ omark = rowMark;
+ }
+
+ if (omark && imark)
+ break;
+ }
+ if (omark && imark && omark->markType != imark->markType)
+ continue;
+
+ /*
+ * We only deal with base rels here, so their relids bitset
+ * contains only one member -- their relid.
+ */
+ joinrelids = bms_add_member(joinrelids, r);
+ joinrelids = bms_add_member(joinrelids, k);
+
+ /*
+ * Be safe to do not remove tables participated in complicated PH
+ */
+ foreach(lc, root->placeholder_list)
+ {
+ PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
+
+ /* there isn't any other place to eval PHV */
+ if (bms_is_subset(phinfo->ph_eval_at, joinrelids) ||
+ bms_is_subset(phinfo->ph_needed, joinrelids) ||
+ bms_is_member(r, phinfo->ph_lateral))
+ break;
+ }
+ if (lc)
+ continue;
+
+ /*
+ * At this stage, joininfo lists of inner and outer can contain
+ * only clauses, required for a superior outer join that can't
+ * influence this optimization. So, we can avoid to call the
+ * build_joinrel_restrictlist() routine.
+ */
+ restrictlist = generate_join_implied_equalities(root, joinrelids,
+ inner->relids,
+ outer, NULL);
+
+ /*
+ * Process restrictlist to separate the self join quals out of the
+ * other quals. e.g x = x goes to selfjoinquals and a = b to
+ * otherjoinquals.
+ */
+ split_selfjoin_quals(root, restrictlist, &selfjoinquals,
+ &otherjoinquals, inner->relid, outer->relid);
+
+ /*
+ * To enable SJE for the only degenerate case without any self
+ * join clauses at all, add baserestrictinfo to this list. The
+ * degenerate case works only if both sides have the same clause.
+ * So doesn't matter which side to add.
+ */
+ selfjoinquals = list_concat(selfjoinquals, outer->baserestrictinfo);
+
+ /*
+ * Determine if the inner table can duplicate outer rows. We must
+ * bypass the unique rel cache here since we're possibly using a
+ * subset of join quals. We can use 'force_cache' == true when all
+ * join quals are self-join quals. Otherwise, we could end up
+ * putting false negatives in the cache.
+ */
+ if (!innerrel_is_unique_ext(root, joinrelids, inner->relids,
+ outer, JOIN_INNER, selfjoinquals,
+ list_length(otherjoinquals) == 0,
+ &uclauses))
+ continue;
+
+ /*
+ * We have proven that for both relations, the same unique index
+ * guarantees that there is at most one row where columns equal
+ * given values. These values must be the same for both relations,
+ * or else we won't match the same row on each side of the join.
+ */
+ if (!match_unique_clauses(root, inner, uclauses, outer->relid))
+ continue;
+
+ /*
+ * We can remove either relation, so remove the inner one in order
+ * to simplify this loop.
+ */
+ remove_self_join_rel(root, omark, imark, outer, inner, restrictlist);
+
+ result = bms_add_member(result, r);
+
+ /* We have removed the outer relation, try the next one. */
+ break;
+ }
+ }
+
+ return result;
+}
+
+/*
+ * Gather indexes of base relations from the joinlist and try to eliminate self
+ * joins. To avoid complexity, limit the max power of this set by a GUC.
+ */
+static Relids
+remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove)
+{
+ ListCell *jl;
+ Relids relids = NULL;
+ SelfJoinCandidate *candidates = NULL;
+ int i;
+ int j;
+ int numRels;
+
+ /* Collect indexes of base relations of the join tree */
+ foreach(jl, joinlist)
+ {
+ Node *jlnode = (Node *) lfirst(jl);
+
+ if (IsA(jlnode, RangeTblRef))
+ {
+ RangeTblRef *ref = (RangeTblRef *) jlnode;
+ RangeTblEntry *rte = root->simple_rte_array[ref->rtindex];
+
+ /*
+ * We only care about base relations from which we select
+ * something.
+ */
+ if (rte->rtekind == RTE_RELATION &&
+ rte->relkind == RELKIND_RELATION &&
+ root->simple_rel_array[ref->rtindex] != NULL)
+ {
+ Assert(!bms_is_member(ref->rtindex, relids));
+ relids = bms_add_member(relids, ref->rtindex);
+ }
+ }
+ else if (IsA(jlnode, List))
+ /* Recursively go inside the sub-joinlist */
+ toRemove = remove_self_joins_recurse(root, (List *) jlnode,
+ toRemove);
+ else
+ elog(ERROR, "unrecognized joinlist node type: %d",
+ (int) nodeTag(jlnode));
+ }
+
+ numRels = bms_num_members(relids);
+
+ /* Need at least two relations for the join */
+ if (numRels < 2)
+ return toRemove;
+
+ /*
+ * In order to find relations with the same oid we first build an array of
+ * candidates and then sort it by oid.
+ */
+ candidates = (SelfJoinCandidate *) palloc(sizeof(SelfJoinCandidate) *
+ numRels);
+ i = -1;
+ j = 0;
+ while ((i = bms_next_member(relids, i)) >= 0)
+ {
+ candidates[j].relid = i;
+ candidates[j].reloid = root->simple_rte_array[i]->relid;
+ j++;
+ }
+
+ pg_qsort(candidates, numRels, sizeof(SelfJoinCandidate),
+ self_join_candidates_cmp);
+
+ /*
+ * Iteratively form a group of relation indexes with the same oid and
+ * launch the routine that detects self-joins in this group and removes
+ * excessive range table entries.
+ *
+ * At the end of the iteration, exclude the group from the overall relids
+ * list. So each next iteration of the cycle will involve less and less
+ * value of relids.
+ */
+ i = 0;
+ for (j = 1; j < numRels + 1; j++)
+ {
+ if (j == numRels || candidates[j].reloid != candidates[i].reloid)
+ {
+ if (j - i >= 2)
+ {
+ /* Create a group of relation indexes with the same oid */
+ Relids group = NULL;
+ Relids removed;
+
+ while (i < j)
+ {
+ group = bms_add_member(group, candidates[i].relid);
+ i++;
+ }
+
+ relids = bms_del_members(relids, group);
+
+ /*
+ * Try to remove self-joins from a group of identical entries.
+ * Make the next attempt iteratively - if something is deleted
+ * from a group, changes in clauses and equivalence classes
+ * can give us a chance to find more candidates.
+ */
+ do
+ {
+ Assert(!bms_overlap(group, toRemove));
+ removed = remove_self_joins_one_group(root, group);
+ toRemove = bms_add_members(toRemove, removed);
+ group = bms_del_members(group, removed);
+ } while (!bms_is_empty(removed) &&
+ bms_membership(group) == BMS_MULTIPLE);
+ bms_free(removed);
+ bms_free(group);
+ }
+ else
+ {
+ /* Single relation, just remove it from the set */
+ relids = bms_del_member(relids, candidates[i].relid);
+ i = j;
+ }
+ }
+ }
+
+ Assert(bms_is_empty(relids));
+
+ return toRemove;
+}
+
+/*
+ * Compare self-join candidates by their oids.
+ */
+static int
+self_join_candidates_cmp(const void *a, const void *b)
+{
+ const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a;
+ const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b;
+
+ if (ca->reloid != cb->reloid)
+ return (ca->reloid < cb->reloid ? -1 : 1);
+ else
+ return 0;
+}
+
+/*
+ * Find and remove useless self joins.
+ *
+ * Search for joins where a relation is joined to itself. If the join clause
+ * for each tuple from one side of the join is proven to match the same
+ * physical row (or nothing) on the other side, that self-join can be
+ * eliminated from the query. Suitable join clauses are assumed to be in the
+ * form of X = X, and can be replaced with NOT NULL clauses.
+ *
+ * For the sake of simplicity, we don't apply this optimization to special
+ * joins. Here is a list of what we could do in some particular cases:
+ * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins,
+ * and then removed normally.
+ * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND
+ * (IS NULL on join columns OR NOT inner quals)'.
+ * 'a a1 left join a a2': could simplify to a scan like inner but without
+ * NOT NULL conditions on join columns.
+ * 'a a1 left join (a a2 join b)': can't simplify this, because join to b
+ * can both remove rows and introduce duplicates.
+ *
+ * To search for removable joins, we order all the relations on their Oid,
+ * go over each set with the same Oid, and consider each pair of relations
+ * in this set.
+ *
+ * To remove the join, we mark one of the participating relations as dead
+ * and rewrite all references to it to point to the remaining relation.
+ * This includes modifying RestrictInfos, EquivalenceClasses, and
+ * EquivalenceMembers. We also have to modify the row marks. The join clauses
+ * of the removed relation become either restriction or join clauses, based on
+ * whether they reference any relations not participating in the removed join.
+ *
+ * 'targetlist' is the top-level targetlist of the query. If it has any
+ * references to the removed relations, we update them to point to the
+ * remaining ones.
+ */
+List *
+remove_useless_self_joins(PlannerInfo *root, List *joinlist)
+{
+ Relids toRemove = NULL;
+ int relid = -1;
+
+ if (!enable_self_join_removal || joinlist == NIL ||
+ (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List)))
+ return joinlist;
+
+ /*
+ * Merge pairs of relations participated in self-join. Remove unnecessary
+ * range table entries.
+ */
+ toRemove = remove_self_joins_recurse(root, joinlist, toRemove);
+
+ if (unlikely(toRemove != NULL))
+ {
+ int nremoved = 0;
+
+ /* At the end, remove orphaned relation links */
+ while ((relid = bms_next_member(toRemove, relid)) >= 0)
+ joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved);
+ }
+
+ return joinlist;
}
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 25e126d1c39..1f9c26d1a8b 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -172,7 +172,6 @@ static List *fix_indexorderby_references(PlannerInfo *root, IndexPath *index_pat
static Node *fix_indexqual_clause(PlannerInfo *root,
IndexOptInfo *index, int indexcol,
Node *clause, List *indexcolnos);
-static Node *fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol);
static List *get_switched_clauses(List *clauses, Relids outerrelids);
static List *order_qual_clauses(PlannerInfo *root, List *clauses);
static void copy_generic_path_info(Plan *dest, Path *src);
@@ -1247,6 +1246,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
/* Generate a Result plan with constant-FALSE gating qual */
Plan *plan;
+ tlist = build_path_tlist(root, &best_path->path);
plan = (Plan *) make_result(tlist,
(Node *) list_make1(makeBoolConst(false,
false)),
@@ -1275,7 +1275,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
plan->plan.righttree = NULL;
plan->apprelids = rel->relids;
- if (pathkeys != NIL)
+ if (pathkeys != NIL && best_path->pull_tlist == false)
{
/*
* Compute sort column info, and adjust the Append's tlist as needed.
@@ -1309,11 +1309,17 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
/* Must insist that all children return the same tlist */
subplan = create_plan_recurse(root, subpath, CP_EXACT_TLIST);
+ if (tlist == NIL && best_path->pull_tlist)
+ plan->plan.targetlist = tlist = copyObject(subplan->targetlist);
+
/*
* For ordered Appends, we must insert a Sort node if subplan isn't
* sufficiently ordered.
+ * if best_path->pull_tlist = then plan came from
+ * keybased_rewrite_index_paths() which guarantee correct sorting in
+ * subplan
*/
- if (pathkeys != NIL)
+ if (pathkeys != NIL && best_path->pull_tlist == false)
{
int numsortkeys;
AttrNumber *sortColIdx;
@@ -5160,7 +5166,7 @@ fix_indexqual_clause(PlannerInfo *root, IndexOptInfo *index, int indexcol,
* Most of the code here is just for sanity cross-checking that the given
* expression actually matches the index column it's claimed to.
*/
-static Node *
+Node *
fix_indexqual_operand(Node *node, IndexOptInfo *index, int indexcol)
{
Var *result;
diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c
index e17d31a5c3e..075d36c7ecc 100644
--- a/src/backend/optimizer/plan/planmain.c
+++ b/src/backend/optimizer/plan/planmain.c
@@ -230,6 +230,11 @@ query_planner(PlannerInfo *root,
*/
reduce_unique_semijoins(root);
+ /*
+ * Remove self joins on a unique column.
+ */
+ joinlist = remove_useless_self_joins(root, joinlist);
+
/*
* Now distribute "placeholders" to base rels as needed. This has to be
* done after join removal because removal could change whether a
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 69ff384fce9..3a84f38f972 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -3062,6 +3062,10 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
{
Var *var = (Var *) node;
+ /* join_references_mutator already checks this node */
+ if (var->varno == OUTER_VAR)
+ return (Node*)copyObject(var);
+
/* Look for the var in the input tlists, first in the outer */
if (context->outer_itlist)
{
@@ -3077,6 +3081,9 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
/* then in the inner. */
if (context->inner_itlist)
{
+ if (var->varno == INNER_VAR)
+ return (Node*)copyObject(var);
+
newvar = search_indexed_tlist_for_var(var,
context->inner_itlist,
INNER_VAR,
diff --git a/src/backend/optimizer/util/joininfo.c b/src/backend/optimizer/util/joininfo.c
index 65993bd6599..c305ca2814a 100644
--- a/src/backend/optimizer/util/joininfo.c
+++ b/src/backend/optimizer/util/joininfo.c
@@ -177,7 +177,6 @@ remove_join_clause_from_rels(PlannerInfo *root,
* Remove the restrictinfo from the list. Pointer comparison is
* sufficient.
*/
- Assert(list_member_ptr(rel->joininfo, restrictinfo));
rel->joininfo = list_delete_ptr(rel->joininfo, restrictinfo);
}
}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index c42742d2c7b..0b123b47533 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -205,6 +205,18 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
/* ... but path2 fuzzily worse on startup, so path1 wins */
return COSTS_BETTER1;
}
+
+ if (IsA(path1, IndexPath) && IsA(path2, IndexPath))
+ {
+ IndexPath *ipath1 = (IndexPath*)path1;
+ IndexPath *ipath2 = (IndexPath*)path2;
+
+ if (ipath1->indexselectivity < ipath2->indexselectivity)
+ return COSTS_BETTER1;
+ else if (ipath1->indexselectivity > ipath2->indexselectivity)
+ return COSTS_BETTER2;
+ }
+
/* fuzzily the same on both costs */
return COSTS_EQUAL;
@@ -1229,7 +1241,7 @@ create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel,
}
/*
- * create_append_path
+ * create_append_path_ext
* Creates a path corresponding to an Append plan, returning the
* pathnode.
*
@@ -1241,12 +1253,12 @@ create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel,
* by totalling the row estimates from the 'subpaths' list.
*/
AppendPath *
-create_append_path(PlannerInfo *root,
+create_append_path_ext(PlannerInfo *root,
RelOptInfo *rel,
List *subpaths, List *partial_subpaths,
List *pathkeys, Relids required_outer,
int parallel_workers, bool parallel_aware,
- double rows)
+ double rows, bool pull_tlist)
{
AppendPath *pathnode = makeNode(AppendPath);
ListCell *l;
@@ -1256,6 +1268,7 @@ create_append_path(PlannerInfo *root,
pathnode->path.pathtype = T_Append;
pathnode->path.parent = rel;
pathnode->path.pathtarget = rel->reltarget;
+ pathnode->pull_tlist = pull_tlist;
/*
* If this is for a baserel (not a join or non-leaf partition), we prefer
@@ -1348,12 +1361,12 @@ create_append_path(PlannerInfo *root,
pathnode->path.total_cost = child->total_cost;
}
else
- cost_append(pathnode);
+ cost_append_ext(pathnode, root);
/* Must do this last, else cost_append complains */
pathnode->path.pathkeys = child->pathkeys;
}
else
- cost_append(pathnode);
+ cost_append_ext(pathnode, root);
/* If the caller provided a row estimate, override the computed value. */
if (rows >= 0)
@@ -3918,9 +3931,13 @@ adjust_limit_rows_costs(double *rows, /* in/out parameter */
if (count_rows > *rows)
count_rows = *rows;
if (input_rows > 0)
+ {
+ *startup_cost = *startup_cost +
+ 2*(input_total_cost - input_startup_cost) / input_rows;
*total_cost = *startup_cost +
(input_total_cost - input_startup_cost)
* count_rows / input_rows;
+ }
*rows = count_rows;
if (*rows < 1)
*rows = 1;
@@ -4045,11 +4062,12 @@ reparameterize_path(PlannerInfo *root, Path *path,
i++;
}
return (Path *)
- create_append_path(root, rel, childpaths, partialpaths,
+ create_append_path_ext(root, rel, childpaths, partialpaths,
apath->path.pathkeys, required_outer,
apath->path.parallel_workers,
apath->path.parallel_aware,
- -1);
+ -1,
+ apath->pull_tlist);
}
case T_Material:
{
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index ac82a021e97..008482ab7ad 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -509,6 +509,9 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
index_close(indexRelation, NoLock);
+ info->sslots = palloc0(
+ (STATISTIC_NUM_SLOTS + 1) * sizeof(AttStatsSlot));
+
/*
* We've historically used lcons() here. It'd make more sense to
* use lappend(), but that causes the planner to change behavior
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 610093fe915..67e367bc743 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -1348,6 +1348,54 @@ build_joinrel_joinlist(RelOptInfo *joinrel,
joinrel->joininfo = result;
}
+typedef struct UniquePtrList {
+ List *unique_list;
+ HTAB *h;
+} UniquePtrList;
+
+static void
+addUniquePtrList(UniquePtrList *upl, void *v)
+{
+ if (upl->h != NULL || list_length(upl->unique_list) > 32)
+ {
+ bool found;
+
+ if (upl->h == NULL)
+ {
+ HASHCTL hash_ctl;
+ ListCell *l;
+
+ MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+
+ hash_ctl.keysize = sizeof(void*);
+ hash_ctl.entrysize = sizeof(void*);
+
+ upl->h = hash_create("UniquePtrList storage", 64,
+ &hash_ctl,
+ HASH_BLOBS |
+ HASH_ELEM);
+
+ foreach(l, upl->unique_list)
+ {
+ void *k = lfirst(l);
+
+ hash_search(upl->h, &k,
+ HASH_ENTER,
+ &found);
+ Assert(found == false);
+ }
+ }
+
+ hash_search(upl->h, &v, HASH_ENTER, &found);
+ if (found == false)
+ upl->unique_list = lappend(upl->unique_list, v);
+ }
+ else
+ {
+ upl->unique_list = list_append_unique_ptr(upl->unique_list, v);
+ }
+}
+
static List *
subbuild_joinrel_restrictlist(PlannerInfo *root,
RelOptInfo *joinrel,
@@ -1356,6 +1404,10 @@ subbuild_joinrel_restrictlist(PlannerInfo *root,
List *new_restrictlist)
{
ListCell *l;
+ UniquePtrList upl;
+
+ memset(&upl, 0, sizeof(upl));
+ upl.unique_list = new_restrictlist;
foreach(l, input_rel->joininfo)
{
@@ -1400,7 +1452,7 @@ subbuild_joinrel_restrictlist(PlannerInfo *root,
* will have been multiply-linked rather than copied, pointer
* equality should be a sufficient test.)
*/
- new_restrictlist = list_append_unique_ptr(new_restrictlist, rinfo);
+ addUniquePtrList(&upl, rinfo);
}
else
{
@@ -1411,7 +1463,8 @@ subbuild_joinrel_restrictlist(PlannerInfo *root,
}
}
- return new_restrictlist;
+ hash_destroy(upl.h);
+ return upl.unique_list;
}
static List *
@@ -1420,6 +1473,10 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel,
List *new_joininfo)
{
ListCell *l;
+ UniquePtrList upl;
+
+ memset(&upl, 0, sizeof(upl));
+ upl.unique_list = new_joininfo;
/* Expected to be called only for join between parent relations. */
Assert(joinrel->reloptkind == RELOPT_JOINREL);
@@ -1445,11 +1502,12 @@ subbuild_joinrel_joinlist(RelOptInfo *joinrel,
* multiply-linked rather than copied, pointer equality should be
* a sufficient test.)
*/
- new_joininfo = list_append_unique_ptr(new_joininfo, rinfo);
+ addUniquePtrList(&upl, rinfo);
}
}
- return new_joininfo;
+ hash_destroy(upl.h);
+ return upl.unique_list;
}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index f230c5ff9e7..2441392a433 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -14862,7 +14862,7 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr LIKE a_expr ESCAPE a_expr %prec LIKE
{
- FuncCall *n = makeFuncCall(SystemFuncName("like_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("like_escape")),
list_make2($3, $5),
COERCE_EXPLICIT_CALL,
@2);
@@ -14876,7 +14876,7 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr NOT_LA LIKE a_expr ESCAPE a_expr %prec NOT_LA
{
- FuncCall *n = makeFuncCall(SystemFuncName("like_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("like_escape")),
list_make2($4, $6),
COERCE_EXPLICIT_CALL,
@2);
@@ -14890,7 +14890,7 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr ILIKE a_expr ESCAPE a_expr %prec ILIKE
{
- FuncCall *n = makeFuncCall(SystemFuncName("like_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("like_escape")),
list_make2($3, $5),
COERCE_EXPLICIT_CALL,
@2);
@@ -14904,7 +14904,7 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr NOT_LA ILIKE a_expr ESCAPE a_expr %prec NOT_LA
{
- FuncCall *n = makeFuncCall(SystemFuncName("like_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("like_escape")),
list_make2($4, $6),
COERCE_EXPLICIT_CALL,
@2);
@@ -14914,7 +14914,7 @@ a_expr: c_expr { $$ = $1; }
| a_expr SIMILAR TO a_expr %prec SIMILAR
{
- FuncCall *n = makeFuncCall(SystemFuncName("similar_to_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("similar_to_escape")),
list_make1($4),
COERCE_EXPLICIT_CALL,
@2);
@@ -14923,7 +14923,7 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr SIMILAR TO a_expr ESCAPE a_expr %prec SIMILAR
{
- FuncCall *n = makeFuncCall(SystemFuncName("similar_to_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("similar_to_escape")),
list_make2($4, $6),
COERCE_EXPLICIT_CALL,
@2);
@@ -14932,7 +14932,7 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr NOT_LA SIMILAR TO a_expr %prec NOT_LA
{
- FuncCall *n = makeFuncCall(SystemFuncName("similar_to_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("similar_to_escape")),
list_make1($5),
COERCE_EXPLICIT_CALL,
@2);
@@ -14941,7 +14941,7 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr NOT_LA SIMILAR TO a_expr ESCAPE a_expr %prec NOT_LA
{
- FuncCall *n = makeFuncCall(SystemFuncName("similar_to_escape"),
+ FuncCall *n = makeFuncCall(list_make1(makeString("similar_to_escape")),
list_make2($5, $7),
COERCE_EXPLICIT_CALL,
@2);
diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index 8118036495b..aa4d99970f9 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -1204,6 +1204,7 @@ transformFromClauseItem(ParseState *pstate, Node *n,
&r_namespace);
/* Remove the left-side RTEs from the namespace list again */
+
pstate->p_namespace = list_truncate(pstate->p_namespace,
sv_namespace_length);
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 2f64eaf0e37..76dfa6c034d 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -2227,7 +2227,6 @@ addRangeTableEntryForJoin(ParseState *pstate,
{
RangeTblEntry *rte = makeNode(RangeTblEntry);
Alias *eref;
- int numaliases;
ParseNamespaceItem *nsitem;
Assert(pstate != NULL);
@@ -2253,19 +2252,37 @@ addRangeTableEntryForJoin(ParseState *pstate,
rte->join_using_alias = join_using_alias;
rte->alias = alias;
- eref = alias ? copyObject(alias) : makeAlias("unnamed_join", NIL);
- numaliases = list_length(eref->colnames);
-
/* fill in any unspecified alias columns */
- if (numaliases < list_length(colnames))
- eref->colnames = list_concat(eref->colnames,
- list_copy_tail(colnames, numaliases));
+ if (alias)
+ {
+ int numaliases;
- if (numaliases > list_length(colnames))
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
- errmsg("join expression \"%s\" has %d columns available but %d columns specified",
- eref->aliasname, list_length(colnames), numaliases)));
+ eref = copyObject(alias);
+
+ numaliases = list_length(eref->colnames);
+
+ if (numaliases == 0)
+ {
+ eref->colnames = colnames;
+ }
+ else if (numaliases > 0 && numaliases < list_length(colnames))
+ {
+ eref->colnames = list_concat(eref->colnames,
+ list_copy_tail(colnames, numaliases));
+ list_free(colnames);
+ }
+
+ if (numaliases > list_length(colnames))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+ errmsg("join expression \"%s\" has %d columns available but %d columns specified",
+ eref->aliasname, list_length(colnames), numaliases)));
+ }
+ else
+ {
+ eref = makeAlias("unnamed_join", NIL);
+ eref->colnames = colnames;
+ }
rte->eref = eref;
@@ -2901,8 +2918,11 @@ expandRTE(RangeTblEntry *rte, int rtindex, int sublevels_up,
{
char *label = strVal(lfirst(colname));
- *colnames = lappend(*colnames,
- makeString(pstrdup(label)));
+ /*
+ * Assume label is already pstrdup'ed somewhere, so
+ * don't duplicate it again
+ */
+ *colnames = lappend(*colnames, makeString(label));
}
if (colvars)
diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c
index 519c1a930b5..2fdd408833f 100644
--- a/src/backend/rewrite/rewriteManip.c
+++ b/src/backend/rewrite/rewriteManip.c
@@ -1374,7 +1374,7 @@ replace_rte_variables(Node *node, int target_varno, int sublevels_up,
result = query_or_expression_tree_mutator(node,
replace_rte_variables_mutator,
(void *) &context,
- 0);
+ QTW_DONT_COPY_DEFAULT);
if (context.inserted_sublink)
{
@@ -1444,14 +1444,14 @@ replace_rte_variables_mutator(Node *node,
newnode = query_tree_mutator((Query *) node,
replace_rte_variables_mutator,
(void *) context,
- 0);
+ QTW_DONT_COPY_DEFAULT);
newnode->hasSubLinks |= context->inserted_sublink;
context->inserted_sublink = save_inserted_sublink;
context->sublevels_up--;
return (Node *) newnode;
}
- return expression_tree_mutator(node, replace_rte_variables_mutator,
- (void *) context);
+ return expression_tree_mutator_ext(node, replace_rte_variables_mutator,
+ (void *) context, QTW_DONT_COPY_DEFAULT);
}
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 66008275bec..1eb0fc477a3 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -965,6 +965,7 @@ count_usable_fds(int max_to_probe, int *usable_fds, int *already_open)
int used = 0;
int highestfd = 0;
int j;
+ int fdTest = 0;
#ifdef HAVE_GETRLIMIT
struct rlimit rlim;
@@ -980,6 +981,15 @@ count_usable_fds(int max_to_probe, int *usable_fds, int *already_open)
ereport(WARNING, (errmsg("getrlimit failed: %m")));
#endif /* HAVE_GETRLIMIT */
+#ifdef WIN32
+ /*
+ * we have error on Windows7 with max_files_per_process > 1200 when dup(0) - stdin
+ * make test on postgresql.conf file
+ */
+ fdTest = _open(ConfigFileName, _O_RDONLY);
+ if (fdTest < 0)
+ fdTest = 0; /* fallback to stdin */
+#endif
/* dup until failure or probe limit reached */
for (;;)
{
@@ -995,7 +1005,7 @@ count_usable_fds(int max_to_probe, int *usable_fds, int *already_open)
break;
#endif
- thisfd = dup(2);
+ thisfd = dup(fdTest);
if (thisfd < 0)
{
/* Expect EMFILE or ENFILE, else it's fishy */
@@ -1022,6 +1032,10 @@ count_usable_fds(int max_to_probe, int *usable_fds, int *already_open)
for (j = 0; j < used; j++)
close(fd[j]);
+#ifdef WIN32
+ if (fdTest>0)
+ _close(fdTest);
+#endif
pfree(fd);
/*
diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c
index 4e69015fe4d..f247408a7fd 100644
--- a/src/backend/storage/ipc/sinval.c
+++ b/src/backend/storage/ipc/sinval.c
@@ -15,6 +15,7 @@
#include "postgres.h"
#include "access/xact.h"
+#include "access/xlog.h"
#include "miscadmin.h"
#include "nodes/memnodes.h"
#include "storage/latch.h"
@@ -47,7 +48,26 @@ volatile sig_atomic_t catchupInterruptPending = false;
void
SendSharedInvalidMessages(const SharedInvalidationMessage *msgs, int n)
{
- SIInsertDataEntries(msgs, n);
+ int i;
+ int count = n;
+
+ /*
+ * Execute local invalidation messages right away without inserting
+ * into shared buffer.
+ * Note: conditions must be in sync with SIInsertDataEntries function.
+ */
+ if (MyDatabaseId != InvalidOid)
+ {
+ for (i=0; i<count; i++)
+ if (msgs[i].isLocal)
+ {
+ LocalExecuteInvalidationMessage((SharedInvalidationMessage*)&msgs[i]);
+ n--;
+ }
+ }
+
+ if (n)
+ SIInsertDataEntries(msgs, n);
}
/*
@@ -101,7 +121,6 @@ ReceiveSharedInvalidMessages(void (*invalFunction) (SharedInvalidationMessage *m
if (getResult < 0)
{
/* got a reset message */
- elog(DEBUG4, "cache state reset");
SharedInvalidMessageCounter++;
resetFunction();
break; /* nothing more to do */
diff --git a/src/backend/storage/ipc/sinvaladt.c b/src/backend/storage/ipc/sinvaladt.c
index 271212987e0..df0d9d46002 100644
--- a/src/backend/storage/ipc/sinvaladt.c
+++ b/src/backend/storage/ipc/sinvaladt.c
@@ -18,6 +18,7 @@
#include <unistd.h>
#include "access/transam.h"
+#include "access/xlog.h"
#include "miscadmin.h"
#include "storage/ipc.h"
#include "storage/proc.h"
@@ -127,8 +128,8 @@
* per iteration.
*/
-#define MAXNUMMESSAGES 4096
-#define MSGNUMWRAPAROUND (MAXNUMMESSAGES * 262144)
+#define MAXNUMMESSAGES 16384
+#define MSGNUMWRAPAROUND (MAXNUMMESSAGES * 65536)
#define CLEANUP_MIN (MAXNUMMESSAGES / 2)
#define CLEANUP_QUANTUM (MAXNUMMESSAGES / 16)
#define SIG_THRESHOLD (MAXNUMMESSAGES / 2)
@@ -172,8 +173,6 @@ typedef struct SISeg
int maxMsgNum; /* next message number to be assigned */
int nextThreshold; /* # of messages to call SICleanupQueue */
- slock_t msgnumLock; /* spinlock protecting maxMsgNum */
-
/*
* Circular buffer holding shared-inval messages
*/
@@ -246,7 +245,6 @@ CreateSharedInvalidationState(void)
shmInvalBuffer->minMsgNum = 0;
shmInvalBuffer->maxMsgNum = 0;
shmInvalBuffer->nextThreshold = CLEANUP_MIN;
- SpinLockInit(&shmInvalBuffer->msgnumLock);
/* The buffer[] array is initially all unused, so we need not fill it */
@@ -412,16 +410,21 @@ SIInsertDataEntries(const SharedInvalidationMessage *data, int n)
* Insert new message(s) into proper slot of circular buffer
*/
max = segP->maxMsgNum;
- while (nthistime-- > 0)
+ while (nthistime)
{
- segP->buffer[max % MAXNUMMESSAGES] = *data++;
- max++;
+ if ((MyDatabaseId == InvalidOid) || !data->isLocal)
+ {
+ segP->buffer[max % MAXNUMMESSAGES] = *data;
+ max++;
+ nthistime--;
+ }
+
+ data++;
}
/* Update current value of maxMsgNum using spinlock */
- SpinLockAcquire(&segP->msgnumLock);
+ pg_memory_barrier();
segP->maxMsgNum = max;
- SpinLockRelease(&segP->msgnumLock);
/*
* Now that the maxMsgNum change is globally visible, we give everyone
@@ -507,11 +510,6 @@ SIGetDataEntries(SharedInvalidationMessage *data, int datasize)
*/
stateP->hasMessages = false;
- /* Fetch current value of maxMsgNum using spinlock */
- SpinLockAcquire(&segP->msgnumLock);
- max = segP->maxMsgNum;
- SpinLockRelease(&segP->msgnumLock);
-
if (stateP->resetState)
{
/*
@@ -519,13 +517,17 @@ SIGetDataEntries(SharedInvalidationMessage *data, int datasize)
* since the reset, as well; and that means we should clear the
* signaled flag, too.
*/
- stateP->nextMsgNum = max;
+ stateP->nextMsgNum = segP->maxMsgNum;
stateP->resetState = false;
stateP->signaled = false;
LWLockRelease(SInvalReadLock);
return -1;
}
+ /* Fetch current value of maxMsgNum using spinlock */
+ max = segP->maxMsgNum;
+ pg_memory_barrier();
+
/*
* Retrieve messages and advance backend's counter, until data array is
* full or there are no more messages.
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index e5e7ab55716..2ca7b88cff9 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -32,6 +32,7 @@
#include <signal.h>
#include <unistd.h>
+#include "access/tempcat.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/twophase_rmgr.h"
@@ -44,10 +45,13 @@
#include "storage/sinvaladt.h"
#include "storage/spin.h"
#include "storage/standby.h"
+#include "utils/inval.h"
#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "utils/resowner.h"
-
+#include "catalog/pg_class.h"
+#include "utils/syscache.h"
+#include "access/htup_details.h"
/* This configuration variable is used to set the lock table size */
int max_locks_per_xact; /* set by guc.c */
@@ -375,6 +379,68 @@ static void LockRefindAndRelease(LockMethod lockMethodTable, PGPROC *proc,
static void GetSingleProcBlockerStatusData(PGPROC *blocked_proc,
BlockedProcsData *data);
+/*
+ * Check if locking/unlocking specific lock can be skipped.
+ * Currently only some locks on temporary tables are skipped.
+ */
+static bool IsLockCanBeSkipped(const LOCKTAG *locktag, bool sessionLock)
+{
+ Form_pg_class form;
+ HeapTuple tuple;
+ bool isTemp;
+ static bool inRecursion = false;
+
+ /*
+ * Only relation locks can be skipped
+ */
+ if (locktag->locktag_type != LOCKTAG_RELATION)
+ return false;
+
+ /*
+ * Skip locks only in transaction, otherwise cache search will fail
+ */
+ if (!IsTransactionState())
+ return false;
+
+ /*
+ * Don't skip session locks for relation, because during unlock we
+ * can't verify here if lock was skipped or not
+ */
+ if (sessionLock)
+ return false;
+
+ /*
+ * Searching SysCache result recursive call to this function.
+ * Since no SysCache locks can be skipped don't check if it's temporary table
+ * inside recursion.
+ */
+ if (inRecursion)
+ return false;
+
+ if(enable_temp_memory_catalog && IsTempTableScope())
+ return true;
+
+ /*
+ * Try get relation description, if possible
+ */
+ inRecursion = true;
+ tuple = TryGetSysCacheRelationClassTuple(locktag->locktag_field2);
+ inRecursion = false;
+
+ /*
+ * Treat fails as if relation is not temporary
+ */
+ if (!tuple)
+ return false;
+
+ form = (Form_pg_class) GETSTRUCT(tuple);
+ isTemp = form->relpersistence == RELPERSISTENCE_TEMP;
+
+ ReleaseSysCache(tuple);
+
+ return isTemp;
+}
+
/*
* InitLocks -- Initialize the lock manager's data structures.
@@ -604,7 +670,7 @@ LockHeldByMe(const LOCKTAG *locktag,
&localtag,
HASH_FIND, NULL);
- if (locallock && locallock->nLocks > 0)
+ if ((locallock && locallock->nLocks > 0) || IsLockCanBeSkipped(locktag, false))
return true;
if (orstronger)
@@ -813,6 +879,10 @@ LockAcquireExtended(const LOCKTAG *locktag,
lockMethodTable->lockModeNames[lockmode]),
errhint("Only RowExclusiveLock or less can be acquired on database objects during recovery.")));
+ /* Don't lock if it's not required. Treat as already locked. */
+ if (IsLockCanBeSkipped(locktag, sessionLock))
+ return LOCKACQUIRE_ALREADY_CLEAR;
+
#ifdef LOCK_DEBUG
if (LOCK_DEBUG_ENABLED(locktag))
elog(LOG, "LockAcquire: lock [%u,%u] %s",
@@ -2001,6 +2071,9 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
*/
if (!locallock || locallock->nLocks <= 0)
{
+ /* Treat skipped locks (they aren't actually locked) as unlocked */
+ if (IsLockCanBeSkipped(locktag, sessionLock))
+ return true;
elog(WARNING, "you don't own a lock of type %s",
lockMethodTable->lockModeNames[lockmode]);
return false;
@@ -2039,6 +2112,8 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
}
if (i < 0)
{
+ if (IsLockCanBeSkipped(locktag, sessionLock))
+ return true;
/* don't release a lock belonging to another owner */
elog(WARNING, "you don't own a lock of type %s",
lockMethodTable->lockModeNames[lockmode]);
diff --git a/src/backend/storage/smgr/Makefile b/src/backend/storage/smgr/Makefile
index 1d0b98764f9..428e67336d7 100644
--- a/src/backend/storage/smgr/Makefile
+++ b/src/backend/storage/smgr/Makefile
@@ -15,6 +15,7 @@ include $(top_builddir)/src/Makefile.global
OBJS = \
bulk_write.o \
md.o \
+ rd.o \
smgr.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/storage/smgr/meson.build b/src/backend/storage/smgr/meson.build
index 6d91b18fe67..a2d9318d9e8 100644
--- a/src/backend/storage/smgr/meson.build
+++ b/src/backend/storage/smgr/meson.build
@@ -3,5 +3,6 @@
backend_sources += files(
'bulk_write.c',
'md.c',
+ 'rd.c',
'smgr.c',
)
diff --git a/src/backend/storage/smgr/rd.c b/src/backend/storage/smgr/rd.c
new file mode 100644
index 00000000000..6aa8dd05d5b
--- /dev/null
+++ b/src/backend/storage/smgr/rd.c
@@ -0,0 +1,347 @@
+#include "postgres.h"
+
+#include "storage/md.h"
+#include "storage/rd.h"
+#include "miscadmin.h"
+#include "utils/memutils.h"
+
+typedef struct _RdBuffer
+{
+ dlist_node node;
+ RelFileLocatorBackend rlocator;
+ ForkNumber forknum;
+ BlockNumber ballocated;
+ BlockNumber bsize;
+ char *data;
+} _RdBuffer;
+
+/*
+ * Size of buffer in blocks. After the buffer is exhausted the storage switches
+ * to 'md' and flushed all the data to disk.
+ */
+int temp_rd_buffers = 4;
+
+bool enable_temp_rd_buffers = false;
+
+static MemoryContext mctx;
+static dlist_head buffers;
+
+
+static _RdBuffer*
+_find_buffer(RelFileLocatorBackend* rlocator, ForkNumber forknum)
+{
+ dlist_iter iter;
+ dlist_foreach(iter, &buffers)
+ {
+ _RdBuffer* buffer = dlist_container(_RdBuffer, node, iter.cur);
+ if (RelFileLocatorBackendEquals(buffer->rlocator, *rlocator) && buffer->forknum == forknum)
+ return buffer;
+ }
+ return NULL;
+}
+
+
+static _RdBuffer*
+_open_buffer(SMgrRelation reln, ForkNumber forknum)
+{
+ _RdBuffer* tbuf = reln->rd_bufs[forknum] ;
+ if (tbuf)
+ return tbuf;
+
+ tbuf = _find_buffer(&reln->smgr_rlocator, forknum);
+ if (tbuf)
+ return tbuf;
+
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("temporary page doesn't exists")));
+
+ return NULL;
+}
+
+
+static void
+switch_to_md(SMgrRelation reln)
+{
+ dlist_mutable_iter iter;
+
+ for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
+ reln->rd_bufs[forknum] = 0;
+
+ reln->smgr_which = 0;
+
+ mdopen(reln);
+
+ dlist_foreach_modify(iter, &buffers)
+ {
+ _RdBuffer* buffer = dlist_container(_RdBuffer, node, iter.cur);
+ if (RelFileLocatorBackendEquals(buffer->rlocator, reln->smgr_rlocator))
+ {
+ smgrcreate(reln, buffer->forknum, false);
+ for(BlockNumber bn=0; bn < buffer->bsize; bn++)
+ smgrextend(reln, buffer->forknum, bn, buffer->data + bn*BLCKSZ, true);
+
+ dlist_delete(&buffer->node);
+ pfree(buffer->data);
+ pfree(buffer);
+ }
+ }
+}
+
+
+void
+rd_reset(SMgrRelation reln)
+{
+ BlockNumber nblocks[MAX_FORKNUM+1];
+ char* buf;
+
+ if (reln->smgr_which == 1)
+ return;
+
+ for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
+ {
+ if (smgrexists(reln, forknum))
+ {
+ nblocks[forknum] = smgrnblocks(reln, forknum);
+
+ if (nblocks[forknum] > temp_rd_buffers)
+ return;
+ }
+ else
+ nblocks[forknum] = InvalidBlockNumber;
+ }
+
+
+ buf = palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, 0);
+
+ for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
+ {
+ if (nblocks[forknum] == InvalidBlockNumber)
+ continue;
+
+ rd_create(reln, forknum, false);
+ for (BlockNumber bn=0; bn < nblocks[forknum]; bn++)
+ {
+ smgrread(reln, forknum, bn, buf);
+ rd_extend(reln, forknum, bn, buf, true);
+ }
+ }
+
+ pfree(buf);
+
+ smgrdounlinkall(&reln, 1, false);
+
+ reln->smgr_which = 1;
+}
+
+
+void
+rd_init(void)
+{
+ mctx = AllocSetContextCreate(TopMemoryContext, "RdSmgr", ALLOCSET_DEFAULT_SIZES);
+ dlist_init(&buffers);
+}
+
+
+void
+rd_shutdown(void)
+{
+}
+
+
+void
+rd_open(SMgrRelation reln)
+{
+ mdopen(reln);
+
+ for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
+ reln->rd_bufs[forknum] = 0;
+}
+
+
+void
+rd_close(SMgrRelation reln, ForkNumber forknum)
+{
+ (void) reln;
+ (void) forknum;
+}
+
+
+void
+rd_create(SMgrRelation reln, ForkNumber forknum, bool isRedo)
+{
+ _RdBuffer* tbuf = _find_buffer(&reln->smgr_rlocator, forknum);
+ if (tbuf)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("temporary page already exists")));
+
+ tbuf = MemoryContextAlloc(mctx, sizeof(_RdBuffer));
+ tbuf->rlocator = reln->smgr_rlocator;
+ tbuf->forknum = forknum;
+ tbuf->ballocated = temp_rd_buffers;
+ tbuf->bsize = 0;
+ tbuf->data = MemoryContextAllocAligned(mctx, tbuf->ballocated*BLCKSZ, PG_IO_ALIGN_SIZE, 0);
+
+ dlist_push_tail(&buffers, &tbuf->node);
+
+ reln->rd_bufs[forknum] = tbuf;
+}
+
+
+bool
+rd_exists(SMgrRelation reln, ForkNumber forknum)
+{
+ return _find_buffer(&reln->smgr_rlocator, forknum);
+}
+
+
+void
+rd_unlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
+{
+ _RdBuffer* tbuf = _find_buffer(&rlocator, forknum);
+ if (tbuf)
+ {
+ dlist_delete(&tbuf->node);
+ pfree(tbuf->data);
+ pfree(tbuf);
+ }
+}
+
+
+void
+rd_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync)
+{
+ _RdBuffer* tbuf = _open_buffer(reln, forknum);
+
+ while (nblocks)
+ {
+ if (blocknum >= tbuf->bsize)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("temporary page write beyond size")));
+
+ if (blocknum >= tbuf->ballocated)
+ {
+ switch_to_md(reln);
+ smgrwritev(reln, forknum, blocknum, buffers, nblocks, skipFsync);
+ return;
+ }
+
+ memcpy(tbuf->data + blocknum*BLCKSZ, *buffers, BLCKSZ);
+
+ buffers++;
+ nblocks--;
+ blocknum++;
+ }
+}
+
+
+void
+rd_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync)
+{
+ _RdBuffer* tbuf = _open_buffer(reln, forknum);
+
+ if (blocknum >= tbuf->ballocated)
+ {
+ switch_to_md(reln);
+ smgrextend(reln, forknum, blocknum, buffer, skipFsync);
+ return;
+ }
+
+ memcpy(tbuf->data + blocknum*BLCKSZ, buffer, BLCKSZ);
+
+ tbuf->bsize = Max(tbuf->bsize, blocknum+1);
+}
+
+
+void
+rd_zeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync)
+{
+ _RdBuffer* tbuf = _open_buffer(reln, forknum);
+
+ if (blocknum + nblocks > tbuf->ballocated)
+ {
+ switch_to_md(reln);
+ smgrzeroextend(reln, forknum, blocknum, nblocks, skipFsync);
+ return;
+ }
+
+ memset(tbuf->data + blocknum*BLCKSZ, 0, BLCKSZ*nblocks);
+
+ tbuf->bsize = Max(tbuf->bsize, blocknum+nblocks);
+}
+
+
+void
+rd_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks)
+{
+ _RdBuffer* tbuf = _open_buffer(reln, forknum);
+
+ while (nblocks)
+ {
+ if (blocknum >= tbuf->bsize)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read block %u in rd of size %u", blocknum, tbuf->bsize)));
+
+ memcpy(*buffers, tbuf->data + blocknum*BLCKSZ, BLCKSZ);
+
+ buffers++;
+ nblocks--;
+ blocknum++;
+ }
+}
+
+
+BlockNumber
+rd_nblocks(SMgrRelation reln, ForkNumber forknum)
+{
+ _RdBuffer* tbuf = _open_buffer(reln, forknum);
+ return tbuf->bsize;
+}
+
+
+void
+rd_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber old_blocks, BlockNumber nblocks)
+{
+ _RdBuffer* tbuf = _open_buffer(reln, forknum);
+ tbuf->bsize = nblocks;
+ (void) old_blocks;
+}
+
+
+bool
+rd_prefetch(SMgrRelation reln,
+ ForkNumber forknum,
+ BlockNumber blocknum,
+ int nblocks)
+{
+ (void) reln;
+ (void) forknum;
+ (void) blocknum;
+ (void) nblocks;
+ return true;
+}
+
+
+void
+rd_writeback(SMgrRelation reln,
+ ForkNumber forknum,
+ BlockNumber blocknum,
+ BlockNumber nblocks)
+{
+ (void) reln;
+ (void) forknum;
+ (void) blocknum;
+ (void) nblocks;
+}
+
+
+void
+rd_immedsync(SMgrRelation reln,
+ ForkNumber forknum)
+{
+ (void) reln;
+ (void) forknum;
+}
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index bfdaac4721d..2672fdc6802 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -57,6 +57,7 @@
#include "storage/ipc.h"
#include "storage/md.h"
#include "storage/smgr.h"
+#include "storage/rd.h"
#include "utils/hsearch.h"
#include "utils/inval.h"
@@ -124,7 +125,28 @@ static const f_smgr smgrsw[] = {
.smgr_truncate = mdtruncate,
.smgr_immedsync = mdimmedsync,
.smgr_registersync = mdregistersync,
- }
+ },
+
+ /* ram disk */
+ {
+ .smgr_init = rd_init,
+ .smgr_shutdown = rd_shutdown,
+ .smgr_open = rd_open,
+ .smgr_close = rd_close,
+ .smgr_create = rd_create,
+ .smgr_exists = rd_exists,
+ .smgr_unlink = rd_unlink,
+ .smgr_extend = rd_extend,
+ .smgr_zeroextend = rd_zeroextend,
+ .smgr_prefetch = rd_prefetch,
+ .smgr_readv = rd_readv,
+ .smgr_writev = rd_writev,
+ .smgr_writeback = rd_writeback,
+ .smgr_nblocks = rd_nblocks,
+ .smgr_truncate = rd_truncate,
+ .smgr_immedsync = rd_immedsync,
+ .smgr_registersync = mdregistersync,
+ },
};
static const int NSmgr = lengthof(smgrsw);
@@ -228,8 +250,19 @@ smgropen(RelFileLocator rlocator, ProcNumber backend)
/* hash_search already filled in the lookup key */
reln->smgr_targblock = InvalidBlockNumber;
for (int i = 0; i <= MAX_FORKNUM; ++i)
+ {
reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
- reln->smgr_which = 0; /* we only have md.c at present */
+ reln->rd_bufs[i] = NULL;
+ reln->md_num_open_segs[i] = 0;
+ }
+
+ if (RelFileLocatorBackendIsTemp(reln->smgr_rlocator)
+ && !smgrsw[0].smgr_exists(reln, MAIN_FORKNUM)
+ && enable_temp_rd_buffers)
+ /* use rd structure until we switch to md after threshold */
+ reln->smgr_which = 1;
+ else
+ reln->smgr_which = 0;
/* it is not pinned yet */
reln->pincount = 0;
@@ -500,7 +533,8 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
* closed our own smgr rel.
*/
for (i = 0; i < nrels; i++)
- CacheInvalidateSmgr(rlocators[i]);
+ if (!SmgrIsTemp(rels[i]))
+ CacheInvalidateSmgr(rlocators[i]);
/*
* Delete the physical file(s).
@@ -745,7 +779,8 @@ smgrtruncate2(SMgrRelation reln, ForkNumber *forknum, int nforks,
* is a performance-critical path.) As in the unlink code, we want to be
* sure the message is sent before we start changing things on-disk.
*/
- CacheInvalidateSmgr(reln->smgr_rlocator);
+ if (!SmgrIsTemp(reln))
+ CacheInvalidateSmgr(reln->smgr_rlocator);
/* Do the truncation */
for (i = 0; i < nforks; i++)
diff --git a/src/backend/tcop/cmdtag.c b/src/backend/tcop/cmdtag.c
index 0870064fdd8..96e202e3aeb 100644
--- a/src/backend/tcop/cmdtag.c
+++ b/src/backend/tcop/cmdtag.c
@@ -145,7 +145,7 @@ BuildQueryCompletionString(char *buff, const QueryCompletion *qc,
*/
if (command_tag_display_rowcount(tag) && !nameonly)
{
- if (tag == CMDTAG_INSERT)
+ if (tag == CMDTAG_INSERT || tag == CMDTAG_EXPLAIN_INSERT)
{
*bufp++ = ' ';
*bufp++ = '0';
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 9cd1d0abe35..38415cdfb1c 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -71,6 +71,7 @@
#include "tcop/pquery.h"
#include "tcop/tcopprot.h"
#include "tcop/utility.h"
+#include "utils/builtins.h"
#include "utils/guc_hooks.h"
#include "utils/injection_point.h"
#include "utils/lsyscache.h"
@@ -3269,6 +3270,15 @@ ProcessRecoveryConflictInterrupts(void)
*/
void
ProcessInterrupts(void)
+{
+ if (likely(!ProcessInterrupts_hook))
+ standard_ProcessInterrupts();
+ else
+ ProcessInterrupts_hook();
+}
+
+void
+standard_ProcessInterrupts(void)
{
/* OK to accept any interrupts now? */
if (InterruptHoldoffCount != 0 || CritSectionCount != 0)
diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c
index 5f86fa98080..198b7178197 100644
--- a/src/backend/tcop/pquery.c
+++ b/src/backend/tcop/pquery.c
@@ -773,7 +773,13 @@ PortalRun(Portal portal, long count, bool isTopLevel, bool run_once,
if (qc && portal->qc.commandTag != CMDTAG_UNKNOWN)
{
CopyQueryCompletion(qc, &portal->qc);
- qc->nprocessed = nprocessed;
+ if (portal->qc.commandTag == CMDTAG_EXPLAIN ||
+ portal->qc.commandTag == CMDTAG_EXPLAIN_INSERT ||
+ portal->qc.commandTag == CMDTAG_EXPLAIN_UPDATE ||
+ portal->qc.commandTag == CMDTAG_EXPLAIN_DELETE)
+ qc->nprocessed = portal->qc.nprocessed;
+ else
+ qc->nprocessed = nprocessed;
}
/* Mark portal not active */
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index fa66b8017ed..f222492b0d6 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -860,7 +860,32 @@ standard_ProcessUtility(PlannedStmt *pstmt,
break;
case T_ExplainStmt:
- ExplainQuery(pstate, (ExplainStmt *) parsetree, params, dest);
+ {
+ Query *query;
+ uint64 processed;
+ int explainTag;
+
+ ExplainQuery(pstate, (ExplainStmt *) parsetree, params, dest, &processed);
+
+ query = castNode(Query, ((ExplainStmt *) parsetree)->query);
+ switch (query->commandType)
+ {
+ case CMD_INSERT:
+ explainTag = CMDTAG_EXPLAIN_INSERT;
+ break;
+ case CMD_UPDATE:
+ explainTag = CMDTAG_EXPLAIN_UPDATE;
+ break;
+ case CMD_DELETE:
+ explainTag = CMDTAG_EXPLAIN_DELETE;
+ break;
+ default:
+ explainTag = CMDTAG_EXPLAIN;
+ break;
+ }
+ if (qc)
+ SetQueryCompletion(qc, explainTag, processed);
+ }
break;
case T_AlterSystemStmt:
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 6cd21ba8fed..a007999b50e 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -54,20 +54,9 @@
#include "utils/selfuncs.h"
#include "utils/varlena.h"
-
-typedef enum
-{
- Pattern_Type_Like,
- Pattern_Type_Like_IC,
- Pattern_Type_Regex,
- Pattern_Type_Regex_IC,
- Pattern_Type_Prefix,
-} Pattern_Type;
-
-typedef enum
-{
- Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact,
-} Pattern_Prefix_Status;
+#include "catalog/pg_proc.h"
+#include "utils/catcache.h"
+#include "utils/syscache.h"
static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
static List *match_pattern_prefix(Node *leftop,
@@ -107,6 +96,119 @@ static Datum string_to_datum(const char *str, Oid datatype);
static Const *string_to_const(const char *str, Oid datatype);
static Const *string_to_bytea_const(const char *str, size_t str_len);
+/****************************************************************************
+ * ---- ROUTINES FOR "SPECIAL" INDEXABLE OPERATORS FOR
+ * SPECIAL USER_DEFINED TYPES ----
+ * -- teodor
+ ****************************************************************************/
+
+static Oid mmPFPOid = InvalidOid;
+static Oid mmGTOid = InvalidOid;
+static Oid mcharOid = InvalidOid;
+static Oid mvarcharOid = InvalidOid;
+
+#define HeapTupleGetOid(type, tuple) (((type)GETSTRUCT(tuple))->oid)
+
+static Oid
+findTypeOid(char *typname)
+{
+ CatCList *catlist;
+ HeapTuple tup;
+ int n_members;
+ Oid typoid;
+
+ catlist = SearchSysCacheList(TYPENAMENSP, 1,
+ CStringGetDatum(typname), 0, 0);
+
+ n_members = catlist->n_members;
+
+ if (n_members != 1)
+ {
+ ReleaseSysCacheList(catlist);
+ if (n_members > 1)
+ elog(ERROR,"There are %d candidates for '%s' type",
+ n_members, typname);
+ return InvalidOid;
+ }
+
+ tup = &catlist->members[0]->tuple;
+
+ typoid = HeapTupleGetOid(Form_pg_type, tup);
+
+ ReleaseSysCacheList(catlist);
+
+ return typoid;
+}
+
+static bool
+fillMCharOIDS() {
+ CatCList *catlist;
+ HeapTuple tup;
+ char *funcname = "mchar_pattern_fixed_prefix";
+ int n_members;
+
+ catlist = SearchSysCacheList(PROCNAMEARGSNSP, 1,
+ CStringGetDatum(funcname), 0, 0);
+ n_members = catlist->n_members;
+
+ if (n_members != 1) {
+ ReleaseSysCacheList(catlist);
+ if (n_members > 1)
+ elog(ERROR,"There are %d candidates for '%s' function'", n_members, funcname);
+ return false;
+ }
+
+ tup = &catlist->members[0]->tuple;
+
+ if ( HeapTupleGetOid(Form_pg_proc, tup) != mmPFPOid ) {
+ char *quals_funcname = "mchar_greaterstring";
+ Oid tmp_mmPFPOid = HeapTupleGetOid(Form_pg_proc, tup);
+
+ ReleaseSysCacheList(catlist);
+
+ mcharOid = findTypeOid("mchar");
+ mvarcharOid = findTypeOid("mvarchar");
+
+ if ( mcharOid == InvalidOid || mvarcharOid == InvalidOid ) {
+ elog(LOG,"Can't find mchar/mvarvarchar types: mchar=%d mvarchar=%d",
+ mcharOid, mvarcharOid);
+ return false;
+ }
+
+ catlist = SearchSysCacheList(PROCNAMEARGSNSP, 1,
+ CStringGetDatum(quals_funcname), 0, 0);
+ n_members = catlist->n_members;
+
+ if ( n_members != 1 ) {
+ ReleaseSysCacheList(catlist);
+ if ( n_members > 1 )
+ elog(ERROR,"There are %d candidates for '%s' function'", n_members, quals_funcname);
+ return false;
+ }
+
+ tup = &catlist->members[0]->tuple;
+ mmGTOid = HeapTupleGetOid(Form_pg_proc, tup);
+ mmPFPOid = tmp_mmPFPOid;
+ }
+
+ ReleaseSysCacheList(catlist);
+
+ return true;
+}
+
+static Pattern_Prefix_Status
+mchar_pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Const **prefix)
+{
+ if (!fillMCharOIDS())
+ return Pattern_Prefix_None;
+
+ return (Pattern_Prefix_Status)DatumGetInt32( OidFunctionCall3(
+ mmPFPOid,
+ PointerGetDatum(patt),
+ Int32GetDatum(ptype),
+ PointerGetDatum(prefix)
+ ) );
+}
/*
* Planner support functions for LIKE, regex, and related operators
@@ -259,6 +361,7 @@ match_pattern_prefix(Node *leftop,
Expr *expr;
FmgrInfo ltproc;
Const *greaterstr;
+ bool isMchar = false;
/*
* Can't do anything with a non-constant or NULL pattern argument.
@@ -291,8 +394,16 @@ match_pattern_prefix(Node *leftop,
/*
* Try to extract a fixed prefix from the pattern.
*/
- pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
- &prefix, NULL);
+ ldatatype = exprType(leftop);
+ if (fillMCharOIDS() && (ldatatype == mcharOid ||
+ ldatatype == mvarcharOid))
+ {
+ pstatus = mchar_pattern_fixed_prefix(patt, ptype, &prefix);
+ isMchar = true;
+ }
+ else
+ pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
+ &prefix, NULL);
/* fail if no fixed prefix */
if (pstatus == Pattern_Prefix_None)
@@ -307,7 +418,6 @@ match_pattern_prefix(Node *leftop,
* selected operators also determine the needed type of the prefix
* constant.
*/
- ldatatype = exprType(leftop);
switch (ldatatype)
{
case TEXTOID:
@@ -374,7 +484,16 @@ match_pattern_prefix(Node *leftop,
break;
default:
/* Can't get here unless we're attached to the wrong operator */
- return NIL;
+ if (!isMchar)
+ return NIL;
+ collation_aware = false;
+ rdatatype = mvarcharOid;
+ ltopr = get_opfamily_member(opfamily, ldatatype, rdatatype,
+ BTLessStrategyNumber);
+ eqopr = get_opfamily_member(opfamily, ldatatype, rdatatype,
+ BTEqualStrategyNumber);
+ geopr = get_opfamily_member(opfamily, ldatatype, rdatatype,
+ BTGreaterEqualStrategyNumber);
}
/*
@@ -386,9 +505,10 @@ match_pattern_prefix(Node *leftop,
*/
if (prefix->consttype != rdatatype)
{
- Assert(prefix->consttype == TEXTOID &&
- rdatatype == BPCHAROID);
- prefix->consttype = rdatatype;
+ Assert(isMchar || (prefix->consttype == TEXTOID &&
+ rdatatype == BPCHAROID));
+ if (!isMchar)
+ prefix->consttype = rdatatype;
}
/*
@@ -457,7 +577,12 @@ match_pattern_prefix(Node *leftop,
if (!op_in_opfamily(ltopr, opfamily))
return result;
fmgr_info(get_opcode(ltopr), <proc);
- greaterstr = make_greater_string(prefix, <proc, indexcollation);
+ if (isMchar)
+ greaterstr = (Const*)DatumGetPointer(OidFunctionCall1(
+ mmGTOid,
+ PointerGetDatum(prefix)));
+ else
+ greaterstr = make_greater_string(prefix, <proc, indexcollation);
if (greaterstr)
{
expr = make_opclause(ltopr, BOOLOID, false,
diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c
index 0214c23a1d4..a1acab62fca 100644
--- a/src/backend/utils/adt/rowtypes.c
+++ b/src/backend/utils/adt/rowtypes.c
@@ -27,7 +27,6 @@
#include "utils/lsyscache.h"
#include "utils/typcache.h"
-
/*
* structure to cache metadata needed for record I/O
*/
@@ -824,6 +823,9 @@ record_cmp(FunctionCallInfo fcinfo)
{
HeapTupleHeader record1 = PG_GETARG_HEAPTUPLEHEADER(0);
HeapTupleHeader record2 = PG_GETARG_HEAPTUPLEHEADER(1);
+ int record_cmp_prefix =
+ (PG_NARGS() == 3 && PG_GETARG_INT32(2) > 0) ?
+ PG_GETARG_INT32(2) : INT_MAX;
int result = 0;
Oid tupType1;
Oid tupType2;
@@ -908,6 +910,9 @@ record_cmp(FunctionCallInfo fcinfo)
nulls2 = (bool *) palloc(ncolumns2 * sizeof(bool));
heap_deform_tuple(&tuple2, tupdesc2, values2, nulls2);
+ ncolumns1 = Min(ncolumns1, record_cmp_prefix);
+ ncolumns2 = Min(ncolumns2, record_cmp_prefix);
+
/*
* Scan corresponding columns, allowing for dropped columns in different
* places in the two rows. i1 and i2 are physical column indexes, j is
@@ -1068,6 +1073,9 @@ record_eq(PG_FUNCTION_ARGS)
{
HeapTupleHeader record1 = PG_GETARG_HEAPTUPLEHEADER(0);
HeapTupleHeader record2 = PG_GETARG_HEAPTUPLEHEADER(1);
+ int record_cmp_prefix =
+ (PG_NARGS() == 3 && PG_GETARG_INT32(2) > 0) ?
+ PG_GETARG_INT32(2) : INT_MAX;
bool result = true;
Oid tupType1;
Oid tupType2;
@@ -1152,6 +1160,9 @@ record_eq(PG_FUNCTION_ARGS)
nulls2 = (bool *) palloc(ncolumns2 * sizeof(bool));
heap_deform_tuple(&tuple2, tupdesc2, values2, nulls2);
+ ncolumns1 = Min(ncolumns1, record_cmp_prefix);
+ ncolumns2 = Min(ncolumns2, record_cmp_prefix);
+
/*
* Scan corresponding columns, allowing for dropped columns in different
* places in the two rows. i1 and i2 are physical column indexes, j is
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index d1139a268f3..863826b188d 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -39,6 +39,7 @@
#include "catalog/pg_type.h"
#include "commands/defrem.h"
#include "commands/tablespace.h"
+#include "common/hashfn.h"
#include "common/keywords.h"
#include "executor/spi.h"
#include "funcapi.h"
@@ -291,6 +292,8 @@ typedef struct
int *leftattnos; /* left-child varattnos of join cols, or 0 */
int *rightattnos; /* right-child varattnos of join cols, or 0 */
List *usingNames; /* names assigned to merged columns */
+
+ HTAB *all_names; /* hash to store all names colname_is_unique() */
} deparse_columns;
/* This macro is analogous to rt_fetch(), but for deparse_columns structs */
@@ -371,6 +374,7 @@ static void set_relation_column_names(deparse_namespace *dpns,
deparse_columns *colinfo);
static void set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
deparse_columns *colinfo);
+static void add_colname(deparse_columns *colinfo, char *colname);
static bool colname_is_unique(const char *colname, deparse_namespace *dpns,
deparse_columns *colinfo);
static char *make_colname_unique(char *colname, deparse_namespace *dpns,
@@ -4683,7 +4687,10 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
changed_any = true;
}
else
+ {
colinfo->new_colnames[j] = child_colname;
+ add_colname(colinfo, child_colname);
+ }
}
colinfo->is_new_col[j] = leftcolinfo->is_new_col[jc];
@@ -4732,7 +4739,10 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
changed_any = true;
}
else
+ {
colinfo->new_colnames[j] = child_colname;
+ add_colname(colinfo, child_colname);
+ }
}
colinfo->is_new_col[j] = rightcolinfo->is_new_col[jc];
@@ -4757,6 +4767,29 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
colinfo->printaliases = false;
}
+static uint32
+pstring_hash(const void *key, Size keysize)
+{
+ return string_hash(*(const void **)key, NAMEDATALEN);
+}
+
+static int
+pstring_compare(const void *key1, const void *key2, Size keysize)
+{
+ return strncmp(*(const void **)key1, *(const void **)key2, keysize - 1);
+}
+
+static void
+add_colname(deparse_columns *colinfo, char *colname)
+{
+ if (colinfo->all_names)
+ {
+ bool found;
+
+ hash_search(colinfo->all_names, &colname, HASH_ENTER, &found);
+ }
+}
+
/*
* colname_is_unique: is colname distinct from already-chosen column names?
*
@@ -4769,6 +4802,75 @@ colname_is_unique(const char *colname, deparse_namespace *dpns,
int i;
ListCell *lc;
+ if (colinfo->all_names != NULL ||
+ (colinfo->num_cols + colinfo->num_new_cols +
+ list_length(dpns->using_names) +
+ list_length(colinfo->parentUsing)) > 64)
+ {
+ bool found;
+
+ if (colinfo->all_names == NULL)
+ {
+ HASHCTL hash_ctl;
+
+ MemSet(&hash_ctl, 0, sizeof(hash_ctl));
+
+ hash_ctl.keysize = sizeof(char*);
+ hash_ctl.entrysize = sizeof(char*);
+ hash_ctl.hash = pstring_hash;
+ hash_ctl.match = pstring_compare;
+
+ colinfo->all_names = hash_create("colname_is_unique storage",
+ 512, &hash_ctl,
+ HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
+
+
+ for (i = 0; i < colinfo->num_cols; i++)
+ {
+ if (colinfo->colnames[i] == NULL)
+ continue;
+
+ hash_search(colinfo->all_names, &colinfo->colnames[i],
+ HASH_ENTER, &found);
+ }
+
+ for (i = 0; i < colinfo->num_new_cols; i++)
+ {
+ if (colinfo->new_colnames[i] == NULL)
+ continue;
+
+ hash_search(colinfo->all_names, &colinfo->new_colnames[i],
+ HASH_ENTER, &found);
+ }
+
+ foreach(lc, dpns->using_names)
+ {
+ char *oldname = (char *) lfirst(lc);
+
+ hash_search(colinfo->all_names, &oldname,
+ HASH_ENTER, &found);
+ }
+
+ foreach(lc, colinfo->parentUsing)
+ {
+ char *oldname = (char *) lfirst(lc);
+
+ hash_search(colinfo->all_names, &oldname,
+ HASH_ENTER, &found);
+ }
+ }
+
+ hash_search(colinfo->all_names, &colname, HASH_FIND, &found);
+
+ if (found)
+ return false;
+
+ hash_search(colinfo->all_names, &colname, HASH_ENTER, &found);
+ Assert(found == false);
+
+ return true;
+ }
+
/* Check against already-assigned column aliases within RTE */
for (i = 0; i < colinfo->num_cols; i++)
{
@@ -4820,6 +4922,8 @@ static char *
make_colname_unique(char *colname, deparse_namespace *dpns,
deparse_columns *colinfo)
{
+ CHECK_FOR_INTERRUPTS();
+
/*
* If the selected name isn't unique, append digits to make it so. For a
* very long input name, we might have to truncate to stay within
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 4670a3d648d..c45bcc2a8a5 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -149,13 +149,14 @@ get_relation_stats_hook_type get_relation_stats_hook = NULL;
get_index_stats_hook_type get_index_stats_hook = NULL;
static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
-static double eqjoinsel_inner(Oid opfuncoid, Oid collation,
+static double eqjoinsel_inner(Oid operator, Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
AttStatsSlot *sslot1, AttStatsSlot *sslot2,
Form_pg_statistic stats1, Form_pg_statistic stats2,
- bool have_mcvs1, bool have_mcvs2);
+ bool have_mcvs1, bool have_mcvs2,
+ int record_cmp_prefix);
static double eqjoinsel_semi(Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
@@ -163,7 +164,8 @@ static double eqjoinsel_semi(Oid opfuncoid, Oid collation,
AttStatsSlot *sslot1, AttStatsSlot *sslot2,
Form_pg_statistic stats1, Form_pg_statistic stats2,
bool have_mcvs1, bool have_mcvs2,
- RelOptInfo *inner_rel);
+ RelOptInfo *inner_rel,
+ int record_cmp_prefix);
static bool estimate_multivariate_ndistinct(PlannerInfo *root,
RelOptInfo *rel, List **varinfos, double *ndistinct);
static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid,
@@ -215,6 +217,20 @@ static bool get_actual_variable_endpoint(Relation heapRel,
Datum *endpointDatum);
static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
+static bool
+join_is_reversed_variables(SpecialJoinInfo *sjinfo,
+ VariableStatData *vardata1, VariableStatData *vardata2)
+{
+ if (vardata1->rel &&
+ bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
+ return true; /* var1 is on RHS */
+ else if (vardata2->rel &&
+ bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
+ return true; /* var2 is on LHS */
+ else
+ return false;
+}
+
/*
* eqsel - Selectivity of "=" for any data types.
@@ -279,14 +295,64 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate)
((Const *) other)->constisnull,
varonleft, negate);
else
+ {
selec = var_eq_non_const(&vardata, operator, collation, other,
varonleft, negate);
+ if (IsA(other, Var) || (IsA(other, RelabelType) &&
+ IsA(((RelabelType *) other)->arg, Var)))
+ {
+ VariableStatData rightvardata;
+ double varselec;
+ examine_variable(root, other, 0, &rightvardata);
+ varselec = eqjoin_selectivity(root, operator, collation, &vardata,
+ &rightvardata, NULL, -1);
+ ReleaseVariableStats(rightvardata);
+
+ /*
+ * If 'other' is variable from another relation then use
+ * selectivity with that variable if it's lower. This prevents worst
+ * cases when selectivity is too high which typically causes nested
+ * loop joins in plan with very bad estimations.
+ */
+ if (varselec > selec)
+ selec = varselec;
+ }
+ }
+
ReleaseVariableStats(vardata);
return selec;
}
+static bool
+get_cached_attstatsslot(AttStatsSlot *sslot, VariableStatData *vardata,
+ int reqkind, Oid reqop, int flags)
+{
+ if (vardata->sslots)
+ {
+ /*
+ * vardata has somewhere cache
+ */
+ AttStatsSlot *sslotp;
+
+ sslotp = fill_attstatsslot(vardata->sslots,
+ vardata->statsTuple,
+ reqkind, reqop, flags);
+
+ if (sslotp)
+ {
+ *sslot = *sslotp;
+ return true;
+ }
+ }
+
+ return get_attstatsslot(sslot, vardata->statsTuple,
+ reqkind, reqop,
+ flags);
+}
+
+
/*
* var_eq_const --- eqsel for var = const case
*
@@ -296,6 +362,15 @@ double
var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation,
Datum constval, bool constisnull,
bool varonleft, bool negate)
+{
+ return eqconst_selectivity(oproid, collation, vardata, constval,
+ constisnull, varonleft, negate, -1);
+}
+
+Selectivity
+eqconst_selectivity(Oid oproid, Oid collation,
+ VariableStatData *vardata, Datum constval, bool constisnull,
+ bool varonleft, bool negate, int record_cmp_prefix)
{
double selec;
double nullfrac = 0.0;
@@ -328,7 +403,8 @@ var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation,
* different from ours, but it's much more likely to be right than
* ignoring the information.)
*/
- if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
+ if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0 &&
+ record_cmp_prefix <= 0)
{
selec = 1.0 / vardata->rel->tuples;
}
@@ -347,11 +423,11 @@ var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation,
* don't like this, maybe you shouldn't be using eqsel for your
* operator...)
*/
- if (get_attstatsslot(&sslot, vardata->statsTuple,
- STATISTIC_KIND_MCV, InvalidOid,
- ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
+ if (get_cached_attstatsslot(&sslot, vardata,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
{
- LOCAL_FCINFO(fcinfo, 2);
+ LOCAL_FCINFO(fcinfo, 3);
FmgrInfo eqproc;
fmgr_info(opfuncoid, &eqproc);
@@ -362,15 +438,17 @@ var_eq_const(VariableStatData *vardata, Oid oproid, Oid collation,
* eqproc returns NULL, though really equality functions should
* never do that.
*/
- InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
+ InitFunctionCallInfoData(*fcinfo, &eqproc, 3, collation,
NULL, NULL);
fcinfo->args[0].isnull = false;
fcinfo->args[1].isnull = false;
+ fcinfo->args[2].isnull = false;
/* be careful to apply operator right way 'round */
if (varonleft)
fcinfo->args[1].value = constval;
else
fcinfo->args[0].value = constval;
+ fcinfo->args[2].value = Int32GetDatum(record_cmp_prefix);
for (i = 0; i < sslot.nvalues; i++)
{
@@ -514,16 +592,32 @@ var_eq_non_const(VariableStatData *vardata, Oid oproid, Oid collation,
if (ndistinct > 1)
selec /= ndistinct;
- /*
- * Cross-check: selectivity should never be estimated as more than the
- * most common value's.
- */
if (get_attstatsslot(&sslot, vardata->statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
- ATTSTATSSLOT_NUMBERS))
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
{
- if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
- selec = sslot.numbers[0];
+ int i;
+ double sum_selec = 0.0;
+
+ /*
+ * Compute quadratic mean, walk on array in reverse direction to
+ * do not lose accuracy. We don't bother about sslot.nnumbers
+ * equality to zero, because in this case we just get the same
+ * result. But equality to zero is unlikely.
+ */
+ for(i=sslot.nnumbers - 1; i>=0; i--)
+ sum_selec += sslot.numbers[i] * sslot.numbers[i];
+
+ selec = sqrt((selec * selec + sum_selec) /
+ ((double)sslot.nnumbers + 1.0));
+
+ /*
+ * Cross-check: selectivity should never be estimated as
+ * more than the most common value's.
+ */
+ if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
+ selec = sslot.numbers[0];
+
free_attstatsslot(&sslot);
}
}
@@ -1020,6 +1114,138 @@ generic_restriction_selectivity(PlannerInfo *root, Oid oproid, Oid collation,
return selec;
}
+/*
+ * Binary search of bound constval in histogramm
+ */
+static int
+prefix_record_histogram_search(AttStatsSlot *sslot, int start,
+ Datum constval, int record_cmp_prefix,
+ FmgrInfo *opproc, bool isgt)
+{
+ int lobound = start; /* first possible slot to search */
+ int hibound = sslot->nvalues; /* last+1 slot to search */
+
+ while (lobound < hibound)
+ {
+ int probe = (lobound + hibound) / 2;
+ bool ltcmp;
+
+ ltcmp = DatumGetBool(FunctionCall3Coll(opproc,
+ DEFAULT_COLLATION_OID,
+ sslot->values[probe],
+ constval,
+ Int32GetDatum(record_cmp_prefix)));
+ if (isgt)
+ ltcmp = !ltcmp;
+ if (ltcmp)
+ lobound = probe + 1;
+ else
+ hibound = probe;
+ }
+
+ return lobound;
+}
+
+/*
+ * Simple function to estimate selctivity by prefix of record, it just counts
+ * number of histogram bins matched by record prefix - similar to
+ * histogram_selectivity() but it knows about sortability of record
+ */
+double
+prefix_record_histogram_selectivity(VariableStatData *vardata,
+ Datum constvalLeft, Datum constvalRight,
+ int record_cmp_prefix,
+ double ndistinct,int *n_bins)
+{
+ double result = -1.0;
+ AttStatsSlot sslot;
+
+ if (HeapTupleIsValid(vardata->statsTuple) &&
+ get_cached_attstatsslot(&sslot, vardata,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES))
+ {
+ FmgrInfo opprocLT, opprocGT;
+ int start = -1,
+ end = -1;
+
+
+ if (sslot.nvalues > 2)
+ {
+ fmgr_info(F_RECORD_GE, &opprocGT);
+ fmgr_info(F_RECORD_LE, &opprocLT);
+
+ start = prefix_record_histogram_search(&sslot, 0, constvalLeft,
+ record_cmp_prefix,
+ &opprocGT, true);
+ if (start < 0)
+ start = 0;
+ end = prefix_record_histogram_search(&sslot, start, constvalRight,
+ -1,
+ &opprocLT, false);
+ if (end >= sslot.nvalues)
+ end = sslot.nvalues - 1;
+ }
+ else
+ {
+ fmgr_info(F_RECORD_GT, &opprocGT);
+ fmgr_info(F_RECORD_LE, &opprocLT);
+
+ /*
+ * Find first bin which start border is less than constant
+ */
+ for (start = sslot.nvalues - 1; start >= 0; start--)
+ {
+ if (DatumGetBool(FunctionCall3Coll(&opprocGT,
+ DEFAULT_COLLATION_OID,
+ constvalLeft,
+ sslot.values[start],
+ Int32GetDatum(record_cmp_prefix))))
+ break;
+ }
+
+ if (start < 0)
+ start=0;
+
+ /*
+ * Find last bin which end border is less than constant
+ */
+ for (end = start; end <= sslot.nvalues - 2; end ++)
+ {
+ if (DatumGetBool(FunctionCall3Coll(&opprocLT,
+ DEFAULT_COLLATION_OID,
+ constvalRight,
+ sslot.values[end + 1],
+ Int32GetDatum(-1))))
+ break;
+ }
+ }
+
+ if (opprocGT.fn_extra)
+ pfree(opprocGT.fn_extra);
+ if (opprocLT.fn_extra)
+ pfree(opprocLT.fn_extra);
+
+ *n_bins = (start >= end) ? 0 : end - start;
+ result = (start >= end) ? 0.5 : end - start;
+ result /= ((double) (sslot.nvalues));
+
+ free_attstatsslot(&sslot);
+
+ if (*n_bins == 0 && ndistinct > 1)
+ {
+ double ntuples = vardata->rel->tuples;
+ double ntuplesbin = vardata->rel->tuples / sslot.nvalues;
+
+ result *= (1 - pow((ntuples - ntuplesbin) / ntuples,
+ ntuples / ndistinct));
+ }
+ }
+
+ return result;
+
+}
+
/*
* ineq_histogram_selectivity - Examine the histogram for scalarineqsel
*
@@ -2280,11 +2506,32 @@ eqjoinsel(PG_FUNCTION_ARGS)
JoinType jointype = (JoinType) PG_GETARG_INT16(3);
#endif
SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
- Oid collation = PG_GET_COLLATION();
+ VariableStatData vardata1;
+ VariableStatData vardata2;
+ Selectivity s;
+ Oid collation = PG_GET_COLLATION();
+
+ get_join_variables(root, args, sjinfo,
+ &vardata1, &vardata2, NULL);
+
+
+ s = eqjoin_selectivity(root, operator, collation, &vardata1, &vardata2,
+ sjinfo, -1);
+
+ ReleaseVariableStats(vardata1);
+ ReleaseVariableStats(vardata2);
+
+ PG_RETURN_FLOAT8((float8)s);
+}
+
+Selectivity
+eqjoin_selectivity(PlannerInfo *root, Oid operator, Oid collation,
+ VariableStatData* vardata1,
+ VariableStatData* vardata2, SpecialJoinInfo *sjinfo,
+ int record_cmp_prefix)
+{
double selec;
double selec_inner;
- VariableStatData vardata1;
- VariableStatData vardata2;
double nd1;
double nd2;
bool isdefault1;
@@ -2300,11 +2547,14 @@ eqjoinsel(PG_FUNCTION_ARGS)
bool join_is_reversed;
RelOptInfo *inner_rel;
- get_join_variables(root, args, sjinfo,
- &vardata1, &vardata2, &join_is_reversed);
+ join_is_reversed = sjinfo && join_is_reversed_variables(sjinfo, vardata1, vardata2);
- nd1 = get_variable_numdistinct(&vardata1, &isdefault1);
- nd2 = get_variable_numdistinct(&vardata2, &isdefault2);
+ nd1 = get_variable_numdistinct(vardata1, &isdefault1);
+ nd2 = get_variable_numdistinct(vardata2, &isdefault2);
+
+ if ((isdefault1 && vardata1->rel && vardata1->rel->tuples <= 0.0) ||
+ (isdefault2 && vardata2->rel && vardata2->rel->tuples <= 0.0))
+ return 0.0;
opfuncoid = get_opcode(operator);
@@ -2315,47 +2565,48 @@ eqjoinsel(PG_FUNCTION_ARGS)
* There is no use in fetching one side's MCVs if we lack MCVs for the
* other side, so do a quick check to verify that both stats exist.
*/
- get_mcv_stats = (HeapTupleIsValid(vardata1.statsTuple) &&
- HeapTupleIsValid(vardata2.statsTuple) &&
- get_attstatsslot(&sslot1, vardata1.statsTuple,
+ get_mcv_stats = (HeapTupleIsValid(vardata1->statsTuple) &&
+ HeapTupleIsValid(vardata2->statsTuple) &&
+ get_attstatsslot(&sslot1, vardata1->statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
0) &&
- get_attstatsslot(&sslot2, vardata2.statsTuple,
+ get_attstatsslot(&sslot2, vardata2->statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
0));
- if (HeapTupleIsValid(vardata1.statsTuple))
+ if (HeapTupleIsValid(vardata1->statsTuple))
{
/* note we allow use of nullfrac regardless of security check */
- stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
+ stats1 = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
if (get_mcv_stats &&
- statistic_proc_security_check(&vardata1, opfuncoid))
- have_mcvs1 = get_attstatsslot(&sslot1, vardata1.statsTuple,
+ statistic_proc_security_check(vardata1, opfuncoid))
+ have_mcvs1 = get_attstatsslot(&sslot1, vardata1->statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
}
- if (HeapTupleIsValid(vardata2.statsTuple))
+ if (HeapTupleIsValid(vardata2->statsTuple))
{
/* note we allow use of nullfrac regardless of security check */
- stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
+ stats2 = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
if (get_mcv_stats &&
- statistic_proc_security_check(&vardata2, opfuncoid))
- have_mcvs2 = get_attstatsslot(&sslot2, vardata2.statsTuple,
+ statistic_proc_security_check(vardata2, opfuncoid))
+ have_mcvs2 = get_attstatsslot(&sslot2, vardata2->statsTuple,
STATISTIC_KIND_MCV, InvalidOid,
ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
}
/* We need to compute the inner-join selectivity in all cases */
- selec_inner = eqjoinsel_inner(opfuncoid, collation,
- &vardata1, &vardata2,
+ selec_inner = eqjoinsel_inner(operator, opfuncoid, collation,
+ vardata1, vardata2,
nd1, nd2,
isdefault1, isdefault2,
&sslot1, &sslot2,
stats1, stats2,
- have_mcvs1, have_mcvs2);
+ have_mcvs1, have_mcvs2,
+ record_cmp_prefix);
- switch (sjinfo->jointype)
+ switch (sjinfo ? sjinfo->jointype : JOIN_INNER)
{
case JOIN_INNER:
case JOIN_LEFT:
@@ -2375,26 +2626,28 @@ eqjoinsel(PG_FUNCTION_ARGS)
if (!join_is_reversed)
selec = eqjoinsel_semi(opfuncoid, collation,
- &vardata1, &vardata2,
+ vardata1, vardata2,
nd1, nd2,
isdefault1, isdefault2,
&sslot1, &sslot2,
stats1, stats2,
have_mcvs1, have_mcvs2,
- inner_rel);
+ inner_rel,
+ record_cmp_prefix);
else
{
Oid commop = get_commutator(operator);
Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;
selec = eqjoinsel_semi(commopfuncoid, collation,
- &vardata2, &vardata1,
+ vardata2, vardata1,
nd2, nd1,
isdefault2, isdefault1,
&sslot2, &sslot1,
stats2, stats1,
have_mcvs2, have_mcvs1,
- inner_rel);
+ inner_rel,
+ record_cmp_prefix);
}
/*
@@ -2420,12 +2673,132 @@ eqjoinsel(PG_FUNCTION_ARGS)
free_attstatsslot(&sslot1);
free_attstatsslot(&sslot2);
- ReleaseVariableStats(vardata1);
- ReleaseVariableStats(vardata2);
-
CLAMP_PROBABILITY(selec);
- PG_RETURN_FLOAT8((float8) selec);
+ return selec;
+}
+
+static int
+cmp_vardata(FmgrInfo *eqproc, FmgrInfo *ltproc,
+ Datum v1, Datum v2, int record_cmp_prefix)
+{
+ int cmp;
+
+ cmp = DatumGetBool(FunctionCall3Coll(ltproc,
+ DEFAULT_COLLATION_OID,
+ v1, v2,
+ Int32GetDatum(record_cmp_prefix)));
+
+ if (cmp)
+ return -1;
+
+ cmp = DatumGetBool(FunctionCall3Coll(eqproc,
+ DEFAULT_COLLATION_OID,
+ v1, v2,
+ Int32GetDatum(record_cmp_prefix)));
+
+ return !cmp;
+}
+static double
+eqjoinsel_histogram(Oid eqop,
+ VariableStatData *vardata1, VariableStatData *vardata2,
+ int record_cmp_prefix, double nd1, double nd2)
+{
+ bool have_hist1 = false;
+ bool have_hist2 = false;
+ AttStatsSlot sslot1;
+ AttStatsSlot sslot2;
+ int i1 = 0, i2 = 0;
+ double n1 = 0.0, n2 = 0.0;
+ double result = -1.0;
+ FmgrInfo eqproc, ltproc;
+ Oid orderop = InvalidOid;
+ List *opfamilies;
+ ListCell *lc;
+
+ if (!(HeapTupleIsValid(vardata1->statsTuple) &&
+ HeapTupleIsValid(vardata2->statsTuple)))
+ return result;
+
+ memset(&sslot1, 0, sizeof(sslot1));
+ memset(&sslot2, 0, sizeof(sslot2));
+
+ have_hist1 = get_cached_attstatsslot(&sslot1, vardata1,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES);
+ have_hist2 = get_cached_attstatsslot(&sslot2, vardata2,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES);
+
+ if (!(have_hist1 && have_hist2))
+ goto out;
+
+ opfamilies = get_mergejoin_opfamilies(eqop);
+ foreach(lc, opfamilies) {
+ Oid opf = lfirst_oid(lc);
+
+ orderop = get_opfamily_member(opf, vardata1->vartype, vardata2->vartype,
+ BTLessStrategyNumber);
+
+ if (OidIsValid(orderop))
+ break;
+ }
+
+ /* == from fulleq, for example */
+ if (!OidIsValid(orderop))
+ goto out;
+
+ fmgr_info(get_opcode(eqop), &eqproc);
+ fmgr_info(get_opcode(orderop), <proc);
+
+ result = 0.0;
+ while(i1 < sslot1.nvalues && i2 < sslot2.nvalues)
+ {
+ int cmp;
+
+ cmp = cmp_vardata(&eqproc, <proc, sslot1.values[i1], sslot2.values[i2],
+ record_cmp_prefix);
+
+ if (cmp < 0)
+ {
+ i1++;
+ n1++;
+ if (n2 > 0)
+ {
+ result += 0.5 / (sslot1.nvalues*sslot2.nvalues);
+ n2=0.0;
+ }
+ }
+ else if (cmp > 0)
+ {
+ i2++;
+ n2++;
+ if (n1 > 0)
+ {
+ result += 0.5 / (sslot1.nvalues*sslot2.nvalues);
+ n1=0.0;
+ }
+ }
+ else
+ {
+ i1++; i2++;
+ n1++; n2++;
+ result += (n1/sslot1.nvalues)*(n2/sslot2.nvalues);
+ n1 = 0.0; n2 = 0.0;
+ }
+
+ }
+
+ nd1 /= sslot1.nvalues;
+ nd2 /= sslot2.nvalues;
+
+ result /= (nd1 > nd2) ? nd1 : nd2;
+
+out:
+ free_attstatsslot(&sslot1);
+ free_attstatsslot(&sslot2);
+
+ return result;
}
/*
@@ -2435,13 +2808,14 @@ eqjoinsel(PG_FUNCTION_ARGS)
* that it's worth trying to distinguish them here.
*/
static double
-eqjoinsel_inner(Oid opfuncoid, Oid collation,
+eqjoinsel_inner(Oid operator, Oid opfuncoid, Oid collation,
VariableStatData *vardata1, VariableStatData *vardata2,
double nd1, double nd2,
bool isdefault1, bool isdefault2,
AttStatsSlot *sslot1, AttStatsSlot *sslot2,
Form_pg_statistic stats1, Form_pg_statistic stats2,
- bool have_mcvs1, bool have_mcvs2)
+ bool have_mcvs1, bool have_mcvs2,
+ int record_cmp_prefix)
{
double selec;
@@ -2459,7 +2833,7 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
* results", Technical Report 1018, Computer Science Dept., University
* of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
*/
- LOCAL_FCINFO(fcinfo, 2);
+ LOCAL_FCINFO(fcinfo, 3);
FmgrInfo eqproc;
bool *hasmatch1;
bool *hasmatch2;
@@ -2485,10 +2859,12 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
* returns NULL, though really equality functions should never do
* that.
*/
- InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
+ InitFunctionCallInfoData(*fcinfo, &eqproc, 3, collation,
NULL, NULL);
fcinfo->args[0].isnull = false;
fcinfo->args[1].isnull = false;
+ fcinfo->args[2].isnull = false;
+ fcinfo->args[2].value = Int32GetDatum(record_cmp_prefix);
hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
hasmatch2 = (bool *) palloc0(sslot2->nvalues * sizeof(bool));
@@ -2614,11 +2990,34 @@ eqjoinsel_inner(Oid opfuncoid, Oid collation,
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
- selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
- if (nd1 > nd2)
- selec /= nd1;
- else
- selec /= nd2;
+ if (isdefault1 && vardata1->rel && nd1 > vardata1->rel->rows)
+ {
+ nd1 = vardata1->rel->rows;
+ if (nd1 == 0.0)
+ nd1 = 1.0;
+ }
+
+ if (isdefault2 && vardata2->rel && nd2 > vardata2->rel->rows)
+ {
+ nd2 = vardata2->rel->rows;
+ if (nd2 == 0.0)
+ nd2 = 1.0;
+ }
+
+ selec = eqjoinsel_histogram(operator, vardata1, vardata2,
+ record_cmp_prefix, nd1, nd2);
+
+ if (selec < 0)
+ {
+ selec = 1.0;
+
+ if (nd1 > nd2)
+ selec /= nd1;
+ else
+ selec /= nd2;
+ }
+
+ selec *= (1.0 - nullfrac1) * (1.0 - nullfrac2);
}
return selec;
@@ -2639,7 +3038,8 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
AttStatsSlot *sslot1, AttStatsSlot *sslot2,
Form_pg_statistic stats1, Form_pg_statistic stats2,
bool have_mcvs1, bool have_mcvs2,
- RelOptInfo *inner_rel)
+ RelOptInfo *inner_rel,
+ int record_cmp_prefix)
{
double selec;
@@ -2686,7 +3086,7 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
* lists. We still have to estimate for the remaining population, but
* in a skewed distribution this gives us a big leg up in accuracy.
*/
- LOCAL_FCINFO(fcinfo, 2);
+ LOCAL_FCINFO(fcinfo, 3);
FmgrInfo eqproc;
bool *hasmatch1;
bool *hasmatch2;
@@ -2715,10 +3115,12 @@ eqjoinsel_semi(Oid opfuncoid, Oid collation,
* returns NULL, though really equality functions should never do
* that.
*/
- InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
+ InitFunctionCallInfoData(*fcinfo, &eqproc, 3, collation,
NULL, NULL);
fcinfo->args[0].isnull = false;
fcinfo->args[1].isnull = false;
+ fcinfo->args[2].isnull = false;
+ fcinfo->args[2].value = Int32GetDatum(record_cmp_prefix);
hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
@@ -3429,11 +3831,29 @@ double
estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
List **pgset, EstimationInfo *estinfo)
{
- List *varinfos = NIL;
+ return estimate_num_groups_incremental(root, groupExprs,
+ input_rows, pgset, estinfo,
+ NULL, 0);
+}
+
+/*
+ * estimate_num_groups_incremental
+ * An estimate_num_groups variant, optimized for cases that are adding the
+ * expressions incrementally (e.g. one by one).
+ */
+double
+estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+ double input_rows,
+ List **pgset, EstimationInfo *estinfo,
+ List **cache_varinfos, int prevNExprs)
+{
+ List *varinfos = (cache_varinfos) ? *cache_varinfos : NIL;
double srf_multiplier = 1.0;
double numdistinct;
ListCell *l;
- int i;
+ int i,
+ j,
+ k;
/* Zero the estinfo output parameter, if non-NULL */
if (estinfo != NULL)
@@ -3464,7 +3884,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
*/
numdistinct = 1.0;
- i = 0;
+ i = j = 0;
foreach(l, groupExprs)
{
Node *groupexpr = (Node *) lfirst(l);
@@ -3473,6 +3893,14 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
List *varshere;
ListCell *l2;
+ /* was done on previous call */
+ if (cache_varinfos && j++ < prevNExprs)
+ {
+ if (pgset)
+ i++; /* to keep in sync with lines below */
+ continue;
+ }
+
/* is expression in this grouping set? */
if (pgset && !list_member_int(*pgset, i++))
continue;
@@ -3522,6 +3950,9 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
}
ReleaseVariableStats(vardata);
+ if (list_length(varinfos) > 2*list_length(groupExprs))
+ continue;
+
/*
* Else pull out the component Vars. Handle PlaceHolderVars by
* recursing into their arguments (effectively assuming that the
@@ -3542,13 +3973,21 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
if (varshere == NIL)
{
if (contain_volatile_functions(groupexpr))
+ {
+ if (cache_varinfos)
+ *cache_varinfos = varinfos;
return input_rows;
+ }
continue;
}
+ if (list_length(varshere) >= 8)
+ continue;
+
/*
* Else add variables to varinfos list
*/
+ k = 0;
foreach(l2, varshere)
{
Node *var = (Node *) lfirst(l2);
@@ -3556,9 +3995,15 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
examine_variable(root, var, 0, &vardata);
varinfos = add_unique_group_var(root, varinfos, var, &vardata);
ReleaseVariableStats(vardata);
+
+ if (++k > 4)
+ break;
}
}
+ if (cache_varinfos)
+ *cache_varinfos = varinfos;
+
/*
* If now no Vars, we must have an all-constant or all-boolean GROUP BY
* list.
@@ -4969,14 +5414,8 @@ get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
examine_variable(root, left, 0, vardata1);
examine_variable(root, right, 0, vardata2);
- if (vardata1->rel &&
- bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
- *join_is_reversed = true; /* var1 is on RHS */
- else if (vardata2->rel &&
- bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
- *join_is_reversed = true; /* var2 is on LHS */
- else
- *join_is_reversed = false;
+ if (join_is_reversed)
+ *join_is_reversed = join_is_reversed_variables(sjinfo, vardata1, vardata2);
}
/* statext_expressions_load copies the tuple, so just pfree it. */
@@ -6600,7 +7039,7 @@ index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
other_operand = NULL; /* keep compiler quiet */
}
- cost_qual_eval_node(&index_qual_cost, other_operand, root);
+ cost_qual_eval_node_index(&index_qual_cost, other_operand, root);
qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
}
return qual_arg_cost;
diff --git a/src/backend/utils/cache/attoptcache.c b/src/backend/utils/cache/attoptcache.c
index af978ccd4b1..28980620662 100644
--- a/src/backend/utils/cache/attoptcache.c
+++ b/src/backend/utils/cache/attoptcache.c
@@ -44,12 +44,10 @@ typedef struct
/*
* InvalidateAttoptCacheCallback
- * Flush all cache entries when pg_attribute is updated.
+ * Flush cache entry (or entries) when pg_attribute is updated.
*
* When pg_attribute is updated, we must flush the cache entry at least
- * for that attribute. Currently, we just flush them all. Since attribute
- * options are not currently used in performance-critical paths (such as
- * query execution), this seems OK.
+ * for that attribute.
*/
static void
InvalidateAttoptCacheCallback(Datum arg, int cacheid, uint32 hashvalue)
@@ -57,7 +55,16 @@ InvalidateAttoptCacheCallback(Datum arg, int cacheid, uint32 hashvalue)
HASH_SEQ_STATUS status;
AttoptCacheEntry *attopt;
- hash_seq_init(&status, AttoptCacheHash);
+ /*
+ * By convection, zero hash value is passed to the callback as a sign
+ * that it's time to invalidate the cache. See sinval.c, inval.c and
+ * InvalidateSystemCachesExtended().
+ */
+ if (hashvalue == 0)
+ hash_seq_init(&status, AttoptCacheHash);
+ else
+ hash_seq_init_with_hash_value(&status, AttoptCacheHash, hashvalue);
+
while ((attopt = (AttoptCacheEntry *) hash_seq_search(&status)) != NULL)
{
if (attopt->opts)
@@ -70,6 +77,17 @@ InvalidateAttoptCacheCallback(Datum arg, int cacheid, uint32 hashvalue)
}
}
+/*
+ * Hash function compatible with two-arg system cache hash function.
+ */
+static uint32
+relatt_cache_syshash(const void *key, Size keysize)
+{
+ const AttoptCacheKey* ckey = key;
+
+ return GetSysCacheHashValue2(ATTNUM, ckey->attrelid, ckey->attnum);
+}
+
/*
* InitializeAttoptCache
* Initialize the attribute options cache.
@@ -82,9 +100,17 @@ InitializeAttoptCache(void)
/* Initialize the hash table. */
ctl.keysize = sizeof(AttoptCacheKey);
ctl.entrysize = sizeof(AttoptCacheEntry);
+
+ /*
+ * AttoptCacheEntry takes hash value from the system cache. For
+ * AttoptCacheHash we use the same hash in order to speedup search by hash
+ * value. This is used by hash_seq_init_with_hash_value().
+ */
+ ctl.hash = relatt_cache_syshash;
+
AttoptCacheHash =
hash_create("Attopt cache", 256, &ctl,
- HASH_ELEM | HASH_BLOBS);
+ HASH_ELEM | HASH_FUNCTION);
/* Make sure we've initialized CacheMemoryContext. */
if (!CacheMemoryContext)
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 59d625b244c..a5a78293a04 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -20,6 +20,7 @@
#include "access/table.h"
#include "access/xact.h"
#include "catalog/catalog.h"
+#include "catalog/namespace.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_type.h"
#include "common/hashfn.h"
@@ -2353,26 +2354,24 @@ CatCacheCopyKeys(TupleDesc tupdesc, int nkeys, int *attnos,
* system relation.
*/
void
-PrepareToInvalidateCacheTuple(Relation relation,
+PrepareToInvalidateCacheTuple(Oid reloid,
HeapTuple tuple,
HeapTuple newtuple,
void (*function) (int, uint32, Oid))
{
slist_iter iter;
- Oid reloid;
+ char prevTempScope = temp_table_scope;
CACHE_elog(DEBUG2, "PrepareToInvalidateCacheTuple: called");
/*
* sanity checks
*/
- Assert(RelationIsValid(relation));
+ Assert(OidIsValid(reloid));
Assert(HeapTupleIsValid(tuple));
Assert(PointerIsValid(function));
Assert(CacheHdr != NULL);
- reloid = RelationGetRelid(relation);
-
/* ----------------
* for each cache
* if the cache contains tuples from the specified relation
@@ -2386,6 +2385,9 @@ PrepareToInvalidateCacheTuple(Relation relation,
CatCache *ccp = slist_container(CatCache, cc_next, iter.cur);
uint32 hashvalue;
Oid dbid;
+ bool isLocal = false;
+ Oid relationId = InvalidOid;
+ bool checkTemp = false;
if (ccp->cc_reloid != reloid)
continue;
@@ -2397,6 +2399,47 @@ PrepareToInvalidateCacheTuple(Relation relation,
hashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, tuple);
dbid = ccp->cc_relisshared ? (Oid) 0 : MyDatabaseId;
+ if (reloid == RelationRelationId)
+ {
+ Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple);
+
+ isLocal = (classtup->relpersistence == RELPERSISTENCE_TEMP) ?
+ true : false;
+ }
+ else if (reloid == AttributeRelationId)
+ {
+ Form_pg_attribute atttup = (Form_pg_attribute) GETSTRUCT(tuple);
+
+ relationId = atttup->attrelid;
+ checkTemp = true;
+ }
+ else if (reloid == IndexRelationId)
+ {
+ Form_pg_index indextup = (Form_pg_index) GETSTRUCT(tuple);
+
+ relationId = indextup->indexrelid;
+ checkTemp = true;
+ }
+
+ if (checkTemp)
+ {
+ HeapTuple htup = SearchSysCache1(RELOID,
+ ObjectIdGetDatum(relationId));
+
+ if (HeapTupleIsValid(htup))
+ {
+ Form_pg_class c = (Form_pg_class)GETSTRUCT(htup);
+
+ isLocal = (c->relisshared == false &&
+ c->relpersistence == RELPERSISTENCE_TEMP &&
+ isTempOrTempToastNamespace(c->relnamespace)) ?
+ true : false;
+ ReleaseSysCache(htup);
+ }
+ }
+
+ temp_table_scope = isLocal ? TEMP_TABLE_SCOPE_LOCAL : prevTempScope;
+
(*function) (ccp->id, hashvalue, dbid);
if (newtuple)
@@ -2409,6 +2452,8 @@ PrepareToInvalidateCacheTuple(Relation relation,
(*function) (ccp->id, newhashvalue, dbid);
}
}
+
+ temp_table_scope = prevTempScope;
}
/* ResourceOwner callbacks */
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 66e04f973f6..43ce6933c75 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -114,6 +114,7 @@
#include "access/xact.h"
#include "access/xloginsert.h"
#include "catalog/catalog.h"
+#include "catalog/namespace.h"
#include "catalog/pg_constraint.h"
#include "miscadmin.h"
#include "storage/sinval.h"
@@ -273,6 +274,11 @@ static struct RELCACHECALLBACK
static int relcache_callback_count = 0;
+
+
+char temp_table_scope = TEMP_TABLE_SCOPE_NOTEMP;
+
+
/* ----------------------------------------------------------------
* Invalidation subgroup support functions
* ----------------------------------------------------------------
@@ -320,6 +326,14 @@ AddInvalidationMessage(InvalidationMsgsGroup *group, int subgroup,
}
/* Okay, add message to current group */
ima->msgs[nextindex] = *msg;
+
+ /* Mark message as local-only when it's related to temporary tables.
+ Don't mark snapshot invalidation or any messages when higher that
+ read commited isolation level, because it causes troubles. */
+ ima->msgs[nextindex].isLocal = IsLocalTempTableScope() &&
+ msg->id != SHAREDINVALSNAPSHOT_ID &&
+ XactIsoLevel == XACT_READ_COMMITTED;
+
group->nextmsg[subgroup]++;
}
@@ -1211,6 +1225,8 @@ CacheInvalidateHeapTuple(Relation relation,
Oid tupleRelId;
Oid databaseId;
Oid relationId;
+ bool tempRel = false;
+ bool checkTemp = false;
/* Do nothing during bootstrap */
if (IsBootstrapProcessingMode())
@@ -1247,7 +1263,7 @@ CacheInvalidateHeapTuple(Relation relation,
RegisterSnapshotInvalidation(databaseId, tupleRelId);
}
else
- PrepareToInvalidateCacheTuple(relation, tuple, newtuple,
+ PrepareToInvalidateCacheTuple(tupleRelId, tuple, newtuple,
RegisterCatcacheInvalidation);
/*
@@ -1266,6 +1282,9 @@ CacheInvalidateHeapTuple(Relation relation,
databaseId = InvalidOid;
else
databaseId = MyDatabaseId;
+
+ tempRel = (classtup->relpersistence == RELPERSISTENCE_TEMP) ?
+ true : false;
}
else if (tupleRelId == AttributeRelationId)
{
@@ -1284,6 +1303,7 @@ CacheInvalidateHeapTuple(Relation relation,
* never come here for a shared rel anyway.)
*/
databaseId = MyDatabaseId;
+ checkTemp = true;
}
else if (tupleRelId == IndexRelationId)
{
@@ -1297,6 +1317,7 @@ CacheInvalidateHeapTuple(Relation relation,
*/
relationId = indextup->indexrelid;
databaseId = MyDatabaseId;
+ checkTemp = true;
}
else if (tupleRelId == ConstraintRelationId)
{
@@ -1318,10 +1339,29 @@ CacheInvalidateHeapTuple(Relation relation,
else
return;
+ if (checkTemp)
+ {
+ HeapTuple htup = SearchSysCache1(RELOID, ObjectIdGetDatum(relationId));
+
+ if (HeapTupleIsValid(htup))
+ {
+ Form_pg_class c = (Form_pg_class)GETSTRUCT(htup);
+
+ tempRel = (c->relisshared == false &&
+ c->relpersistence == RELPERSISTENCE_TEMP &&
+ isTempOrTempToastNamespace(c->relnamespace)) ?
+ true : false;
+
+ ReleaseSysCache(htup);
+ }
+ }
+
/*
* Yes. We need to register a relcache invalidation event.
*/
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(tempRel);
RegisterRelcacheInvalidation(databaseId, relationId);
+ END_TEMP_TABLE_SCOPE();
}
/*
@@ -1373,7 +1413,11 @@ CacheInvalidateRelcache(Relation relation)
else
databaseId = MyDatabaseId;
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(relation->rd_rel->relisshared == false &&
+ RELATION_IS_LOCAL(relation) &&
+ !RELATION_IS_OTHER_TEMP(relation));
RegisterRelcacheInvalidation(databaseId, relationId);
+ END_TEMP_TABLE_SCOPE();
}
/*
@@ -1409,7 +1453,12 @@ CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
databaseId = InvalidOid;
else
databaseId = MyDatabaseId;
+
+ BEGIN_TEMP_TABLE_SCOPE_LOCAL(classtup->relisshared == false &&
+ classtup->relpersistence == RELPERSISTENCE_TEMP &&
+ isTempOrTempToastNamespace(classtup->relnamespace));
RegisterRelcacheInvalidation(databaseId, relationId);
+ END_TEMP_TABLE_SCOPE();
}
/*
@@ -1467,6 +1516,8 @@ CacheInvalidateSmgr(RelFileLocatorBackend rlocator)
msg.sm.backend_hi = rlocator.backend >> 16;
msg.sm.backend_lo = rlocator.backend & 0xffff;
msg.sm.rlocator = rlocator.locator;
+ msg.isLocal = false;
+
/* check AddCatcacheInvalidationMessage() for an explanation */
VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg));
@@ -1495,6 +1546,8 @@ CacheInvalidateRelmap(Oid databaseId)
msg.rm.id = SHAREDINVALRELMAP_ID;
msg.rm.dbId = databaseId;
+ msg.isLocal = false;
+
/* check AddCatcacheInvalidationMessage() for an explanation */
VALGRIND_MAKE_MEM_DEFINED(&msg, sizeof(msg));
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index 48a280d089b..a0b35686c8a 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -46,6 +46,7 @@
#include "utils/datum.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h"
+#include "utils/memutils.h"
#include "utils/syscache.h"
#include "utils/typcache.h"
@@ -3336,6 +3337,52 @@ get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple,
return true;
}
+AttStatsSlot*
+fill_attstatsslot(AttStatsSlot *sslots, HeapTuple statstuple,
+ int reqkind, Oid reqop, int flags)
+{
+ int add_flags = 0, has_flags = 0;
+ AttStatsSlot *sslot;
+ MemoryContext oldctx;
+
+ if (reqkind >= STATISTIC_NUM_SLOTS)
+ return NULL; /* not there */
+
+ sslot = sslots + reqkind;
+
+ if (sslot->values != NULL)
+ has_flags |= ATTSTATSSLOT_VALUES;
+ if (sslot->numbers != NULL)
+ has_flags |= ATTSTATSSLOT_NUMBERS;
+
+ if ((flags & ATTSTATSSLOT_VALUES) && !(has_flags & ATTSTATSSLOT_VALUES))
+ add_flags |= ATTSTATSSLOT_VALUES;
+
+ if ((flags & ATTSTATSSLOT_NUMBERS) && !(has_flags & ATTSTATSSLOT_NUMBERS))
+ add_flags |= ATTSTATSSLOT_NUMBERS;
+
+ if (add_flags == 0 && (reqop == InvalidOid || sslot->staop == reqop))
+ return sslot;
+
+ sslot->incache = false;
+ free_attstatsslot(sslot);
+
+ oldctx = MemoryContextSwitchTo(GetMemoryChunkContext(sslots));
+
+ if (get_attstatsslot(sslot, statstuple, reqkind, reqop,
+ add_flags | has_flags))
+ {
+ sslot->incache = true;
+ MemoryContextSwitchTo(oldctx);
+ return sslot;
+ }
+ else
+ {
+ MemoryContextSwitchTo(oldctx);
+ return NULL;
+ }
+}
+
/*
* free_attstatsslot
* Free data allocated by get_attstatsslot
@@ -3343,6 +3390,10 @@ get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple,
void
free_attstatsslot(AttStatsSlot *sslot)
{
+ /* do not free cached slot */
+ if (sslot->incache)
+ return;
+
/* The values[] array was separately palloc'd by deconstruct_array */
if (sslot->values)
pfree(sslot->values);
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index 88c47a99965..b5c490b22be 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -21,6 +21,7 @@
#include "postgres.h"
#include "access/htup_details.h"
+#include "access/tempcat.h"
#include "catalog/pg_db_role_setting_d.h"
#include "catalog/pg_depend_d.h"
#include "catalog/pg_description_d.h"
@@ -162,6 +163,8 @@ InitCatalogCache(void)
sizeof(Oid), oid_compare);
CacheInitialized = true;
+
+ temp_catalog_init();
}
/*
@@ -800,3 +803,20 @@ oid_compare(const void *a, const void *b)
return pg_cmp_u32(oa, ob);
}
+
+HeapTuple TryGetSysCacheRelationClassTuple(Oid relid)
+{
+ HeapTuple tuple;
+
+ /* Fail if cache is unitialized */
+ if (!SysCache[RELOID])
+ return NULL;
+
+ /* Fail if cache didn't yet populated tuple description.
+ * We better fail that miss the cache. */
+ if (!SysCache[RELOID]->cc_tupdesc)
+ return NULL;
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ return tuple;
+}
\ No newline at end of file
diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c
index aa4720cb598..b2b6a897961 100644
--- a/src/backend/utils/cache/typcache.c
+++ b/src/backend/utils/cache/typcache.c
@@ -77,6 +77,13 @@
/* The main type cache hashtable searched by lookup_type_cache */
static HTAB *TypeCacheHash = NULL;
+typedef struct mapRelTypeEntry
+{
+ Oid typrelid;
+ Oid type_id;
+} mapRelTypeEntry;
+static HTAB *mapRelType = NULL;
+
/* List of type cache entries for domain types */
static TypeCacheEntry *firstDomainTypeEntry = NULL;
@@ -330,6 +337,15 @@ static TupleDesc find_or_make_matching_shared_tupledesc(TupleDesc tupdesc);
static dsa_pointer share_tupledesc(dsa_area *area, TupleDesc tupdesc,
uint32 typmod);
+/*
+ * Hashing function should compatible with syscache hashing function to use
+ * hash_seq_init_with_hash_value()
+ */
+static uint32
+type_cache_hash(const void *key, Size keysize)
+{
+ return GetSysCacheHashValue1(TYPEOID, ObjectIdGetDatum(*(const Oid*)key));
+}
/*
* lookup_type_cache
@@ -355,8 +371,14 @@ lookup_type_cache(Oid type_id, int flags)
ctl.keysize = sizeof(Oid);
ctl.entrysize = sizeof(TypeCacheEntry);
+ ctl.hash = type_cache_hash;
TypeCacheHash = hash_create("Type information cache", 64,
- &ctl, HASH_ELEM | HASH_BLOBS);
+ &ctl, HASH_ELEM | HASH_FUNCTION);
+
+ ctl.keysize = sizeof(Oid);
+ ctl.entrysize = sizeof(mapRelTypeEntry);
+ mapRelType = hash_create("Map reloid to typeoid", 64,
+ &ctl, HASH_ELEM | HASH_BLOBS);
/* Also set up callbacks for SI invalidations */
CacheRegisterRelcacheCallback(TypeCacheRelCallback, (Datum) 0);
@@ -407,8 +429,7 @@ lookup_type_cache(Oid type_id, int flags)
/* These fields can never change, by definition */
typentry->type_id = type_id;
- typentry->type_id_hash = GetSysCacheHashValue1(TYPEOID,
- ObjectIdGetDatum(type_id));
+ typentry->type_id_hash = get_hash_value(TypeCacheHash, &type_id);
/* Keep this part in sync with the code below */
typentry->typlen = typtup->typlen;
@@ -429,6 +450,18 @@ lookup_type_cache(Oid type_id, int flags)
firstDomainTypeEntry = typentry;
}
+ if (OidIsValid(typtup->typrelid))
+ {
+ mapRelTypeEntry *relentry;
+
+ relentry = (mapRelTypeEntry*) hash_search(mapRelType,
+ &typentry->typrelid,
+ HASH_ENTER, NULL);
+
+ relentry->typrelid = typentry->typrelid;
+ relentry->type_id = typentry->type_id;
+ }
+
ReleaseSysCache(tp);
}
else if (!(typentry->flags & TCFLAGS_HAVE_PG_TYPE_DATA))
@@ -467,6 +500,18 @@ lookup_type_cache(Oid type_id, int flags)
typentry->typcollation = typtup->typcollation;
typentry->flags |= TCFLAGS_HAVE_PG_TYPE_DATA;
+ if (OidIsValid(typtup->typrelid))
+ {
+ mapRelTypeEntry *relentry;
+
+ relentry = (mapRelTypeEntry*) hash_search(mapRelType,
+ &typentry->typrelid,
+ HASH_ENTER, NULL);
+
+ relentry->typrelid = typentry->typrelid;
+ relentry->type_id = typentry->type_id;
+ }
+
ReleaseSysCache(tp);
}
@@ -2289,58 +2334,72 @@ SharedRecordTypmodRegistryAttach(SharedRecordTypmodRegistry *registry)
static void
TypeCacheRelCallback(Datum arg, Oid relid)
{
- HASH_SEQ_STATUS status;
TypeCacheEntry *typentry;
/* TypeCacheHash must exist, else this callback wouldn't be registered */
- hash_seq_init(&status, TypeCacheHash);
- while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL)
+
+ if (OidIsValid(relid))
{
- if (typentry->typtype == TYPTYPE_COMPOSITE)
+ mapRelTypeEntry *relentry;
+
+ relentry = (mapRelTypeEntry *) hash_search(mapRelType,
+ &relid,
+ HASH_FIND, NULL);
+
+ if (relentry != NULL)
{
- /* Skip if no match, unless we're zapping all composite types */
- if (relid != typentry->typrelid && relid != InvalidOid)
- continue;
+ typentry = (TypeCacheEntry *) hash_search(TypeCacheHash,
+ &relentry->type_id,
+ HASH_FIND, NULL);
- /* Delete tupdesc if we have it */
- if (typentry->tupDesc != NULL)
+ if (typentry != NULL)
{
- /*
- * Release our refcount, and free the tupdesc if none remain.
- * (Can't use DecrTupleDescRefCount because this reference is
- * not logged in current resource owner.)
- */
- Assert(typentry->tupDesc->tdrefcount > 0);
- if (--typentry->tupDesc->tdrefcount == 0)
- FreeTupleDesc(typentry->tupDesc);
- typentry->tupDesc = NULL;
-
- /*
- * Also clear tupDesc_identifier, so that anything watching
- * that will realize that the tupdesc has possibly changed.
- * (Alternatively, we could specify that to detect possible
- * tupdesc change, one must check for tupDesc != NULL as well
- * as tupDesc_identifier being the same as what was previously
- * seen. That seems error-prone.)
- */
- typentry->tupDesc_identifier = 0;
- }
+ Assert(typentry->typtype == TYPTYPE_COMPOSITE);
+ Assert(relid == typentry->typrelid);
- /* Reset equality/comparison/hashing validity information */
- typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
- }
- else if (typentry->typtype == TYPTYPE_DOMAIN)
- {
- /*
- * If it's domain over composite, reset flags. (We don't bother
- * trying to determine whether the specific base type needs a
- * reset.) Note that if we haven't determined whether the base
- * type is composite, we don't need to reset anything.
- */
- if (typentry->flags & TCFLAGS_DOMAIN_BASE_IS_COMPOSITE)
+ /* Delete tupdesc if we have it */
+ if (typentry->tupDesc != NULL)
+ {
+ /*
+ * Release our refcount, and free the tupdesc if none remain.
+ * (Can't use DecrTupleDescRefCount because this reference is
+ * not logged in current resource owner.)
+ */
+ Assert(typentry->tupDesc->tdrefcount > 0);
+ if (--typentry->tupDesc->tdrefcount == 0)
+ FreeTupleDesc(typentry->tupDesc);
+ typentry->tupDesc = NULL;
+
+ /*
+ * Also clear tupDesc_identifier, so that anything watching
+ * that will realize that the tupdesc has possibly changed.
+ * (Alternatively, we could specify that to detect possible
+ * tupdesc change, one must check for tupDesc != NULL as well
+ * as tupDesc_identifier being the same as what was previously
+ * seen. That seems error-prone.)
+ */
+ typentry->tupDesc_identifier = 0;
+ }
+
+ /* Reset equality/comparison/hashing validity information */
typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
+ }
}
}
+
+ for (typentry = firstDomainTypeEntry;
+ typentry != NULL;
+ typentry = typentry->nextDomain)
+ {
+ /*
+ * If it's domain over composite, reset flags. (We don't bother
+ * trying to determine whether the specific base type needs a
+ * reset.) Note that if we haven't determined whether the base
+ * type is composite, we don't need to reset anything.
+ */
+ if (typentry->flags & TCFLAGS_DOMAIN_BASE_IS_COMPOSITE)
+ typentry->flags &= ~TCFLAGS_OPERATOR_FLAGS;
+ }
}
/*
@@ -2358,20 +2417,20 @@ TypeCacheTypCallback(Datum arg, int cacheid, uint32 hashvalue)
TypeCacheEntry *typentry;
/* TypeCacheHash must exist, else this callback wouldn't be registered */
- hash_seq_init(&status, TypeCacheHash);
+ if (hashvalue == 0)
+ hash_seq_init(&status, TypeCacheHash);
+ else
+ hash_seq_init_with_hash_value(&status, TypeCacheHash, hashvalue);
+
while ((typentry = (TypeCacheEntry *) hash_seq_search(&status)) != NULL)
{
- /* Is this the targeted type row (or it's a total cache flush)? */
- if (hashvalue == 0 || typentry->type_id_hash == hashvalue)
- {
- /*
- * Mark the data obtained directly from pg_type as invalid. Also,
- * if it's a domain, typnotnull might've changed, so we'll need to
- * recalculate its constraints.
- */
- typentry->flags &= ~(TCFLAGS_HAVE_PG_TYPE_DATA |
- TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS);
- }
+ /*
+ * Mark the data obtained directly from pg_type as invalid. Also,
+ * if it's a domain, typnotnull might've changed, so we'll need to
+ * recalculate its constraints.
+ */
+ typentry->flags &= ~(TCFLAGS_HAVE_PG_TYPE_DATA |
+ TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS);
}
}
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index 45b8e8e919e..fcd624b8b60 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -1388,10 +1388,37 @@ hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
status->hashp = hashp;
status->curBucket = 0;
status->curEntry = NULL;
+ status->hasHashVal = false;
if (!hashp->frozen)
register_seq_scan(hashp);
}
+void
+hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp,
+ uint32 hashvalue)
+{
+ HASHHDR *hctl = hashp->hctl;
+ long segment_num;
+ long segment_ndx;
+ HASHSEGMENT segp;
+
+ hash_seq_init(status, hashp);
+ status->hasHashVal = true;
+ status->hashvalue = hashvalue;
+
+ status->curBucket = calc_bucket(hctl, hashvalue);
+
+ segment_num = status->curBucket >> hashp->sshift;
+ segment_ndx = MOD(status->curBucket, hashp->ssize);
+
+ segp = hashp->dir[segment_num];
+
+ if (segp == NULL)
+ hash_corrupted(hashp);
+
+ status->curEntry = segp[segment_ndx];
+}
+
void *
hash_seq_search(HASH_SEQ_STATUS *status)
{
@@ -1405,6 +1432,20 @@ hash_seq_search(HASH_SEQ_STATUS *status)
uint32 curBucket;
HASHELEMENT *curElem;
+ if (status->hasHashVal)
+ {
+ while ((curElem = status->curEntry) != NULL)
+ {
+ status->curEntry = curElem->link;
+ if (status->hashvalue != curElem->hashvalue)
+ continue;
+ return (void *) ELEMENTKEY(curElem);
+ }
+
+ hash_seq_term(status);
+ return NULL;
+ }
+
if ((curElem = status->curEntry) != NULL)
{
/* Continuing scan of curBucket... */
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index cc61937eef7..5feff544e66 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -42,6 +42,8 @@ volatile uint32 InterruptHoldoffCount = 0;
volatile uint32 QueryCancelHoldoffCount = 0;
volatile uint32 CritSectionCount = 0;
+ProcessInterrupts_hook_type ProcessInterrupts_hook = NULL;
+
int MyProcPid;
pg_time_t MyStartTime;
TimestampTz MyStartTimestamp;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index a997dcb7dbc..31cb54e1877 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -74,6 +74,7 @@
#include "storage/large_object.h"
#include "storage/pg_shmem.h"
#include "storage/predicate.h"
+#include "storage/rd.h"
#include "storage/standby.h"
#include "tcop/tcopprot.h"
#include "tsearch/ts_cache.h"
@@ -88,6 +89,7 @@
#include "utils/plancache.h"
#include "utils/ps_status.h"
#include "utils/xml.h"
+#include "access/tempcat.h"
/* This value is normally passed in from the Makefile */
#ifndef PG_KRB_SRVTAB
@@ -997,6 +999,36 @@ struct config_bool ConfigureNamesBool[] =
true,
NULL, NULL, NULL
},
+ {
+ {"enable_self_join_removal", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("Enable removal of unique self-joins."),
+ NULL,
+ GUC_EXPLAIN | GUC_NOT_IN_SAMPLE
+ },
+ &enable_self_join_removal,
+ true,
+ NULL, NULL, NULL
+ },
+ {
+ {"enable_temp_memory_catalog", PGC_USERSET, QUERY_TUNING_METHOD,
+ gettext_noop("Enable in-memory system catalog for temporary tables."),
+ NULL,
+ GUC_EXPLAIN | GUC_NOT_IN_SAMPLE
+ },
+ &enable_temp_memory_catalog,
+ false,
+ NULL, NULL, NULL
+ },
+ {
+ {"enable_temp_rd_buffers", PGC_USERSET, RESOURCES_MEM,
+ gettext_noop("Enable in-memory page buffers for temporary tables."),
+ NULL,
+ GUC_EXPLAIN | GUC_NOT_IN_SAMPLE
+ },
+ &enable_temp_rd_buffers,
+ false,
+ NULL, NULL, NULL
+ },
{
{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
gettext_noop("Enables genetic query optimization."),
@@ -3653,6 +3685,17 @@ struct config_int ConfigureNamesInt[] =
NULL, NULL, NULL
},
+ // {
+ // {"temp_rd_buffers", PGC_USERSET, RESOURCES_MEM,
+ // gettext_noop("Sets the default number of buffers for each temporary table."),
+ // NULL,
+ // GUC_UNIT_BLOCKS | GUC_EXPLAIN
+ // },
+ // &temp_rd_buffers,
+ // 4, 1, 128<<10,
+ // NULL, NULL, NULL
+ // },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 667e0dc40a2..910ca12d709 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -137,6 +137,7 @@
# (change requires restart)
# Caution: it is not advisable to set max_prepared_transactions nonzero unless
# you actively intend to use prepared transactions.
+#enable_temp_rd_buffers = off
#work_mem = 4MB # min 64kB
#hash_mem_multiplier = 2.0 # 1-1000.0 multiplier on hash table work_mem
#maintenance_work_mem = 64MB # min 64kB
diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c
index 947a868e569..81bbb65b661 100644
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@@ -545,7 +545,7 @@ tuplestore_select_read_pointer(Tuplestorestate *state, int ptr)
int64
tuplestore_tuple_count(Tuplestorestate *state)
{
- return state->tuples;
+ return (state) ? state->tuples : 0;
}
/*
diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c
index 555f0175f0e..25b5b32dc28 100644
--- a/src/bin/pg_basebackup/pg_receivewal.c
+++ b/src/bin/pg_basebackup/pg_receivewal.c
@@ -97,6 +97,7 @@ usage(void)
printf(_(" -d, --dbname=CONNSTR connection string\n"));
printf(_(" -h, --host=HOSTNAME database server host or socket directory\n"));
printf(_(" -p, --port=PORT database server port number\n"));
+ printf(_(" -u, --umask set files mode according to umask (might break security!)\n"));
printf(_(" -U, --username=NAME connect as specified database user\n"));
printf(_(" -w, --no-password never prompt for password\n"));
printf(_(" -W, --password force password prompt (should happen automatically)\n"));
@@ -631,6 +632,7 @@ main(int argc, char **argv)
{"endpos", required_argument, NULL, 'E'},
{"host", required_argument, NULL, 'h'},
{"port", required_argument, NULL, 'p'},
+ {"umask", no_argument, NULL, 'u'},
{"username", required_argument, NULL, 'U'},
{"no-loop", no_argument, NULL, 'n'},
{"no-password", no_argument, NULL, 'w'},
@@ -677,7 +679,7 @@ main(int argc, char **argv)
}
}
- while ((c = getopt_long(argc, argv, "d:D:E:h:np:s:S:U:vwWZ:",
+ while ((c = getopt_long(argc, argv, "d:D:E:h:np:s:S:U:vuwWZ:",
long_options, &option_index)) != -1)
{
switch (c)
@@ -712,6 +714,9 @@ main(int argc, char **argv)
case 'S':
replication_slot = pg_strdup(optarg);
break;
+ case 'u':
+ useumask = 1;
+ break;
case 'U':
dbuser = pg_strdup(optarg);
break;
diff --git a/src/bin/pg_basebackup/pg_recvlogical.c b/src/bin/pg_basebackup/pg_recvlogical.c
index 3db520ed38b..a939135b850 100644
--- a/src/bin/pg_basebackup/pg_recvlogical.c
+++ b/src/bin/pg_basebackup/pg_recvlogical.c
@@ -336,11 +336,14 @@ StreamLogicalLog(void)
{
struct stat statbuf;
+ mode_t mode = (useumask == 1) ?
+ (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) : (S_IRUSR | S_IWUSR);
+
if (strcmp(outfile, "-") == 0)
outfd = fileno(stdout);
else
outfd = open(outfile, O_CREAT | O_APPEND | O_WRONLY | PG_BINARY,
- S_IRUSR | S_IWUSR);
+ mode);
if (outfd == -1)
{
pg_log_error("could not open log file \"%s\": %m", outfile);
diff --git a/src/bin/pg_basebackup/streamutil.c b/src/bin/pg_basebackup/streamutil.c
index dc604b15380..890854db4b7 100644
--- a/src/bin/pg_basebackup/streamutil.c
+++ b/src/bin/pg_basebackup/streamutil.c
@@ -50,6 +50,7 @@ char *dbhost = NULL;
char *dbuser = NULL;
char *dbport = NULL;
char *dbname = NULL;
+int useumask = 0; /* 0=auto, -1=never, 1=always */
int dbgetpassword = 0; /* 0=auto, -1=never, 1=always */
static char *password = NULL;
PGconn *conn = NULL;
diff --git a/src/bin/pg_basebackup/streamutil.h b/src/bin/pg_basebackup/streamutil.h
index 9b38e8c0f38..04b5c25d25f 100644
--- a/src/bin/pg_basebackup/streamutil.h
+++ b/src/bin/pg_basebackup/streamutil.h
@@ -23,6 +23,7 @@ extern char *dbhost;
extern char *dbuser;
extern char *dbport;
extern char *dbname;
+extern int useumask;
extern int dbgetpassword;
extern int WalSegSz;
diff --git a/src/bin/pg_basebackup/walmethods.c b/src/bin/pg_basebackup/walmethods.c
index 935429ea1e8..81e722b91a0 100644
--- a/src/bin/pg_basebackup/walmethods.c
+++ b/src/bin/pg_basebackup/walmethods.c
@@ -130,6 +130,8 @@ dir_open_for_write(WalWriteMethod *wwmethod, const char *pathname,
size_t lz4bufsize = 0;
void *lz4buf = NULL;
#endif
+ mode_t mode = (useumask == 1) ?
+ (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) : (S_IRUSR | S_IWUSR);
clear_error(wwmethod);
@@ -144,7 +146,7 @@ dir_open_for_write(WalWriteMethod *wwmethod, const char *pathname,
* does not do any system calls to fsync() to make changes permanent on
* disk.
*/
- fd = open(tmppath, O_WRONLY | O_CREAT | PG_BINARY, pg_file_create_mode);
+ fd = open(tmppath, O_WRONLY | O_CREAT | PG_BINARY, pg_file_create_mode | mode);
if (fd < 0)
{
wwmethod->lasterrno = errno;
@@ -838,6 +840,8 @@ tar_open_for_write(WalWriteMethod *wwmethod, const char *pathname,
const char *temp_suffix, size_t pad_to_size)
{
TarMethodData *tar_data = (TarMethodData *) wwmethod;
+ mode_t mode = (useumask == 1) ?
+ (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) : (S_IRUSR | S_IWUSR);
char *tmppath;
clear_error(wwmethod);
@@ -849,7 +853,7 @@ tar_open_for_write(WalWriteMethod *wwmethod, const char *pathname,
*/
tar_data->fd = open(tar_data->tarfilename,
O_WRONLY | O_CREAT | PG_BINARY,
- pg_file_create_mode);
+ pg_file_create_mode | mode);
if (tar_data->fd < 0)
{
wwmethod->lasterrno = errno;
diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index 74bbea79663..7ac1afbfa85 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -88,7 +88,6 @@ static void flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables,
static void flagInhIndexes(Archive *fout, TableInfo *tblinfo, int numTables);
static void flagInhAttrs(Archive *fout, TableInfo *tblinfo, int numTables);
static int strInArray(const char *pattern, char **arr, int arr_size);
-static IndxInfo *findIndexByOid(Oid oid);
/*
@@ -866,7 +865,7 @@ findTableByOid(Oid oid)
* finds the DumpableObject for the index with the given oid
* returns NULL if not found
*/
-static IndxInfo *
+IndxInfo *
findIndexByOid(Oid oid)
{
CatalogId catId;
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 13139c9f078..669e7d66b8b 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -1912,11 +1912,22 @@ selectDumpableType(TypeInfo *tyinfo, Archive *fout)
if (OidIsValid(tyinfo->typrelid) &&
tyinfo->typrelkind != RELKIND_COMPOSITE_TYPE)
{
- TableInfo *tytable = findTableByOid(tyinfo->typrelid);
+ DumpableObject *parentRel;
tyinfo->dobj.objType = DO_DUMMY_TYPE;
- if (tytable != NULL)
- tyinfo->dobj.dump = tytable->dobj.dump;
+
+ /* Get associated relation */
+ if (tyinfo->typrelkind == RELKIND_INDEX)
+ parentRel = (DumpableObject *) findIndexByOid(tyinfo->typrelid);
+ else
+ parentRel = (DumpableObject *) findTableByOid(tyinfo->typrelid);
+
+ /*
+ * If associated relation found, dump based on if the
+ * contents of the associated relation are being dumped.
+ */
+ if (parentRel != NULL)
+ tyinfo->dobj.dump = parentRel->dump;
else
tyinfo->dobj.dump = DUMP_COMPONENT_NONE;
return;
@@ -5385,6 +5396,9 @@ binary_upgrade_set_type_oids_by_type_oid(Archive *fout,
Oid pg_type_multirange_oid;
Oid pg_type_multirange_array_oid;
+ if (pg_type_oid == InvalidOid)
+ return;
+
appendPQExpBufferStr(upgrade_buffer, "\n-- For binary upgrade, must preserve pg_type oid\n");
appendPQExpBuffer(upgrade_buffer,
"SELECT pg_catalog.binary_upgrade_set_next_pg_type_oid('%u'::pg_catalog.oid);\n\n",
@@ -5469,6 +5483,17 @@ binary_upgrade_set_type_oids_by_rel(Archive *fout,
pg_type_oid, false, false);
}
+static void
+binary_upgrade_set_type_oids_by_rel_oid(Archive *fout,
+ PQExpBuffer upgrade_buffer,
+ Oid pg_type_oid
+ )
+{
+ if (OidIsValid(pg_type_oid))
+ binary_upgrade_set_type_oids_by_type_oid(fout, upgrade_buffer,
+ pg_type_oid, false, false);
+}
+
static void
binary_upgrade_set_pg_class_oids(Archive *fout,
PQExpBuffer upgrade_buffer, Oid pg_class_oid,
@@ -7447,6 +7472,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
i_indnkeyatts,
i_indnatts,
i_indkey,
+ i_indtype,
i_indisclustered,
i_indisreplident,
i_indnullsnotdistinct,
@@ -7495,7 +7521,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
"SELECT t.tableoid, t.oid, i.indrelid, "
"t.relname AS indexname, "
"pg_catalog.pg_get_indexdef(i.indexrelid) AS indexdef, "
- "i.indkey, i.indisclustered, "
+ "i.indkey, t.reltype AS indtype, i.indisclustered, "
"c.contype, c.conname, "
"c.condeferrable, c.condeferred, "
"c.tableoid AS contableoid, "
@@ -7599,6 +7625,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
i_indnkeyatts = PQfnumber(res, "indnkeyatts");
i_indnatts = PQfnumber(res, "indnatts");
i_indkey = PQfnumber(res, "indkey");
+ i_indtype = PQfnumber(res, "indtype");
i_indisclustered = PQfnumber(res, "indisclustered");
i_indisreplident = PQfnumber(res, "indisreplident");
i_indnullsnotdistinct = PQfnumber(res, "indnullsnotdistinct");
@@ -7676,6 +7703,7 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
indxinfo[j].indkeys = (Oid *) pg_malloc(indxinfo[j].indnattrs * sizeof(Oid));
parseOidArray(PQgetvalue(res, j, i_indkey),
indxinfo[j].indkeys, indxinfo[j].indnattrs);
+ indxinfo[j].indtype = atooid(PQgetvalue(res, j, i_indtype));
indxinfo[j].indisclustered = (PQgetvalue(res, j, i_indisclustered)[0] == 't');
indxinfo[j].indisreplident = (PQgetvalue(res, j, i_indisreplident)[0] == 't');
indxinfo[j].indnullsnotdistinct = (PQgetvalue(res, j, i_indnullsnotdistinct)[0] == 't');
@@ -17000,8 +17028,13 @@ dumpIndex(Archive *fout, const IndxInfo *indxinfo)
int nstatvals = 0;
if (dopt->binary_upgrade)
+ {
binary_upgrade_set_pg_class_oids(fout, q,
indxinfo->dobj.catId.oid, true);
+ if (indxinfo->indnkeyattrs > 1)
+ binary_upgrade_set_type_oids_by_rel_oid(fout, q,
+ indxinfo->indtype);
+ }
/* Plain secondary index */
appendPQExpBuffer(q, "%s;\n", indxinfo->indexdef);
@@ -17268,8 +17301,14 @@ dumpConstraint(Archive *fout, const ConstraintInfo *coninfo)
coninfo->dobj.name);
if (dopt->binary_upgrade)
+ {
+ if (indxinfo->indnkeyattrs > 1)
+ binary_upgrade_set_type_oids_by_rel_oid(fout, q,
+ indxinfo->indtype);
+
binary_upgrade_set_pg_class_oids(fout, q,
indxinfo->dobj.catId.oid, true);
+ }
appendPQExpBuffer(q, "ALTER %sTABLE ONLY %s\n", foreign,
fmtQualifiedDumpable(tbinfo));
@@ -18736,6 +18775,28 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
*/
switch (dobj->objType)
{
+ case DO_DUMMY_TYPE:
+ {
+ /*
+ * In Vanilla, dummy types were only created for tables.
+ * In Postgres Pro for improving join selectivity estimation
+ * we also create two types for each composite index:
+ * 1) a type for attributes of the index
+ * 2) a type which is an array containing elements of type (1)
+ * These types depend on indexes, so adding preDataBound -> type
+ * dependency would create a loop; don't do that.
+ */
+ TypeInfo *tyinfo = (TypeInfo *) dobj;
+ if (tyinfo->isArray)
+ /* If it's an array, take its element type */
+ tyinfo = findTypeByOid(tyinfo->typelem);
+
+ if (OidIsValid(tyinfo->typrelid) &&
+ (tyinfo->typrelkind == RELKIND_INDEX ||
+ tyinfo->typrelkind == RELKIND_PARTITIONED_INDEX))
+ break;
+ }
+ /* FALLTHROUGH */
case DO_NAMESPACE:
case DO_EXTENSION:
case DO_TYPE:
@@ -18753,7 +18814,6 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
case DO_ATTRDEF:
case DO_PROCLANG:
case DO_CAST:
- case DO_DUMMY_TYPE:
case DO_TSPARSER:
case DO_TSDICT:
case DO_TSTEMPLATE:
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 2de5afdacdb..46ad02d205b 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -414,6 +414,7 @@ typedef struct _indxInfo
int indnattrs; /* total number of index attributes */
Oid *indkeys; /* In spite of the name 'indkeys' this field
* contains both key and nonkey attributes */
+ Oid indtype; /* OID of index's composite type, if any */
bool indisclustered;
bool indisreplident;
bool indnullsnotdistinct;
@@ -732,6 +733,7 @@ extern AccessMethodInfo *findAccessMethodByOid(Oid oid);
extern CollInfo *findCollationByOid(Oid oid);
extern NamespaceInfo *findNamespaceByOid(Oid oid);
extern ExtensionInfo *findExtensionByOid(Oid oid);
+extern IndxInfo *findIndexByOid(Oid oid);
extern PublicationInfo *findPublicationByOid(Oid oid);
extern SubscriptionInfo *findSubscriptionByOid(Oid oid);
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 65999dd64e1..005cf5ac414 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -105,6 +105,8 @@ typedef struct HeapScanDescData
int rs_cindex; /* current tuple's index in vistuples */
int rs_ntuples; /* number of visible tuples on page */
OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */
+ struct TempCatScanData *tempscan;
+ HeapTuple temptup;
} HeapScanDescData;
typedef struct HeapScanDescData *HeapScanDesc;
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 521043304ab..be34e1089a4 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -176,6 +176,7 @@ typedef struct ParallelIndexScanDescData
} ParallelIndexScanDescData;
struct TupleTableSlot;
+struct TempCatScanData;
/* Struct for storage-or-index scans of system tables */
typedef struct SysScanDescData
@@ -186,6 +187,8 @@ typedef struct SysScanDescData
struct IndexScanDescData *iscan; /* only valid in index-scan case */
struct SnapshotData *snapshot; /* snapshot to unregister at end of scan */
struct TupleTableSlot *slot;
+ struct TempCatScanData *tempscan;
+
} SysScanDescData;
#endif /* RELSCAN_H */
diff --git a/src/include/access/tempcat.h b/src/include/access/tempcat.h
new file mode 100644
index 00000000000..d1216bb439f
--- /dev/null
+++ b/src/include/access/tempcat.h
@@ -0,0 +1,28 @@
+#ifndef TEMPCAT_H
+#define TEMPCAT_H
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/relscan.h"
+#include "access/skey.h"
+#include "executor/tuptable.h"
+#include "utils/rel.h"
+
+extern bool enable_temp_memory_catalog;
+
+typedef struct TempCatScanData TempCatScanData;
+
+extern void temp_catalog_init(void);
+extern void temp_catalog_insert(Relation relation, HeapTuple htup);
+extern void temp_catalog_delete(Relation relation, ItemPointer ptr);
+extern void temp_catalog_update(Relation relation, ItemPointer ptr, HeapTuple htup);
+extern void temp_catalog_update_inplace(Relation relation, HeapTuple htup);
+extern TempCatScanData* temp_catalog_beginscan(Relation rel, int nkeys, ScanKey key);
+extern void temp_catalog_endscan(TempCatScanData* scan);
+extern HeapTuple temp_catalog_getnext(TempCatScanData* scan, BufferHeapTupleTableSlot* bslot);
+extern bool temp_catalog_is_fetched(TempCatScanData* scan);
+
+extern ItemPointerData temp_catalog_tupmap_assign (ItemPointer ptr, void* data);
+extern bool temp_catalog_tupmap_unassign(ItemPointer ptr, void* data);
+extern void* temp_catalog_tupmap_get (ItemPointer ptr);
+#endif
\ No newline at end of file
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index 6d4439f0524..6ec7def8264 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -133,6 +133,8 @@ typedef enum
XACT_EVENT_PRE_COMMIT,
XACT_EVENT_PARALLEL_PRE_COMMIT,
XACT_EVENT_PRE_PREPARE,
+ XACT_EVENT_PRE_ABORT,
+ XACT_EVENT_PARALLEL_PRE_ABORT,
} XactEvent;
typedef void (*XactCallback) (XactEvent event, void *arg);
@@ -143,6 +145,7 @@ typedef enum
SUBXACT_EVENT_COMMIT_SUB,
SUBXACT_EVENT_ABORT_SUB,
SUBXACT_EVENT_PRE_COMMIT_SUB,
+ SUBXACT_EVENT_PRE_ABORT_SUB,
} SubXactEvent;
typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid,
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index 7d434f8e653..d24a559a18b 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -65,6 +65,7 @@ extern void index_check_primary_key(Relation heapRel,
#define INDEX_CREATE_IF_NOT_EXISTS (1 << 4)
#define INDEX_CREATE_PARTITIONED (1 << 5)
#define INDEX_CREATE_INVALID (1 << 6)
+#define INDEX_CREATE_WITHOUT_TYPE (1 << 7)
extern Oid index_create(Relation heapRelation,
const char *indexRelationName,
diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat
index ceff66ccde1..49c4a0457ce 100644
--- a/src/include/catalog/pg_type.dat
+++ b/src/include/catalog/pg_type.dat
@@ -695,4 +695,12 @@
typreceive => 'brin_minmax_multi_summary_recv',
typsend => 'brin_minmax_multi_summary_send', typalign => 'i',
typstorage => 'x', typcollation => 'default' },
+{ oid => '14756', descr => 'pseudo-type representing removed abstime',
+ typname => 'abstime', typlen => '-1', typbyval => 'f', typtype => 'p',
+ typcategory => 'P', typinput => 'timestamp_in', typoutput => 'timestamp_out',
+ typreceive => 'timestamp_recv', typsend => 'timestamp_send', typalign => 'c' },
+{ oid => '14757', descr => 'pseudo-type representing removed reltime',
+ typname => 'reltime', typlen => '-1', typbyval => 'f', typtype => 'p',
+ typcategory => 'P', typinput => 'timestamp_in', typoutput => 'timestamp_out',
+ typreceive => 'timestamp_recv', typsend => 'timestamp_send', typalign => 'c' },
]
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index 9b8b351d9a2..36f0e8ea8bb 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -69,6 +69,7 @@ typedef struct ExplainState
bool hide_workers; /* set if we find an invisible Gather */
/* state related to the current plan node */
ExplainWorkersState *workers_state; /* needed if parallel plan */
+ uint64 es_processed; /* sum of queryDesc->estate->es_processed */
} ExplainState;
/* Hook for plugins to get control in ExplainOneQuery() */
@@ -87,7 +88,7 @@ extern PGDLLIMPORT explain_get_index_name_hook_type explain_get_index_name_hook;
extern void ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
- ParamListInfo params, DestReceiver *dest);
+ ParamListInfo params, DestReceiver *dest, uint64 *processed);
extern void standard_ExplainOneQuery(Query *query, int cursorOptions,
IntoClause *into, ExplainState *es,
const char *queryString, ParamListInfo params,
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 759f9a87d38..d5ad97faa85 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -170,6 +170,9 @@ typedef struct VacAttrStats
*/
int tupattnum; /* attribute number within tuples */
HeapTuple *rows; /* access info for std fetch function */
+ int rowsAttrPitch; /* access info for rows data */
+ Datum* rowsAttrValues;
+ bool* rowsAttrNulls;
TupleDesc tupDesc;
Datum *exprvals; /* access info for index fetch function */
bool *exprnulls;
diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h
index ef1fa37716d..8a6c07b782c 100644
--- a/src/include/executor/execExpr.h
+++ b/src/include/executor/execExpr.h
@@ -363,12 +363,17 @@ typedef struct ExprEvalStep
struct
{
bool *anynull; /* track if any input was NULL */
+ bool *guaranteed_empty;
+ bool is_last;
+ int *count_guaranteed_empty;
+ int nargs;
int jumpdone; /* jump here if result determined */
} boolexpr;
/* for EEOP_QUAL */
struct
{
+ bool *guaranteed_empty;
int jumpdone; /* jump here on false or null */
} qualexpr;
@@ -636,6 +641,7 @@ typedef struct ExprEvalStep
{
/* out-of-line state, created by nodeSubplan.c */
SubPlanState *sstate;
+ bool *guaranteed_empty;
} subplan;
/* for EEOP_AGG_*DESERIALIZE */
diff --git a/src/include/lib/rbtree.h b/src/include/lib/rbtree.h
index 16de4c012c8..87fa1c8533d 100644
--- a/src/include/lib/rbtree.h
+++ b/src/include/lib/rbtree.h
@@ -58,12 +58,14 @@ typedef int (*rbt_comparator) (const RBTNode *a, const RBTNode *b, void *arg);
typedef void (*rbt_combiner) (RBTNode *existing, const RBTNode *newdata, void *arg);
typedef RBTNode *(*rbt_allocfunc) (void *arg);
typedef void (*rbt_freefunc) (RBTNode *x, void *arg);
+typedef void (*rbt_fixfunc) (RBTNode *x, void *arg);
extern RBTree *rbt_create(Size node_size,
rbt_comparator comparator,
rbt_combiner combiner,
rbt_allocfunc allocfunc,
rbt_freefunc freefunc,
+ rbt_fixfunc fixfunc,
void *arg);
extern RBTNode *rbt_find(RBTree *rbt, const RBTNode *data);
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 0d563465935..9bd4f140410 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -105,8 +105,13 @@ extern PGDLLIMPORT volatile uint32 InterruptHoldoffCount;
extern PGDLLIMPORT volatile uint32 QueryCancelHoldoffCount;
extern PGDLLIMPORT volatile uint32 CritSectionCount;
+/* to allow extensions to handle custom interrupts */
+typedef void (*ProcessInterrupts_hook_type) (void);
+extern PGDLLIMPORT ProcessInterrupts_hook_type ProcessInterrupts_hook;
+
/* in tcop/postgres.c */
extern void ProcessInterrupts(void);
+extern void standard_ProcessInterrupts(void);
/* Test whether an interrupt is pending */
#ifndef WIN32
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 0696ec05b16..d1311ebea1c 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -138,6 +138,8 @@ typedef struct ExprState
* ExecInitExprRec().
*/
ErrorSaveContext *escontext;
+
+ bool guaranteed_empty;
} ExprState;
@@ -991,6 +993,7 @@ typedef struct SubPlanState
FmgrInfo *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */
FmgrInfo *cur_eq_funcs; /* equality functions for LHS vs. table */
ExprState *cur_eq_comp; /* equality comparator for LHS vs. table */
+ bool guaranteed_empty;
} SubPlanState;
/*
@@ -1169,6 +1172,8 @@ typedef struct PlanState
*/
TupleDesc scandesc;
+ bool guaranteed_empty;
+
/*
* Define the slot types for inner, outer and scanslots for expression
* contexts with this state as a parent. If *opsset is set, then
diff --git a/src/include/nodes/nodeFuncs.h b/src/include/nodes/nodeFuncs.h
index eaba59bed83..c8c308cf6dd 100644
--- a/src/include/nodes/nodeFuncs.h
+++ b/src/include/nodes/nodeFuncs.h
@@ -30,6 +30,7 @@ struct PlanState; /* avoid including execnodes.h too */
* contents */
#define QTW_DONT_COPY_QUERY 0x40 /* do not copy top Query */
#define QTW_EXAMINE_SORTGROUP 0x80 /* include SortGroupClause lists */
+#define QTW_DONT_COPY_DEFAULT 0x00 /* only custom mutator will copy */
/* callback function for check_functions_in_node */
typedef bool (*check_function_callback) (Oid func_id, void *context);
@@ -153,7 +154,10 @@ extern bool check_functions_in_node(Node *node, check_function_callback checker,
#define expression_tree_walker(n, w, c) \
expression_tree_walker_impl(n, (tree_walker_callback) (w), c)
#define expression_tree_mutator(n, m, c) \
- expression_tree_mutator_impl(n, (tree_mutator_callback) (m), c)
+ expression_tree_mutator_impl(n, (tree_mutator_callback) (m), c, 0)
+#define expression_tree_mutator_ext(n, m, c, f) \
+ expression_tree_mutator_impl(n, (tree_mutator_callback) (m), c, f)
+
#define query_tree_walker(q, w, c, f) \
query_tree_walker_impl(q, (tree_walker_callback) (w), c, f)
@@ -184,7 +188,7 @@ extern bool expression_tree_walker_impl(Node *node,
void *context);
extern Node *expression_tree_mutator_impl(Node *node,
tree_mutator_callback mutator,
- void *context);
+ void *context, int flags);
extern bool query_tree_walker_impl(Query *query,
tree_walker_callback walker,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 576f1c7e9b9..3a44cde1474 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -17,10 +17,12 @@
#define PATHNODES_H
#include "access/sdir.h"
+#include "catalog/pg_statistic.h"
#include "lib/stringinfo.h"
#include "nodes/params.h"
#include "nodes/parsenodes.h"
#include "storage/block.h"
+#include "utils/lsyscache.h"
/*
@@ -728,7 +730,7 @@ typedef struct PartitionSchemeData *PartitionScheme;
* populate these fields, for base rels; but someday they might be used for
* join rels too:
*
- * unique_for_rels - list of Relid sets, each one being a set of other
+ * unique_for_rels - list of UniqueRelInfo, each one being a set of other
* rels for which this one has been proven unique
* non_unique_for_rels - list of Relid sets, each one being a set of
* other rels for which we have tried and failed to prove
@@ -869,6 +871,7 @@ typedef struct RelOptInfo
*/
/* estimated number of result tuples */
Cardinality rows;
+ Cardinality rowsUnclamped;
/*
* per-relation planner control flags
@@ -967,7 +970,7 @@ typedef struct RelOptInfo
/*
* cache space for remembering if we have proven this relation unique
*/
- /* known unique for these other relid set(s) */
+ /* known unique for these other relid set(s) given in UniqueRelInfo(s) */
List *unique_for_rels;
/* known not unique for these set(s) */
List *non_unique_for_rels;
@@ -1203,6 +1206,10 @@ struct IndexOptInfo
/* AM's cost estimator */
/* Rather than include amapi.h here, we declare amcostestimate like this */
void (*amcostestimate) (struct PlannerInfo *, struct IndexPath *, double, Cost *, Cost *, Selectivity *, double *, double *) pg_node_attr(read_write_ignore);
+
+ /* cache for per-tuple index statistic. That stats could be large and it
+ * will be expensive to uncompress it every time */
+ AttStatsSlot *sslots pg_node_attr(equal_ignore, query_jumble_ignore, read_write_ignore, read_as(0));
};
/*
@@ -1473,6 +1480,16 @@ typedef struct PathKey
bool pk_nulls_first; /* do NULLs come before normal values? */
} PathKey;
+/*
+ * Combines information about pathkeys and the associated clauses.
+ */
+typedef struct PathKeyInfo
+{
+ NodeTag type;
+ List *pathkeys;
+ List *clauses;
+} PathKeyInfo;
+
/*
* Contains an order of group-by clauses and the corresponding list of
* pathkeys.
@@ -1935,6 +1952,11 @@ typedef struct AppendPath
/* Index of first partial path in subpaths; list_length(subpaths) if none */
int first_partial_path;
Cardinality limit_tuples; /* hard limit on output tuples, or -1 */
+ bool pull_tlist; /* if = true, create_append_plan()
+ * should get targetlist from any
+ * subpath - they are the same,
+ * because the only place - append
+ * index scan for range OR */
} AppendPath;
#define IS_DUMMY_APPEND(p) \
@@ -3434,4 +3456,29 @@ typedef struct AggTransInfo
bool initValueIsNull;
} AggTransInfo;
+/*
+ * UniqueRelInfo caches a fact that a relation is unique when being joined
+ * to other relation(s).
+ */
+typedef struct UniqueRelInfo
+{
+ pg_node_attr(no_copy_equal, no_read, no_query_jumble)
+
+ NodeTag type;
+
+ /*
+ * The relation in consideration is unique when being joined with this set
+ * of other relation(s).
+ */
+ Relids outerrelids;
+
+ /*
+ * Additional clauses from a baserestrictinfo list that were used to prove
+ * the uniqueness. We cache it for the self-join checking procedure: a
+ * self-join can be removed if the outer relation contains strictly the
+ * same set of clauses.
+ */
+ List *extra_clauses;
+} UniqueRelInfo;
+
#endif /* PATHNODES_H */
diff --git a/src/include/nodes/supportnodes.h b/src/include/nodes/supportnodes.h
index 5f7bcde891c..b81b76db7c7 100644
--- a/src/include/nodes/supportnodes.h
+++ b/src/include/nodes/supportnodes.h
@@ -35,6 +35,22 @@
#include "nodes/plannodes.h"
+typedef enum
+{
+ Pattern_Type_Like,
+ Pattern_Type_Like_IC,
+ Pattern_Type_Regex,
+ Pattern_Type_Regex_IC,
+ Pattern_Type_Prefix
+} Pattern_Type;
+
+typedef enum
+{
+ Pattern_Prefix_None,
+ Pattern_Prefix_Partial,
+ Pattern_Prefix_Exact
+} Pattern_Prefix_Status;
+
struct PlannerInfo; /* avoid including pathnodes.h here */
struct IndexOptInfo;
struct SpecialJoinInfo;
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index b1c51a4e70f..48c284ac03d 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -116,7 +116,15 @@ extern void cost_incremental_sort(Path *path,
Cost input_startup_cost, Cost input_total_cost,
double input_tuples, int width, Cost comparison_cost, int sort_mem,
double limit_tuples);
-extern void cost_append(AppendPath *apath);
+extern void cost_append_ext(AppendPath *path, PlannerInfo *root);
+
+static inline void cost_append(AppendPath *apath)
+{
+ cost_append_ext(apath, NULL);
+}
+
+extern Cost cost_sort_estimate(PlannerInfo *root, List *pathkeys,
+ int nPresortedKeys, double tuples);
extern void cost_merge_append(Path *path, PlannerInfo *root,
List *pathkeys, int n_streams,
Cost input_startup_cost, Cost input_total_cost,
@@ -176,6 +184,7 @@ extern void cost_gather_merge(GatherMergePath *path, PlannerInfo *root,
extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan);
extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root);
extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root);
+extern void cost_qual_eval_node_index(QualCost *cost, Node *qual, PlannerInfo *root);
extern void compute_semi_anti_join_factors(PlannerInfo *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 112e7c23d4e..69acf7713aa 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -67,11 +67,24 @@ extern TidRangePath *create_tidrangescan_path(PlannerInfo *root,
RelOptInfo *rel,
List *tidrangequals,
Relids required_outer);
-extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel,
+
+extern AppendPath *create_append_path_ext(PlannerInfo *root, RelOptInfo *rel,
+ List *subpaths, List *partial_subpaths,
+ List *pathkeys, Relids required_outer,
+ int parallel_workers, bool parallel_aware,
+ double rows, bool pull_tlist);
+
+static inline AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel,
List *subpaths, List *partial_subpaths,
List *pathkeys, Relids required_outer,
int parallel_workers, bool parallel_aware,
- double rows);
+ double rows)
+{
+ return create_append_path_ext(root, rel, subpaths, partial_subpaths, pathkeys,
+ required_outer, parallel_workers, parallel_aware,
+ rows, false);
+}
+
extern MergeAppendPath *create_merge_append_path(PlannerInfo *root,
RelOptInfo *rel,
List *subpaths,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 5e88c0224a4..aaaac7bf4b2 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -69,9 +69,14 @@ extern void generate_partitionwise_join_paths(PlannerInfo *root,
* routines to generate index paths
*/
extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel);
+extern List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
+ List *clauses, List *other_clauses);
extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel,
List *restrictlist,
List *exprlist, List *oprlist);
+extern bool relation_has_unique_index_ext(PlannerInfo *root, RelOptInfo *rel,
+ List *restrictlist, List *exprlist,
+ List *oprlist, List **extra_clauses);
extern bool indexcol_is_bool_constant_for_query(PlannerInfo *root,
IndexOptInfo *index,
int indexcol);
@@ -208,6 +213,10 @@ typedef enum
extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
extern bool pathkeys_contained_in(List *keys1, List *keys2);
extern bool pathkeys_count_contained_in(List *keys1, List *keys2, int *n_common);
+extern int group_keys_reorder_by_pathkeys(List *pathkeys,
+ List **group_pathkeys,
+ List **group_clauses,
+ int num_groupby_pathkeys);
extern List *get_useful_group_keys_orderings(PlannerInfo *root, Path *path);
extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
Relids required_outer,
@@ -254,6 +263,7 @@ extern List *select_outer_pathkeys_for_merge(PlannerInfo *root,
extern List *make_inner_pathkeys_for_merge(PlannerInfo *root,
List *mergeclauses,
List *outer_pathkeys);
+extern int pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys);
extern List *trim_mergeclauses_for_inner_pathkeys(PlannerInfo *root,
List *mergeclauses,
List *pathkeys);
@@ -262,6 +272,7 @@ extern List *truncate_useless_pathkeys(PlannerInfo *root,
List *pathkeys);
extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);
extern List *append_pathkeys(List *target, List *source);
+extern void keybased_rewrite_index_paths(PlannerInfo *root, RelOptInfo *rel);
extern PathKey *make_canonical_pathkey(PlannerInfo *root,
EquivalenceClass *eclass, Oid opfamily,
int strategy, bool nulls_first);
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index aafc1737921..29e143e767d 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -20,6 +20,7 @@
/* GUC parameters */
#define DEFAULT_CURSOR_TUPLE_FRACTION 0.1
extern PGDLLIMPORT double cursor_tuple_fraction;
+extern PGDLLIMPORT bool enable_self_join_removal;
/* query_planner callback to compute query_pathkeys */
typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra);
@@ -49,6 +50,9 @@ extern Plan *materialize_finished_plan(Plan *subplan);
extern bool is_projection_capable_path(Path *path);
extern bool is_projection_capable_plan(Plan *plan);
+extern Node * fix_indexqual_operand(Node *node, IndexOptInfo *index, int
+ indexcol);
+
/* External use of these functions is deprecated: */
extern Sort *make_sort_from_sortclauses(List *sortcls, Plan *lefttree);
extern Agg *make_agg(List *tlist, List *qual,
@@ -108,6 +112,11 @@ extern bool query_is_distinct_for(Query *query, List *colnos, List *opids);
extern bool innerrel_is_unique(PlannerInfo *root,
Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel,
JoinType jointype, List *restrictlist, bool force_cache);
+extern bool innerrel_is_unique_ext(PlannerInfo *root, Relids joinrelids,
+ Relids outerrelids, RelOptInfo *innerrel,
+ JoinType jointype, List *restrictlist,
+ bool force_cache, List **uclauses);
+extern List *remove_useless_self_joins(PlannerInfo *root, List *jointree);
/*
* prototypes for plan/setrefs.c
diff --git a/src/include/port.h b/src/include/port.h
index 85cf26c7521..344f29a0232 100644
--- a/src/include/port.h
+++ b/src/include/port.h
@@ -107,6 +107,53 @@ extern void pgfnames_cleanup(char **filenames);
#define is_absolute_path(filename) is_windows_absolute_path(filename)
#endif
+
+/*
+ * Socket error codes handling constants.
+ * Any socket related routines must use SOCK_ERRNO instead of errno!
+ *
+ * In Windows socket errors are checked using WSAGetLastError instead of errno -
+ * errno might return 0 while WSAGetLastError returns actual socket error codes.
+ * WSAGetLastError uses own error codes different from errno.
+ */
+#ifdef WIN32
+#define SOCK_EWOULDBLOCK WSAEWOULDBLOCK
+#define SOCK_EINTR WSAEINTR
+#define SOCK_EINVAL WSAEINVAL
+#define SOCK_EIO WSAEFAULT /* used only for setting error code */
+#define SOCK_EINPROGRESS WSAEINPROGRESS
+#define SOCK_ECONNRESET WSAECONNRESET
+#define SOCK_ECONNABORTED WSAECONNABORTED
+#define SOCK_EHOSTDOWN WSAEHOSTDOWN
+#define SOCK_EHOSTUNREACH WSAEHOSTUNREACH
+#define SOCK_ENETDOWN WSAENETDOWN
+#define SOCK_ENETRESET WSAENETRESET
+#define SOCK_ENETUNREACH WSAENETUNREACH
+#define SOCK_ETIMEDOUT WSAETIMEDOUT
+#else
+#ifdef EAGAIN
+#define SOCK_EAGAIN EAGAIN
+#endif
+#ifdef EWOULDBLOCK
+#define SOCK_EWOULDBLOCK EWOULDBLOCK
+#endif
+#define SOCK_EINTR EINTR
+#define SOCK_EINVAL EINVAL
+#define SOCK_EIO EIO
+#define SOCK_EINPROGRESS EINPROGRESS
+#define SOCK_ECONNRESET ECONNRESET
+#ifdef EPIPE
+#define SOCK_EPIPE EPIPE
+#endif
+#define SOCK_ECONNABORTED ECONNABORTED
+#define SOCK_EHOSTDOWN EHOSTDOWN
+#define SOCK_EHOSTUNREACH EHOSTUNREACH
+#define SOCK_ENETDOWN ENETDOWN
+#define SOCK_ENETRESET ENETRESET
+#define SOCK_ENETUNREACH ENETUNREACH
+#define SOCK_ETIMEDOUT ETIMEDOUT
+#endif
+
/*
* This macro provides a centralized list of all errnos that identify
* hard failure of a previously-established network connection.
@@ -119,16 +166,22 @@ extern void pgfnames_cleanup(char **filenames);
* are actually reporting errors typically single out EPIPE and ECONNRESET,
* while allowing the network failures to be reported generically.
*/
-#define ALL_CONNECTION_FAILURE_ERRNOS \
- EPIPE: \
- case ECONNRESET: \
- case ECONNABORTED: \
- case EHOSTDOWN: \
- case EHOSTUNREACH: \
- case ENETDOWN: \
- case ENETRESET: \
- case ENETUNREACH: \
- case ETIMEDOUT
+
+#define ALL_CONNECTION_FAILURE_ERRNOS_COMMON \
+ SOCK_ECONNRESET: \
+ case SOCK_ECONNABORTED: \
+ case SOCK_EHOSTDOWN: \
+ case SOCK_EHOSTUNREACH: \
+ case SOCK_ENETDOWN: \
+ case SOCK_ENETRESET: \
+ case SOCK_ENETUNREACH: \
+ case SOCK_ETIMEDOUT
+
+#ifdef SOCK_EPIPE
+#define ALL_CONNECTION_FAILURE_ERRNOS SOCK_EPIPE: case ALL_CONNECTION_FAILURE_ERRNOS_COMMON
+#else
+#define ALL_CONNECTION_FAILURE_ERRNOS ALL_CONNECTION_FAILURE_ERRNOS_COMMON
+#endif
/* Portable locale initialization (in exec.c) */
extern void set_pglocale_pgservice(const char *argv0, const char *app);
diff --git a/src/include/storage/rd.h b/src/include/storage/rd.h
new file mode 100644
index 00000000000..69baaa0cfb5
--- /dev/null
+++ b/src/include/storage/rd.h
@@ -0,0 +1,33 @@
+#ifndef RD_H
+#define RD_H
+
+#include "postgres.h"
+
+#include "storage/block.h"
+#include "storage/relfilelocator.h"
+#include "storage/smgr.h"
+#include "storage/sync.h"
+
+extern void rd_init(void);
+extern void rd_shutdown(void);
+extern void rd_open(SMgrRelation reln);
+extern void rd_close(SMgrRelation reln, ForkNumber forknum);
+extern void rd_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
+extern bool rd_exists(SMgrRelation reln, ForkNumber forknum);
+extern void rd_unlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo);
+extern void rd_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync);
+extern void rd_zeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync);
+extern bool rd_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks);
+extern void rd_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks);
+extern void rd_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync);
+extern void rd_writeback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks);
+extern BlockNumber rd_nblocks(SMgrRelation reln, ForkNumber forknum);
+extern void rd_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber old_blocks, BlockNumber nblocks);
+extern void rd_immedsync(SMgrRelation reln, ForkNumber forknum);
+extern void rd_registersync(SMgrRelation reln, ForkNumber forknum);
+extern void rd_reset(SMgrRelation reln);
+
+extern int temp_rd_buffers;
+extern bool enable_temp_rd_buffers;
+
+#endif /* RD_H */
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index 29ac6cdcd92..7884b6b94a6 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -393,6 +393,17 @@ do \
#endif /* __sparc__ */
+/* Elbrus */
+#ifdef __e2k__
+#define HAS_TEST_AND_SET
+typedef int slock_t;
+/* There is no need to check for sync_lock availability. */
+#define TAS(lock) __sync_lock_test_and_set(lock, 1)
+#define S_UNLOCK(lock) __sync_lock_release(lock)
+#define SPIN_DELAY() do { __asm__ __volatile__ ("nop" : : ); } while(0)
+#endif
+
+
/* PowerPC */
#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
#define HAS_TEST_AND_SET
diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h
index 8f5744b21bc..23c996194c8 100644
--- a/src/include/storage/sinval.h
+++ b/src/include/storage/sinval.h
@@ -110,15 +110,20 @@ typedef struct
Oid relId; /* relation ID */
} SharedInvalSnapshotMsg;
-typedef union
+typedef struct
{
- int8 id; /* type field --- must be first */
- SharedInvalCatcacheMsg cc;
- SharedInvalCatalogMsg cat;
- SharedInvalRelcacheMsg rc;
- SharedInvalSmgrMsg sm;
- SharedInvalRelmapMsg rm;
- SharedInvalSnapshotMsg sn;
+ union
+ {
+ int8 id; /* type field --- must be first */
+ SharedInvalCatcacheMsg cc;
+ SharedInvalCatalogMsg cat;
+ SharedInvalRelcacheMsg rc;
+ SharedInvalSmgrMsg sm;
+ SharedInvalRelmapMsg rm;
+ SharedInvalSnapshotMsg sn;
+ };
+
+ bool isLocal;
} SharedInvalidationMessage;
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 3856d1d4f8b..a79c6d59c6e 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -59,6 +59,7 @@ typedef struct SMgrRelationData
*/
int md_num_open_segs[MAX_FORKNUM + 1];
struct _MdfdVec *md_seg_fds[MAX_FORKNUM + 1];
+ struct _RdBuffer *rd_bufs[MAX_FORKNUM + 1];
/*
* Pinning support. If unpinned (ie. pincount == 0), 'node' is a list
diff --git a/src/include/tcop/cmdtag.h b/src/include/tcop/cmdtag.h
index 23c99d7eca6..c8e6742aa90 100644
--- a/src/include/tcop/cmdtag.h
+++ b/src/include/tcop/cmdtag.h
@@ -29,6 +29,7 @@ typedef enum CommandTag
typedef struct QueryCompletion
{
CommandTag commandTag;
+ CommandTag explainCommandTag;
uint64 nprocessed;
} QueryCompletion;
diff --git a/src/include/tcop/cmdtaglist.h b/src/include/tcop/cmdtaglist.h
index 7fdcec6dd93..7144760c936 100644
--- a/src/include/tcop/cmdtaglist.h
+++ b/src/include/tcop/cmdtaglist.h
@@ -178,6 +178,9 @@ PG_CMDTAG(CMDTAG_DROP_USER_MAPPING, "DROP USER MAPPING", true, false, false)
PG_CMDTAG(CMDTAG_DROP_VIEW, "DROP VIEW", true, false, false)
PG_CMDTAG(CMDTAG_EXECUTE, "EXECUTE", false, false, false)
PG_CMDTAG(CMDTAG_EXPLAIN, "EXPLAIN", false, false, false)
+PG_CMDTAG(CMDTAG_EXPLAIN_INSERT, "INSERT", false, false, true)
+PG_CMDTAG(CMDTAG_EXPLAIN_UPDATE, "UPDATE", false, false, true)
+PG_CMDTAG(CMDTAG_EXPLAIN_DELETE, "DELETE", false, false, true)
PG_CMDTAG(CMDTAG_FETCH, "FETCH", false, false, true)
PG_CMDTAG(CMDTAG_GRANT, "GRANT", true, false, false)
PG_CMDTAG(CMDTAG_GRANT_ROLE, "GRANT ROLE", false, false, false)
diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h
index 99169a93d91..3038a53294c 100644
--- a/src/include/utils/catcache.h
+++ b/src/include/utils/catcache.h
@@ -223,7 +223,7 @@ extern void ResetCatalogCaches(void);
extern void ResetCatalogCachesExt(bool debug_discard);
extern void CatalogCacheFlushCatalog(Oid catId);
extern void CatCacheInvalidate(CatCache *cache, uint32 hashValue);
-extern void PrepareToInvalidateCacheTuple(Relation relation,
+extern void PrepareToInvalidateCacheTuple(Oid relid,
HeapTuple tuple,
HeapTuple newtuple,
void (*function) (int, uint32, Oid));
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h
index da26941f6db..bf652f35f3d 100644
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -122,6 +122,8 @@ typedef struct
HTAB *hashp;
uint32 curBucket; /* index of current bucket */
HASHELEMENT *curEntry; /* current entry in bucket */
+ bool hasHashVal;
+ uint32 hashvalue;
} HASH_SEQ_STATUS;
/*
@@ -141,6 +143,8 @@ extern bool hash_update_hash_key(HTAB *hashp, void *existingEntry,
const void *newKeyPtr);
extern long hash_get_num_entries(HTAB *hashp);
extern void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp);
+extern void hash_seq_init_with_hash_value(HASH_SEQ_STATUS *status, HTAB *hashp,
+ uint32 hashvalue);
extern void *hash_seq_search(HASH_SEQ_STATUS *status);
extern void hash_seq_term(HASH_SEQ_STATUS *status);
extern void hash_freeze(HTAB *hashp);
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h
index 24695facf22..e855b04c12b 100644
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -65,4 +65,74 @@ extern void InvalidateSystemCaches(void);
extern void InvalidateSystemCachesExtended(bool debug_discard);
extern void LogLogicalInvalidations(void);
-#endif /* INVAL_H */
+
+/*
+ * Hints that operation being performed is related to temporary tables.
+ */
+extern char temp_table_scope;
+
+#define TEMP_TABLE_SCOPE_NOTEMP 0
+#define TEMP_TABLE_SCOPE_SHARED 1
+#define TEMP_TABLE_SCOPE_LOCAL 2
+
+/*
+ * This is modified PG_TRY/PG_FINALLY/PG_END_TRY block that conditionally sets
+ * and restores `temp_table_scope` on error. It's optimized to do not use
+ * try/catch mechanism when `isTemp` is false. When entering scope by using
+ * `BEGIN_TEMP_TABLE_SCOPE` the previous value of `temp_table_scope` is saved,
+ * and new value is set according to `level`. On upon reaching
+ * `END_TEMP_TABLE_SCOPE` or exception, the value `temp_table_scope` is
+ * restored to saved value. Thus, nesting of scope is possible.
+ *
+ * When level `level` is `TEMP_TABLE_SCOPE_LOCAL` (or `BEGIN_TEMP_TABLE_SCOPE_LOCAL`
+ * used with non-zero argument), some of shared invalidation messages aren't sent
+ * to other sessions.
+ *
+ * When level is `TEMP_TABLE_SCOPE_LOCAL` or `TEMP_TABLE_SCOPE_SHARED`
+ * (or `BEGIN_TEMP_TABLE_SCOPE_*` used with non-zero argument) all created WAL
+ * records won't issue fsync on commit.
+ */
+#define BEGIN_TEMP_TABLE_SCOPE(level) \
+ do { \
+ const char _temp_scope_level = (level); \
+ const bool _temp_scope_do = (_temp_scope_level != temp_table_scope); \
+ bool _temp_scope_throw = false; \
+ char _temp_scope_save_state; \
+ sigjmp_buf* _temp_scope_save_exception_stack = PG_exception_stack; \
+ ErrorContextCallback* _temp_scope_save_error_stack; \
+ sigjmp_buf _temp_scope_save_sigjmp_buf; \
+ if (_temp_scope_do) \
+ { \
+ _temp_scope_save_state = temp_table_scope; \
+ _temp_scope_save_error_stack = error_context_stack; \
+ if (sigsetjmp(_temp_scope_save_sigjmp_buf, 0) == 0) \
+ { \
+ PG_exception_stack = &_temp_scope_save_sigjmp_buf; \
+ temp_table_scope = level; \
+ } \
+ else \
+ _temp_scope_throw = true; \
+ } \
+ if (!_temp_scope_throw) \
+ {
+
+#define BEGIN_TEMP_TABLE_SCOPE_LOCAL(isTemp) BEGIN_TEMP_TABLE_SCOPE( (isTemp) ? TEMP_TABLE_SCOPE_LOCAL : TEMP_TABLE_SCOPE_NOTEMP )
+#define BEGIN_TEMP_TABLE_SCOPE_SHARED(isTemp) BEGIN_TEMP_TABLE_SCOPE( (isTemp) ? TEMP_TABLE_SCOPE_SHARED : TEMP_TABLE_SCOPE_NOTEMP )
+
+#define END_TEMP_TABLE_SCOPE() \
+ } \
+ PG_exception_stack = _temp_scope_save_exception_stack; \
+ if (_temp_scope_do) \
+ { \
+ error_context_stack = _temp_scope_save_error_stack; \
+ temp_table_scope = _temp_scope_save_state; \
+ if (_temp_scope_throw) \
+ PG_RE_THROW(); \
+ } \
+ } while (0)
+
+#define IsTempTableScope() (temp_table_scope != TEMP_TABLE_SCOPE_NOTEMP)
+#define IsLocalTempTableScope() (temp_table_scope == TEMP_TABLE_SCOPE_LOCAL)
+#define IsSharedTempTableScope() (temp_table_scope == TEMP_TABLE_SCOPE_SHARED)
+
+#endif
\ No newline at end of file
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index 20446f6f836..7eaa0c9b6fa 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -59,6 +59,8 @@ typedef struct AttStatsSlot
/* Remaining fields are private to get_attstatsslot/free_attstatsslot */
void *values_arr; /* palloc'd values array, if any */
void *numbers_arr; /* palloc'd numbers array, if any */
+
+ bool incache; /* do not free because struct is cached */
} AttStatsSlot;
/* Hook for plugins to get control in get_attavgwidth() */
@@ -190,6 +192,8 @@ extern int32 get_typavgwidth(Oid typid, int32 typmod);
extern int32 get_attavgwidth(Oid relid, AttrNumber attnum);
extern bool get_attstatsslot(AttStatsSlot *sslot, HeapTuple statstuple,
int reqkind, Oid reqop, int flags);
+extern AttStatsSlot* fill_attstatsslot(AttStatsSlot *sslots, HeapTuple statstuple,
+ int reqkind, Oid reqop, int flags);
extern void free_attstatsslot(AttStatsSlot *sslot);
extern char *get_namespace_name(Oid nspid);
extern char *get_namespace_name_or_temp(Oid nspid);
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 0de77aeaeab..4e112aab146 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -97,6 +97,7 @@ typedef struct VariableStatData
bool isunique; /* matches unique index or DISTINCT clause */
bool acl_ok; /* true if user has SELECT privilege on all
* rows from the table or column */
+ AttStatsSlot *sslots;
} VariableStatData;
#define ReleaseVariableStats(vardata) \
@@ -179,6 +180,9 @@ extern double generic_restriction_selectivity(PlannerInfo *root,
Oid oproid, Oid collation,
List *args, int varRelid,
double default_selectivity);
+double prefix_record_histogram_selectivity(VariableStatData *vardata,
+ Datum constvalLeft, Datum constvalRight, int record_cmp_prefix,
+ double ndistinct,int *n_bins);
extern double ineq_histogram_selectivity(PlannerInfo *root,
VariableStatData *vardata,
Oid opoid, FmgrInfo *opproc,
@@ -219,6 +223,11 @@ extern double estimate_num_groups(PlannerInfo *root, List *groupExprs,
double input_rows, List **pgset,
EstimationInfo *estinfo);
+extern double estimate_num_groups_incremental(PlannerInfo *root, List *groupExprs,
+ double input_rows, List **pgset,
+ EstimationInfo *estinfo,
+ List **cache_varinfos, int prevNExprs);
+
extern void estimate_hash_bucket_stats(PlannerInfo *root,
Node *hashkey, double nbuckets,
Selectivity *mcv_freq,
@@ -242,5 +251,13 @@ extern Selectivity scalararraysel_containment(PlannerInfo *root,
Node *leftop, Node *rightop,
Oid elemtype, bool isEquality, bool useOr,
int varRelid);
-
+extern Selectivity eqjoin_selectivity(PlannerInfo *root, Oid operator, Oid
+ collation,
+ VariableStatData* vardata1,
+ VariableStatData* vardata2,
+ SpecialJoinInfo *sjinfo,
+ int record_cmp_prefix);
+extern Selectivity eqconst_selectivity(Oid operator, Oid collation,
+ VariableStatData *vardata, Datum constval, bool constisnull,
+ bool varonleft, bool negate, int record_cmp_prefix);
#endif /* SELFUNCS_H */
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index b541911c8fc..e1791c91b9e 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -83,6 +83,13 @@ extern bool RelationInvalidatesSnapshotsOnly(Oid relid);
extern bool RelationHasSysCache(Oid relid);
extern bool RelationSupportsSysCache(Oid relid);
+
+/* Retreive relation description data, if possible.
+ * Use only in situations when it's ok if this function will
+ * fail (return false) for no reason.
+ */
+extern HeapTuple TryGetSysCacheRelationClassTuple(Oid relid);
+
/*
* The use of the macros below rather than direct calls to the corresponding
* functions is encouraged, as it insulates the caller from changes in the
diff --git a/src/interfaces/libpq/fe-cancel.c b/src/interfaces/libpq/fe-cancel.c
index 8c7aff42b5a..07df5d7cdca 100644
--- a/src/interfaces/libpq/fe-cancel.c
+++ b/src/interfaces/libpq/fe-cancel.c
@@ -565,7 +565,7 @@ retry3:
if (connect(tmpsock, (struct sockaddr *) &cancel->raddr.addr,
cancel->raddr.salen) < 0)
{
- if (SOCK_ERRNO == EINTR)
+ if (SOCK_ERRNO == SOCK_EINTR)
/* Interrupted system call - we'll just try again */
goto retry3;
strlcpy(errbuf, "PQcancel() -- connect() failed: ", errbufsize);
@@ -582,7 +582,7 @@ retry3:
retry4:
if (send(tmpsock, (char *) &crp, sizeof(crp), 0) != (int) sizeof(crp))
{
- if (SOCK_ERRNO == EINTR)
+ if (SOCK_ERRNO == SOCK_EINTR)
/* Interrupted system call - we'll just try again */
goto retry4;
strlcpy(errbuf, "PQcancel() -- send() failed: ", errbufsize);
@@ -599,7 +599,7 @@ retry4:
retry5:
if (recv(tmpsock, (char *) &crp, 1, 0) < 0)
{
- if (SOCK_ERRNO == EINTR)
+ if (SOCK_ERRNO == SOCK_EINTR)
/* Interrupted system call - we'll just try again */
goto retry5;
/* we ignore other error conditions */
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index 454d2ea3fb7..0556901e26a 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -3168,11 +3168,11 @@ keep_going: /* We will come back to here until there is
if (connect(conn->sock, (struct sockaddr *) &addr_cur->addr.addr,
addr_cur->addr.salen) < 0)
{
- if (SOCK_ERRNO == EINPROGRESS ||
+ if (SOCK_ERRNO == SOCK_EINPROGRESS ||
#ifdef WIN32
- SOCK_ERRNO == EWOULDBLOCK ||
+ SOCK_ERRNO == SOCK_EWOULDBLOCK ||
#endif
- SOCK_ERRNO == EINTR)
+ SOCK_ERRNO == SOCK_EINTR)
{
/*
* This is fine - we're in non-blocking mode, and
diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c
index 91063b159e6..b545c130eac 100644
--- a/src/interfaces/libpq/fe-exec.c
+++ b/src/interfaces/libpq/fe-exec.c
@@ -3827,7 +3827,9 @@ PQcmdTuples(PGresult *res)
if (!res)
return "";
- if (strncmp(res->cmdStatus, "INSERT ", 7) == 0)
+ if (strncmp(res->cmdStatus, "EXPLAIN ", 8) == 0)
+ p = res->cmdStatus + 8;
+ else if (strncmp(res->cmdStatus, "INSERT ", 7) == 0)
{
p = res->cmdStatus + 7;
/* INSERT: skip oid and space */
diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c
index 3ed89450ff5..f4eeb8f4ee4 100644
--- a/src/interfaces/libpq/fe-misc.c
+++ b/src/interfaces/libpq/fe-misc.c
@@ -645,16 +645,16 @@ retry3:
{
switch (SOCK_ERRNO)
{
- case EINTR:
+ case SOCK_EINTR:
goto retry3;
/* Some systems return EAGAIN/EWOULDBLOCK for no data */
-#ifdef EAGAIN
- case EAGAIN:
+#ifdef SOCK_EAGAIN
+ case SOCK_EAGAIN:
return someread;
#endif
-#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
- case EWOULDBLOCK:
+#if defined(SOCK_EWOULDBLOCK) && (!defined(SOCK_EAGAIN) || (SOCK_EWOULDBLOCK != SOCK_EAGAIN))
+ case SOCK_EWOULDBLOCK:
return someread;
#endif
@@ -740,16 +740,16 @@ retry4:
{
switch (SOCK_ERRNO)
{
- case EINTR:
+ case SOCK_EINTR:
goto retry4;
/* Some systems return EAGAIN/EWOULDBLOCK for no data */
-#ifdef EAGAIN
- case EAGAIN:
+#ifdef SOCK_EAGAIN
+ case SOCK_EAGAIN:
return 0;
#endif
-#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
- case EWOULDBLOCK:
+#if defined(SOCK_EWOULDBLOCK) && (!defined(SOCK_EAGAIN) || (SOCK_EWOULDBLOCK != SOCK_EAGAIN))
+ case SOCK_EWOULDBLOCK:
return 0;
#endif
@@ -868,15 +868,15 @@ pqSendSome(PGconn *conn, int len)
/* Anything except EAGAIN/EWOULDBLOCK/EINTR is trouble */
switch (SOCK_ERRNO)
{
-#ifdef EAGAIN
- case EAGAIN:
+#ifdef SOCK_EAGAIN
+ case SOCK_EAGAIN:
break;
#endif
-#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
- case EWOULDBLOCK:
+#if defined(SOCK_EWOULDBLOCK) && (!defined(SOCK_EAGAIN) || (SOCK_EWOULDBLOCK != SOCK_EAGAIN))
+ case SOCK_EWOULDBLOCK:
break;
#endif
- case EINTR:
+ case SOCK_EINTR:
continue;
default:
@@ -1088,7 +1088,7 @@ pqSocketCheck(PGconn *conn, int forRead, int forWrite, pg_usec_time_t end_time)
/* We will retry as long as we get EINTR */
do
result = PQsocketPoll(conn->sock, forRead, forWrite, end_time);
- while (result < 0 && SOCK_ERRNO == EINTR);
+ while (result < 0 && SOCK_ERRNO == SOCK_EINTR);
if (result < 0)
{
diff --git a/src/interfaces/libpq/fe-secure-gssapi.c b/src/interfaces/libpq/fe-secure-gssapi.c
index d093be9ba5a..9011dcabeed 100644
--- a/src/interfaces/libpq/fe-secure-gssapi.c
+++ b/src/interfaces/libpq/fe-secure-gssapi.c
@@ -84,8 +84,8 @@
* transport negotiation is complete).
*
* On success, returns the number of data bytes consumed (possibly less than
- * len). On failure, returns -1 with errno set appropriately. If the errno
- * indicates a non-retryable error, a message is added to conn->errorMessage.
+ * len). On failure, returns -1 with SOCK_ERRNO (need to use SOCK_ERRNO since it is different from errno in Windows)
+ * set appropriately. If the SOCK_ERRNO indicates a non-retryable error, a message is added to conn->errorMessage.
* For retryable errors, caller should call again (passing the same or more
* data) once the socket is ready.
*/
@@ -121,7 +121,7 @@ pg_GSS_write(PGconn *conn, const void *ptr, size_t len)
{
appendPQExpBufferStr(&conn->errorMessage,
"GSSAPI caller failed to retransmit all data needing to be retried\n");
- errno = EINVAL;
+ SOCK_ERRNO_SET(SOCK_EINVAL);
return -1;
}
@@ -199,14 +199,14 @@ pg_GSS_write(PGconn *conn, const void *ptr, size_t len)
if (major != GSS_S_COMPLETE)
{
pg_GSS_error(libpq_gettext("GSSAPI wrap error"), conn, major, minor);
- errno = EIO; /* for lack of a better idea */
+ SOCK_ERRNO_SET(SOCK_EIO); /* for lack of a better idea */
goto cleanup;
}
if (conf_state == 0)
{
libpq_append_conn_error(conn, "outgoing GSSAPI message would not use confidentiality");
- errno = EIO; /* for lack of a better idea */
+ SOCK_ERRNO_SET(SOCK_EIO); /* for lack of a better idea */
goto cleanup;
}
@@ -215,7 +215,7 @@ pg_GSS_write(PGconn *conn, const void *ptr, size_t len)
libpq_append_conn_error(conn, "client tried to send oversize GSSAPI packet (%zu > %zu)",
(size_t) output.length,
PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32));
- errno = EIO; /* for lack of a better idea */
+ SOCK_ERRNO_SET(SOCK_EIO); /* for lack of a better idea */
goto cleanup;
}
@@ -258,8 +258,8 @@ cleanup:
* transport negotiation is complete).
*
* Returns the number of data bytes read, or on failure, returns -1
- * with errno set appropriately. If the errno indicates a non-retryable
- * error, a message is added to conn->errorMessage. For retryable errors,
+ * with SOCK_ERRNO (need to use SOCK_ERRNO since it is different from errno in Windows) set appropriately.
+ * If the SOCK_ERRNO indicates a non-retryable error, a message is added to conn->errorMessage. For retryable errors,
* caller should call again once the socket is ready.
*/
ssize_t
@@ -341,7 +341,7 @@ pg_GSS_read(PGconn *conn, void *ptr, size_t len)
/* If we still haven't got the length, return to the caller */
if (PqGSSRecvLength < sizeof(uint32))
{
- errno = EWOULDBLOCK;
+ SOCK_ERRNO_SET(SOCK_EWOULDBLOCK);
return -1;
}
}
@@ -354,7 +354,7 @@ pg_GSS_read(PGconn *conn, void *ptr, size_t len)
libpq_append_conn_error(conn, "oversize GSSAPI packet sent by the server (%zu > %zu)",
(size_t) input.length,
PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32));
- errno = EIO; /* for lack of a better idea */
+ SOCK_ERRNO_SET(SOCK_EIO); /* for lack of a better idea */
return -1;
}
@@ -373,7 +373,7 @@ pg_GSS_read(PGconn *conn, void *ptr, size_t len)
/* If we don't yet have the whole packet, return to the caller */
if (PqGSSRecvLength - sizeof(uint32) < input.length)
{
- errno = EWOULDBLOCK;
+ SOCK_ERRNO_SET(SOCK_EWOULDBLOCK);
return -1;
}
@@ -393,7 +393,7 @@ pg_GSS_read(PGconn *conn, void *ptr, size_t len)
pg_GSS_error(libpq_gettext("GSSAPI unwrap error"), conn,
major, minor);
ret = -1;
- errno = EIO; /* for lack of a better idea */
+ SOCK_ERRNO_SET(SOCK_EIO); /* for lack of a better idea */
goto cleanup;
}
@@ -401,7 +401,7 @@ pg_GSS_read(PGconn *conn, void *ptr, size_t len)
{
libpq_append_conn_error(conn, "incoming GSSAPI message did not use confidentiality");
ret = -1;
- errno = EIO; /* for lack of a better idea */
+ SOCK_ERRNO_SET(SOCK_EIO); /* for lack of a better idea */
goto cleanup;
}
@@ -437,7 +437,15 @@ gss_read(PGconn *conn, void *recv_buffer, size_t length, ssize_t *ret)
*ret = pqsecure_raw_read(conn, recv_buffer, length);
if (*ret < 0)
{
- if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
+ int err = SOCK_ERRNO;
+ if (
+#ifdef SOCK_EAGAIN
+ err == SOCK_EAGAIN ||
+#endif
+#ifdef SOCK_EWOULDBLOCK
+ err == SOCK_EWOULDBLOCK ||
+#endif
+ err == SOCK_EINTR)
return PGRES_POLLING_READING;
else
return PGRES_POLLING_FAILED;
@@ -457,7 +465,16 @@ gss_read(PGconn *conn, void *recv_buffer, size_t length, ssize_t *ret)
*ret = pqsecure_raw_read(conn, recv_buffer, length);
if (*ret < 0)
{
- if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
+ int err = SOCK_ERRNO;
+
+ if (
+#ifdef SOCK_EAGAIN
+ err == SOCK_EAGAIN ||
+#endif
+#ifdef SOCK_EWOULDBLOCK
+ err == SOCK_EWOULDBLOCK ||
+#endif
+ err == SOCK_EINTR)
return PGRES_POLLING_READING;
else
return PGRES_POLLING_FAILED;
@@ -520,7 +537,15 @@ pqsecure_open_gss(PGconn *conn)
ret = pqsecure_raw_write(conn, PqGSSSendBuffer + PqGSSSendNext, amount);
if (ret < 0)
{
- if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
+ int err = SOCK_ERRNO;
+ if (
+#ifdef SOCK_EAGAIN
+ err == SOCK_EAGAIN ||
+#endif
+#ifdef SOCK_EWOULDBLOCK
+ err == SOCK_EWOULDBLOCK ||
+#endif
+ err == SOCK_EINTR)
return PGRES_POLLING_WRITING;
else
return PGRES_POLLING_FAILED;
diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c
index b6fffd7b9b0..578f6f1f134 100644
--- a/src/interfaces/libpq/fe-secure-openssl.c
+++ b/src/interfaces/libpq/fe-secure-openssl.c
@@ -179,7 +179,7 @@ rloop:
appendPQExpBufferStr(&conn->errorMessage,
"SSL_read failed but did not provide error information\n");
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
}
break;
case SSL_ERROR_WANT_READ:
@@ -198,8 +198,11 @@ rloop:
if (n < 0 && SOCK_ERRNO != 0)
{
result_errno = SOCK_ERRNO;
- if (result_errno == EPIPE ||
- result_errno == ECONNRESET)
+ if (
+#ifdef SOCK_EPIPE
+ result_errno == SOCK_EPIPE ||
+#endif
+ result_errno == SOCK_ECONNRESET)
libpq_append_conn_error(conn, "server closed the connection unexpectedly\n"
"\tThis probably means the server terminated abnormally\n"
"\tbefore or while processing the request.");
@@ -212,7 +215,7 @@ rloop:
{
libpq_append_conn_error(conn, "SSL SYSCALL error: EOF detected");
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
}
break;
@@ -223,7 +226,7 @@ rloop:
libpq_append_conn_error(conn, "SSL error: %s", errm);
SSLerrfree(errm);
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
break;
}
@@ -235,13 +238,13 @@ rloop:
* server crash.
*/
libpq_append_conn_error(conn, "SSL connection has been closed unexpectedly");
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
break;
default:
libpq_append_conn_error(conn, "unrecognized SSL error code: %d", err);
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
break;
}
@@ -281,7 +284,7 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len)
appendPQExpBufferStr(&conn->errorMessage,
"SSL_write failed but did not provide error information\n");
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
}
break;
case SSL_ERROR_WANT_READ:
@@ -305,7 +308,11 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len)
if (n < 0 && SOCK_ERRNO != 0)
{
result_errno = SOCK_ERRNO;
- if (result_errno == EPIPE || result_errno == ECONNRESET)
+ if (
+#ifdef SOCK_EPIPE
+ result_errno == SOCK_EPIPE ||
+#endif
+ result_errno == SOCK_ECONNRESET)
libpq_append_conn_error(conn, "server closed the connection unexpectedly\n"
"\tThis probably means the server terminated abnormally\n"
"\tbefore or while processing the request.");
@@ -318,7 +325,7 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len)
{
libpq_append_conn_error(conn, "SSL SYSCALL error: EOF detected");
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
}
break;
@@ -329,7 +336,7 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len)
libpq_append_conn_error(conn, "SSL error: %s", errm);
SSLerrfree(errm);
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
break;
}
@@ -341,13 +348,13 @@ pgtls_write(PGconn *conn, const void *ptr, size_t len)
* server crash.
*/
libpq_append_conn_error(conn, "SSL connection has been closed unexpectedly");
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
break;
default:
libpq_append_conn_error(conn, "unrecognized SSL error code: %d", err);
/* assume the connection is broken */
- result_errno = ECONNRESET;
+ result_errno = SOCK_ECONNRESET;
n = -1;
break;
}
@@ -1917,13 +1924,13 @@ my_sock_read(BIO *h, char *buf, int size)
/* If we were interrupted, tell caller to retry */
switch (SOCK_ERRNO)
{
-#ifdef EAGAIN
- case EAGAIN:
+#ifdef SOCK_EAGAIN
+ case SOCK_EAGAIN:
#endif
-#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
- case EWOULDBLOCK:
+#if defined(SOCK_EWOULDBLOCK) && (!defined(SOCK_EAGAIN) || (SOCK_EWOULDBLOCK != SOCK_EAGAIN))
+ case SOCK_EWOULDBLOCK:
#endif
- case EINTR:
+ case SOCK_EINTR:
BIO_set_retry_read(h);
break;
@@ -1950,13 +1957,13 @@ my_sock_write(BIO *h, const char *buf, int size)
/* If we were interrupted, tell caller to retry */
switch (SOCK_ERRNO)
{
-#ifdef EAGAIN
- case EAGAIN:
+#ifdef SOCK_EAGAIN
+ case SOCK_EAGAIN:
#endif
-#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
- case EWOULDBLOCK:
+#if defined(SOCK_EWOULDBLOCK) && (!defined(SOCK_EAGAIN) || (SOCK_EWOULDBLOCK != SOCK_EAGAIN))
+ case SOCK_EWOULDBLOCK:
#endif
- case EINTR:
+ case SOCK_EINTR:
BIO_set_retry_write(h);
break;
diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c
index f628082337e..1ca6bac6283 100644
--- a/src/interfaces/libpq/fe-secure.c
+++ b/src/interfaces/libpq/fe-secure.c
@@ -222,18 +222,19 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len)
/* Set error message if appropriate */
switch (result_errno)
{
-#ifdef EAGAIN
- case EAGAIN:
+#ifdef SOCK_EAGAIN
+ case SOCK_EAGAIN:
#endif
-#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
- case EWOULDBLOCK:
+#if defined(SOCK_EWOULDBLOCK) && (!defined(SOCK_EAGAIN) || (SOCK_EWOULDBLOCK != SOCK_EAGAIN))
+ case SOCK_EWOULDBLOCK:
#endif
- case EINTR:
+ case SOCK_EINTR:
/* no error message, caller is expected to retry */
break;
-
- case EPIPE:
- case ECONNRESET:
+#ifdef SOCK_EPIPE
+ case SOCK_EPIPE:
+#endif
+ case SOCK_ECONNRESET:
libpq_append_conn_error(conn, "server closed the connection unexpectedly\n"
"\tThis probably means the server terminated abnormally\n"
"\tbefore or while processing the request.");
@@ -380,23 +381,24 @@ retry_masked:
/* Set error message if appropriate */
switch (result_errno)
{
-#ifdef EAGAIN
- case EAGAIN:
+#ifdef SOCK_EAGAIN
+ case SOCK_EAGAIN:
#endif
-#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
- case EWOULDBLOCK:
+#if defined(SOCK_EWOULDBLOCK) && (!defined(SOCK_EAGAIN) || (SOCK_EWOULDBLOCK != SOCK_EAGAIN))
+ case SOCK_EWOULDBLOCK:
#endif
- case EINTR:
+ case SOCK_EINTR:
/* no error message, caller is expected to retry */
break;
-
- case EPIPE:
+#ifdef SOCK_EPIPE
+ case SOCK_EPIPE:
/* Set flag for EPIPE */
REMEMBER_EPIPE(spinfo, true);
/* FALL THRU */
+#endif
- case ECONNRESET:
+ case SOCK_ECONNRESET:
conn->write_failed = true;
/* Store error message in conn->write_err_msg, if possible */
/* (strdup failure is OK, we'll cope later) */
diff --git a/src/pl/plperl/expected/plperl.out b/src/pl/plperl/expected/plperl.out
index e3d7c8896a2..311aca3e2cc 100644
--- a/src/pl/plperl/expected/plperl.out
+++ b/src/pl/plperl/expected/plperl.out
@@ -620,7 +620,7 @@ CREATE OR REPLACE FUNCTION perl_spi_prepared_set(INTEGER, INTEGER) RETURNS SETOF
spi_freeplan($x);
return;
$$ LANGUAGE plperl;
-SELECT * from perl_spi_prepared_set(1,2);
+SELECT * from perl_spi_prepared_set(1,2) ORDER BY 1;
perl_spi_prepared_set
-----------------------
2
diff --git a/src/pl/plperl/sql/plperl.sql b/src/pl/plperl/sql/plperl.sql
index bb0b8ce4cb6..b01a3c919ca 100644
--- a/src/pl/plperl/sql/plperl.sql
+++ b/src/pl/plperl/sql/plperl.sql
@@ -392,7 +392,7 @@ CREATE OR REPLACE FUNCTION perl_spi_prepared_set(INTEGER, INTEGER) RETURNS SETOF
spi_freeplan($x);
return;
$$ LANGUAGE plperl;
-SELECT * from perl_spi_prepared_set(1,2);
+SELECT * from perl_spi_prepared_set(1,2) ORDER BY 1;
--
-- Test prepare with a type with spaces
diff --git a/src/port/win32fdatasync.c b/src/port/win32fdatasync.c
index 1cf9c159551..1d6df122f65 100644
--- a/src/port/win32fdatasync.c
+++ b/src/port/win32fdatasync.c
@@ -22,6 +22,15 @@
int
fdatasync(int fd)
{
+#if WINVER < _WIN32_WINNT_WIN8
+#ifndef FRONTEND
+ ereport(LOG,
+ (errmsg_internal("fdatasync is not supported on this Windows version")));
+#else
+ fprintf(stderr, "fdatasync is not supported on this Windows version");
+#endif
+ return -1;
+#else
IO_STATUS_BLOCK iosb;
NTSTATUS status;
HANDLE handle;
@@ -48,4 +57,5 @@ fdatasync(int fd)
_dosmaperr(pg_RtlNtStatusToDosError(status));
return -1;
+#endif
}
diff --git a/src/port/win32gettimeofday.c b/src/port/win32gettimeofday.c
index 1e00f7ee149..d53979e423b 100644
--- a/src/port/win32gettimeofday.c
+++ b/src/port/win32gettimeofday.c
@@ -63,7 +63,12 @@ gettimeofday(struct timeval *tp, void *tzp)
*/
Assert(tzp == NULL);
+#if WINVER >= _WIN32_WINNT_WIN8
GetSystemTimePreciseAsFileTime(&file_time);
+ duh
+#else
+ GetSystemTimeAsFileTime(&file_time);
+#endif
ularge.LowPart = file_time.dwLowDateTime;
ularge.HighPart = file_time.dwHighDateTime;
diff --git a/src/port/win32ntdll.c b/src/port/win32ntdll.c
index bf8bce7f1a1..c14c2075c03 100644
--- a/src/port/win32ntdll.c
+++ b/src/port/win32ntdll.c
@@ -19,7 +19,10 @@
RtlGetLastNtStatus_t pg_RtlGetLastNtStatus;
RtlNtStatusToDosError_t pg_RtlNtStatusToDosError;
+
+#if WINVER >= _WIN32_WINNT_WIN8
NtFlushBuffersFileEx_t pg_NtFlushBuffersFileEx;
+#endif
typedef struct NtDllRoutine
{
@@ -30,7 +33,9 @@ typedef struct NtDllRoutine
static const NtDllRoutine routines[] = {
{"RtlGetLastNtStatus", (pg_funcptr_t *) &pg_RtlGetLastNtStatus},
{"RtlNtStatusToDosError", (pg_funcptr_t *) &pg_RtlNtStatusToDosError},
+#if WINVER >= _WIN32_WINNT_WIN8
{"NtFlushBuffersFileEx", (pg_funcptr_t *) &pg_NtFlushBuffersFileEx}
+#endif
};
static bool initialized;
diff --git a/src/test/modules/test_rbtree/test_rbtree.c b/src/test/modules/test_rbtree/test_rbtree.c
index 3e76d27bf10..d3b3dd74d07 100644
--- a/src/test/modules/test_rbtree/test_rbtree.c
+++ b/src/test/modules/test_rbtree/test_rbtree.c
@@ -84,6 +84,7 @@ create_int_rbtree(void)
irbt_combine,
irbt_alloc,
irbt_free,
+ NULL,
NULL);
}
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index 9b97fcf40b5..cb1428bb5ec 100644
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1234,7 +1234,8 @@ explain (costs off)
select distinct min(f1), max(f1) from minmaxtest;
QUERY PLAN
---------------------------------------------------------------------------------------------
- Unique
+ HashAggregate
+ Group Key: (InitPlan 1).col1, (InitPlan 2).col1
InitPlan 1
-> Limit
-> Merge Append
@@ -1257,10 +1258,8 @@ explain (costs off)
-> Index Only Scan using minmaxtest2i on minmaxtest2 minmaxtest_8
Index Cond: (f1 IS NOT NULL)
-> Index Only Scan Backward using minmaxtest3i on minmaxtest3 minmaxtest_9
- -> Sort
- Sort Key: ((InitPlan 1).col1), ((InitPlan 2).col1)
- -> Result
-(26 rows)
+ -> Result
+(25 rows)
select distinct min(f1), max(f1) from minmaxtest;
min | max
@@ -2872,19 +2871,18 @@ SELECT count(*)
QUERY PLAN
-------------------------------------------------------------------------------
GroupAggregate
- Group Key: t1.z, t1.w, t1.x, t1.y
- -> Incremental Sort
- Sort Key: t1.z, t1.w, t1.x, t1.y
- Presorted Key: t1.z, t1.w, t1.x
+ Group Key: t1.x, t1.y, t1.z, t1.w
+ -> Sort
+ Sort Key: t1.x, t1.y, t1.z, t1.w
-> Merge Join
- Merge Cond: ((t1.z = t2.z) AND (t1.w = t2.w) AND (t1.x = t2.x))
+ Merge Cond: ((t1.w = t2.w) AND (t1.z = t2.z) AND (t1.x = t2.x))
-> Sort
- Sort Key: t1.z, t1.w, t1.x
+ Sort Key: t1.w, t1.z, t1.x
-> Index Scan using btg_x_y_idx on btg t1
-> Sort
- Sort Key: t2.z, t2.w, t2.x
+ Sort Key: t2.w, t2.z, t2.x
-> Index Scan using btg_x_y_idx on btg t2
-(13 rows)
+(12 rows)
RESET enable_nestloop;
RESET enable_hashjoin;
@@ -2983,10 +2981,9 @@ GROUP BY c1.y,c1.x,c2.x;
QUERY PLAN
-----------------------------------------------------
Group
- Group Key: c1.x, c1.y
- -> Incremental Sort
- Sort Key: c1.x, c1.y
- Presorted Key: c1.x
+ Group Key: c1.y, c1.x
+ -> Sort
+ Sort Key: c1.y, c1.x
-> Merge Join
Merge Cond: (c1.x = c2.x)
-> Sort
@@ -2995,7 +2992,7 @@ GROUP BY c1.y,c1.x,c2.x;
-> Sort
Sort Key: c2.x
-> Seq Scan on group_agg_pk c2
-(13 rows)
+(12 rows)
EXPLAIN (COSTS OFF)
SELECT c1.y,c1.x FROM group_agg_pk c1
@@ -3005,10 +3002,9 @@ GROUP BY c1.y,c2.x,c1.x;
QUERY PLAN
-----------------------------------------------------
Group
- Group Key: c2.x, c1.y
- -> Incremental Sort
- Sort Key: c2.x, c1.y
- Presorted Key: c2.x
+ Group Key: c1.y, c2.x
+ -> Sort
+ Sort Key: c1.y, c2.x
-> Merge Join
Merge Cond: (c1.x = c2.x)
-> Sort
@@ -3017,7 +3013,7 @@ GROUP BY c1.y,c2.x,c1.x;
-> Sort
Sort Key: c2.x
-> Seq Scan on group_agg_pk c2
-(13 rows)
+(12 rows)
RESET enable_nestloop;
RESET enable_hashjoin;
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index b1df05593c4..e336031259c 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -1844,18 +1844,12 @@ DROP TABLE onek_with_null;
EXPLAIN (COSTS OFF)
SELECT * FROM tenk1
WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
- QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------
- Bitmap Heap Scan on tenk1
- Recheck Cond: (((thousand = 42) AND (tenthous = 1)) OR ((thousand = 42) AND (tenthous = 3)) OR ((thousand = 42) AND (tenthous = 42)))
- -> BitmapOr
- -> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: ((thousand = 42) AND (tenthous = 1))
- -> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: ((thousand = 42) AND (tenthous = 3))
- -> Bitmap Index Scan on tenk1_thous_tenthous
- Index Cond: ((thousand = 42) AND (tenthous = 42))
-(9 rows)
+ QUERY PLAN
+-----------------------------------------------------------------
+ Index Scan using tenk1_thous_tenthous on tenk1
+ Index Cond: ((thousand = 42) AND (thousand = 42))
+ Filter: ((tenthous = 1) OR (tenthous = 3) OR (tenthous = 42))
+(3 rows)
SELECT * FROM tenk1
WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out
index 126f7047fed..3d5de283544 100644
--- a/src/test/regress/expected/equivclass.out
+++ b/src/test/regress/expected/equivclass.out
@@ -430,6 +430,36 @@ explain (costs off)
Filter: ((unique1 IS NOT NULL) AND (unique2 IS NOT NULL))
(2 rows)
+-- Test that broken ECs are processed correctly during self join removal.
+-- Disable merge joins so that we don't get an error about missing commutator.
+-- Test both orientations of the join clause, because only one of them breaks
+-- the EC.
+set enable_mergejoin to off;
+explain (costs off)
+ select * from ec0 m join ec0 n on m.ff = n.ff
+ join ec1 p on m.ff + n.ff = p.f1;
+ QUERY PLAN
+---------------------------------------
+ Nested Loop
+ Join Filter: ((n.ff + n.ff) = p.f1)
+ -> Seq Scan on ec0 n
+ -> Materialize
+ -> Seq Scan on ec1 p
+(5 rows)
+
+explain (costs off)
+ select * from ec0 m join ec0 n on m.ff = n.ff
+ join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1;
+ QUERY PLAN
+---------------------------------------------------------------
+ Nested Loop
+ Join Filter: ((p.f1)::bigint = ((n.ff + n.ff))::int8alias1)
+ -> Seq Scan on ec0 n
+ -> Materialize
+ -> Seq Scan on ec1 p
+(5 rows)
+
+reset enable_mergejoin;
-- this could be converted, but isn't at present
explain (costs off)
select * from tenk1 where unique1 = unique1 or unique2 = unique2;
diff --git a/src/test/regress/expected/guc.out b/src/test/regress/expected/guc.out
index 455b6d6c0ce..8ac7a9d5355 100644
--- a/src/test/regress/expected/guc.out
+++ b/src/test/regress/expected/guc.out
@@ -823,7 +823,7 @@ reset check_function_bodies;
set default_with_oids to f;
-- Should not allow to set it to true.
set default_with_oids to t;
-ERROR: tables declared WITH OIDS are not supported
+WARNING: tables declared WITH OIDS are not supported, ignored
-- Test GUC categories and flag patterns
SELECT pg_settings_get_flags(NULL);
pg_settings_get_flags
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 5fd54a10b1a..9f35a65cd06 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1426,7 +1426,7 @@ set parallel_setup_cost = 0;
set parallel_tuple_cost = 0;
set max_parallel_workers_per_gather = 2;
create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
create index on t (a);
analyze t;
set enable_incremental_sort = off;
diff --git a/src/test/regress/expected/index_including.out b/src/test/regress/expected/index_including.out
index ea8b2454bf8..346bf8f2094 100644
--- a/src/test/regress/expected/index_including.out
+++ b/src/test/regress/expected/index_including.out
@@ -129,13 +129,11 @@ DETAIL: Failing row contains (1, null, 3, (4,4),(4,4)).
INSERT INTO tbl SELECT x, 2*x, NULL, NULL FROM generate_series(1,300) AS x;
explain (costs off)
select * from tbl where (c1,c2,c3) < (2,5,1);
- QUERY PLAN
-------------------------------------------------
- Bitmap Heap Scan on tbl
+ QUERY PLAN
+--------------------------------------------
+ Seq Scan on tbl
Filter: (ROW(c1, c2, c3) < ROW(2, 5, 1))
- -> Bitmap Index Scan on covering
- Index Cond: (ROW(c1, c2) <= ROW(2, 5))
-(4 rows)
+(2 rows)
select * from tbl where (c1,c2,c3) < (2,5,1);
c1 | c2 | c3 | c4
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 2eaadceed0d..f7b42d8da7f 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -1984,8 +1984,8 @@ USING (name);
------+----+----
bb | 12 | 13
cc | 22 | 23
- dd | | 33
ee | 42 |
+ dd | | 33
(4 rows)
-- Cases with non-nullable expressions in subquery results;
@@ -2019,8 +2019,8 @@ NATURAL FULL JOIN
------+------+------+------+------
bb | 12 | 2 | 13 | 3
cc | 22 | 2 | 23 | 3
- dd | | | 33 | 3
ee | 42 | 2 | |
+ dd | | | 33 | 3
(4 rows)
SELECT * FROM
@@ -2517,11 +2517,10 @@ where t1.f1 = coalesce(t2.f1, 1);
-> Materialize
-> Seq Scan on int4_tbl t2
Filter: (f1 > 1)
- -> Materialize
- -> Seq Scan on int4_tbl t3
+ -> Seq Scan on int4_tbl t3
-> Materialize
-> Seq Scan on int4_tbl t4
-(14 rows)
+(13 rows)
explain (costs off)
select * from int4_tbl t1
@@ -3755,8 +3754,8 @@ select * from
where thousand = (q1 + q2);
QUERY PLAN
--------------------------------------------------------------
- Hash Join
- Hash Cond: (tenk1.twothousand = int4_tbl.f1)
+ Nested Loop
+ Join Filter: (tenk1.twothousand = int4_tbl.f1)
-> Nested Loop
-> Nested Loop
-> Seq Scan on q1
@@ -3765,9 +3764,8 @@ where thousand = (q1 + q2);
Recheck Cond: (thousand = (q1.q1 + q2.q2))
-> Bitmap Index Scan on tenk1_thous_tenthous
Index Cond: (thousand = (q1.q1 + q2.q2))
- -> Hash
- -> Seq Scan on int4_tbl
-(12 rows)
+ -> Seq Scan on int4_tbl
+(11 rows)
--
-- test ability to generate a suitable plan for a star-schema query
@@ -4470,7 +4468,7 @@ select q1, unique2, thousand, hundred
QUERY PLAN
----------------------------------------------------------------------------------------------------------
Nested Loop Left Join
- Filter: ((COALESCE(b.thousand, 123) = COALESCE(b.hundred, 123)) AND (a.q1 = COALESCE(b.hundred, 123)))
+ Filter: ((a.q1 = COALESCE(b.hundred, 123)) AND (COALESCE(b.thousand, 123) = COALESCE(b.hundred, 123)))
-> Seq Scan on int8_tbl a
-> Index Scan using tenk1_unique2 on tenk1 b
Index Cond: (unique2 = a.q1)
@@ -4960,9 +4958,8 @@ select 1 from
-> Seq Scan on int8_tbl i8
-> Result
One-Time Filter: false
- -> Materialize
- -> Seq Scan on int4_tbl i42
-(16 rows)
+ -> Seq Scan on int4_tbl i42
+(15 rows)
--
-- test for appropriate join order in the presence of lateral references
@@ -5726,18 +5723,20 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s
explain (costs off)
select d.* from d left join (select distinct * from b) s
on d.a = s.id;
- QUERY PLAN
---------------------------------------
- Merge Right Join
- Merge Cond: (b.id = d.a)
- -> Unique
- -> Sort
- Sort Key: b.id, b.c_id
- -> Seq Scan on b
+ QUERY PLAN
+---------------------------------------------
+ Merge Left Join
+ Merge Cond: (d.a = s.id)
-> Sort
Sort Key: d.a
-> Seq Scan on d
-(9 rows)
+ -> Sort
+ Sort Key: s.id
+ -> Subquery Scan on s
+ -> HashAggregate
+ Group Key: b.id, b.c_id
+ -> Seq Scan on b
+(11 rows)
-- join removal is not possible here
explain (costs off)
@@ -5972,18 +5971,17 @@ FROM int4_tbl
JOIN ((SELECT 42 AS x FROM int8_tbl LEFT JOIN innertab ON q1 = id) AS ss1
RIGHT JOIN tenk1 ON NULL)
ON tenk1.unique1 = ss1.x OR tenk1.unique2 = ss1.x;
- QUERY PLAN
---------------------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------------------
Nested Loop
+ -> Nested Loop Left Join
+ Join Filter: NULL::boolean
+ Filter: ((tenk1.unique1 = (42)) OR (tenk1.unique2 = (42)))
+ -> Seq Scan on tenk1
+ -> Result
+ One-Time Filter: false
-> Seq Scan on int4_tbl
- -> Materialize
- -> Nested Loop Left Join
- Join Filter: NULL::boolean
- Filter: ((tenk1.unique1 = (42)) OR (tenk1.unique2 = (42)))
- -> Seq Scan on tenk1
- -> Result
- One-Time Filter: false
-(9 rows)
+(8 rows)
rollback;
-- another join removal bug: we must clean up correctly when removing a PHV
@@ -6213,6 +6211,847 @@ select * from
----+----+----+----
(0 rows)
+--
+-- test that semi- or inner self-joins on a unique column are removed
+--
+-- enable only nestloop to get more predictable plans
+set enable_hashjoin to off;
+set enable_mergejoin to off;
+create table sj (a int unique, b int, c int unique);
+insert into sj values (1, null, 2), (null, 2, null), (2, 1, 1);
+analyze sj;
+-- Trivial self-join case.
+explain (costs off)
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+ QUERY PLAN
+-----------------------------------------------
+ Seq Scan on sj q
+ Filter: ((a IS NOT NULL) AND (b = (a - 1)))
+(2 rows)
+
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+ a | b | c
+---+---+---
+ 2 | 1 | 1
+(1 row)
+
+-- Self-join removal performs after a subquery pull-up process and could remove
+-- such kind of self-join too. Check this option.
+explain (costs off)
+select * from sj p
+where exists (select * from sj q
+ where q.a = p.a and q.b < 10);
+ QUERY PLAN
+------------------------------------------
+ Seq Scan on sj q
+ Filter: ((a IS NOT NULL) AND (b < 10))
+(2 rows)
+
+select * from sj p where exists (select * from sj q where q.a = p.a and q.b < 10);
+ a | b | c
+---+---+---
+ 2 | 1 | 1
+(1 row)
+
+-- Don't remove self-join for the case of equality of two different unique columns.
+explain (costs off)
+select * from sj t1, sj t2 where t1.a = t2.c and t1.b is not null;
+ QUERY PLAN
+---------------------------------------
+ Nested Loop
+ Join Filter: (t1.a = t2.c)
+ -> Seq Scan on sj t2
+ -> Materialize
+ -> Seq Scan on sj t1
+ Filter: (b IS NOT NULL)
+(6 rows)
+
+-- Degenerated case.
+explain (costs off)
+select * from
+ (select a as x from sj where false) as q1,
+ (select a as y from sj where false) as q2
+where q1.x = q2.y;
+ QUERY PLAN
+--------------------------
+ Result
+ One-Time Filter: false
+(2 rows)
+
+-- We can't use a cross-EC generated self join qual because of current logic of
+-- the generate_join_implied_equalities routine.
+explain (costs off)
+select * from sj t1, sj t2 where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a;
+ QUERY PLAN
+------------------------------
+ Nested Loop
+ Join Filter: (t1.a = t2.b)
+ -> Seq Scan on sj t1
+ Filter: (a = b)
+ -> Seq Scan on sj t2
+ Filter: (b = a)
+(6 rows)
+
+explain (costs off)
+select * from sj t1, sj t2, sj t3
+where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a
+ and t1.b = t3.b and t3.b = t3.a;
+ QUERY PLAN
+------------------------------------
+ Nested Loop
+ Join Filter: (t1.a = t3.b)
+ -> Nested Loop
+ Join Filter: (t1.a = t2.b)
+ -> Seq Scan on sj t1
+ Filter: (a = b)
+ -> Seq Scan on sj t2
+ Filter: (b = a)
+ -> Seq Scan on sj t3
+ Filter: (b = a)
+(10 rows)
+
+-- Double self-join removal.
+-- Use a condition on "b + 1", not on "b", for the second join, so that
+-- the equivalence class is different from the first one, and we can
+-- test the non-ec code path.
+explain (costs off)
+select * from sj t1 join sj t2 on t1.a = t2.a and t1.b = t2.b
+ join sj t3 on t2.a = t3.a and t2.b + 1 = t3.b + 1;
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Seq Scan on sj t3
+ Filter: ((a IS NOT NULL) AND (b IS NOT NULL) AND ((b + 1) IS NOT NULL))
+(2 rows)
+
+-- subselect that references the removed relation
+explain (costs off)
+select t1.a, (select a from sj where a = t2.a and a = t1.a)
+from sj t1, sj t2
+where t1.a = t2.a;
+ QUERY PLAN
+------------------------------------------
+ Seq Scan on sj t2
+ Filter: (a IS NOT NULL)
+ SubPlan 1
+ -> Result
+ One-Time Filter: (t2.a = t2.a)
+ -> Seq Scan on sj
+ Filter: (a = t2.a)
+(7 rows)
+
+-- self-join under outer join
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on x.a = z.q1;
+ QUERY PLAN
+------------------------------------
+ Nested Loop Left Join
+ Join Filter: (y.a = z.q1)
+ -> Seq Scan on sj y
+ Filter: (a IS NOT NULL)
+ -> Materialize
+ -> Seq Scan on int8_tbl z
+(6 rows)
+
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on y.a = z.q1;
+ QUERY PLAN
+------------------------------------
+ Nested Loop Left Join
+ Join Filter: (y.a = z.q1)
+ -> Seq Scan on sj y
+ Filter: (a IS NOT NULL)
+ -> Materialize
+ -> Seq Scan on int8_tbl z
+(6 rows)
+
+explain (costs off)
+SELECT * FROM (
+ SELECT t1.*, t2.a AS ax FROM sj t1 JOIN sj t2
+ ON (t1.a = t2.a AND t1.c*t1.c = t2.c+2 AND t2.b IS NULL)
+) AS q1
+LEFT JOIN
+ (SELECT t3.* FROM sj t3, sj t4 WHERE t3.c = t4.c) AS q2
+ON q1.ax = q2.a;
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Nested Loop Left Join
+ Join Filter: (t2.a = t4.a)
+ -> Seq Scan on sj t2
+ Filter: ((b IS NULL) AND (a IS NOT NULL) AND ((c * c) = (c + 2)))
+ -> Seq Scan on sj t4
+ Filter: (c IS NOT NULL)
+(6 rows)
+
+-- Test that placeholders are updated correctly after join removal
+explain (costs off)
+select * from (values (1)) x
+left join (select coalesce(y.q1, 1) from int8_tbl y
+ right join sj j1 inner join sj j2 on j1.a = j2.a
+ on true) z
+on true;
+ QUERY PLAN
+------------------------------------------
+ Nested Loop Left Join
+ -> Result
+ -> Nested Loop Left Join
+ -> Seq Scan on sj j2
+ Filter: (a IS NOT NULL)
+ -> Materialize
+ -> Seq Scan on int8_tbl y
+(7 rows)
+
+-- Check updating of Lateral links from top-level query to the removing relation
+explain (COSTS OFF)
+SELECT * FROM pg_am am WHERE am.amname IN (
+ SELECT c1.relname AS relname
+ FROM pg_class c1
+ JOIN pg_class c2
+ ON c1.oid=c2.oid AND c1.oid < 10
+);
+ QUERY PLAN
+----------------------------------------------------------------
+ Nested Loop Semi Join
+ Join Filter: (am.amname = c2.relname)
+ -> Seq Scan on pg_am am
+ -> Materialize
+ -> Index Scan using pg_class_oid_index on pg_class c2
+ Index Cond: (oid < '10'::oid)
+(6 rows)
+
+--
+-- SJR corner case: uniqueness of an inner is [partially] derived from
+-- baserestrictinfo clauses.
+-- XXX: We really should allow SJR for these corner cases?
+--
+INSERT INTO sj VALUES (3, 1, 3);
+explain (costs off) -- Don't remove SJ
+ SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3;
+ QUERY PLAN
+------------------------------
+ Nested Loop
+ Join Filter: (j1.b = j2.b)
+ -> Seq Scan on sj j1
+ Filter: (a = 2)
+ -> Seq Scan on sj j2
+ Filter: (a = 3)
+(6 rows)
+
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3; -- Return one row
+ a | b | c | a | b | c
+---+---+---+---+---+---
+ 2 | 1 | 1 | 3 | 1 | 3
+(1 row)
+
+explain (costs off) -- Remove SJ, define uniqueness by a constant
+ SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2;
+ QUERY PLAN
+-----------------------------------------
+ Seq Scan on sj j2
+ Filter: ((b IS NOT NULL) AND (a = 2))
+(2 rows)
+
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2; -- Return one row
+ a | b | c | a | b | c
+---+---+---+---+---+---
+ 2 | 1 | 1 | 2 | 1 | 1
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a
+; -- Remove SJ, define uniqueness by a constant expression
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------------------------------
+ Seq Scan on sj j2
+ Filter: ((b IS NOT NULL) AND (a = (((EXTRACT(dow FROM CURRENT_TIMESTAMP(0)) / '15'::numeric) + '3'::numeric))::integer))
+(2 rows)
+
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a
+; -- Return one row
+ a | b | c | a | b | c
+---+---+---+---+---+---
+ 3 | 1 | 3 | 3 | 1 | 3
+(1 row)
+
+explain (costs off) -- Remove SJ
+ SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1;
+ QUERY PLAN
+-----------------------------------------
+ Seq Scan on sj j2
+ Filter: ((b IS NOT NULL) AND (a = 1))
+(2 rows)
+
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1; -- Return no rows
+ a | b | c | a | b | c
+---+---+---+---+---+---
+(0 rows)
+
+explain (costs off) -- Shuffle a clause. Remove SJ
+ SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1;
+ QUERY PLAN
+-----------------------------------------
+ Seq Scan on sj j2
+ Filter: ((b IS NOT NULL) AND (a = 1))
+(2 rows)
+
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1; -- Return no rows
+ a | b | c | a | b | c
+---+---+---+---+---+---
+(0 rows)
+
+-- SJE Corner case: a 'a.x=a.x' clause, have replaced with 'a.x IS NOT NULL'
+-- after SJ elimination it shouldn't be a mergejoinable clause.
+SELECT t4.*
+FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3
+JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42;
+ a | b | c
+---+---+---
+(0 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT t4.*
+FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3
+JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42
+; -- SJs must be removed.
+ QUERY PLAN
+---------------------------------
+ Nested Loop
+ Join Filter: (t1.b = t2.b)
+ -> Seq Scan on sj t2
+ Filter: (a = 42)
+ -> Seq Scan on sj t1
+ Filter: (a IS NOT NULL)
+(6 rows)
+
+-- Functional index
+CREATE UNIQUE INDEX sj_fn_idx ON sj((a * a));
+explain (costs off) -- Remove SJ
+ SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 1;
+ QUERY PLAN
+-----------------------------------------------
+ Seq Scan on sj j2
+ Filter: ((b IS NOT NULL) AND ((a * a) = 1))
+(2 rows)
+
+explain (costs off) -- Don't remove SJ
+ SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 2;
+ QUERY PLAN
+-------------------------------
+ Nested Loop
+ Join Filter: (j1.b = j2.b)
+ -> Seq Scan on sj j1
+ Filter: ((a * a) = 1)
+ -> Seq Scan on sj j2
+ Filter: ((a * a) = 2)
+(6 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a)
+; -- Restriction contains expressions in both sides, Remove SJ.
+ QUERY PLAN
+----------------------------------------------------------------------------------------------------------------------------------
+ Seq Scan on sj j2
+ Filter: ((b IS NOT NULL) AND ((a * a) = (((EXTRACT(dow FROM CURRENT_TIMESTAMP(0)) / '15'::numeric) + '3'::numeric))::integer))
+(2 rows)
+
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a)
+; -- Empty set of rows should be returned
+ a | b | c | a | b | c
+---+---+---+---+---+---
+(0 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.a) = (random()/3 + 3)::int
+ AND (random()/3 + 3)::int = (j2.a*j2.a)
+; -- Restriction contains volatile function - disable SJR feature.
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------
+ Nested Loop
+ Join Filter: (j1.b = j2.b)
+ -> Seq Scan on sj j1
+ Filter: ((a * a) = (((random() / '3'::double precision) + '3'::double precision))::integer)
+ -> Seq Scan on sj j2
+ Filter: ((((random() / '3'::double precision) + '3'::double precision))::integer = (a * a))
+(6 rows)
+
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.c/3) = (random()/3 + 3)::int
+ AND (random()/3 + 3)::int = (j2.a*j2.c/3)
+; -- Return one row
+ a | b | c | a | b | c
+---+---+---+---+---+---
+ 3 | 1 | 3 | 3 | 1 | 3
+(1 row)
+
+-- Multiple filters
+CREATE UNIQUE INDEX sj_temp_idx1 ON sj(a,b,c);
+explain (costs off) -- Remove SJ
+ SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND j1.a = 2 AND j1.c = 3 AND j2.a = 2 AND 3 = j2.c;
+ QUERY PLAN
+-----------------------------------------------------
+ Seq Scan on sj j2
+ Filter: ((b IS NOT NULL) AND (a = 2) AND (c = 3))
+(2 rows)
+
+explain (costs off) -- Don't remove SJ
+ SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND 2 = j1.a AND j1.c = 3 AND j2.a = 1 AND 3 = j2.c;
+ QUERY PLAN
+---------------------------------------
+ Nested Loop
+ Join Filter: (j1.b = j2.b)
+ -> Seq Scan on sj j1
+ Filter: ((2 = a) AND (c = 3))
+ -> Seq Scan on sj j2
+ Filter: ((c = 3) AND (a = 1))
+(6 rows)
+
+CREATE UNIQUE INDEX sj_temp_idx ON sj(a,b);
+explain (costs off) -- Don't remove SJ
+ SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2;
+ QUERY PLAN
+------------------------------
+ Nested Loop
+ Join Filter: (j1.b = j2.b)
+ -> Seq Scan on sj j1
+ Filter: (a = 2)
+ -> Seq Scan on sj j2
+(5 rows)
+
+explain (costs off) -- Don't remove SJ
+ SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 2 = j2.a;
+ QUERY PLAN
+------------------------------
+ Nested Loop
+ Join Filter: (j1.b = j2.b)
+ -> Seq Scan on sj j2
+ Filter: (2 = a)
+ -> Seq Scan on sj j1
+(5 rows)
+
+explain (costs off) -- Don't remove SJ
+ SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND (j1.a = 1 OR j2.a = 1);
+ QUERY PLAN
+---------------------------------------------------------------
+ Nested Loop
+ Join Filter: ((j1.b = j2.b) AND ((j1.a = 1) OR (j2.a = 1)))
+ -> Seq Scan on sj j1
+ -> Materialize
+ -> Seq Scan on sj j2
+(5 rows)
+
+DROP INDEX sj_fn_idx, sj_temp_idx1, sj_temp_idx;
+-- Test that OR predicated are updated correctly after join removal
+CREATE TABLE tab_with_flag ( id INT PRIMARY KEY, is_flag SMALLINT);
+CREATE INDEX idx_test_is_flag ON tab_with_flag (is_flag);
+explain (costs off)
+SELECT COUNT(*) FROM tab_with_flag
+WHERE
+ (is_flag IS NULL OR is_flag = 0)
+ AND id IN (SELECT id FROM tab_with_flag WHERE id IN (2, 3));
+ QUERY PLAN
+-----------------------------------------------------------
+ Aggregate
+ -> Bitmap Heap Scan on tab_with_flag
+ Recheck Cond: (id = ANY ('{2,3}'::integer[]))
+ Filter: ((is_flag IS NULL) OR (is_flag = 0))
+ -> Bitmap Index Scan on tab_with_flag_pkey
+ Index Cond: (id = ANY ('{2,3}'::integer[]))
+(6 rows)
+
+DROP TABLE tab_with_flag;
+-- HAVING clause
+explain (costs off)
+select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1;
+ QUERY PLAN
+---------------------------------
+ HashAggregate
+ Group Key: q.b
+ Filter: (sum(q.a) = 1)
+ -> Seq Scan on sj q
+ Filter: (a IS NOT NULL)
+(5 rows)
+
+-- update lateral references and range table entry reference
+explain (verbose, costs off)
+select 1 from (select x.* from sj x, sj y where x.a = y.a) q,
+ lateral generate_series(1, q.a) gs(i);
+ QUERY PLAN
+------------------------------------------------------
+ Nested Loop
+ Output: 1
+ -> Seq Scan on public.sj y
+ Output: y.a, y.b, y.c
+ Filter: (y.a IS NOT NULL)
+ -> Function Scan on pg_catalog.generate_series gs
+ Output: gs.i
+ Function Call: generate_series(1, y.a)
+(8 rows)
+
+explain (verbose, costs off)
+select 1 from (select y.* from sj x, sj y where x.a = y.a) q,
+ lateral generate_series(1, q.a) gs(i);
+ QUERY PLAN
+------------------------------------------------------
+ Nested Loop
+ Output: 1
+ -> Seq Scan on public.sj y
+ Output: y.a, y.b, y.c
+ Filter: (y.a IS NOT NULL)
+ -> Function Scan on pg_catalog.generate_series gs
+ Output: gs.i
+ Function Call: generate_series(1, y.a)
+(8 rows)
+
+-- Test that a non-EC-derived join clause is processed correctly. Use an
+-- outer join so that we can't form an EC.
+explain (costs off) select * from sj p join sj q on p.a = q.a
+ left join sj r on p.a + q.a = r.a;
+ QUERY PLAN
+------------------------------------
+ Nested Loop Left Join
+ Join Filter: ((q.a + q.a) = r.a)
+ -> Seq Scan on sj q
+ Filter: (a IS NOT NULL)
+ -> Materialize
+ -> Seq Scan on sj r
+(6 rows)
+
+-- FIXME this constant false filter doesn't look good. Should we merge
+-- equivalence classes?
+explain (costs off)
+select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2;
+ QUERY PLAN
+-----------------------------------------------------
+ Seq Scan on sj q
+ Filter: ((a IS NOT NULL) AND (b = 2) AND (b = 1))
+(2 rows)
+
+-- Check that attr_needed is updated correctly after self-join removal. In this
+-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2.
+-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b.
+-- Use index scan for k1 so that we don't get 'b' from physical tlist used for
+-- seqscan. Also disable reordering of joins because this test depends on a
+-- particular join tree.
+create table sk (a int, b int);
+create index on sk(a);
+set join_collapse_limit to 1;
+set enable_seqscan to off;
+explain (costs off) select 1 from
+ (sk k1 join sk k2 on k1.a = k2.a)
+ join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b;
+ QUERY PLAN
+-----------------------------------------------------
+ Nested Loop
+ Join Filter: (k1.b = j2.b)
+ -> Nested Loop
+ -> Index Scan using sk_a_idx on sk k1
+ -> Index Only Scan using sk_a_idx on sk k2
+ Index Cond: (a = k1.a)
+ -> Materialize
+ -> Index Scan using sj_a_key on sj j2
+ Index Cond: (a IS NOT NULL)
+(9 rows)
+
+explain (costs off) select 1 from
+ (sk k1 join sk k2 on k1.a = k2.a)
+ join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b;
+ QUERY PLAN
+-----------------------------------------------------
+ Nested Loop
+ Join Filter: (k1.b = j2.b)
+ -> Nested Loop
+ -> Index Scan using sk_a_idx on sk k1
+ -> Index Only Scan using sk_a_idx on sk k2
+ Index Cond: (a = k1.a)
+ -> Materialize
+ -> Index Scan using sj_a_key on sj j2
+ Index Cond: (a IS NOT NULL)
+(9 rows)
+
+reset join_collapse_limit;
+reset enable_seqscan;
+-- Check that clauses from the join filter list is not lost on the self-join removal
+CREATE TABLE emp1 ( id SERIAL PRIMARY KEY NOT NULL, code int);
+explain (verbose, costs off)
+SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code;
+ QUERY PLAN
+------------------------------------------
+ Seq Scan on public.emp1 e2
+ Output: e2.id, e2.code, e2.id, e2.code
+ Filter: (e2.code <> e2.code)
+(3 rows)
+
+-- Shuffle self-joined relations. Only in the case of iterative deletion
+-- attempts explains of these queries will be identical.
+CREATE UNIQUE INDEX ON emp1((id*id));
+explain (costs off)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c1.id=c2.id AND c1.id*c2.id=c3.id*c3.id;
+ QUERY PLAN
+-----------------------------------------
+ Aggregate
+ -> Seq Scan on emp1 c3
+ Filter: ((id * id) IS NOT NULL)
+(3 rows)
+
+explain (costs off)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c1.id=c3.id AND c1.id*c3.id=c2.id*c2.id;
+ QUERY PLAN
+-----------------------------------------
+ Aggregate
+ -> Seq Scan on emp1 c3
+ Filter: ((id * id) IS NOT NULL)
+(3 rows)
+
+explain (costs off)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c3.id=c2.id AND c3.id*c2.id=c1.id*c1.id;
+ QUERY PLAN
+-----------------------------------------
+ Aggregate
+ -> Seq Scan on emp1 c3
+ Filter: ((id * id) IS NOT NULL)
+(3 rows)
+
+-- Check that SJE removes references from PHVs correctly
+explain (costs off)
+select * from emp1 t1 left join
+ (select coalesce(t3.code, 1) from emp1 t2
+ left join (emp1 t3 join emp1 t4 on t3.id = t4.id)
+ on true)
+on true;
+ QUERY PLAN
+---------------------------------------------
+ Nested Loop Left Join
+ -> Seq Scan on emp1 t1
+ -> Materialize
+ -> Nested Loop Left Join
+ -> Seq Scan on emp1 t2
+ -> Materialize
+ -> Seq Scan on emp1 t4
+(7 rows)
+
+-- Check that SJE does not remove self joins if a PHV references the removed
+-- rel laterally.
+explain (costs off)
+select * from emp1 t1 join emp1 t2 on t1.id = t2.id left join
+ lateral (select t1.id as t1id, * from generate_series(1,1) t3) s on true;
+ QUERY PLAN
+---------------------------------------------------
+ Nested Loop Left Join
+ -> Nested Loop
+ -> Seq Scan on emp1 t1
+ -> Index Scan using emp1_pkey on emp1 t2
+ Index Cond: (id = t1.id)
+ -> Function Scan on generate_series t3
+(6 rows)
+
+-- We can remove the join even if we find the join can't duplicate rows and
+-- the base quals of each side are different. In the following case we end up
+-- moving quals over to s1 to make it so it can't match any rows.
+create table sl(a int, b int, c int);
+create unique index on sl(a, b);
+vacuum analyze sl;
+-- Both sides are unique, but base quals are different
+explain (costs off)
+select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1 and t2.b = 2;
+ QUERY PLAN
+------------------------------
+ Nested Loop
+ Join Filter: (t1.a = t2.a)
+ -> Seq Scan on sl t1
+ Filter: (b = 1)
+ -> Seq Scan on sl t2
+ Filter: (b = 2)
+(6 rows)
+
+-- Check NullTest in baserestrictinfo list
+explain (costs off)
+select * from sl t1, sl t2
+where t1.a = t2.a and t1.b = 1 and t2.b = 2
+ and t1.c IS NOT NULL and t2.c IS NOT NULL
+ and t2.b IS NOT NULL and t1.b IS NOT NULL
+ and t1.a IS NOT NULL and t2.a IS NOT NULL;
+ QUERY PLAN
+---------------------------------------------------------------------------------------
+ Nested Loop
+ Join Filter: (t1.a = t2.a)
+ -> Seq Scan on sl t1
+ Filter: ((c IS NOT NULL) AND (b IS NOT NULL) AND (a IS NOT NULL) AND (b = 1))
+ -> Seq Scan on sl t2
+ Filter: ((c IS NOT NULL) AND (b IS NOT NULL) AND (a IS NOT NULL) AND (b = 2))
+(6 rows)
+
+explain (verbose, costs off)
+select * from sl t1, sl t2
+where t1.b = t2.b and t2.a = 3 and t1.a = 3
+ and t1.c IS NOT NULL and t2.c IS NOT NULL
+ and t2.b IS NOT NULL and t1.b IS NOT NULL
+ and t1.a IS NOT NULL and t2.a IS NOT NULL;
+ QUERY PLAN
+---------------------------------------------------------------------------------------------
+ Seq Scan on public.sl t2
+ Output: t2.a, t2.b, t2.c, t2.a, t2.b, t2.c
+ Filter: ((t2.c IS NOT NULL) AND (t2.b IS NOT NULL) AND (t2.a IS NOT NULL) AND (t2.a = 3))
+(3 rows)
+
+-- Join qual isn't mergejoinable, but inner is unique.
+explain (COSTS OFF)
+SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a AND n2.a = 1;
+ QUERY PLAN
+-------------------------------
+ Nested Loop
+ Join Filter: (n1.a <> n2.a)
+ -> Seq Scan on sj n2
+ Filter: (a = 1)
+ -> Seq Scan on sj n1
+(5 rows)
+
+explain (COSTS OFF)
+SELECT * FROM
+ (SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a) q0, sl
+WHERE q0.a = 1;
+ QUERY PLAN
+-------------------------------
+ Nested Loop
+ Join Filter: (n1.a <> n2.a)
+ -> Nested Loop
+ -> Seq Scan on sl
+ -> Seq Scan on sj n2
+ Filter: (a = 1)
+ -> Seq Scan on sj n1
+(7 rows)
+
+--
+---- Only one side is unqiue
+--select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1;
+--select * from sl t1, sl t2 where t1.a = t2.a and t2.b = 1;
+--
+---- Several uniques indexes match, and we select a different one
+---- for each side, so the join is not removed
+--create table sm(a int unique, b int unique, c int unique);
+--explain (costs off)
+--select * from sm m, sm n where m.a = n.b and m.c = n.c;
+--explain (costs off)
+--select * from sm m, sm n where m.a = n.c and m.b = n.b;
+--explain (costs off)
+--select * from sm m, sm n where m.c = n.b and m.a = n.a;
+-- Check optimization disabling if it will violate special join conditions.
+-- Two identical joined relations satisfies self join removal conditions but
+-- stay in different special join infos.
+CREATE TABLE sj_t1 (id serial, a int);
+CREATE TABLE sj_t2 (id serial, a int);
+CREATE TABLE sj_t3 (id serial, a int);
+CREATE TABLE sj_t4 (id serial, a int);
+CREATE UNIQUE INDEX ON sj_t3 USING btree (a,id);
+CREATE UNIQUE INDEX ON sj_t2 USING btree (id);
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj_t1
+JOIN (
+ SELECT sj_t2.id AS id FROM sj_t2
+ WHERE EXISTS
+ (
+ SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id
+ )
+ ) t2t3t4
+ON sj_t1.id = t2t3t4.id
+JOIN (
+ SELECT sj_t2.id AS id FROM sj_t2
+ WHERE EXISTS
+ (
+ SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id
+ )
+ ) _t2t3t4
+ON sj_t1.id = _t2t3t4.id;
+ QUERY PLAN
+-------------------------------------------------------------------------------------
+ Nested Loop
+ Join Filter: (sj_t3.id = sj_t1.id)
+ -> Nested Loop
+ Join Filter: (sj_t2.id = sj_t3.id)
+ -> Nested Loop Semi Join
+ -> Nested Loop
+ -> HashAggregate
+ Group Key: sj_t3.id
+ -> Nested Loop
+ -> Seq Scan on sj_t4
+ -> Materialize
+ -> Bitmap Heap Scan on sj_t3
+ Recheck Cond: (a = 1)
+ -> Bitmap Index Scan on sj_t3_a_id_idx
+ Index Cond: (a = 1)
+ -> Index Only Scan using sj_t2_id_idx on sj_t2 sj_t2_1
+ Index Cond: (id = sj_t3.id)
+ -> Nested Loop
+ -> Index Only Scan using sj_t3_a_id_idx on sj_t3 sj_t3_1
+ Index Cond: ((a = 1) AND (id = sj_t3.id))
+ -> Seq Scan on sj_t4 sj_t4_1
+ -> Index Only Scan using sj_t2_id_idx on sj_t2
+ Index Cond: (id = sj_t2_1.id)
+ -> Seq Scan on sj_t1
+(24 rows)
+
+--
+-- Test RowMarks-related code
+--
+-- TODO: Why this select returns two copies of ctid field? Should we fix it?
+EXPLAIN (COSTS OFF) -- Both sides have explicit LockRows marks
+SELECT a1.a FROM sj a1,sj a2 WHERE (a1.a=a2.a) FOR UPDATE;
+ QUERY PLAN
+---------------------------------
+ LockRows
+ -> Seq Scan on sj a2
+ Filter: (a IS NOT NULL)
+(3 rows)
+
+EXPLAIN (COSTS OFF) -- A RowMark exists for the table being kept
+UPDATE sj sq SET b = 1 FROM sj as sz WHERE sq.a = sz.a;
+ QUERY PLAN
+---------------------------------
+ Update on sj sq
+ -> Seq Scan on sj sz
+ Filter: (a IS NOT NULL)
+(3 rows)
+
+CREATE RULE sj_del_rule AS ON DELETE TO sj
+ DO INSTEAD
+ UPDATE sj SET a = 1 WHERE a = old.a;
+EXPLAIN (COSTS OFF) DELETE FROM sj; -- A RowMark exists for the table being dropped
+ QUERY PLAN
+---------------------------------
+ Update on sj
+ -> Seq Scan on sj
+ Filter: (a IS NOT NULL)
+(3 rows)
+
+DROP RULE sj_del_rule ON sj CASCADE;
+reset enable_hashjoin;
+reset enable_mergejoin;
--
-- Test hints given on incorrect column references are useful
--
@@ -6651,15 +7490,15 @@ select * from
lateral (values(x.q1,y.q1,y.q2)) v(xq1,yq1,yq2);
q1 | q2 | q1 | q2 | xq1 | yq1 | yq2
------------------+-------------------+------------------+-------------------+------------------+------------------+-------------------
- 123 | 456 | | | 123 | |
- 123 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | -4567890123456789
- 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
- 123 | 4567890123456789 | 4567890123456789 | 123 | 123 | 4567890123456789 | 123
- 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
4567890123456789 | 123 | 123 | 456 | 4567890123456789 | 123 | 456
- 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789
- 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
+ 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 123
+ 123 | 4567890123456789 | 4567890123456789 | 123 | 123 | 4567890123456789 | 123
+ 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
+ 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
+ 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789
+ 123 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | -4567890123456789
+ 123 | 456 | | | 123 | |
4567890123456789 | -4567890123456789 | | | 4567890123456789 | |
(10 rows)
@@ -6668,15 +7507,15 @@ select * from
lateral (select x.q1,y.q1,y.q2) v(xq1,yq1,yq2);
q1 | q2 | q1 | q2 | xq1 | yq1 | yq2
------------------+-------------------+------------------+-------------------+------------------+------------------+-------------------
- 123 | 456 | | | 123 | |
- 123 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | -4567890123456789
- 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
- 123 | 4567890123456789 | 4567890123456789 | 123 | 123 | 4567890123456789 | 123
- 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
4567890123456789 | 123 | 123 | 456 | 4567890123456789 | 123 | 456
- 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789
- 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
+ 4567890123456789 | 123 | 123 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789
4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789 | 123
+ 123 | 4567890123456789 | 4567890123456789 | 123 | 123 | 4567890123456789 | 123
+ 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 4567890123456789
+ 123 | 4567890123456789 | 4567890123456789 | 4567890123456789 | 123 | 4567890123456789 | 4567890123456789
+ 4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 4567890123456789 | 4567890123456789 | -4567890123456789
+ 123 | 4567890123456789 | 4567890123456789 | -4567890123456789 | 123 | 4567890123456789 | -4567890123456789
+ 123 | 456 | | | 123 | |
4567890123456789 | -4567890123456789 | | | 4567890123456789 | |
(10 rows)
@@ -6703,24 +7542,24 @@ select v.* from
lateral (select x.q1,y.q1 union all select x.q2,y.q2) v(vx,vy);
vx | vy
-------------------+-------------------
- 123 |
- 456 |
- 123 | 4567890123456789
- 4567890123456789 | -4567890123456789
+ 4567890123456789 | 123
+ 123 | 456
+ 4567890123456789 | 123
123 | 4567890123456789
4567890123456789 | 4567890123456789
- 123 | 4567890123456789
- 4567890123456789 | 123
4567890123456789 | 123
123 | 4567890123456789
4567890123456789 | 123
- 123 | 456
4567890123456789 | 4567890123456789
- 4567890123456789 | -4567890123456789
4567890123456789 | 4567890123456789
+ 123 | 4567890123456789
4567890123456789 | 4567890123456789
4567890123456789 | 4567890123456789
- 4567890123456789 | 123
+ 4567890123456789 | -4567890123456789
+ 123 | 4567890123456789
+ 4567890123456789 | -4567890123456789
+ 123 |
+ 456 |
4567890123456789 |
-4567890123456789 |
(20 rows)
@@ -7047,15 +7886,15 @@ select * from
Hash Cond: (d.q1 = c.q2)
-> Nested Loop
Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint)), d.q1, (COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2))
- -> Hash Left Join
+ -> Hash Right Join
Output: a.q1, a.q2, b.q1, (COALESCE(b.q2, '42'::bigint))
- Hash Cond: (a.q2 = b.q1)
- -> Seq Scan on public.int8_tbl a
- Output: a.q1, a.q2
+ Hash Cond: (b.q1 = a.q2)
+ -> Seq Scan on public.int8_tbl b
+ Output: b.q1, COALESCE(b.q2, '42'::bigint)
-> Hash
- Output: b.q1, (COALESCE(b.q2, '42'::bigint))
- -> Seq Scan on public.int8_tbl b
- Output: b.q1, COALESCE(b.q2, '42'::bigint)
+ Output: a.q1, a.q2
+ -> Seq Scan on public.int8_tbl a
+ Output: a.q1, a.q2
-> Seq Scan on public.int8_tbl d
Output: d.q1, COALESCE((COALESCE(b.q2, '42'::bigint)), d.q2)
-> Hash
@@ -7703,44 +8542,39 @@ select * from j1 natural join j2;
explain (verbose, costs off)
select * from j1
inner join (select distinct id from j3) j3 on j1.id = j3.id;
- QUERY PLAN
------------------------------------------
+ QUERY PLAN
+-----------------------------------
Nested Loop
Output: j1.id, j3.id
Inner Unique: true
Join Filter: (j1.id = j3.id)
- -> Unique
+ -> HashAggregate
Output: j3.id
- -> Sort
+ Group Key: j3.id
+ -> Seq Scan on public.j3
Output: j3.id
- Sort Key: j3.id
- -> Seq Scan on public.j3
- Output: j3.id
-> Seq Scan on public.j1
Output: j1.id
-(13 rows)
+(11 rows)
-- ensure group by clause allows the inner to become unique
explain (verbose, costs off)
select * from j1
inner join (select id from j3 group by id) j3 on j1.id = j3.id;
- QUERY PLAN
------------------------------------------
+ QUERY PLAN
+-----------------------------------
Nested Loop
Output: j1.id, j3.id
Inner Unique: true
Join Filter: (j1.id = j3.id)
- -> Group
+ -> HashAggregate
Output: j3.id
Group Key: j3.id
- -> Sort
+ -> Seq Scan on public.j3
Output: j3.id
- Sort Key: j3.id
- -> Seq Scan on public.j3
- Output: j3.id
-> Seq Scan on public.j1
Output: j1.id
-(14 rows)
+(11 rows)
drop table j1;
drop table j2;
@@ -8024,15 +8858,13 @@ EXPLAIN (COSTS OFF)
SELECT 1 FROM group_tbl t1
LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE
GROUP BY s.c1, s.c2;
- QUERY PLAN
---------------------------------------------
- Group
+ QUERY PLAN
+--------------------------------------
+ HashAggregate
Group Key: t2.a, (COALESCE(t2.a))
- -> Sort
- Sort Key: t2.a, (COALESCE(t2.a))
- -> Nested Loop Left Join
- -> Seq Scan on group_tbl t1
- -> Seq Scan on group_tbl t2
-(7 rows)
+ -> Nested Loop Left Join
+ -> Seq Scan on group_tbl t1
+ -> Seq Scan on group_tbl t2
+(5 rows)
DROP TABLE group_tbl;
diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out
index f9ad47f978f..461c371ae9f 100644
--- a/src/test/regress/expected/merge.out
+++ b/src/test/regress/expected/merge.out
@@ -1792,18 +1792,15 @@ WHEN MATCHED AND t.a < 10 THEN
explain_merge
--------------------------------------------------------------------
Merge on ex_mtarget t (actual rows=0 loops=1)
- -> Merge Join (actual rows=0 loops=1)
- Merge Cond: (t.a = s.a)
- -> Sort (actual rows=0 loops=1)
- Sort Key: t.a
- Sort Method: quicksort Memory: xxx
+ -> Hash Join (actual rows=0 loops=1)
+ Hash Cond: (s.a = t.a)
+ -> Seq Scan on ex_msource s (actual rows=1 loops=1)
+ -> Hash (actual rows=0 loops=1)
+ Buckets: xxx Batches: xxx Memory Usage: xxx
-> Seq Scan on ex_mtarget t (actual rows=0 loops=1)
Filter: (a < '-1000'::integer)
Rows Removed by Filter: 54
- -> Sort (never executed)
- Sort Key: s.a
- -> Seq Scan on ex_msource s (never executed)
-(12 rows)
+(9 rows)
DROP TABLE ex_msource, ex_mtarget;
DROP FUNCTION explain_merge(text);
@@ -2345,13 +2342,13 @@ MERGE INTO pa_target t
merge_action | logts | tid | balance | val
--------------+--------------------------+-----+---------+--------------------------
UPDATE | Tue Jan 31 00:00:00 2017 | 1 | 110 | initial updated by merge
- UPDATE | Tue Feb 28 00:00:00 2017 | 2 | 220 | initial updated by merge
- INSERT | Sun Jan 15 00:00:00 2017 | 3 | 30 | inserted by merge
+ UPDATE | Tue Jan 31 00:00:00 2017 | 7 | 770 | initial updated by merge
UPDATE | Tue Jan 31 00:00:00 2017 | 4 | 440 | initial updated by merge
UPDATE | Tue Feb 28 00:00:00 2017 | 5 | 550 | initial updated by merge
- INSERT | Sun Jan 15 00:00:00 2017 | 6 | 60 | inserted by merge
- UPDATE | Tue Jan 31 00:00:00 2017 | 7 | 770 | initial updated by merge
+ UPDATE | Tue Feb 28 00:00:00 2017 | 2 | 220 | initial updated by merge
UPDATE | Tue Feb 28 00:00:00 2017 | 8 | 880 | initial updated by merge
+ INSERT | Sun Jan 15 00:00:00 2017 | 6 | 60 | inserted by merge
+ INSERT | Sun Jan 15 00:00:00 2017 | 3 | 30 | inserted by merge
INSERT | Sun Jan 15 00:00:00 2017 | 9 | 90 | inserted by merge
(9 rows)
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index 5f2c0cf5786..4e3ac9615df 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -728,10 +728,10 @@ EXPLAIN (COSTS OFF)
SELECT a.x, b.y, count(*) FROM (SELECT * FROM pagg_tab1 WHERE x < 20) a LEFT JOIN (SELECT * FROM pagg_tab2 WHERE y > 10) b ON a.x = b.y WHERE a.x > 5 or b.y < 20 GROUP BY a.x, b.y ORDER BY 1, 2;
QUERY PLAN
--------------------------------------------------------------------
- Sort
- Sort Key: pagg_tab1.x, pagg_tab2.y
- -> HashAggregate
- Group Key: pagg_tab1.x, pagg_tab2.y
+ GroupAggregate
+ Group Key: pagg_tab1.x, pagg_tab2.y
+ -> Sort
+ Sort Key: pagg_tab1.x, pagg_tab2.y
-> Hash Left Join
Hash Cond: (pagg_tab1.x = pagg_tab2.y)
Filter: ((pagg_tab1.x > 5) OR (pagg_tab2.y < 20))
@@ -948,12 +948,12 @@ SET parallel_setup_cost = 0;
-- is not partial agg safe.
EXPLAIN (COSTS OFF)
SELECT a, sum(b), array_agg(distinct c), count(*) FROM pagg_tab_ml GROUP BY a HAVING avg(b) < 3 ORDER BY 1, 2, 3;
- QUERY PLAN
---------------------------------------------------------------------------------------
- Sort
- Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
- -> Gather
- Workers Planned: 2
+ QUERY PLAN
+--------------------------------------------------------------------------------------------
+ Gather Merge
+ Workers Planned: 2
+ -> Sort
+ Sort Key: pagg_tab_ml.a, (sum(pagg_tab_ml.b)), (array_agg(DISTINCT pagg_tab_ml.c))
-> Parallel Append
-> GroupAggregate
Group Key: pagg_tab_ml.a
@@ -1380,28 +1380,26 @@ SELECT x, sum(y), avg(y), count(*) FROM pagg_tab_para GROUP BY x HAVING avg(y) <
-- When GROUP BY clause does not match; partial aggregation is performed for each partition.
EXPLAIN (COSTS OFF)
SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
- QUERY PLAN
--------------------------------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------------------
Sort
Sort Key: pagg_tab_para.y, (sum(pagg_tab_para.x)), (avg(pagg_tab_para.x))
- -> Finalize GroupAggregate
+ -> Finalize HashAggregate
Group Key: pagg_tab_para.y
Filter: (avg(pagg_tab_para.x) < '12'::numeric)
- -> Gather Merge
+ -> Gather
Workers Planned: 2
- -> Sort
- Sort Key: pagg_tab_para.y
- -> Parallel Append
- -> Partial HashAggregate
- Group Key: pagg_tab_para.y
- -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
- -> Partial HashAggregate
- Group Key: pagg_tab_para_1.y
- -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
- -> Partial HashAggregate
- Group Key: pagg_tab_para_2.y
- -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
-(19 rows)
+ -> Parallel Append
+ -> Partial HashAggregate
+ Group Key: pagg_tab_para.y
+ -> Parallel Seq Scan on pagg_tab_para_p1 pagg_tab_para
+ -> Partial HashAggregate
+ Group Key: pagg_tab_para_1.y
+ -> Parallel Seq Scan on pagg_tab_para_p2 pagg_tab_para_1
+ -> Partial HashAggregate
+ Group Key: pagg_tab_para_2.y
+ -> Parallel Seq Scan on pagg_tab_para_p3 pagg_tab_para_2
+(17 rows)
SELECT y, sum(x), avg(x), count(*) FROM pagg_tab_para GROUP BY y HAVING avg(x) < 12 ORDER BY 1, 2, 3;
y | sum | avg | count
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index b9b41340663..a94e9f2fb03 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -456,34 +456,34 @@ EXPLAIN (COSTS OFF)
SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
(SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.b) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
- QUERY PLAN
---------------------------------------------------------------
+ QUERY PLAN
+--------------------------------------------------------
Sort
Sort Key: t1.a
- -> Hash Left Join
- Hash Cond: ((t1.c)::text = (t2.c)::text)
+ -> Hash Right Join
+ Hash Cond: ((t2.c)::text = (t1.c)::text)
Filter: ((t1.b + COALESCE(t2.b, 0)) = 0)
-> Append
- -> Seq Scan on prt1_p1 t1_1
- -> Seq Scan on prt1_p2 t1_2
- -> Seq Scan on prt1_p3 t1_3
+ -> Hash Join
+ Hash Cond: (t2_1.a = t3_1.b)
+ -> Seq Scan on prt1_p1 t2_1
+ -> Hash
+ -> Seq Scan on prt2_p1 t3_1
+ -> Hash Join
+ Hash Cond: (t2_2.a = t3_2.b)
+ -> Seq Scan on prt1_p2 t2_2
+ -> Hash
+ -> Seq Scan on prt2_p2 t3_2
+ -> Hash Join
+ Hash Cond: (t2_3.a = t3_3.b)
+ -> Seq Scan on prt1_p3 t2_3
+ -> Hash
+ -> Seq Scan on prt2_p3 t3_3
-> Hash
-> Append
- -> Hash Join
- Hash Cond: (t2_1.a = t3_1.b)
- -> Seq Scan on prt1_p1 t2_1
- -> Hash
- -> Seq Scan on prt2_p1 t3_1
- -> Hash Join
- Hash Cond: (t2_2.a = t3_2.b)
- -> Seq Scan on prt1_p2 t2_2
- -> Hash
- -> Seq Scan on prt2_p2 t3_2
- -> Hash Join
- Hash Cond: (t2_3.a = t3_3.b)
- -> Seq Scan on prt1_p3 t2_3
- -> Hash
- -> Seq Scan on prt2_p3 t3_3
+ -> Seq Scan on prt1_p1 t1_1
+ -> Seq Scan on prt1_p2 t1_2
+ -> Seq Scan on prt1_p3 t1_3
(26 rows)
SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
@@ -604,52 +604,41 @@ EXPLAIN (COSTS OFF)
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
WHERE a BETWEEN 490 AND 510
GROUP BY 1, 2 ORDER BY 1, 2;
- QUERY PLAN
------------------------------------------------------------------------------------------------------------------
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------
Group
Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
- -> Merge Append
+ -> Sort
Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
- -> Group
- Group Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
- -> Sort
- Sort Key: (COALESCE(prt1.a, p2.a)), (COALESCE(prt1.b, p2.b))
- -> Merge Full Join
- Merge Cond: ((prt1.a = p2.a) AND (prt1.b = p2.b))
- Filter: ((COALESCE(prt1.a, p2.a) >= 490) AND (COALESCE(prt1.a, p2.a) <= 510))
- -> Sort
- Sort Key: prt1.a, prt1.b
- -> Seq Scan on prt1_p1 prt1
- -> Sort
- Sort Key: p2.a, p2.b
- -> Seq Scan on prt2_p1 p2
- -> Group
- Group Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
- -> Sort
- Sort Key: (COALESCE(prt1_1.a, p2_1.a)), (COALESCE(prt1_1.b, p2_1.b))
- -> Merge Full Join
- Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
- Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
- -> Sort
- Sort Key: prt1_1.a, prt1_1.b
- -> Seq Scan on prt1_p2 prt1_1
- -> Sort
- Sort Key: p2_1.a, p2_1.b
- -> Seq Scan on prt2_p2 p2_1
- -> Group
- Group Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
- -> Sort
- Sort Key: (COALESCE(prt1_2.a, p2_2.a)), (COALESCE(prt1_2.b, p2_2.b))
- -> Merge Full Join
- Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
- Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
- -> Sort
- Sort Key: prt1_2.a, prt1_2.b
- -> Seq Scan on prt1_p3 prt1_2
- -> Sort
- Sort Key: p2_2.a, p2_2.b
- -> Seq Scan on prt2_p3 p2_2
-(43 rows)
+ -> Append
+ -> Merge Full Join
+ Merge Cond: ((prt1_1.a = p2_1.a) AND (prt1_1.b = p2_1.b))
+ Filter: ((COALESCE(prt1_1.a, p2_1.a) >= 490) AND (COALESCE(prt1_1.a, p2_1.a) <= 510))
+ -> Sort
+ Sort Key: prt1_1.a, prt1_1.b
+ -> Seq Scan on prt1_p1 prt1_1
+ -> Sort
+ Sort Key: p2_1.a, p2_1.b
+ -> Seq Scan on prt2_p1 p2_1
+ -> Merge Full Join
+ Merge Cond: ((prt1_2.a = p2_2.a) AND (prt1_2.b = p2_2.b))
+ Filter: ((COALESCE(prt1_2.a, p2_2.a) >= 490) AND (COALESCE(prt1_2.a, p2_2.a) <= 510))
+ -> Sort
+ Sort Key: prt1_2.a, prt1_2.b
+ -> Seq Scan on prt1_p2 prt1_2
+ -> Sort
+ Sort Key: p2_2.a, p2_2.b
+ -> Seq Scan on prt2_p2 p2_2
+ -> Merge Full Join
+ Merge Cond: ((prt1_3.a = p2_3.a) AND (prt1_3.b = p2_3.b))
+ Filter: ((COALESCE(prt1_3.a, p2_3.a) >= 490) AND (COALESCE(prt1_3.a, p2_3.a) <= 510))
+ -> Sort
+ Sort Key: prt1_3.a, prt1_3.b
+ -> Seq Scan on prt1_p3 prt1_3
+ -> Sort
+ Sort Key: p2_3.a, p2_3.b
+ -> Seq Scan on prt2_p3 p2_3
+(32 rows)
SELECT a, b FROM prt1 FULL JOIN prt2 p2(b,a,c) USING(a,b)
WHERE a BETWEEN 490 AND 510
@@ -1149,8 +1138,8 @@ SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1, prt1_e t2 WHER
EXPLAIN (COSTS OFF)
SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
- QUERY PLAN
----------------------------------------------------------------------------
+ QUERY PLAN
+---------------------------------------------------------------------------------
Sort
Sort Key: t1.a
-> Append
@@ -1179,18 +1168,19 @@ SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (
Index Cond: (a = t1_7.b)
Filter: (b = 0)
-> Nested Loop
- -> HashAggregate
- Group Key: t1_8.b
- -> Hash Semi Join
- Hash Cond: (t1_8.b = ((t1_11.a + t1_11.b) / 2))
- -> Seq Scan on prt2_p3 t1_8
- -> Hash
- -> Seq Scan on prt1_e_p3 t1_11
- Filter: (c = 0)
+ -> Unique
+ -> Sort
+ Sort Key: t1_8.b
+ -> Hash Semi Join
+ Hash Cond: (t1_8.b = ((t1_11.a + t1_11.b) / 2))
+ -> Seq Scan on prt2_p3 t1_8
+ -> Hash
+ -> Seq Scan on prt1_e_p3 t1_11
+ Filter: (c = 0)
-> Index Scan using iprt1_p3_a on prt1_p3 t1_5
Index Cond: (a = t1_8.b)
Filter: (b = 0)
-(39 rows)
+(40 rows)
SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (SELECT (t1.a + t1.b)/2 FROM prt1_e t1 WHERE t1.c = 0)) AND t1.b = 0 ORDER BY t1.a;
a | b | c
@@ -1268,60 +1258,54 @@ SELECT t1.* FROM prt1 t1 WHERE t1.a IN (SELECT t1.b FROM prt2 t1 WHERE t1.b IN (
EXPLAIN (COSTS OFF)
SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
- QUERY PLAN
-----------------------------------------------------------------------------
+ QUERY PLAN
+----------------------------------------------------------------
Sort
Sort Key: t1.a, t2.b, ((t3.a + t3.b))
-> Append
- -> Merge Left Join
- Merge Cond: (t1_1.a = t2_1.b)
- -> Sort
- Sort Key: t1_1.a
- -> Merge Left Join
- Merge Cond: ((((t3_1.a + t3_1.b) / 2)) = t1_1.a)
- -> Sort
- Sort Key: (((t3_1.a + t3_1.b) / 2))
- -> Seq Scan on prt1_e_p1 t3_1
- Filter: (c = 0)
- -> Sort
- Sort Key: t1_1.a
- -> Seq Scan on prt1_p1 t1_1
- -> Sort
- Sort Key: t2_1.b
- -> Seq Scan on prt2_p1 t2_1
- -> Merge Left Join
- Merge Cond: (t1_2.a = t2_2.b)
- -> Sort
- Sort Key: t1_2.a
- -> Merge Left Join
- Merge Cond: ((((t3_2.a + t3_2.b) / 2)) = t1_2.a)
- -> Sort
- Sort Key: (((t3_2.a + t3_2.b) / 2))
- -> Seq Scan on prt1_e_p2 t3_2
- Filter: (c = 0)
- -> Sort
- Sort Key: t1_2.a
- -> Seq Scan on prt1_p2 t1_2
+ -> Merge Right Join
+ Merge Cond: (t1_1.a = (((t3_1.a + t3_1.b) / 2)))
+ -> Merge Left Join
+ Merge Cond: (t1_1.a = t2_1.b)
+ -> Sort
+ Sort Key: t1_1.a
+ -> Seq Scan on prt1_p1 t1_1
+ -> Sort
+ Sort Key: t2_1.b
+ -> Seq Scan on prt2_p1 t2_1
-> Sort
- Sort Key: t2_2.b
- -> Seq Scan on prt2_p2 t2_2
- -> Merge Left Join
- Merge Cond: (t1_3.a = t2_3.b)
+ Sort Key: (((t3_1.a + t3_1.b) / 2))
+ -> Seq Scan on prt1_e_p1 t3_1
+ Filter: (c = 0)
+ -> Merge Right Join
+ Merge Cond: (t1_2.a = (((t3_2.a + t3_2.b) / 2)))
+ -> Merge Left Join
+ Merge Cond: (t1_2.a = t2_2.b)
+ -> Sort
+ Sort Key: t1_2.a
+ -> Seq Scan on prt1_p2 t1_2
+ -> Sort
+ Sort Key: t2_2.b
+ -> Seq Scan on prt2_p2 t2_2
-> Sort
- Sort Key: t1_3.a
- -> Merge Left Join
- Merge Cond: ((((t3_3.a + t3_3.b) / 2)) = t1_3.a)
- -> Sort
- Sort Key: (((t3_3.a + t3_3.b) / 2))
- -> Seq Scan on prt1_e_p3 t3_3
- Filter: (c = 0)
- -> Sort
- Sort Key: t1_3.a
- -> Seq Scan on prt1_p3 t1_3
+ Sort Key: (((t3_2.a + t3_2.b) / 2))
+ -> Seq Scan on prt1_e_p2 t3_2
+ Filter: (c = 0)
+ -> Merge Right Join
+ Merge Cond: (t1_3.a = (((t3_3.a + t3_3.b) / 2)))
+ -> Merge Left Join
+ Merge Cond: (t1_3.a = t2_3.b)
+ -> Sort
+ Sort Key: t1_3.a
+ -> Seq Scan on prt1_p3 t1_3
+ -> Sort
+ Sort Key: t2_3.b
+ -> Seq Scan on prt2_p3 t2_3
-> Sort
- Sort Key: t2_3.b
- -> Seq Scan on prt2_p3 t2_3
-(51 rows)
+ Sort Key: (((t3_3.a + t3_3.b) / 2))
+ -> Seq Scan on prt1_e_p3 t3_3
+ Filter: (c = 0)
+(45 rows)
SELECT t1.a, t1.c, t2.b, t2.c, t3.a + t3.b, t3.c FROM (prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b) RIGHT JOIN prt1_e t3 ON (t1.a = (t3.a + t3.b)/2) WHERE t3.c = 0 ORDER BY t1.a, t2.b, t3.a + t3.b;
a | c | b | c | ?column? | c
@@ -2246,29 +2230,24 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt4_n t2, prt2 t3 WHERE t1.a = t2.a
QUERY PLAN
--------------------------------------------------------
Hash Join
- Hash Cond: (t2.a = t1.a)
+ Hash Cond: (t1.a = t2.a)
-> Append
- -> Seq Scan on prt4_n_p1 t2_1
- -> Seq Scan on prt4_n_p2 t2_2
- -> Seq Scan on prt4_n_p3 t2_3
+ -> Seq Scan on prt1_p1 t1_1
+ -> Seq Scan on prt1_p2 t1_2
+ -> Seq Scan on prt1_p3 t1_3
-> Hash
- -> Append
- -> Hash Join
- Hash Cond: (t1_1.a = t3_1.b)
- -> Seq Scan on prt1_p1 t1_1
- -> Hash
+ -> Hash Join
+ Hash Cond: (t2.a = t3.b)
+ -> Append
+ -> Seq Scan on prt4_n_p1 t2_1
+ -> Seq Scan on prt4_n_p2 t2_2
+ -> Seq Scan on prt4_n_p3 t2_3
+ -> Hash
+ -> Append
-> Seq Scan on prt2_p1 t3_1
- -> Hash Join
- Hash Cond: (t1_2.a = t3_2.b)
- -> Seq Scan on prt1_p2 t1_2
- -> Hash
-> Seq Scan on prt2_p2 t3_2
- -> Hash Join
- Hash Cond: (t1_3.a = t3_3.b)
- -> Seq Scan on prt1_p3 t1_3
- -> Hash
-> Seq Scan on prt2_p3 t3_3
-(23 rows)
+(18 rows)
-- partitionwise join can not be applied if there are no equi-join conditions
-- between partition keys
diff --git a/src/test/regress/expected/select.out b/src/test/regress/expected/select.out
index 33a6dceb0e3..e39d4594787 100644
--- a/src/test/regress/expected/select.out
+++ b/src/test/regress/expected/select.out
@@ -521,6 +521,124 @@ SELECT * FROM nocols n, LATERAL (VALUES(n.*)) v;
--
(1 row)
+--
+-- test order by NULLS (FIRST|LAST)
+--
+select unique1, unique2 into onek_with_null from onek;
+insert into onek_with_null (unique1,unique2) values (NULL, -1), (NULL, NULL);
+select * from onek_with_null order by unique1 nulls first , unique2 limit 3;
+ unique1 | unique2
+---------+---------
+ | -1
+ |
+ 0 | 998
+(3 rows)
+
+select * from onek_with_null order by unique1 nulls last , unique2 limit 3;
+ unique1 | unique2
+---------+---------
+ 0 | 998
+ 1 | 214
+ 2 | 326
+(3 rows)
+
+select * from onek_with_null order by unique1 nulls first , unique2 nulls first limit 3;
+ unique1 | unique2
+---------+---------
+ |
+ | -1
+ 0 | 998
+(3 rows)
+
+select * from onek_with_null order by unique1 nulls last , unique2 nulls first limit 3;
+ unique1 | unique2
+---------+---------
+ 0 | 998
+ 1 | 214
+ 2 | 326
+(3 rows)
+
+select * from onek_with_null order by unique1 nulls first , unique2 nulls last limit 3;
+ unique1 | unique2
+---------+---------
+ | -1
+ |
+ 0 | 998
+(3 rows)
+
+select * from onek_with_null order by unique1 nulls last , unique2 nulls last limit 3;
+ unique1 | unique2
+---------+---------
+ 0 | 998
+ 1 | 214
+ 2 | 326
+(3 rows)
+
+select * from onek_with_null order by unique1 desc nulls first , unique2 desc limit 3;
+ unique1 | unique2
+---------+---------
+ |
+ | -1
+ 999 | 152
+(3 rows)
+
+select * from onek_with_null order by unique1 desc nulls last , unique2 desc limit 3;
+ unique1 | unique2
+---------+---------
+ 999 | 152
+ 998 | 549
+ 997 | 21
+(3 rows)
+
+select * from onek_with_null order by unique1 desc nulls first , unique2 desc nulls first limit 3;
+ unique1 | unique2
+---------+---------
+ |
+ | -1
+ 999 | 152
+(3 rows)
+
+select * from onek_with_null order by unique1 desc nulls last , unique2 desc nulls first limit 3;
+ unique1 | unique2
+---------+---------
+ 999 | 152
+ 998 | 549
+ 997 | 21
+(3 rows)
+
+select * from onek_with_null order by unique1 desc nulls first , unique2 desc nulls last limit 3;
+ unique1 | unique2
+---------+---------
+ | -1
+ |
+ 999 | 152
+(3 rows)
+
+select * from onek_with_null order by unique1 desc nulls last , unique2 desc nulls last limit 3;
+ unique1 | unique2
+---------+---------
+ 999 | 152
+ 998 | 549
+ 997 | 21
+(3 rows)
+
+select unique1 as u1, unique2 as u2 from onek_with_null order by u1 nulls first , u2 nulls first limit 3;
+ u1 | u2
+----+-----
+ |
+ | -1
+ 0 | 998
+(3 rows)
+
+select unique1 as u1, unique2 as u2 from onek_with_null order by u1 asc nulls first , u2 desc nulls first limit 3;
+ u1 | u2
+----+-----
+ |
+ | -1
+ 0 | 998
+(3 rows)
+
+drop table onek_with_null;
--
-- Test ORDER BY options
--
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index 82b8e54f5f1..a43031640ae 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -355,15 +355,14 @@ SET max_parallel_workers_per_gather=2;
-- distinct
EXPLAIN (COSTS OFF)
SELECT DISTINCT four FROM tenk1 WHERE four = 10;
- QUERY PLAN
-----------------------------------------------
- Limit
+ QUERY PLAN
+----------------------------------------
+ HashAggregate
-> Gather
Workers Planned: 2
- -> Limit
- -> Parallel Seq Scan on tenk1
- Filter: (four = 10)
-(6 rows)
+ -> Parallel Seq Scan on tenk1
+ Filter: (four = 10)
+(5 rows)
RESET max_parallel_workers_per_gather;
RESET min_parallel_table_scan_size;
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 43539dfe27f..76d43d0a1c6 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -4,6 +4,7 @@
-- with autovacuum_enabled = off, so that we don't have unstable results
-- from auto-analyze happening when we didn't expect it.
--
+set default_statistics_target=10000; --prevent random subset for joinsel
-- check the number of estimated/actual rows in the top node
create function check_estimated_rows(text) returns table (estimated int, actual int)
language plpgsql as
@@ -1113,43 +1114,43 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b = ''1''');
estimated | actual
-----------+--------
- 2 | 100
+ 99 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 51) AND b IN (''1'', ''2'')');
estimated | actual
-----------+--------
- 4 | 100
+ 100 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b IN (''1'', ''2'')');
estimated | actual
-----------+--------
- 8 | 200
+ 197 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 51, 52) AND b = ''1''');
estimated | actual
-----------+--------
- 4 | 100
+ 100 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c = 1');
estimated | actual
-----------+--------
- 1 | 200
+ 197 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 26, 51, 76) AND b IN (''1'', ''26'') AND c IN (1)');
estimated | actual
-----------+--------
- 1 | 200
+ 197 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)');
estimated | actual
-----------+--------
- 3 | 400
+ 386 | 400
(1 row)
-- OR clauses referencing the same attribute
@@ -1182,37 +1183,37 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');
estimated | actual
-----------+--------
- 2 | 100
+ 99 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ANY (ARRAY[''1'', ''2''])');
estimated | actual
-----------+--------
- 4 | 100
+ 100 | 100
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 51, 52]) AND b = ANY (ARRAY[''1'', ''2''])');
estimated | actual
-----------+--------
- 8 | 200
+ 197 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = 1');
estimated | actual
-----------+--------
- 1 | 200
+ 197 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 26, 51, 76]) AND b = ANY (ARRAY[''1'', ''26'']) AND c = ANY (ARRAY[1])');
estimated | actual
-----------+--------
- 1 | 200
+ 197 | 200
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 2, 26, 27, 51, 52, 76, 77]) AND b = ANY (ARRAY[''1'', ''2'', ''26'', ''27'']) AND c = ANY (ARRAY[1, 2])');
estimated | actual
-----------+--------
- 3 | 400
+ 386 | 400
(1 row)
-- ANY with inequalities should not benefit from functional dependencies
@@ -1878,7 +1879,7 @@ ANALYZE mcv_lists;
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1''');
estimated | actual
-----------+--------
- 3 | 4
+ 4 | 4
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 AND b = ''1'' AND c = 1');
@@ -2815,7 +2816,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 0');
estimated | actual
-----------+--------
- 96 | 102
+ 102 | 102
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 AND b = 10 AND c = 10');
@@ -2839,7 +2840,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 10');
estimated | actual
-----------+--------
- 102 | 104
+ 104 | 104
(1 row)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0 AND c = 0) OR (a = 1 AND b = 1 AND c = 1) OR (a = 2 AND b = 2 AND c = 2)');
@@ -2851,7 +2852,7 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0) OR (a = 0 AND c = 0) OR (b = 0 AND c = 0)');
estimated | actual
-----------+--------
- 108 | 102
+ 102 | 102
(1 row)
DROP TABLE mcv_lists_partial;
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 2176a54bca1..b8da8d8f4f0 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -153,10 +153,12 @@ select name, setting from pg_settings where name like 'enable%';
enable_partitionwise_aggregate | off
enable_partitionwise_join | off
enable_presorted_aggregate | on
+ enable_self_join_removal | on
enable_seqscan | on
enable_sort | on
+ enable_temp_memory_catalog | off
enable_tidscan | on
-(22 rows)
+(24 rows)
-- There are always wait event descriptions for various types. InjectionPoint
-- may be present or absent, depending on history since last postmaster start.
diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out
index 88d8f6c32d6..2bdf7d4bc69 100644
--- a/src/test/regress/expected/type_sanity.out
+++ b/src/test/regress/expected/type_sanity.out
@@ -140,10 +140,12 @@ WHERE t1.typinput = p1.oid AND t1.typtype in ('b', 'p') AND NOT
(t1.typelem != 0 AND t1.typlen < 0) AND NOT
(p1.prorettype = t1.oid AND NOT p1.proretset)
ORDER BY 1;
- oid | typname | oid | proname
-------+-----------+-----+---------
- 1790 | refcursor | 46 | textin
-(1 row)
+ oid | typname | oid | proname
+-------+-----------+------+--------------
+ 1790 | refcursor | 46 | textin
+ 14756 | abstime | 1312 | timestamp_in
+ 14757 | reltime | 1312 | timestamp_in
+(3 rows)
-- Varlena array types will point to array_in
-- Exception as of 8.1: int2vector and oidvector have their own I/O routines
@@ -192,10 +194,12 @@ WHERE t1.typoutput = p1.oid AND t1.typtype in ('b', 'p') AND NOT
(p1.oid = 'array_out'::regproc AND
t1.typelem != 0 AND t1.typlen = -1)))
ORDER BY 1;
- oid | typname | oid | proname
-------+-----------+-----+---------
- 1790 | refcursor | 47 | textout
-(1 row)
+ oid | typname | oid | proname
+-------+-----------+------+---------------
+ 1790 | refcursor | 47 | textout
+ 14756 | abstime | 1313 | timestamp_out
+ 14757 | reltime | 1313 | timestamp_out
+(3 rows)
SELECT t1.oid, t1.typname, p1.oid, p1.proname
FROM pg_type AS t1, pg_proc AS p1
@@ -256,10 +260,12 @@ WHERE t1.typreceive = p1.oid AND t1.typtype in ('b', 'p') AND NOT
(t1.typelem != 0 AND t1.typlen < 0) AND NOT
(p1.prorettype = t1.oid AND NOT p1.proretset)
ORDER BY 1;
- oid | typname | oid | proname
-------+-----------+------+----------
- 1790 | refcursor | 2414 | textrecv
-(1 row)
+ oid | typname | oid | proname
+-------+-----------+------+----------------
+ 1790 | refcursor | 2414 | textrecv
+ 14756 | abstime | 2474 | timestamp_recv
+ 14757 | reltime | 2474 | timestamp_recv
+(3 rows)
-- Varlena array types will point to array_recv
-- Exception as of 8.1: int2vector and oidvector have their own I/O routines
@@ -317,10 +323,12 @@ WHERE t1.typsend = p1.oid AND t1.typtype in ('b', 'p') AND NOT
(p1.oid = 'array_send'::regproc AND
t1.typelem != 0 AND t1.typlen = -1)))
ORDER BY 1;
- oid | typname | oid | proname
-------+-----------+------+----------
- 1790 | refcursor | 2415 | textsend
-(1 row)
+ oid | typname | oid | proname
+-------+-----------+------+----------------
+ 1790 | refcursor | 2415 | textsend
+ 14756 | abstime | 2475 | timestamp_send
+ 14757 | reltime | 2475 | timestamp_send
+(3 rows)
SELECT t1.oid, t1.typname, p1.oid, p1.proname
FROM pg_type AS t1, pg_proc AS p1
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 0fd0e1c38b3..73d8a3d03ad 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -1224,18 +1224,17 @@ SELECT * FROM
SELECT 2 AS t, 4 AS x) ss
WHERE x < 4
ORDER BY x;
- QUERY PLAN
---------------------------------------------------
+ QUERY PLAN
+--------------------------------------------
Sort
Sort Key: (2)
- -> Unique
- -> Sort
- Sort Key: (1), (2)
- -> Append
- -> Result
- -> Result
- One-Time Filter: false
-(9 rows)
+ -> HashAggregate
+ Group Key: (1), (2)
+ -> Append
+ -> Result
+ -> Result
+ One-Time Filter: false
+(8 rows)
SELECT * FROM
(SELECT 1 AS t, 2 AS x
@@ -1289,19 +1288,18 @@ SELECT * FROM
SELECT 2 AS t, 4 AS x) ss
WHERE x > 3
ORDER BY x;
- QUERY PLAN
-------------------------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------------
Sort
Sort Key: ss.x
-> Subquery Scan on ss
Filter: (ss.x > 3)
- -> Unique
- -> Sort
- Sort Key: (1), (((random() * '3'::double precision))::integer)
- -> Append
- -> Result
- -> Result
-(10 rows)
+ -> HashAggregate
+ Group Key: (1), (((random() * '3'::double precision))::integer)
+ -> Append
+ -> Result
+ -> Result
+(9 rows)
SELECT * FROM
(SELECT 1 AS t, (random()*3)::int AS x
@@ -1322,24 +1320,22 @@ select distinct q1 from
union all
select distinct * from int8_tbl i82) ss
where q2 = q2;
- QUERY PLAN
-----------------------------------------------------------
- Unique
- -> Merge Append
- Sort Key: "*SELECT* 1".q1
+ QUERY PLAN
+----------------------------------------------------
+ HashAggregate
+ Group Key: "*SELECT* 1".q1
+ -> Append
-> Subquery Scan on "*SELECT* 1"
- -> Unique
- -> Sort
- Sort Key: i81.q1, i81.q2
- -> Seq Scan on int8_tbl i81
- Filter: (q2 IS NOT NULL)
+ -> HashAggregate
+ Group Key: i81.q1, i81.q2
+ -> Seq Scan on int8_tbl i81
+ Filter: (q2 IS NOT NULL)
-> Subquery Scan on "*SELECT* 2"
- -> Unique
- -> Sort
- Sort Key: i82.q1, i82.q2
- -> Seq Scan on int8_tbl i82
- Filter: (q2 IS NOT NULL)
-(15 rows)
+ -> HashAggregate
+ Group Key: i82.q1, i82.q2
+ -> Seq Scan on int8_tbl i82
+ Filter: (q2 IS NOT NULL)
+(13 rows)
select distinct q1 from
(select distinct * from int8_tbl i81
@@ -1358,24 +1354,22 @@ select distinct q1 from
union all
select distinct * from int8_tbl i82) ss
where -q1 = q2;
- QUERY PLAN
---------------------------------------------------------
- Unique
- -> Merge Append
- Sort Key: "*SELECT* 1".q1
+ QUERY PLAN
+--------------------------------------------------
+ HashAggregate
+ Group Key: "*SELECT* 1".q1
+ -> Append
-> Subquery Scan on "*SELECT* 1"
- -> Unique
- -> Sort
- Sort Key: i81.q1, i81.q2
- -> Seq Scan on int8_tbl i81
- Filter: ((- q1) = q2)
+ -> HashAggregate
+ Group Key: i81.q1, i81.q2
+ -> Seq Scan on int8_tbl i81
+ Filter: ((- q1) = q2)
-> Subquery Scan on "*SELECT* 2"
- -> Unique
- -> Sort
- Sort Key: i82.q1, i82.q2
- -> Seq Scan on int8_tbl i82
- Filter: ((- q1) = q2)
-(15 rows)
+ -> HashAggregate
+ Group Key: i82.q1, i82.q2
+ -> Seq Scan on int8_tbl i82
+ Filter: ((- q1) = q2)
+(13 rows)
select distinct q1 from
(select distinct * from int8_tbl i81
@@ -1460,14 +1454,14 @@ select t1.unique1 from tenk1 t1
inner join tenk2 t2 on t1.tenthous = t2.tenthous and t2.thousand = 0
union all
(values(1)) limit 1;
- QUERY PLAN
---------------------------------------------------------
+ QUERY PLAN
+------------------------------------------------------
Limit
-> Append
- -> Nested Loop
- Join Filter: (t1.tenthous = t2.tenthous)
+ -> Hash Join
+ Hash Cond: (t1.tenthous = t2.tenthous)
-> Seq Scan on tenk1 t1
- -> Materialize
+ -> Hash
-> Seq Scan on tenk2 t2
Filter: (thousand = 0)
-> Result
diff --git a/src/test/regress/expected/updatable_views.out b/src/test/regress/expected/updatable_views.out
index 442b55120c8..1d0e3d1b2d1 100644
--- a/src/test/regress/expected/updatable_views.out
+++ b/src/test/regress/expected/updatable_views.out
@@ -489,8 +489,8 @@ MERGE INTO rw_view1 t
merge_action | a | b | a | b
--------------+---+-------+---+-------------
UPDATE | 1 | ROW 1 | 1 | ROW 1
- DELETE | 3 | ROW 3 | 3 | Row 3
INSERT | 2 | ROW 2 | 2 | Unspecified
+ DELETE | 3 | ROW 3 | 3 | Row 3
(3 rows)
SELECT * FROM base_tbl ORDER BY a;
@@ -560,18 +560,14 @@ EXPLAIN (costs off)
MERGE INTO rw_view1 t
USING (SELECT * FROM generate_series(1,5)) AS s(a) ON t.a = s.a
WHEN MATCHED THEN UPDATE SET b = 'Updated';
- QUERY PLAN
--------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------------
Merge on base_tbl
- -> Hash Join
- Hash Cond: (base_tbl.a = generate_series.generate_series)
- -> Bitmap Heap Scan on base_tbl
- Recheck Cond: (a > 0)
- -> Bitmap Index Scan on base_tbl_pkey
- Index Cond: (a > 0)
- -> Hash
- -> Function Scan on generate_series
-(9 rows)
+ -> Nested Loop
+ -> Function Scan on generate_series
+ -> Index Scan using base_tbl_pkey on base_tbl
+ Index Cond: ((a = generate_series.generate_series) AND (a > 0))
+(5 rows)
EXPLAIN (costs off)
MERGE INTO rw_view1 t
@@ -594,18 +590,14 @@ EXPLAIN (costs off)
MERGE INTO rw_view1 t
USING (SELECT * FROM generate_series(1,5)) AS s(a) ON t.a = s.a
WHEN NOT MATCHED THEN INSERT (a) VALUES (s.a);
- QUERY PLAN
--------------------------------------------------------------------
+ QUERY PLAN
+-------------------------------------------------------------------------------
Merge on base_tbl
- -> Hash Right Join
- Hash Cond: (base_tbl.a = generate_series.generate_series)
- -> Bitmap Heap Scan on base_tbl
- Recheck Cond: (a > 0)
- -> Bitmap Index Scan on base_tbl_pkey
- Index Cond: (a > 0)
- -> Hash
- -> Function Scan on generate_series
-(9 rows)
+ -> Nested Loop Left Join
+ -> Function Scan on generate_series
+ -> Index Scan using base_tbl_pkey on base_tbl
+ Index Cond: ((a = generate_series.generate_series) AND (a > 0))
+(5 rows)
-- it's still updatable if we add a DO ALSO rule
CREATE TABLE base_tbl_hist(ts timestamptz default now(), a int, b text);
@@ -3119,16 +3111,13 @@ SELECT * FROM rw_view1;
(1 row)
EXPLAIN (costs off) DELETE FROM rw_view1 WHERE id = 1 AND snoop(data);
- QUERY PLAN
--------------------------------------------------------------------
- Update on base_tbl base_tbl_1
- -> Nested Loop
- -> Index Scan using base_tbl_pkey on base_tbl base_tbl_1
- Index Cond: (id = 1)
- -> Index Scan using base_tbl_pkey on base_tbl
- Index Cond: (id = 1)
- Filter: ((NOT deleted) AND snoop(data))
-(7 rows)
+ QUERY PLAN
+--------------------------------------------------
+ Update on base_tbl
+ -> Index Scan using base_tbl_pkey on base_tbl
+ Index Cond: (id = 1)
+ Filter: ((NOT deleted) AND snoop(data))
+(4 rows)
DELETE FROM rw_view1 WHERE id = 1 AND snoop(data);
NOTICE: snooped value: Row 1
diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out
index 23d1463df22..d8eac6ea97d 100644
--- a/src/test/regress/expected/window.out
+++ b/src/test/regress/expected/window.out
@@ -4207,15 +4207,12 @@ WHERE s.c = 1;
Run Condition: (ntile(e2.salary) OVER (?) <= 1)
-> Sort
Sort Key: e1.depname
- -> Merge Join
- Merge Cond: (e1.empno = e2.empno)
- -> Sort
- Sort Key: e1.empno
- -> Seq Scan on empsalary e1
- -> Sort
- Sort Key: e2.empno
+ -> Hash Join
+ Hash Cond: (e1.empno = e2.empno)
+ -> Seq Scan on empsalary e1
+ -> Hash
-> Seq Scan on empsalary e2
-(14 rows)
+(11 rows)
-- Ensure the run condition optimization is used in cases where the WindowFunc
-- has a Var from another query level
diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql
index 247b0a31055..77dd964ebf2 100644
--- a/src/test/regress/sql/equivclass.sql
+++ b/src/test/regress/sql/equivclass.sql
@@ -259,6 +259,22 @@ drop user regress_user_ectest;
explain (costs off)
select * from tenk1 where unique1 = unique1 and unique2 = unique2;
+-- Test that broken ECs are processed correctly during self join removal.
+-- Disable merge joins so that we don't get an error about missing commutator.
+-- Test both orientations of the join clause, because only one of them breaks
+-- the EC.
+set enable_mergejoin to off;
+
+explain (costs off)
+ select * from ec0 m join ec0 n on m.ff = n.ff
+ join ec1 p on m.ff + n.ff = p.f1;
+
+explain (costs off)
+ select * from ec0 m join ec0 n on m.ff = n.ff
+ join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1;
+
+reset enable_mergejoin;
+
-- this could be converted, but isn't at present
explain (costs off)
select * from tenk1 where unique1 = unique1 or unique2 = unique2;
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index ab471bdfffc..70f311a6f23 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -208,7 +208,7 @@ set parallel_tuple_cost = 0;
set max_parallel_workers_per_gather = 2;
create table t (a int, b int, c int);
-insert into t select mod(i,10),mod(i,10),i from generate_series(1,10000) s(i);
+insert into t select mod(i,10),mod(i,10),i from generate_series(1,60000) s(i);
create index on t (a);
analyze t;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index dcc94c0715d..49225143b1a 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2345,6 +2345,382 @@ select * from
select * from
int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok
+--
+-- test that semi- or inner self-joins on a unique column are removed
+--
+
+-- enable only nestloop to get more predictable plans
+set enable_hashjoin to off;
+set enable_mergejoin to off;
+
+create table sj (a int unique, b int, c int unique);
+insert into sj values (1, null, 2), (null, 2, null), (2, 1, 1);
+analyze sj;
+
+-- Trivial self-join case.
+explain (costs off)
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1;
+
+-- Self-join removal performs after a subquery pull-up process and could remove
+-- such kind of self-join too. Check this option.
+explain (costs off)
+select * from sj p
+where exists (select * from sj q
+ where q.a = p.a and q.b < 10);
+select * from sj p where exists (select * from sj q where q.a = p.a and q.b < 10);
+
+-- Don't remove self-join for the case of equality of two different unique columns.
+explain (costs off)
+select * from sj t1, sj t2 where t1.a = t2.c and t1.b is not null;
+
+-- Degenerated case.
+explain (costs off)
+select * from
+ (select a as x from sj where false) as q1,
+ (select a as y from sj where false) as q2
+where q1.x = q2.y;
+
+-- We can't use a cross-EC generated self join qual because of current logic of
+-- the generate_join_implied_equalities routine.
+explain (costs off)
+select * from sj t1, sj t2 where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a;
+explain (costs off)
+select * from sj t1, sj t2, sj t3
+where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a
+ and t1.b = t3.b and t3.b = t3.a;
+
+-- Double self-join removal.
+-- Use a condition on "b + 1", not on "b", for the second join, so that
+-- the equivalence class is different from the first one, and we can
+-- test the non-ec code path.
+explain (costs off)
+select * from sj t1 join sj t2 on t1.a = t2.a and t1.b = t2.b
+ join sj t3 on t2.a = t3.a and t2.b + 1 = t3.b + 1;
+
+-- subselect that references the removed relation
+explain (costs off)
+select t1.a, (select a from sj where a = t2.a and a = t1.a)
+from sj t1, sj t2
+where t1.a = t2.a;
+
+-- self-join under outer join
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on x.a = z.q1;
+
+explain (costs off)
+select * from sj x join sj y on x.a = y.a
+left join int8_tbl z on y.a = z.q1;
+
+explain (costs off)
+SELECT * FROM (
+ SELECT t1.*, t2.a AS ax FROM sj t1 JOIN sj t2
+ ON (t1.a = t2.a AND t1.c*t1.c = t2.c+2 AND t2.b IS NULL)
+) AS q1
+LEFT JOIN
+ (SELECT t3.* FROM sj t3, sj t4 WHERE t3.c = t4.c) AS q2
+ON q1.ax = q2.a;
+
+-- Test that placeholders are updated correctly after join removal
+explain (costs off)
+select * from (values (1)) x
+left join (select coalesce(y.q1, 1) from int8_tbl y
+ right join sj j1 inner join sj j2 on j1.a = j2.a
+ on true) z
+on true;
+
+-- Check updating of Lateral links from top-level query to the removing relation
+explain (COSTS OFF)
+SELECT * FROM pg_am am WHERE am.amname IN (
+ SELECT c1.relname AS relname
+ FROM pg_class c1
+ JOIN pg_class c2
+ ON c1.oid=c2.oid AND c1.oid < 10
+);
+
+--
+-- SJR corner case: uniqueness of an inner is [partially] derived from
+-- baserestrictinfo clauses.
+-- XXX: We really should allow SJR for these corner cases?
+--
+
+INSERT INTO sj VALUES (3, 1, 3);
+
+explain (costs off) -- Don't remove SJ
+ SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3;
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3; -- Return one row
+
+explain (costs off) -- Remove SJ, define uniqueness by a constant
+ SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2;
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2; -- Return one row
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a
+; -- Remove SJ, define uniqueness by a constant expression
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a
+; -- Return one row
+
+explain (costs off) -- Remove SJ
+ SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1;
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1; -- Return no rows
+
+explain (costs off) -- Shuffle a clause. Remove SJ
+ SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1;
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1; -- Return no rows
+
+-- SJE Corner case: a 'a.x=a.x' clause, have replaced with 'a.x IS NOT NULL'
+-- after SJ elimination it shouldn't be a mergejoinable clause.
+SELECT t4.*
+FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3
+JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42;
+EXPLAIN (COSTS OFF)
+SELECT t4.*
+FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3
+JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42
+; -- SJs must be removed.
+
+-- Functional index
+CREATE UNIQUE INDEX sj_fn_idx ON sj((a * a));
+explain (costs off) -- Remove SJ
+ SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 1;
+explain (costs off) -- Don't remove SJ
+ SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 2;
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a)
+; -- Restriction contains expressions in both sides, Remove SJ.
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int
+ AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a)
+; -- Empty set of rows should be returned
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.a) = (random()/3 + 3)::int
+ AND (random()/3 + 3)::int = (j2.a*j2.a)
+; -- Restriction contains volatile function - disable SJR feature.
+SELECT * FROM sj j1, sj j2
+WHERE j1.b = j2.b
+ AND (j1.a*j1.c/3) = (random()/3 + 3)::int
+ AND (random()/3 + 3)::int = (j2.a*j2.c/3)
+; -- Return one row
+
+-- Multiple filters
+CREATE UNIQUE INDEX sj_temp_idx1 ON sj(a,b,c);
+explain (costs off) -- Remove SJ
+ SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND j1.a = 2 AND j1.c = 3 AND j2.a = 2 AND 3 = j2.c;
+explain (costs off) -- Don't remove SJ
+ SELECT * FROM sj j1, sj j2
+ WHERE j1.b = j2.b AND 2 = j1.a AND j1.c = 3 AND j2.a = 1 AND 3 = j2.c;
+
+CREATE UNIQUE INDEX sj_temp_idx ON sj(a,b);
+explain (costs off) -- Don't remove SJ
+ SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2;
+explain (costs off) -- Don't remove SJ
+ SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 2 = j2.a;
+explain (costs off) -- Don't remove SJ
+ SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND (j1.a = 1 OR j2.a = 1);
+DROP INDEX sj_fn_idx, sj_temp_idx1, sj_temp_idx;
+
+-- Test that OR predicated are updated correctly after join removal
+CREATE TABLE tab_with_flag ( id INT PRIMARY KEY, is_flag SMALLINT);
+CREATE INDEX idx_test_is_flag ON tab_with_flag (is_flag);
+explain (costs off)
+SELECT COUNT(*) FROM tab_with_flag
+WHERE
+ (is_flag IS NULL OR is_flag = 0)
+ AND id IN (SELECT id FROM tab_with_flag WHERE id IN (2, 3));
+DROP TABLE tab_with_flag;
+
+-- HAVING clause
+explain (costs off)
+select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1;
+
+-- update lateral references and range table entry reference
+explain (verbose, costs off)
+select 1 from (select x.* from sj x, sj y where x.a = y.a) q,
+ lateral generate_series(1, q.a) gs(i);
+
+explain (verbose, costs off)
+select 1 from (select y.* from sj x, sj y where x.a = y.a) q,
+ lateral generate_series(1, q.a) gs(i);
+
+-- Test that a non-EC-derived join clause is processed correctly. Use an
+-- outer join so that we can't form an EC.
+explain (costs off) select * from sj p join sj q on p.a = q.a
+ left join sj r on p.a + q.a = r.a;
+
+-- FIXME this constant false filter doesn't look good. Should we merge
+-- equivalence classes?
+explain (costs off)
+select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2;
+
+-- Check that attr_needed is updated correctly after self-join removal. In this
+-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2.
+-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b.
+-- Use index scan for k1 so that we don't get 'b' from physical tlist used for
+-- seqscan. Also disable reordering of joins because this test depends on a
+-- particular join tree.
+create table sk (a int, b int);
+create index on sk(a);
+set join_collapse_limit to 1;
+set enable_seqscan to off;
+explain (costs off) select 1 from
+ (sk k1 join sk k2 on k1.a = k2.a)
+ join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b;
+explain (costs off) select 1 from
+ (sk k1 join sk k2 on k1.a = k2.a)
+ join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b;
+reset join_collapse_limit;
+reset enable_seqscan;
+
+-- Check that clauses from the join filter list is not lost on the self-join removal
+CREATE TABLE emp1 ( id SERIAL PRIMARY KEY NOT NULL, code int);
+explain (verbose, costs off)
+SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code;
+
+-- Shuffle self-joined relations. Only in the case of iterative deletion
+-- attempts explains of these queries will be identical.
+CREATE UNIQUE INDEX ON emp1((id*id));
+explain (costs off)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c1.id=c2.id AND c1.id*c2.id=c3.id*c3.id;
+explain (costs off)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c1.id=c3.id AND c1.id*c3.id=c2.id*c2.id;
+explain (costs off)
+SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3
+WHERE c3.id=c2.id AND c3.id*c2.id=c1.id*c1.id;
+
+-- Check that SJE removes references from PHVs correctly
+explain (costs off)
+select * from emp1 t1 left join
+ (select coalesce(t3.code, 1) from emp1 t2
+ left join (emp1 t3 join emp1 t4 on t3.id = t4.id)
+ on true)
+on true;
+
+-- Check that SJE does not remove self joins if a PHV references the removed
+-- rel laterally.
+explain (costs off)
+select * from emp1 t1 join emp1 t2 on t1.id = t2.id left join
+ lateral (select t1.id as t1id, * from generate_series(1,1) t3) s on true;
+
+-- We can remove the join even if we find the join can't duplicate rows and
+-- the base quals of each side are different. In the following case we end up
+-- moving quals over to s1 to make it so it can't match any rows.
+create table sl(a int, b int, c int);
+create unique index on sl(a, b);
+vacuum analyze sl;
+
+-- Both sides are unique, but base quals are different
+explain (costs off)
+select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1 and t2.b = 2;
+
+-- Check NullTest in baserestrictinfo list
+explain (costs off)
+select * from sl t1, sl t2
+where t1.a = t2.a and t1.b = 1 and t2.b = 2
+ and t1.c IS NOT NULL and t2.c IS NOT NULL
+ and t2.b IS NOT NULL and t1.b IS NOT NULL
+ and t1.a IS NOT NULL and t2.a IS NOT NULL;
+explain (verbose, costs off)
+select * from sl t1, sl t2
+where t1.b = t2.b and t2.a = 3 and t1.a = 3
+ and t1.c IS NOT NULL and t2.c IS NOT NULL
+ and t2.b IS NOT NULL and t1.b IS NOT NULL
+ and t1.a IS NOT NULL and t2.a IS NOT NULL;
+
+-- Join qual isn't mergejoinable, but inner is unique.
+explain (COSTS OFF)
+SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a AND n2.a = 1;
+explain (COSTS OFF)
+SELECT * FROM
+ (SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a) q0, sl
+WHERE q0.a = 1;
+
+--
+---- Only one side is unqiue
+--select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1;
+--select * from sl t1, sl t2 where t1.a = t2.a and t2.b = 1;
+--
+---- Several uniques indexes match, and we select a different one
+---- for each side, so the join is not removed
+--create table sm(a int unique, b int unique, c int unique);
+--explain (costs off)
+--select * from sm m, sm n where m.a = n.b and m.c = n.c;
+--explain (costs off)
+--select * from sm m, sm n where m.a = n.c and m.b = n.b;
+--explain (costs off)
+--select * from sm m, sm n where m.c = n.b and m.a = n.a;
+
+-- Check optimization disabling if it will violate special join conditions.
+-- Two identical joined relations satisfies self join removal conditions but
+-- stay in different special join infos.
+CREATE TABLE sj_t1 (id serial, a int);
+CREATE TABLE sj_t2 (id serial, a int);
+CREATE TABLE sj_t3 (id serial, a int);
+CREATE TABLE sj_t4 (id serial, a int);
+
+CREATE UNIQUE INDEX ON sj_t3 USING btree (a,id);
+CREATE UNIQUE INDEX ON sj_t2 USING btree (id);
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM sj_t1
+JOIN (
+ SELECT sj_t2.id AS id FROM sj_t2
+ WHERE EXISTS
+ (
+ SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id
+ )
+ ) t2t3t4
+ON sj_t1.id = t2t3t4.id
+JOIN (
+ SELECT sj_t2.id AS id FROM sj_t2
+ WHERE EXISTS
+ (
+ SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id
+ )
+ ) _t2t3t4
+ON sj_t1.id = _t2t3t4.id;
+
+--
+-- Test RowMarks-related code
+--
+
+-- TODO: Why this select returns two copies of ctid field? Should we fix it?
+EXPLAIN (COSTS OFF) -- Both sides have explicit LockRows marks
+SELECT a1.a FROM sj a1,sj a2 WHERE (a1.a=a2.a) FOR UPDATE;
+
+EXPLAIN (COSTS OFF) -- A RowMark exists for the table being kept
+UPDATE sj sq SET b = 1 FROM sj as sz WHERE sq.a = sz.a;
+
+CREATE RULE sj_del_rule AS ON DELETE TO sj
+ DO INSTEAD
+ UPDATE sj SET a = 1 WHERE a = old.a;
+EXPLAIN (COSTS OFF) DELETE FROM sj; -- A RowMark exists for the table being dropped
+DROP RULE sj_del_rule ON sj CASCADE;
+
+reset enable_hashjoin;
+reset enable_mergejoin;
+
--
-- Test hints given on incorrect column references are useful
--
diff --git a/src/test/regress/sql/select.sql b/src/test/regress/sql/select.sql
index 019f1e76739..3dd3a606e2a 100644
--- a/src/test/regress/sql/select.sql
+++ b/src/test/regress/sql/select.sql
@@ -148,6 +148,33 @@ CREATE TEMP TABLE nocols();
INSERT INTO nocols DEFAULT VALUES;
SELECT * FROM nocols n, LATERAL (VALUES(n.*)) v;
+--
+-- test order by NULLS (FIRST|LAST)
+--
+
+select unique1, unique2 into onek_with_null from onek;
+insert into onek_with_null (unique1,unique2) values (NULL, -1), (NULL, NULL);
+
+
+select * from onek_with_null order by unique1 nulls first , unique2 limit 3;
+select * from onek_with_null order by unique1 nulls last , unique2 limit 3;
+select * from onek_with_null order by unique1 nulls first , unique2 nulls first limit 3;
+select * from onek_with_null order by unique1 nulls last , unique2 nulls first limit 3;
+select * from onek_with_null order by unique1 nulls first , unique2 nulls last limit 3;
+select * from onek_with_null order by unique1 nulls last , unique2 nulls last limit 3;
+
+select * from onek_with_null order by unique1 desc nulls first , unique2 desc limit 3;
+select * from onek_with_null order by unique1 desc nulls last , unique2 desc limit 3;
+select * from onek_with_null order by unique1 desc nulls first , unique2 desc nulls first limit 3;
+select * from onek_with_null order by unique1 desc nulls last , unique2 desc nulls first limit 3;
+select * from onek_with_null order by unique1 desc nulls first , unique2 desc nulls last limit 3;
+select * from onek_with_null order by unique1 desc nulls last , unique2 desc nulls last limit 3;
+
+select unique1 as u1, unique2 as u2 from onek_with_null order by u1 nulls first , u2 nulls first limit 3;
+select unique1 as u1, unique2 as u2 from onek_with_null order by u1 asc nulls first , u2 desc nulls first limit 3;
+
+drop table onek_with_null;
+
--
-- Test ORDER BY options
--
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 8f54c363b1f..5baf3f5fc93 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -6,6 +6,8 @@
-- from auto-analyze happening when we didn't expect it.
--
+set default_statistics_target=10000; --prevent random subset for joinsel
+
-- check the number of estimated/actual rows in the top node
create function check_estimated_rows(text) returns table (estimated int, actual int)
language plpgsql as
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 72e6d3a9865..3b153d9b83b 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -389,6 +389,7 @@ CatalogId
CatalogIdMapEntry
CatalogIndexState
ChangeVarNodes_context
+ReplaceVarnoContext
CheckPoint
CheckPointStmt
CheckpointStatsData
@@ -2565,6 +2566,7 @@ SeenRelsEntry
SelectLimit
SelectStmt
Selectivity
+SelfJoinCandidate
SemTPadded
SemiAntiJoinFactors
SeqScan
@@ -4002,6 +4004,7 @@ unicodeStyleColumnFormat
unicodeStyleFormat
unicodeStyleRowFormat
unicode_linestyle
+UniqueRelInfo
unit_conversion
unlogged_relation_entry
utf_local_conversion_func