From 977cd26ee8b489772b99de46330c9492a1839b6d Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Mon, 11 Oct 2021 13:43:50 -0400
Subject: [PATCH 2/2] updates

---
 src/backend/postmaster/checkpointer.c       |   2 +-
 src/backend/postmaster/pgstat.c             | 104 +++++++++++---------
 src/backend/storage/buffer/bufmgr.c         |  25 +++--
 src/backend/storage/buffer/freelist.c       |   6 +-
 src/backend/storage/buffer/localbuf.c       |   3 +
 src/backend/storage/sync/sync.c             |   1 +
 src/backend/utils/activity/backend_status.c |   5 +-
 src/backend/utils/adt/pgstatfuncs.c         |  61 ++++++------
 src/include/pgstat.h                        |   3 +-
 src/include/utils/backend_status.h          |  12 +--
 10 files changed, 119 insertions(+), 103 deletions(-)

diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 8f2ef63ee5..dec325e40e 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -1083,12 +1083,12 @@ ForwardSyncRequest(const FileTag *ftag, SyncRequestType type)
 		(CheckpointerShmem->num_requests >= CheckpointerShmem->max_requests &&
 		 !CompactCheckpointerRequestQueue()))
 	{
+		LWLockRelease(CheckpointerCommLock);
 		/*
 		 * Count the subset of writes where backends have to do their own
 		 * fsync
 		 */
 		pgstat_inc_ioop(IOOP_FSYNC, IOPATH_SHARED);
-		LWLockRelease(CheckpointerCommLock);
 		return false;
 	}
 
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index fbec722a1f..e0762444af 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -1435,6 +1435,28 @@ pgstat_reset_counters(void)
 	pgstat_send(&msg, sizeof(msg));
 }
 
+/*
+ * Helper function to collect and send live backends' current IO operations
+ * stats counters when a stats reset is initiated so that they may be deducted
+ * from future totals.
+ */
+static void
+pgstat_send_buffers_reset(PgStat_MsgResetsharedcounter *msg)
+{
+		int			backend_type;
+		PgStatIOPathOps ops[BACKEND_NUM_TYPES];
+
+		memset(ops, 0, sizeof(ops));
+		pgstat_report_live_backend_io_path_ops(ops);
+
+		for (backend_type = 1; backend_type < BACKEND_NUM_TYPES; backend_type++)
+		{
+			msg->m_backend_resets.backend_type = backend_type;
+			memcpy(&msg->m_backend_resets.iop, &ops[backend_type], sizeof(msg->m_backend_resets.iop));
+			pgstat_send(msg, sizeof(PgStat_MsgResetsharedcounter));
+		}
+}
+
 /* ----------
  * pgstat_reset_shared_counters() -
  *
@@ -1452,12 +1474,17 @@ pgstat_reset_shared_counters(const char *target)
 	if (pgStatSock == PGINVALID_SOCKET)
 		return;
 
-	if (strcmp(target, "archiver") == 0)
+	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSHAREDCOUNTER);
+	if (strcmp(target, "buffers") == 0)
+	{
+		msg.m_resettarget = RESET_BUFFERS;
+		pgstat_send_buffers_reset(&msg);
+		return;
+	}
+	else if (strcmp(target, "archiver") == 0)
 		msg.m_resettarget = RESET_ARCHIVER;
 	else if (strcmp(target, "bgwriter") == 0)
 		msg.m_resettarget = RESET_BGWRITER;
-	else if (strcmp(target, "buffers") == 0)
-		msg.m_resettarget = RESET_BUFFERS;
 	else if (strcmp(target, "wal") == 0)
 		msg.m_resettarget = RESET_WAL;
 	else
@@ -1466,25 +1493,8 @@ pgstat_reset_shared_counters(const char *target)
 				 errmsg("unrecognized reset target: \"%s\"", target),
 				 errhint("Target must be \"archiver\", \"bgwriter\", or \"wal\".")));
 
-	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSHAREDCOUNTER);
-
-	if (msg.m_resettarget == RESET_BUFFERS)
-	{
-		int			backend_type;
-		PgStatIOPathOps ops[BACKEND_NUM_TYPES];
 
-		memset(ops, 0, sizeof(ops));
-		pgstat_report_live_backend_io_path_ops(ops);
-
-		for (backend_type = 1; backend_type < BACKEND_NUM_TYPES; backend_type++)
-		{
-			msg.m_backend_resets.backend_type = backend_type;
-			memcpy(&msg.m_backend_resets.iop, &ops[backend_type], sizeof(msg.m_backend_resets.iop));
-			pgstat_send(&msg, sizeof(msg));
-		}
-	}
-	else
-		pgstat_send(&msg, sizeof(msg));
+	pgstat_send(&msg, sizeof(msg));
 
 }
 
@@ -3137,30 +3147,6 @@ pgstat_send(void *msg, int len)
 #endif
 }
 
-/*
- * Add live IO Op stats for all IO Paths (e.g. shared, local) to those in the
- * equivalent stats structure for exited backends. Note that this adds and
- * doesn't set, so the destination stats structure should be zeroed out by the
- * caller initially. This would commonly be used to transfer all IO Op stats
- * for all IO Paths for a particular backend type to the pgstats structure.
- */
-void
-pgstat_add_io_path_ops(PgStatIOOps *dest, IOOps *src, int io_path_num_types)
-{
-	int			io_path;
-
-	for (io_path = 0; io_path < io_path_num_types; io_path++)
-	{
-		dest->allocs += pg_atomic_read_u64(&src->allocs);
-		dest->extends += pg_atomic_read_u64(&src->extends);
-		dest->fsyncs += pg_atomic_read_u64(&src->fsyncs);
-		dest->writes += pg_atomic_read_u64(&src->writes);
-		dest++;
-		src++;
-	}
-
-}
-
 /* ----------
  * pgstat_send_archiver() -
  *
@@ -3234,9 +3220,8 @@ pgstat_send_buffers(void)
 	memset(&msg, 0, sizeof(msg));
 	msg.backend_type = beentry->st_backendType;
 
-	pgstat_add_io_path_ops(msg.iop.io_path_ops,
-						   (IOOps *) &beentry->io_path_stats,
-						   IOPATH_NUM_TYPES);
+	pgstat_sum_io_path_ops(msg.iop.io_path_ops,
+						   (IOOps *) &beentry->io_path_stats);
 
 	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_IO_PATH_OPS);
 	pgstat_send(&msg, sizeof(msg));
@@ -3407,6 +3392,29 @@ pgstat_send_slru(void)
 	}
 }
 
+/*
+ * Helper function to sum all live IO Op stats for all IO Paths (e.g. shared,
+ * local) to those in the equivalent stats structure for exited backends. Note
+ * that this adds and doesn't set, so the destination stats structure should be
+ * zeroed out by the caller initially. This would commonly be used to transfer
+ * all IO Op stats for all IO Paths for a particular backend type to the
+ * pgstats structure.
+ */
+void
+pgstat_sum_io_path_ops(PgStatIOOps *dest, IOOps *src)
+{
+	int			io_path;
+	for (io_path = 0; io_path < IOPATH_NUM_TYPES; io_path++)
+	{
+		dest->allocs += pg_atomic_read_u64(&src->allocs);
+		dest->extends += pg_atomic_read_u64(&src->extends);
+		dest->fsyncs += pg_atomic_read_u64(&src->fsyncs);
+		dest->writes += pg_atomic_read_u64(&src->writes);
+		dest++;
+		src++;
+	}
+
+}
 
 /* ----------
  * PgstatCollectorMain() -
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index b911dd9ce5..537c8dcadc 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -480,7 +480,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
 							   BlockNumber blockNum,
 							   BufferAccessStrategy strategy,
 							   bool *foundPtr);
-static void FlushBuffer(BufferDesc *buf, SMgrRelation reln);
+static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOPath iopath);
 static void FindAndDropRelFileNodeBuffers(RelFileNode rnode,
 										  ForkNumber forkNum,
 										  BlockNumber nForkBlock,
@@ -1262,7 +1262,6 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 				 */
 
 				iopath = from_ring ? IOPATH_STRATEGY : IOPATH_SHARED;
-				pgstat_inc_ioop(IOOP_WRITE, iopath);
 
 				/* OK, do the I/O */
 				TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum,
@@ -1270,7 +1269,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 														  smgr->smgr_rnode.node.dbNode,
 														  smgr->smgr_rnode.node.relNode);
 
-				FlushBuffer(buf, NULL);
+				FlushBuffer(buf, NULL, iopath);
 				LWLockRelease(BufferDescriptorGetContentLock(buf));
 
 				ScheduleBufferTagForWriteback(&BackendWritebackContext,
@@ -2566,11 +2565,10 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 	 * buffer is clean by the time we've locked it.)
 	 */
 
-	pgstat_inc_ioop(IOOP_WRITE, IOPATH_SHARED);
 	PinBuffer_Locked(bufHdr);
 	LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
 
-	FlushBuffer(bufHdr, NULL);
+	FlushBuffer(bufHdr, NULL, IOPATH_SHARED);
 
 	LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
 
@@ -2818,9 +2816,12 @@ BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum,
  *
  * If the caller has an smgr reference for the buffer's relation, pass it
  * as the second parameter.  If not, pass NULL.
+ *
+ * IOPath will always be IOPATH_SHARED except when a buffer access strategy is
+ * used and the buffer being flushed is a buffer from the strategy ring.
  */
 static void
-FlushBuffer(BufferDesc *buf, SMgrRelation reln)
+FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOPath iopath)
 {
 	XLogRecPtr	recptr;
 	ErrorContextCallback errcallback;
@@ -2912,6 +2913,8 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
 			  bufToWrite,
 			  false);
 
+	pgstat_inc_ioop(IOOP_WRITE, iopath);
+
 	if (track_io_timing)
 	{
 		INSTR_TIME_SET_CURRENT(io_time);
@@ -3559,6 +3562,8 @@ FlushRelationBuffers(Relation rel)
 						  localpage,
 						  false);
 
+				pgstat_inc_ioop(IOOP_WRITE, IOPATH_LOCAL);
+
 				buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
 				pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
 
@@ -3594,7 +3599,7 @@ FlushRelationBuffers(Relation rel)
 		{
 			PinBuffer_Locked(bufHdr);
 			LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
-			FlushBuffer(bufHdr, RelationGetSmgr(rel));
+			FlushBuffer(bufHdr, RelationGetSmgr(rel), IOPATH_SHARED);
 			LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
 			UnpinBuffer(bufHdr, true);
 		}
@@ -3690,7 +3695,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
 		{
 			PinBuffer_Locked(bufHdr);
 			LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
-			FlushBuffer(bufHdr, srelent->srel);
+			FlushBuffer(bufHdr, srelent->srel, IOPATH_SHARED);
 			LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
 			UnpinBuffer(bufHdr, true);
 		}
@@ -3746,7 +3751,7 @@ FlushDatabaseBuffers(Oid dbid)
 		{
 			PinBuffer_Locked(bufHdr);
 			LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
-			FlushBuffer(bufHdr, NULL);
+			FlushBuffer(bufHdr, NULL, IOPATH_SHARED);
 			LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
 			UnpinBuffer(bufHdr, true);
 		}
@@ -3773,7 +3778,7 @@ FlushOneBuffer(Buffer buffer)
 
 	Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr)));
 
-	FlushBuffer(bufHdr, NULL);
+	FlushBuffer(bufHdr, NULL, IOPATH_SHARED);
 }
 
 /*
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index e2e1c3bf56..c7ca8d75aa 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -689,8 +689,8 @@ bool
 StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool *from_ring)
 {
 	/*
-	 * If we decide to use the dirty buffer selected by StrategyGetBuffer(),
-	 * then ensure that we count it as such in pg_stat_buffers view.
+	 * Start by assuming that we will use the dirty buffer selected by
+	 * StrategyGetBuffer().
 	 */
 	*from_ring = true;
 
@@ -712,7 +712,7 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool *from_
 	/*
 	 * Since we will not be writing out a dirty buffer from the ring, set
 	 * from_ring to false so that the caller does not count this write as a
-	 * "strategy write" and can do proper bookkeeping for pg_stat_buffers.
+	 * "strategy write" and can do proper bookkeeping.
 	 */
 	*from_ring = false;
 
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 04b3558ea3..f396a2b68d 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -20,6 +20,7 @@
 #include "executor/instrument.h"
 #include "storage/buf_internals.h"
 #include "storage/bufmgr.h"
+#include "utils/backend_status.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
 #include "utils/resowner_private.h"
@@ -226,6 +227,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 				  localpage,
 				  false);
 
+		pgstat_inc_ioop(IOOP_WRITE, IOPATH_LOCAL);
+
 		/* Mark not-dirty now in case we error out below */
 		buf_state &= ~BM_DIRTY;
 		pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c
index 4a2ed414b0..8e5be66998 100644
--- a/src/backend/storage/sync/sync.c
+++ b/src/backend/storage/sync/sync.c
@@ -396,6 +396,7 @@ ProcessSyncRequests(void)
 					total_elapsed += elapsed;
 					processed++;
 
+					pgstat_inc_ioop(IOOP_FSYNC, IOPATH_SHARED);
 					if (log_checkpoints)
 						elog(DEBUG1, "checkpoint sync: number=%d file=%s time=%.3f ms",
 							 processed,
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
index f326297517..f853ee6c1c 100644
--- a/src/backend/utils/activity/backend_status.c
+++ b/src/backend/utils/activity/backend_status.c
@@ -670,9 +670,8 @@ pgstat_report_live_backend_io_path_ops(PgStatIOPathOps *backend_io_path_ops)
 		if (beentry->st_procpid == 0)
 			continue;
 
-		pgstat_add_io_path_ops(backend_io_path_ops[beentry->st_backendType].io_path_ops,
-							   (IOOps *) beentry->io_path_stats,
-							   IOPATH_NUM_TYPES);
+		pgstat_sum_io_path_ops(backend_io_path_ops[beentry->st_backendType].io_path_ops,
+							   (IOOps *) beentry->io_path_stats);
 
 	}
 }
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 557b2673c0..d6ac325d63 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1751,10 +1751,40 @@ pg_stat_get_bgwriter_stat_reset_time(PG_FUNCTION_ARGS)
 	PG_RETURN_TIMESTAMPTZ(pgstat_fetch_stat_bgwriter()->stat_reset_timestamp);
 }
 
+/*
+* When adding a new column to the pg_stat_buffers view, add a new enum
+* value here above COLUMN_LENGTH.
+*/
+enum
+{
+	COLUMN_BACKEND_TYPE,
+	COLUMN_IO_PATH,
+	COLUMN_ALLOCS,
+	COLUMN_EXTENDS,
+	COLUMN_FSYNCS,
+	COLUMN_WRITES,
+	COLUMN_RESET_TIME,
+	COLUMN_LENGTH,
+};
+
+#define NROWS ((BACKEND_NUM_TYPES - 1) * IOPATH_NUM_TYPES)
+/*
+ * Helper function to get the correct row in the pg_stat_buffers view.
+ */
+static inline Datum *
+get_pg_stat_buffers_row(Datum all_values[NROWS][COLUMN_LENGTH], BackendType backend_type, IOPath io_path)
+{
+
+	/*
+	 * Subtract 1 from backend_type to avoid having rows for B_INVALID
+	 * BackendType
+	 */
+	return all_values[(backend_type - 1) * IOPATH_NUM_TYPES + io_path];
+}
+
 Datum
 pg_stat_get_buffers(PG_FUNCTION_ARGS)
 {
-#define NROWS ((BACKEND_NUM_TYPES - 1) * IOPATH_NUM_TYPES)
 	PgStat_BackendIOPathOps *backend_io_path_ops;
 	int			i;
 	int			io_path,
@@ -1765,22 +1795,6 @@ pg_stat_get_buffers(PG_FUNCTION_ARGS)
 
 	Tuplestorestate *tupstore = pg_stat_make_tuplestore(fcinfo, &tupdesc);
 
-	/*
-	 * When adding a new column to the pg_stat_buffers view, add a new enum
-	 * value here above COLUMN_LENGTH.
-	 */
-	enum
-	{
-		COLUMN_BACKEND_TYPE,
-		COLUMN_IO_PATH,
-		COLUMN_ALLOCS,
-		COLUMN_EXTENDS,
-		COLUMN_FSYNCS,
-		COLUMN_WRITES,
-		COLUMN_RESET_TIME,
-		COLUMN_LENGTH,
-	};
-
 	Datum		all_values[NROWS][COLUMN_LENGTH];
 	bool		all_nulls[NROWS][COLUMN_LENGTH];
 
@@ -1805,12 +1819,7 @@ pg_stat_get_buffers(PG_FUNCTION_ARGS)
 
 		for (io_path = 0; io_path < IOPATH_NUM_TYPES; io_path++)
 		{
-			/*
-			 * Subtract 1 from backend_type to avoid having rows for B_INVALID
-			 * BackendType
-			 */
-			int			rownum = (beentry->st_backendType - 1) * IOPATH_NUM_TYPES + io_path;
-			Datum	   *values = all_values[rownum];
+			Datum *values = get_pg_stat_buffers_row(all_values, beentry->st_backendType, io_path);
 
 			/*
 			 * COLUMN_RESET_TIME, COLUMN_BACKEND_TYPE, and COLUMN_IO_PATH will
@@ -1839,11 +1848,7 @@ pg_stat_get_buffers(PG_FUNCTION_ARGS)
 
 		for (io_path = 0; io_path < IOPATH_NUM_TYPES; io_path++)
 		{
-			/*
-			 * Subtract 1 from backend_type to avoid having rows for B_INVALID
-			 * BackendType
-			 */
-			Datum	   *values = all_values[(backend_type - 1) * IOPATH_NUM_TYPES + io_path];
+			Datum *values = get_pg_stat_buffers_row(all_values, backend_type, io_path);
 
 			values[COLUMN_BACKEND_TYPE] = backend_type_desc;
 			values[COLUMN_IO_PATH] = CStringGetTextDatum(GetIOPathDesc(io_path));
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 8ff87a3f54..2d72933e90 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -1156,13 +1156,12 @@ extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info,
 extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
 									  void *recdata, uint32 len);
 
-extern void pgstat_add_io_path_ops(PgStatIOOps *dest,
-								   IOOps *src, int io_path_num_types);
 extern void pgstat_send_archiver(const char *xlog, bool failed);
 extern void pgstat_send_bgwriter(void);
 extern void pgstat_send_buffers(void);
 extern void pgstat_send_checkpointer(void);
 extern void pgstat_send_wal(bool force);
+extern void pgstat_sum_io_path_ops(PgStatIOOps *dest, IOOps *src);
 
 /* ----------
  * Support functions for the SQL-callable functions to
diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h
index c0149ce0de..f0392a07dc 100644
--- a/src/include/utils/backend_status.h
+++ b/src/include/utils/backend_status.h
@@ -371,20 +371,16 @@ pgstat_inc_ioop(IOOp io_op, IOPath io_path)
 	switch (io_op)
 	{
 		case IOOP_ALLOC:
-			pg_atomic_write_u64(&io_ops->allocs,
-								pg_atomic_read_u64(&io_ops->allocs) + 1);
+			inc_counter(&io_ops->allocs);
 			break;
 		case IOOP_EXTEND:
-			pg_atomic_write_u64(&io_ops->extends,
-								pg_atomic_read_u64(&io_ops->extends) + 1);
+			inc_counter(&io_ops->extends);
 			break;
 		case IOOP_FSYNC:
-			pg_atomic_write_u64(&io_ops->fsyncs,
-								pg_atomic_read_u64(&io_ops->fsyncs) + 1);
+			inc_counter(&io_ops->fsyncs);
 			break;
 		case IOOP_WRITE:
-			pg_atomic_write_u64(&io_ops->writes,
-								pg_atomic_read_u64(&io_ops->writes) + 1);
+			inc_counter(&io_ops->writes);
 			break;
 	}
 }
-- 
2.27.0

