LogoopenSUSE Build Service > Projects
Sign Up | Log In

View File 1010-drm-i915-Unify-active-context-tracking-between-legac.patch of Package drm (Project home:tiwai:bnc1050256)

From e8a9c58fcd9a5081f71f57f370af1347ed6a310b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Sun, 18 Dec 2016 15:37:20 +0000
Subject: [PATCH] drm/i915: Unify active context tracking between
 legacy/execlists/guc

The requests conversion introduced a nasty bug where we could generate a
new request in the middle of constructing a request if we needed to idle
the system in order to evict space for a context. The request to idle
would be executed (and waited upon) before the current one, creating a
minor havoc in the seqno accounting, as we will consider the current
request to already be completed (prior to deferred seqno assignment) but
ring->last_retired_head would have been updated and still could allow
us to overwrite the current request before execution.

We also employed two different mechanisms to track the active context
until it was switched out. The legacy method allowed for waiting upon an
active context (it could forcibly evict any vma, including context's),
but the execlists method took a step backwards by pinning the vma for
the entire active lifespan of the context (the only way to evict was to
idle the entire GPU, not individual contexts). However, to circumvent
the tricky issue of locking (i.e. we cannot take struct_mutex at the
time of i915_gem_request_submit(), where we would want to move the
previous context onto the active tracker and unpin it), we take the
execlists approach and keep the contexts pinned until retirement.
The benefit of the execlists approach, more important for execlists than
legacy, was the reduction in work in pinning the context for each
request - as the context was kept pinned until idle, it could short
circuit the pinning for all active contexts.

We introduce new engine vfuncs to pin and unpin the context
respectively. The context is pinned at the start of the request, and
only unpinned when the following request is retired (this ensures that
the context is idle and coherent in main memory before we unpin it). We
move the engine->last_context tracking into the retirement itself
(rather than during request submission) in order to allow the submission
to be reordered or unwound without undue difficultly.

And finally an ulterior motive for unifying context handling was to
prepare for mock requests.

v2: Rename to last_retired_context, split out legacy_context tracking
for MI_SET_CONTEXT.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161218153724.8439-3-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem.c         |    2 
 drivers/gpu/drm/i915/i915_gem_context.c |   97 +++++---------------------------
 drivers/gpu/drm/i915/i915_gem_request.c |   33 +++++++---
 drivers/gpu/drm/i915/i915_gem_request.h |   11 ---
 drivers/gpu/drm/i915/intel_display.c    |    3 
 drivers/gpu/drm/i915/intel_engine_cs.c  |   19 +++++-
 drivers/gpu/drm/i915/intel_lrc.c        |   56 +++++-------------
 drivers/gpu/drm/i915/intel_lrc.h        |    5 -
 drivers/gpu/drm/i915/intel_pm.c         |    2 
 drivers/gpu/drm/i915/intel_ringbuffer.c |   57 +++++++++++-------
 drivers/gpu/drm/i915/intel_ringbuffer.h |   23 +++++++
 11 files changed, 138 insertions(+), 170 deletions(-)

--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3012,7 +3012,7 @@ int i915_gem_wait_for_idle(struct drm_i9
 	int ret;
 
 	for_each_engine(engine, dev_priv) {
-		if (engine->last_context == NULL)
+		if (engine->last_retired_context == NULL)
 			continue;
 
 		ret = intel_engine_idle(engine, flags);
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -405,21 +405,6 @@ out:
 	return ctx;
 }
 
-static void i915_gem_context_unpin(struct i915_gem_context *ctx,
-				   struct intel_engine_cs *engine)
-{
-	if (i915.enable_execlists) {
-		intel_lr_context_unpin(ctx, engine);
-	} else {
-		struct intel_context *ce = &ctx->engine[engine->id];
-
-		if (ce->state)
-			i915_vma_unpin(ce->state);
-
-		i915_gem_context_put(ctx);
-	}
-}
-
 int i915_gem_context_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -478,10 +463,13 @@ void i915_gem_context_lost(struct drm_i9
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
 	for_each_engine(engine, dev_priv) {
-		if (engine->last_context) {
-			i915_gem_context_unpin(engine->last_context, engine);
-			engine->last_context = NULL;
-		}
+		engine->legacy_active_context = NULL;
+
+		if (!engine->last_retired_context)
+			continue;
+
+		engine->context_unpin(engine, engine->last_retired_context);
+		engine->last_retired_context = NULL;
 	}
 
 	/* Force the GPU state to be restored on enabling */
@@ -703,7 +691,7 @@ static inline bool skip_rcs_switch(struc
 	if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
 		return false;
 
-	return to == engine->last_context;
+	return to == engine->legacy_active_context;
 }
 
 static bool
@@ -715,11 +703,11 @@ needs_pd_load_pre(struct i915_hw_ppgtt *
 		return false;
 
 	/* Always load the ppgtt on first use */
-	if (!engine->last_context)
+	if (!engine->legacy_active_context)
 		return true;
 
 	/* Same context without new entries, skip */
-	if (engine->last_context == to &&
+	if (engine->legacy_active_context == to &&
 	    !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
 		return false;
 
@@ -754,35 +742,15 @@ static int do_rcs_switch(struct drm_i915
 	struct i915_gem_context *to = req->ctx;
 	struct intel_engine_cs *engine = req->engine;
 	struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
-	struct i915_vma *vma = to->engine[RCS].state;
-	struct i915_gem_context *from;
+	struct i915_gem_context *from = engine->legacy_active_context;
 	u32 hw_flags;
 	int ret, i;
 
+	GEM_BUG_ON(engine->id != RCS);
+
 	if (skip_rcs_switch(ppgtt, engine, to))
 		return 0;
 
-	/* Clear this page out of any CPU caches for coherent swap-in/out. */
-	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
-		ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
-		if (ret)
-			return ret;
-	}
-
-	/* Trying to pin first makes error handling easier. */
-	ret = i915_vma_pin(vma, 0, to->ggtt_alignment, PIN_GLOBAL);
-	if (ret)
-		return ret;
-
-	/*
-	 * Pin can switch back to the default context if we end up calling into
-	 * evict_everything - as a last ditch gtt defrag effort that also
-	 * switches to the default context. Hence we need to reload from here.
-	 *
-	 * XXX: Doing so is painfully broken!
-	 */
-	from = engine->last_context;
-
 	if (needs_pd_load_pre(ppgtt, engine, to)) {
 		/* Older GENs and non render rings still want the load first,
 		 * "PP_DCLV followed by PP_DIR_BASE register through Load
@@ -791,7 +759,7 @@ static int do_rcs_switch(struct drm_i915
 		trace_switch_mm(engine, to);
 		ret = ppgtt->switch_mm(ppgtt, req);
 		if (ret)
-			goto err;
+			return ret;
 	}
 
 	if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
@@ -808,29 +776,10 @@ static int do_rcs_switch(struct drm_i915
 	if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
 		ret = mi_set_context(req, hw_flags);
 		if (ret)
-			goto err;
-	}
+			return ret;
 
-	/* The backing object for the context is done after switching to the
-	 * *next* context. Therefore we cannot retire the previous context until
-	 * the next context has already started running. In fact, the below code
-	 * is a bit suboptimal because the retiring can occur simply after the
-	 * MI_SET_CONTEXT instead of when the next seqno has completed.
-	 */
-	if (from != NULL) {
-		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
-		 * whole damn pipeline, we don't need to explicitly mark the
-		 * object dirty. The only exception is that the context must be
-		 * correct in case the object gets swapped out. Ideally we'd be
-		 * able to defer doing this until we know the object would be
-		 * swapped, but there is no way to do that yet.
-		 */
-		i915_vma_move_to_active(from->engine[RCS].state, req, 0);
-		/* state is kept alive until the next request */
-		i915_vma_unpin(from->engine[RCS].state);
-		i915_gem_context_put(from);
+		engine->legacy_active_context = to;
 	}
-	engine->last_context = i915_gem_context_get(to);
 
 	/* GEN8 does *not* require an explicit reload if the PDPs have been
 	 * setup, and we do not wish to move them.
@@ -871,10 +820,6 @@ static int do_rcs_switch(struct drm_i915
 	}
 
 	return 0;
-
-err:
-	i915_vma_unpin(vma);
-	return ret;
 }
 
 /**
@@ -914,12 +859,6 @@ int i915_switch_context(struct drm_i915_
 			ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
 		}
 
-		if (to != engine->last_context) {
-			if (engine->last_context)
-				i915_gem_context_put(engine->last_context);
-			engine->last_context = i915_gem_context_get(to);
-		}
-
 		return 0;
 	}
 
@@ -934,10 +873,10 @@ int i915_gem_switch_to_kernel_context(st
 		struct drm_i915_gem_request *req;
 		int ret;
 
-		if (engine->last_context == NULL)
+		if (engine->last_retired_context == NULL)
 			continue;
 
-		if (engine->last_context == dev_priv->kernel_context)
+		if (engine->last_retired_context == dev_priv->kernel_context)
 			continue;
 
 		req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -162,6 +162,7 @@ void i915_gem_retire_noop(struct i915_ge
 
 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 {
+	struct intel_engine_cs *engine = request->engine;
 	struct i915_gem_active *active, *next;
 
 	trace_i915_gem_request_retire(request);
@@ -207,13 +208,17 @@ static void i915_gem_request_retire(stru
 
 	i915_gem_request_remove_from_client(request);
 
-	if (request->previous_context) {
-		if (i915.enable_execlists)
-			intel_lr_context_unpin(request->previous_context,
-					       request->engine);
-	}
+	/* The backing object for the context is done after switching to the
+	 * *next* context. Therefore we cannot retire the previous context until
+	 * the next context has already started running. However, since we
+	 * cannot take the required locks at i915_gem_request_submit() we
+	 * defer the unpinning of the active context to now, retirement of
+	 * the subsequent request.
+	 */
+	if (engine->last_retired_context)
+		engine->context_unpin(engine, engine->last_retired_context);
+	engine->last_retired_context = request->ctx;
 
-	i915_gem_context_put(request->ctx);
 	i915_gem_request_put(request);
 }
 
@@ -368,6 +373,14 @@ i915_gem_request_alloc(struct intel_engi
 	if (ret)
 		return ERR_PTR(ret);
 
+	/* Pinning the contexts may generate requests in order to acquire
+	 * GGTT space, so do this first before we reserve a seqno for
+	 * ourselves.
+	 */
+	ret = engine->context_pin(engine, ctx);
+	if (ret)
+		return ERR_PTR(ret);
+
 	/* Move the oldest request to the slab-cache (if not in use!) */
 	req = list_first_entry_or_null(&engine->request_list,
 				       typeof(*req), link);
@@ -422,10 +435,9 @@ i915_gem_request_alloc(struct intel_engi
 	INIT_LIST_HEAD(&req->active_list);
 	req->i915 = dev_priv;
 	req->engine = engine;
-	req->ctx = i915_gem_context_get(ctx);
+	req->ctx = ctx;
 
 	/* No zalloc, must clear what we need by hand */
-	req->previous_context = NULL;
 	req->file_priv = NULL;
 	req->batch = NULL;
 
@@ -443,7 +455,7 @@ i915_gem_request_alloc(struct intel_engi
 	else
 		ret = intel_ring_alloc_request_extras(req);
 	if (ret)
-		goto err_ctx;
+		goto err;
 
 	/* Record the position of the start of the request so that
 	 * should we detect the updated seqno part-way through the
@@ -454,10 +466,9 @@ i915_gem_request_alloc(struct intel_engi
 
 	return req;
 
-err_ctx:
-	i915_gem_context_put(ctx);
 err:
 	kmem_cache_free(dev_priv->requests, req);
+	engine->context_unpin(engine, ctx);
 	return ERR_PTR(ret);
 }
 
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -111,17 +111,6 @@ struct drm_i915_gem_request {
 	/** Preallocate space in the ring for the emitting the request */
 	u32 reserved_space;
 
-	/**
-	 * Context related to the previous request.
-	 * As the contexts are accessed by the hardware until the switch is
-	 * completed to a new context, the hardware may still be writing
-	 * to the context object after the breadcrumb is visible. We must
-	 * not unpin/unbind/prune that object whilst still active and so
-	 * we keep the previous context pinned until the following (this)
-	 * request is retired.
-	 */
-	struct i915_gem_context *previous_context;
-
 	/** Batch buffer related to this request if any (used for
 	 * error state dump only).
 	 */
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12306,7 +12306,8 @@ static int intel_crtc_page_flip(struct d
 							    &obj->base.dev->struct_mutex);
 		schedule_work(&work->mmio_work);
 	} else {
-		request = i915_gem_request_alloc(engine, engine->last_context);
+		request = i915_gem_request_alloc(engine,
+						 dev_priv->kernel_context);
 		if (IS_ERR(request)) {
 			ret = PTR_ERR(request);
 			goto cleanup_unpin;
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -297,11 +297,26 @@ int intel_engine_init_common(struct inte
 {
 	int ret;
 
-	ret = intel_engine_init_breadcrumbs(engine);
+	/* We may need to do things with the shrinker which
+	 * require us to immediately switch back to the default
+	 * context. This can cause a problem as pinning the
+	 * default context also requires GTT space which may not
+	 * be available. To avoid this we always pin the default
+	 * context.
+	 */
+	ret = engine->context_pin(engine, engine->i915->kernel_context);
 	if (ret)
 		return ret;
 
+	ret = intel_engine_init_breadcrumbs(engine);
+	if (ret)
+		goto err_unpin;
+
 	return 0;
+
+err_unpin:
+	engine->context_unpin(engine, engine->i915->kernel_context);
+	return ret;
 }
 
 /**
@@ -318,4 +333,6 @@ void intel_engine_cleanup_common(struct
 	intel_engine_fini_breadcrumbs(engine);
 	intel_engine_cleanup_cmd_parser(engine);
 	i915_gem_batch_pool_fini(&engine->batch_pool);
+
+	engine->context_unpin(engine, engine->i915->kernel_context);
 }
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -230,8 +230,6 @@ enum {
 
 static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
 					    struct intel_engine_cs *engine);
-static int intel_lr_context_pin(struct i915_gem_context *ctx,
-				struct intel_engine_cs *engine);
 static void execlists_init_reg_state(u32 *reg_state,
 				     struct i915_gem_context *ctx,
 				     struct intel_engine_cs *engine,
@@ -627,10 +625,6 @@ int intel_logical_ring_alloc_request_ext
 
 	request->ring = ce->ring;
 
-	ret = intel_lr_context_pin(request->ctx, engine);
-	if (ret)
-		return ret;
-
 	if (i915.enable_guc_submission) {
 		/*
 		 * Check that the GuC has space for the request before
@@ -639,7 +633,7 @@ int intel_logical_ring_alloc_request_ext
 		 */
 		ret = i915_guc_wq_reserve(request);
 		if (ret)
-			goto err_unpin;
+			goto err;
 	}
 
 	ret = intel_ring_begin(request, 0);
@@ -667,8 +661,7 @@ int intel_logical_ring_alloc_request_ext
 err_unreserve:
 	if (i915.enable_guc_submission)
 		i915_guc_wq_unreserve(request);
-err_unpin:
-	intel_lr_context_unpin(request->ctx, engine);
+err:
 	return ret;
 }
 
@@ -685,7 +678,6 @@ static int
 intel_logical_ring_advance(struct drm_i915_gem_request *request)
 {
 	struct intel_ring *ring = request->ring;
-	struct intel_engine_cs *engine = request->engine;
 
 	intel_ring_advance(ring);
 	request->tail = ring->tail;
@@ -700,20 +692,11 @@ intel_logical_ring_advance(struct drm_i9
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
 	request->wa_tail = ring->tail;
-
-	/* We keep the previous context alive until we retire the following
-	 * request. This ensures that any the context object is still pinned
-	 * for any residual writes the HW makes into it on the context switch
-	 * into the next object following the breadcrumb. Otherwise, we may
-	 * retire the context too early.
-	 */
-	request->previous_context = engine->last_context;
-	engine->last_context = request->ctx;
 	return 0;
 }
 
-static int intel_lr_context_pin(struct i915_gem_context *ctx,
-				struct intel_engine_cs *engine)
+static int execlists_context_pin(struct intel_engine_cs *engine,
+				 struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = &ctx->engine[engine->id];
 	void *vaddr;
@@ -724,6 +707,12 @@ static int intel_lr_context_pin(struct i
 	if (ce->pin_count++)
 		return 0;
 
+	if (!ce->state) {
+		ret = execlists_context_deferred_alloc(ctx, engine);
+		if (ret)
+			goto err;
+	}
+
 	ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN,
 			   PIN_OFFSET_BIAS | GUC_WOPCM_TOP | PIN_GLOBAL);
 	if (ret)
@@ -765,8 +754,8 @@ err:
 	return ret;
 }
 
-void intel_lr_context_unpin(struct i915_gem_context *ctx,
-			    struct intel_engine_cs *engine)
+static void execlists_context_unpin(struct intel_engine_cs *engine,
+				    struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = &ctx->engine[engine->id];
 
@@ -1670,13 +1659,12 @@ void intel_logical_ring_cleanup(struct i
 	if (engine->cleanup)
 		engine->cleanup(engine);
 
-	intel_engine_cleanup_common(engine);
-
 	if (engine->status_page.vma) {
 		i915_gem_object_unpin_map(engine->status_page.vma->obj);
 		engine->status_page.vma = NULL;
 	}
-	intel_lr_context_unpin(dev_priv->kernel_context, engine);
+
+	intel_engine_cleanup_common(engine);
 
 	lrc_destroy_wa_ctx_obj(engine);
 	engine->i915 = NULL;
@@ -1696,6 +1684,10 @@ logical_ring_default_vfuncs(struct intel
 	/* Default vfuncs which can be overriden by each engine. */
 	engine->init_hw = gen8_init_common_ring;
 	engine->reset_hw = reset_common_ring;
+
+	engine->context_pin = execlists_context_pin;
+	engine->context_unpin = execlists_context_unpin;
+
 	engine->emit_flush = gen8_emit_flush;
 	engine->emit_request = gen8_emit_request;
 	engine->submit_request = execlists_submit_request;
@@ -1776,18 +1768,6 @@ logical_ring_init(struct intel_engine_cs
 	if (ret)
 		goto error;
 
-	ret = execlists_context_deferred_alloc(dctx, engine);
-	if (ret)
-		goto error;
-
-	/* As this is the default context, always pin it */
-	ret = intel_lr_context_pin(dctx, engine);
-	if (ret) {
-		DRM_ERROR("Failed to pin context for %s: %d\n",
-			  engine->name, ret);
-		goto error;
-	}
-
 	/* And setup the hardware status page. */
 	ret = lrc_setup_hws(engine, dctx->engine[engine->id].state);
 	if (ret) {
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -79,13 +79,10 @@ int intel_engines_init(struct drm_device
 #define LRC_PPHWSP_PN	(LRC_GUCSHR_PN + 1)
 #define LRC_STATE_PN	(LRC_PPHWSP_PN + 1)
 
+struct drm_i915_private;
 struct i915_gem_context;
 
 uint32_t intel_lr_context_size(struct intel_engine_cs *engine);
-void intel_lr_context_unpin(struct i915_gem_context *ctx,
-			    struct intel_engine_cs *engine);
-
-struct drm_i915_private;
 
 void intel_lr_context_resume(struct drm_i915_private *dev_priv);
 uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -6828,7 +6828,7 @@ static void __intel_autoenable_gt_powers
 		goto out;
 
 	rcs = &dev_priv->engine[RCS];
-	if (rcs->last_context)
+	if (rcs->last_retired_context)
 		goto out;
 
 	if (!rcs->init_context)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2019,8 +2019,26 @@ intel_ring_free(struct intel_ring *ring)
 	kfree(ring);
 }
 
-static int intel_ring_context_pin(struct i915_gem_context *ctx,
-				  struct intel_engine_cs *engine)
+static int context_pin(struct i915_gem_context *ctx, unsigned int flags)
+{
+	struct i915_vma *vma = ctx->engine[RCS].state;
+	int ret;
+
+	/* Clear this page out of any CPU caches for coherent swap-in/out.
+	 * We only want to do this on the first bind so that we do not stall
+	 * on an active context (which by nature is already on the GPU).
+	 */
+	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
+		ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
+		if (ret)
+			return ret;
+	}
+
+	return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags);
+}
+
+static int intel_ring_context_pin(struct intel_engine_cs *engine,
+				  struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = &ctx->engine[engine->id];
 	int ret;
@@ -2031,12 +2049,13 @@ static int intel_ring_context_pin(struct
 		return 0;
 
 	if (ce->state) {
-		ret = i915_gem_object_set_to_gtt_domain(ce->state->obj, false);
-		if (ret)
-			goto error;
+		unsigned int flags;
+
+		flags = 0;
+		if (ctx == ctx->i915->kernel_context)
+			flags = PIN_HIGH;
 
-		ret = i915_vma_pin(ce->state, 0, ctx->ggtt_alignment,
-				   PIN_GLOBAL | PIN_HIGH);
+		ret = context_pin(ctx, flags);
 		if (ret)
 			goto error;
 	}
@@ -2059,12 +2078,13 @@ error:
 	return ret;
 }
 
-static void intel_ring_context_unpin(struct i915_gem_context *ctx,
-				     struct intel_engine_cs *engine)
+static void intel_ring_context_unpin(struct intel_engine_cs *engine,
+				     struct i915_gem_context *ctx)
 {
 	struct intel_context *ce = &ctx->engine[engine->id];
 
 	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
+	GEM_BUG_ON(ce->pin_count == 0);
 
 	if (--ce->pin_count)
 		return;
@@ -2092,17 +2112,6 @@ static int intel_init_ring_buffer(struct
 	if (ret)
 		goto error;
 
-	/* We may need to do things with the shrinker which
-	 * require us to immediately switch back to the default
-	 * context. This can cause a problem as pinning the
-	 * default context also requires GTT space which may not
-	 * be available. To avoid this we always pin the default
-	 * context.
-	 */
-	ret = intel_ring_context_pin(dev_priv->kernel_context, engine);
-	if (ret)
-		goto error;
-
 	ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE);
 	if (IS_ERR(ring)) {
 		ret = PTR_ERR(ring);
@@ -2164,8 +2173,6 @@ void intel_engine_cleanup(struct intel_e
 
 	intel_engine_cleanup_common(engine);
 
-	intel_ring_context_unpin(dev_priv->kernel_context, engine);
-
 	engine->i915 = NULL;
 }
 
@@ -2183,12 +2190,15 @@ int intel_ring_alloc_request_extras(stru
 {
 	int ret;
 
+	GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count);
+
 	/* Flush enough space to reduce the likelihood of waiting after
 	 * we start building the request - in which case we will just
 	 * have to repeat work.
 	 */
 	request->reserved_space += LEGACY_REQUEST_SIZE;
 
+	GEM_BUG_ON(!request->engine->buffer);
 	request->ring = request->engine->buffer;
 
 	ret = intel_ring_begin(request, 0);
@@ -2666,6 +2676,9 @@ static void intel_ring_default_vfuncs(st
 	engine->init_hw = init_ring_common;
 	engine->reset_hw = reset_ring_common;
 
+	engine->context_pin = intel_ring_context_pin;
+	engine->context_unpin = intel_ring_context_unpin;
+
 	engine->emit_request = i9xx_emit_request;
 	if (i915.semaphores)
 		engine->emit_request = gen6_sema_emit_request;
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -212,6 +212,10 @@ struct intel_engine_cs {
 	void		(*reset_hw)(struct intel_engine_cs *engine,
 				    struct drm_i915_gem_request *req);
 
+	int		(*context_pin)(struct intel_engine_cs *engine,
+				       struct i915_gem_context *ctx);
+	void		(*context_unpin)(struct intel_engine_cs *engine,
+					 struct i915_gem_context *ctx);
 	int		(*init_context)(struct drm_i915_gem_request *req);
 
 	int		(*emit_flush)(struct drm_i915_gem_request *request,
@@ -337,7 +341,24 @@ struct intel_engine_cs {
 	 */
 	struct i915_gem_active last_request;
 
-	struct i915_gem_context *last_context;
+	/* Contexts are pinned whilst they are active on the GPU. The last
+	 * context executed remains active whilst the GPU is idle - the
+	 * switch away and write to the context object only occurs on the
+	 * next execution.  Contexts are only unpinned on retirement of the
+	 * following request ensuring that we can always write to the object
+	 * on the context switch even after idling. Across suspend, we switch
+	 * to the kernel context and trash it as the save may not happen
+	 * before the hardware is powered down.
+	 */
+	struct i915_gem_context *last_retired_context;
+
+	/* We track the current MI_SET_CONTEXT in order to eliminate
+	 * redudant context switches. This presumes that requests are not
+	 * reordered! Or when they are the tracking is updated along with
+	 * the emission of individual requests into the legacy command
+	 * stream (ring).
+	 */
+	struct i915_gem_context *legacy_active_context;
 
 	struct intel_engine_hangcheck hangcheck;