File melonds_hires-software-renderer.patch of Package melonds

From fdfbfce11bb0be6d45bf23538c6d33a7ec0c622c Mon Sep 17 00:00:00 2001
From: v-fox <virtuousfox@gmail.com>
Date: Mon, 18 Sep 2023 00:28:52 +0500
Subject: [PATCH] rebase of "Hi-res software rendering" PR from
 https://github.com/melonDS-emu/melonDS/pull/1009

---
 src/GPU.cpp                                 |  26 +-
 src/GPU.h                                   |   3 +-
 src/GPU2D.cpp                               |  38 +--
 src/GPU2D.h                                 |   7 +-
 src/GPU2D_Soft.cpp                          | 269 +++++++++++++-------
 src/GPU2D_Soft.h                            |  20 +-
 src/GPU3D.cpp                               | 197 +++++++-------
 src/GPU3D.h                                 |   5 +
 src/GPU3D_OpenGL.cpp                        |  43 ++--
 src/GPU3D_OpenGL.h                          |   2 +-
 src/GPU3D_Soft.cpp                          | 126 ++++++---
 src/GPU3D_Soft.h                            |  23 +-
 src/GPU_OpenGL.cpp                          |   2 +-
 src/Savestate.h                             |   2 +-
 src/frontend/FrontendUtil.h                 |   1 +
 src/frontend/Util_Video.cpp                 |  25 +-
 src/frontend/qt_sdl/VideoSettingsDialog.cpp |  23 +-
 src/frontend/qt_sdl/VideoSettingsDialog.h   |   2 +-
 src/frontend/qt_sdl/VideoSettingsDialog.ui  |  44 +++-
 src/frontend/qt_sdl/main.cpp                | 176 +++++++------
 src/frontend/qt_sdl/main.h                  |   9 +-
 21 files changed, 630 insertions(+), 413 deletions(-)

diff --git a/src/GPU.cpp b/src/GPU.cpp
index f51748b..16eabd1 100644
--- a/src/GPU.cpp
+++ b/src/GPU.cpp
@@ -87,6 +87,7 @@ u8* VRAMPtr_BOBJ[0x8];
 int FrontBuffer;
 u32* Framebuffer[2][2];
 int Renderer = 0;
+int ScaleFactor;
 
 GPU2D::Unit GPU2D_A(0);
 GPU2D::Unit GPU2D_B(1);
@@ -253,9 +254,9 @@ void Reset()
 
     size_t fbsize;
     if (GPU3D::CurrentRenderer->Accelerated)
-        fbsize = (256*3 + 1) * 192;
+        fbsize = (NATIVE_WIDTH*3 + 1) * NATIVE_HEIGHT;
     else
-        fbsize = 256 * 192;
+        fbsize = NATIVE_WIDTH * ScaleFactor * NATIVE_HEIGHT * ScaleFactor;
 
     for (size_t i = 0; i < fbsize; i++)
     {
@@ -287,9 +288,9 @@ void Stop()
 {
     int fbsize;
     if (GPU3D::CurrentRenderer->Accelerated)
-        fbsize = (256*3 + 1) * 192;
+        fbsize = (NATIVE_WIDTH*3 + 1) * NATIVE_HEIGHT;
     else
-        fbsize = 256 * 192;
+        fbsize = NATIVE_WIDTH * ScaleFactor * NATIVE_HEIGHT * ScaleFactor;
 
     memset(Framebuffer[0][0], 0, fbsize*4);
     memset(Framebuffer[0][1], 0, fbsize*4);
@@ -451,11 +452,12 @@ void SetRenderSettings(int renderer, RenderSettings& settings)
         InitRenderer(renderer);
     }
 
+    ScaleFactor = settings.ScaleFactor;
     int fbsize;
     if (GPU3D::CurrentRenderer->Accelerated)
-        fbsize = (256*3 + 1) * 192;
+        fbsize = (NATIVE_WIDTH*3 + 1) * NATIVE_HEIGHT;
     else
-        fbsize = 256 * 192;
+        fbsize = NATIVE_WIDTH * ScaleFactor * NATIVE_HEIGHT * ScaleFactor;
 
     if (Framebuffer[0][0]) { delete[] Framebuffer[0][0]; Framebuffer[0][0] = nullptr; }
     if (Framebuffer[1][0]) { delete[] Framebuffer[1][0]; Framebuffer[1][0] = nullptr; }
@@ -474,6 +476,8 @@ void SetRenderSettings(int renderer, RenderSettings& settings)
 
     AssignFramebuffers();
 
+    GPU2D_Renderer->SetRenderSettings(ScaleFactor);
+
     if (Renderer == 0)
     {
         GPU3D::CurrentRenderer->SetRenderSettings(settings);
@@ -1012,7 +1016,7 @@ void DisplayFIFO(u32 x)
             GPU2D_A.SampleFIFO(x-11, 8);
     }
 
-    if (x < 256)
+    if (x < NATIVE_WIDTH)
     {
         // transfer the next 8 pixels
         NDS::CheckDMAs(0, 0x04);
@@ -1038,18 +1042,18 @@ void StartHBlank(u32 line)
     DispStat[0] |= (1<<1);
     DispStat[1] |= (1<<1);
 
-    if (VCount < 192)
+    if (VCount < NATIVE_HEIGHT)
     {
         // draw
         // note: this should start 48 cycles after the scanline start
-        if (line < 192)
+        if (line < NATIVE_HEIGHT)
         {
             GPU2D_Renderer->DrawScanline(line, &GPU2D_A);
             GPU2D_Renderer->DrawScanline(line, &GPU2D_B);
         }
 
         // sprites are pre-rendered one scanline in advance
-        if (line < 191)
+        if (line < NATIVE_HEIGHT - 1)
         {
             GPU2D_Renderer->DrawSprites(line+1, &GPU2D_A);
             GPU2D_Renderer->DrawSprites(line+1, &GPU2D_B);
@@ -1130,7 +1134,7 @@ void StartScanline(u32 line)
     else if (VCount == 194)
         NDS::StopDMAs(0, 0x03);
 
-    if (line < 192)
+    if (line < NATIVE_HEIGHT)
     {
         if (line == 0)
         {
diff --git a/src/GPU.h b/src/GPU.h
index 9686704..e09a436 100644
--- a/src/GPU.h
+++ b/src/GPU.h
@@ -79,6 +79,7 @@ extern GPU2D::Unit GPU2D_A;
 extern GPU2D::Unit GPU2D_B;
 
 extern int Renderer;
+extern int ScaleFactor;
 
 const u32 VRAMDirtyGranularity = 512;
 
@@ -158,7 +159,7 @@ struct RenderSettings
 {
     bool Soft_Threaded;
 
-    int GL_ScaleFactor;
+    int ScaleFactor;
     bool GL_BetterPolygons;
 };
 
diff --git a/src/GPU2D.cpp b/src/GPU2D.cpp
index d5df992..5549263 100644
--- a/src/GPU2D.cpp
+++ b/src/GPU2D.cpp
@@ -132,7 +132,7 @@ void Unit::Reset()
     DispFIFOReadPtr = 0;
     DispFIFOWritePtr = 0;
 
-    memset(DispFIFOBuffer, 0, 256*2);
+    memset(DispFIFOBuffer, 0, sizeof(DispFIFOBuffer));
 
     CaptureCnt = 0;
     CaptureLatch = false;
@@ -182,7 +182,7 @@ void Unit::DoSavestate(Savestate* file)
         file->Var32(&DispFIFOReadPtr);
         file->Var32(&DispFIFOWritePtr);
 
-        file->VarArray(DispFIFOBuffer, 256*2);
+        file->VarArray(DispFIFOBuffer, sizeof(DispFIFOBuffer));
 
         file->Var32(&CaptureCnt);
     }
@@ -422,21 +422,21 @@ void Unit::Write16(u32 addr, u16 val)
     case 0x026: BGRotD[0] = val; return;
     case 0x028:
         BGXRef[0] = (BGXRef[0] & 0xFFFF0000) | val;
-        if (GPU::VCount < 192) BGXRefInternal[0] = BGXRef[0];
+        if (GPU::VCount < NATIVE_HEIGHT) BGXRefInternal[0] = BGXRef[0];
         return;
     case 0x02A:
         if (val & 0x0800) val |= 0xF000;
         BGXRef[0] = (BGXRef[0] & 0xFFFF) | (val << 16);
-        if (GPU::VCount < 192) BGXRefInternal[0] = BGXRef[0];
+        if (GPU::VCount < NATIVE_HEIGHT) BGXRefInternal[0] = BGXRef[0];
         return;
     case 0x02C:
         BGYRef[0] = (BGYRef[0] & 0xFFFF0000) | val;
-        if (GPU::VCount < 192) BGYRefInternal[0] = BGYRef[0];
+        if (GPU::VCount < NATIVE_HEIGHT) BGYRefInternal[0] = BGYRef[0];
         return;
     case 0x02E:
         if (val & 0x0800) val |= 0xF000;
         BGYRef[0] = (BGYRef[0] & 0xFFFF) | (val << 16);
-        if (GPU::VCount < 192) BGYRefInternal[0] = BGYRef[0];
+        if (GPU::VCount < NATIVE_HEIGHT) BGYRefInternal[0] = BGYRef[0];
         return;
 
     case 0x030: BGRotA[1] = val; return;
@@ -445,21 +445,21 @@ void Unit::Write16(u32 addr, u16 val)
     case 0x036: BGRotD[1] = val; return;
     case 0x038:
         BGXRef[1] = (BGXRef[1] & 0xFFFF0000) | val;
-        if (GPU::VCount < 192) BGXRefInternal[1] = BGXRef[1];
+        if (GPU::VCount < NATIVE_HEIGHT) BGXRefInternal[1] = BGXRef[1];
         return;
     case 0x03A:
         if (val & 0x0800) val |= 0xF000;
         BGXRef[1] = (BGXRef[1] & 0xFFFF) | (val << 16);
-        if (GPU::VCount < 192) BGXRefInternal[1] = BGXRef[1];
+        if (GPU::VCount < NATIVE_HEIGHT) BGXRefInternal[1] = BGXRef[1];
         return;
     case 0x03C:
         BGYRef[1] = (BGYRef[1] & 0xFFFF0000) | val;
-        if (GPU::VCount < 192) BGYRefInternal[1] = BGYRef[1];
+        if (GPU::VCount < NATIVE_HEIGHT) BGYRefInternal[1] = BGYRef[1];
         return;
     case 0x03E:
         if (val & 0x0800) val |= 0xF000;
         BGYRef[1] = (BGYRef[1] & 0xFFFF) | (val << 16);
-        if (GPU::VCount < 192) BGYRefInternal[1] = BGYRef[1];
+        if (GPU::VCount < NATIVE_HEIGHT) BGYRefInternal[1] = BGYRef[1];
         return;
 
     case 0x040:
@@ -541,23 +541,23 @@ void Unit::Write32(u32 addr, u32 val)
         case 0x028:
             if (val & 0x08000000) val |= 0xF0000000;
             BGXRef[0] = val;
-            if (GPU::VCount < 192) BGXRefInternal[0] = BGXRef[0];
+            if (GPU::VCount < NATIVE_HEIGHT) BGXRefInternal[0] = BGXRef[0];
             return;
         case 0x02C:
             if (val & 0x08000000) val |= 0xF0000000;
             BGYRef[0] = val;
-            if (GPU::VCount < 192) BGYRefInternal[0] = BGYRef[0];
+            if (GPU::VCount < NATIVE_HEIGHT) BGYRefInternal[0] = BGYRef[0];
             return;
 
         case 0x038:
             if (val & 0x08000000) val |= 0xF0000000;
             BGXRef[1] = val;
-            if (GPU::VCount < 192) BGXRefInternal[1] = BGXRef[1];
+            if (GPU::VCount < NATIVE_HEIGHT) BGXRefInternal[1] = BGXRef[1];
             return;
         case 0x03C:
             if (val & 0x08000000) val |= 0xF0000000;
             BGYRef[1] = val;
-            if (GPU::VCount < 192) BGYRefInternal[1] = BGYRef[1];
+            if (GPU::VCount < NATIVE_HEIGHT) BGYRefInternal[1] = BGYRef[1];
             return;
         }
     }
@@ -625,7 +625,7 @@ void Unit::SampleFIFO(u32 offset, u32 num)
 
 u16* Unit::GetBGExtPal(u32 slot, u32 pal)
 {
-    const u32 PaletteSize = 256 * 2;
+    const u32 PaletteSize = NATIVE_WIDTH * 2;
     const u32 SlotSize = PaletteSize * 16;
     return (u16*)&(Num == 0
          ? GPU::VRAMFlat_ABGExtPal
@@ -650,13 +650,13 @@ void Unit::CheckWindows(u32 line)
 
 void Unit::CalculateWindowMask(u32 line, u8* windowMask, u8* objWindow)
 {
-    for (u32 i = 0; i < 256; i++)
+    for (u32 i = 0; i < NATIVE_WIDTH; i++)
         windowMask[i] = WinCnt[2]; // window outside
 
     if (DispCnt & (1<<15))
     {
         // OBJ window
-        for (int i = 0; i < 256; i++)
+        for (int i = 0; i < NATIVE_WIDTH; i++)
         {
             if (objWindow[i])
                 windowMask[i] = WinCnt[3];
@@ -669,7 +669,7 @@ void Unit::CalculateWindowMask(u32 line, u8* windowMask, u8* objWindow)
         u8 x1 = Win1Coords[0];
         u8 x2 = Win1Coords[1];
 
-        for (int i = 0; i < 256; i++)
+        for (int i = 0; i < NATIVE_WIDTH; i++)
         {
             if (i == x2)      Win1Active &= ~0x2;
             else if (i == x1) Win1Active |=  0x2;
@@ -684,7 +684,7 @@ void Unit::CalculateWindowMask(u32 line, u8* windowMask, u8* objWindow)
         u8 x1 = Win0Coords[0];
         u8 x2 = Win0Coords[1];
 
-        for (int i = 0; i < 256; i++)
+        for (int i = 0; i < NATIVE_WIDTH; i++)
         {
             if (i == x2)      Win0Active &= ~0x2;
             else if (i == x1) Win0Active |=  0x2;
diff --git a/src/GPU2D.h b/src/GPU2D.h
index 5edc0a2..bf64fb5 100644
--- a/src/GPU2D.h
+++ b/src/GPU2D.h
@@ -19,6 +19,9 @@
 #ifndef GPU2D_H
 #define GPU2D_H
 
+#define NATIVE_WIDTH 256
+#define NATIVE_HEIGHT 192
+
 #include "types.h"
 #include "Savestate.h"
 
@@ -79,7 +82,7 @@ public:
     u32 DispFIFOReadPtr;
     u32 DispFIFOWritePtr;
 
-    u16 DispFIFOBuffer[256];
+    u16 DispFIFOBuffer[NATIVE_WIDTH];
 
     u32 DispCnt;
     u16 BGCnt[4];
@@ -133,6 +136,8 @@ public:
         Framebuffer[0] = unitA;
         Framebuffer[1] = unitB;
     }
+
+    virtual void SetRenderSettings(int scale) {}
 protected:
     u32* Framebuffer[2];
 
diff --git a/src/GPU2D_Soft.cpp b/src/GPU2D_Soft.cpp
index 070079a..ef6d0f4 100644
--- a/src/GPU2D_Soft.cpp
+++ b/src/GPU2D_Soft.cpp
@@ -28,12 +28,27 @@ SoftRenderer::SoftRenderer()
     // initialize mosaic table
     for (int m = 0; m < 16; m++)
     {
-        for (int x = 0; x < 256; x++)
+        for (int x = 0; x < NATIVE_WIDTH; x++)
         {
             int offset = x % (m+1);
             MosaicTable[m][x] = offset;
         }
     }
+
+    BGOBJLine = NULL;
+}
+
+SoftRenderer::~SoftRenderer()
+{
+    if (BGOBJLine) operator delete[](BGOBJLine, std::align_val_t(8));
+    BGOBJLine = NULL;
+}
+
+void SoftRenderer::SetRenderSettings(int scale)
+{
+    if (BGOBJLine) operator delete[](BGOBJLine, std::align_val_t(8));
+    int len = GPU3D::CurrentRenderer->Accelerated ? NATIVE_WIDTH * 3 : NATIVE_WIDTH * scale * scale * 2;
+    BGOBJLine = new(std::align_val_t(8)) u32[len];
 }
 
 u32 SoftRenderer::ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb)
@@ -165,10 +180,12 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit)
 {
     CurUnit = unit;
 
-    int stride = GPU3D::CurrentRenderer->Accelerated ? (256*3 + 1) : 256;
+     // multiply by ScaleFactor twice, because we want to multiply line number and width
+    int pixelCount = GPU3D::CurrentRenderer->Accelerated ? NATIVE_WIDTH : NATIVE_WIDTH * GPU::ScaleFactor * GPU::ScaleFactor;
+    int stride = GPU3D::CurrentRenderer->Accelerated ? pixelCount*3 + 1 : pixelCount;
     u32* dst = &Framebuffer[CurUnit->Num][stride * line];
 
-    int n3dline = line;
+    int n3dline = GPU3D::CurrentRenderer->Accelerated ? line : line * GPU::ScaleFactor;
     line = GPU::VCount;
 
     if (CurUnit->Num == 0)
@@ -194,7 +211,7 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit)
 
     // scanlines that end up outside of the GPU drawing range
     // (as a result of writing to VCount) are filled white
-    if (line > 192) forceblank = true;
+    if (line > NATIVE_HEIGHT) forceblank = true;
 
     // GPU B can be completely disabled by POWCNT1
     // oddly that's not the case for GPU A
@@ -216,12 +233,12 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit)
 
     if (forceblank)
     {
-        for (int i = 0; i < 256; i++)
+        for (int i = 0; i < pixelCount; i++)
             dst[i] = 0xFFFFFFFF;
 
         if (GPU3D::CurrentRenderer->Accelerated)
         {
-            dst[256*3] = 0;
+            dst[NATIVE_WIDTH*3] = 0;
         }
         return;
     }
@@ -237,7 +254,7 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit)
     {
     case 0: // screen off
         {
-            for (int i = 0; i < 256; i++)
+            for (int i = 0; i < pixelCount; i++)
                 dst[i] = 0x003F3F3F;
         }
         break;
@@ -256,9 +273,9 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit)
             if (GPU::VRAMMap_LCDC & (1<<vrambank))
             {
                 u16* vram = (u16*)GPU::VRAM[vrambank];
-                vram = &vram[line * 256];
+                vram = &vram[line * NATIVE_WIDTH];
 
-                for (int i = 0; i < 256; i++)
+                for (int i = 0; i < NATIVE_WIDTH; i++)
                 {
                     u16 color = vram[i];
                     u8 r = (color & 0x001F) << 1;
@@ -267,10 +284,11 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit)
 
                     dst[i] = r | (g << 8) | (b << 16);
                 }
+                ExpandLine(dst);
             }
             else
             {
-                for (int i = 0; i < 256; i++)
+                for (int i = 0; i < pixelCount; i++)
                 {
                     dst[i] = 0;
                 }
@@ -280,7 +298,7 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit)
 
     case 3: // FIFO display
         {
-            for (int i = 0; i < 256; i++)
+            for (int i = 0; i < NATIVE_WIDTH; i++)
             {
                 u16 color = CurUnit->DispFIFOBuffer[i];
                 u8 r = (color & 0x001F) << 1;
@@ -289,6 +307,7 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit)
 
                 dst[i] = r | (g << 8) | (b << 16);
             }
+            ExpandLine(dst);
         }
         break;
     }
@@ -313,7 +332,7 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit)
 
     if (GPU3D::CurrentRenderer->Accelerated)
     {
-        dst[256*3] = masterBrightness | (CurUnit->DispCnt & 0x30000);
+        dst[NATIVE_WIDTH*3] = masterBrightness | (CurUnit->DispCnt & 0x30000);
         return;
     }
 
@@ -326,7 +345,7 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit)
             u32 factor = masterBrightness & 0x1F;
             if (factor > 16) factor = 16;
 
-            for (int i = 0; i < 256; i++)
+            for (int i = 0; i < pixelCount; i++)
             {
                 dst[i] = ColorBrightnessUp(dst[i], factor, 0x0);
             }
@@ -337,7 +356,7 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit)
             u32 factor = masterBrightness & 0x1F;
             if (factor > 16) factor = 16;
 
-            for (int i = 0; i < 256; i++)
+            for (int i = 0; i < pixelCount; i++)
             {
                 dst[i] = ColorBrightnessDown(dst[i], factor, 0xF);
             }
@@ -347,7 +366,7 @@ void SoftRenderer::DrawScanline(u32 line, Unit* unit)
     // convert to 32-bit BGRA
     // note: 32-bit RGBA would be more straightforward, but
     // BGRA seems to be more compatible (Direct2D soft, cairo...)
-    for (int i = 0; i < 256; i+=2)
+    for (int i = 0; i < pixelCount; i+=2)
     {
         u64 c = *(u64*)&dst[i];
 
@@ -373,6 +392,29 @@ void SoftRenderer::VBlankEnd(Unit* unitA, Unit* unitB)
 #endif
 }
 
+void SoftRenderer::ExpandLine(u32* line)
+{
+    if (GPU3D::CurrentRenderer->Accelerated || GPU::ScaleFactor == 1)
+        return;
+
+    u32 lineWidth = NATIVE_WIDTH * GPU::ScaleFactor;
+
+    // write to the last line, so that we don't mess up the original line
+    u32* dst = line + (GPU::ScaleFactor - 1) * lineWidth;
+    u32* src = line;
+    for (int x = 0; x < NATIVE_WIDTH; x++)
+    {
+        for (int i = 0; i < GPU::ScaleFactor; i++)
+            *dst++ = *src;
+        src++;
+    }
+
+    // copy last line to all other lines
+    src = line + (GPU::ScaleFactor - 1) * lineWidth;
+    for (int i = 0; i < GPU::ScaleFactor - 1; i++)
+        memcpy(line + i * lineWidth, src, lineWidth * sizeof(u32));
+}
+
 void SoftRenderer::DoCapture(u32 line, u32 width)
 {
     u32 captureCnt = CurUnit->CaptureCnt;
@@ -389,6 +431,7 @@ void SoftRenderer::DoCapture(u32 line, u32 width)
     // TODO: handle 3D in GPU3D::CurrentRenderer->Accelerated mode!!
 
     u32* srcA;
+    u32 srcAScale = GPU3D::CurrentRenderer->Accelerated ? 1 : GPU::ScaleFactor;
     if (captureCnt & (1<<24))
     {
         srcA = _3DLine;
@@ -402,11 +445,11 @@ void SoftRenderer::DoCapture(u32 line, u32 width)
             // but when doing display capture, we do need the composited output
             // so we do it here
 
-            for (int i = 0; i < 256; i++)
+            for (int i = 0; i < NATIVE_WIDTH; i++)
             {
                 u32 val1 = BGOBJLine[i];
-                u32 val2 = BGOBJLine[256+i];
-                u32 val3 = BGOBJLine[512+i];
+                u32 val2 = BGOBJLine[NATIVE_WIDTH+i];
+                u32 val3 = BGOBJLine[NATIVE_WIDTH*2+i];
 
                 u32 compmode = (val3 >> 24) & 0xF;
 
@@ -458,7 +501,7 @@ void SoftRenderer::DoCapture(u32 line, u32 width)
     }
 
     u16* srcB = NULL;
-    u32 srcBaddr = line * 256;
+    u32 srcBaddr = line * NATIVE_WIDTH;
 
     if (captureCnt & (1<<25))
     {
@@ -487,7 +530,8 @@ void SoftRenderer::DoCapture(u32 line, u32 width)
         {
             for (u32 i = 0; i < width; i++)
             {
-                u32 val = srcA[i];
+                u32 val = *srcA;
+                srcA += srcAScale;
 
                 // TODO: check what happens when alpha=0
 
@@ -538,7 +582,8 @@ void SoftRenderer::DoCapture(u32 line, u32 width)
             {
                 for (u32 i = 0; i < width; i++)
                 {
-                    u32 val = srcA[i];
+                    u32 val = *srcA;
+                    srcA += srcAScale;
 
                     // TODO: check what happens when alpha=0
 
@@ -572,7 +617,8 @@ void SoftRenderer::DoCapture(u32 line, u32 width)
             {
                 for (u32 i = 0; i < width; i++)
                 {
-                    u32 val = srcA[i];
+                    u32 val = *srcA;
+                    srcA += srcAScale;
 
                     // TODO: check what happens when alpha=0
 
@@ -601,12 +647,14 @@ void SoftRenderer::DoCapture(u32 line, u32 width)
         if ((bgCnt[num] & 0x0040) && (CurUnit->BGMosaicSize[0] > 0)) \
         { \
             if (GPU3D::CurrentRenderer->Accelerated) DrawBG_##type<true, DrawPixel_Accel>(line, num); \
-            else DrawBG_##type<true, DrawPixel_Normal>(line, num); \
+            else if (GPU::ScaleFactor == 1) DrawBG_##type<true, DrawPixel_Normal>(line, num); \
+            else DrawBG_##type<true, DrawPixel_HiRes>(line, num); \
         } \
         else \
         { \
             if (GPU3D::CurrentRenderer->Accelerated) DrawBG_##type<false, DrawPixel_Accel>(line, num); \
-            else DrawBG_##type<false, DrawPixel_Normal>(line, num); \
+            else if (GPU::ScaleFactor == 1) DrawBG_##type<false, DrawPixel_Normal>(line, num); \
+            else DrawBG_##type<false, DrawPixel_HiRes>(line, num); \
         } \
     } while (false)
 
@@ -616,17 +664,21 @@ void SoftRenderer::DoCapture(u32 line, u32 width)
         if ((bgCnt[2] & 0x0040) && (CurUnit->BGMosaicSize[0] > 0)) \
         { \
             if (GPU3D::CurrentRenderer->Accelerated) DrawBG_Large<true, DrawPixel_Accel>(line); \
-            else DrawBG_Large<true, DrawPixel_Normal>(line); \
+            else if (GPU::ScaleFactor == 1) DrawBG_Large<true, DrawPixel_Normal>(line); \
+            else DrawBG_Large<true, DrawPixel_HiRes>(line); \
         } \
         else \
         { \
             if (GPU3D::CurrentRenderer->Accelerated) DrawBG_Large<false, DrawPixel_Accel>(line); \
-            else DrawBG_Large<false, DrawPixel_Normal>(line); \
+            else if (GPU::ScaleFactor == 1) DrawBG_Large<false, DrawPixel_Normal>(line); \
+            else DrawBG_Large<false, DrawPixel_HiRes>(line); \
         } \
     } while (false)
 
 #define DoInterleaveSprites(prio) \
-    if (GPU3D::CurrentRenderer->Accelerated) InterleaveSprites<DrawPixel_Accel>(prio); else InterleaveSprites<DrawPixel_Normal>(prio);
+    if (GPU3D::CurrentRenderer->Accelerated) InterleaveSprites<DrawPixel_Accel>(prio); \
+    else if (GPU::ScaleFactor == 1) InterleaveSprites<DrawPixel_Normal>(prio); \
+    else InterleaveSprites<DrawPixel_HiRes>(prio);
 
 template<u32 bgmode>
 void SoftRenderer::DrawScanlineBGMode(u32 line)
@@ -746,10 +798,11 @@ void SoftRenderer::DrawScanlineBGMode7(u32 line)
 
 void SoftRenderer::DrawScanline_BGOBJ(u32 line)
 {
+    int pixelCount = GPU3D::CurrentRenderer->Accelerated ? NATIVE_WIDTH : NATIVE_WIDTH * GPU::ScaleFactor * GPU::ScaleFactor;
     // forced blank disables BG/OBJ compositing
     if (CurUnit->DispCnt & (1<<7))
     {
-        for (int i = 0; i < 256; i++)
+        for (int i = 0; i < pixelCount; i++)
             BGOBJLine[i] = 0xFF3F3F3F;
 
         return;
@@ -767,14 +820,14 @@ void SoftRenderer::DrawScanline_BGOBJ(u32 line)
         backdrop = r | (g << 8) | (b << 16) | 0x20000000;
         backdrop |= (backdrop << 32);
 
-        for (int i = 0; i < 256; i+=2)
+        for (int i = 0; i < pixelCount; i+=2)
             *(u64*)&BGOBJLine[i] = backdrop;
     }
 
     if (CurUnit->DispCnt & 0xE000)
         CurUnit->CalculateWindowMask(line, WindowMask, OBJWindow[CurUnit->Num]);
     else
-        memset(WindowMask, 0xFF, 256);
+        memset(WindowMask, 0xFF, NATIVE_WIDTH);
 
     ApplySpriteMosaicX();
     CurBGXMosaicTable = MosaicTable[CurUnit->BGMosaicSize[0]];
@@ -796,23 +849,23 @@ void SoftRenderer::DrawScanline_BGOBJ(u32 line)
 
     if (!GPU3D::CurrentRenderer->Accelerated)
     {
-        for (int i = 0; i < 256; i++)
+        for (int i = 0; i < pixelCount; i++)
         {
             u32 val1 = BGOBJLine[i];
-            u32 val2 = BGOBJLine[256+i];
+            u32 val2 = BGOBJLine[pixelCount+i];
 
-            BGOBJLine[i] = ColorComposite(i, val1, val2);
+            BGOBJLine[i] = ColorComposite((i / GPU::ScaleFactor) & 0xFF, val1, val2);
         }
     }
     else
     {
         if (CurUnit->Num == 0)
         {
-            for (int i = 0; i < 256; i++)
+            for (int i = 0; i < NATIVE_WIDTH; i++)
             {
                 u32 val1 = BGOBJLine[i];
-                u32 val2 = BGOBJLine[256+i];
-                u32 val3 = BGOBJLine[512+i];
+                u32 val2 = BGOBJLine[NATIVE_WIDTH+i];
+                u32 val3 = BGOBJLine[NATIVE_WIDTH*2+i];
 
                 u32 flag1 = val1 >> 24;
                 u32 flag2 = val2 >> 24;
@@ -834,8 +887,8 @@ void SoftRenderer::DrawScanline_BGOBJ(u32 line)
                     // 3D on top, blending
 
                     BGOBJLine[i]     = val2;
-                    BGOBJLine[256+i] = ColorComposite(i, val2, val3);
-                    BGOBJLine[512+i] = 0x04000000;
+                    BGOBJLine[NATIVE_WIDTH+i] = ColorComposite(i, val2, val3);
+                    BGOBJLine[NATIVE_WIDTH*2+i] = 0x04000000;
                 }
                 else if ((flag1 & 0xC0) == 0x40)
                 {
@@ -846,8 +899,8 @@ void SoftRenderer::DrawScanline_BGOBJ(u32 line)
                     if (!(WindowMask[i] & 0x20))       bldcnteffect = 0;
 
                     BGOBJLine[i]     = val2;
-                    BGOBJLine[256+i] = ColorComposite(i, val2, val3);
-                    BGOBJLine[512+i] = (bldcnteffect << 24) | (CurUnit->EVY << 8);
+                    BGOBJLine[NATIVE_WIDTH+i] = ColorComposite(i, val2, val3);
+                    BGOBJLine[NATIVE_WIDTH*2+i] = (bldcnteffect << 24) | (CurUnit->EVY << 8);
                 }
                 else if (((flag2 & 0xC0) == 0x40) && ((CurUnit->BlendCnt & 0x01C0) == 0x0140))
                 {
@@ -869,29 +922,29 @@ void SoftRenderer::DrawScanline_BGOBJ(u32 line)
                         bldcnteffect = 7;
 
                     BGOBJLine[i]     = val1;
-                    BGOBJLine[256+i] = ColorComposite(i, val1, val3);
-                    BGOBJLine[512+i] = (bldcnteffect << 24) | (CurUnit->EVB << 16) | (CurUnit->EVA << 8);
+                    BGOBJLine[NATIVE_WIDTH+i] = ColorComposite(i, val1, val3);
+                    BGOBJLine[NATIVE_WIDTH*2+i] = (bldcnteffect << 24) | (CurUnit->EVB << 16) | (CurUnit->EVA << 8);
                 }
                 else
                 {
                     // no potential 3D pixel involved
 
                     BGOBJLine[i]     = ColorComposite(i, val1, val2);
-                    BGOBJLine[256+i] = 0;
-                    BGOBJLine[512+i] = 0x07000000;
+                    BGOBJLine[NATIVE_WIDTH+i] = 0;
+                    BGOBJLine[NATIVE_WIDTH*2+i] = 0x07000000;
                 }
             }
         }
         else
         {
-            for (int i = 0; i < 256; i++)
+            for (int i = 0; i < NATIVE_WIDTH; i++)
             {
                 u32 val1 = BGOBJLine[i];
-                u32 val2 = BGOBJLine[256+i];
+                u32 val2 = BGOBJLine[NATIVE_WIDTH+i];
 
                 BGOBJLine[i]     = ColorComposite(i, val1, val2);
-                BGOBJLine[256+i] = 0;
-                BGOBJLine[512+i] = 0x07000000;
+                BGOBJLine[NATIVE_WIDTH+i] = 0;
+                BGOBJLine[NATIVE_WIDTH*2+i] = 0x07000000;
             }
         }
     }
@@ -914,54 +967,92 @@ void SoftRenderer::DrawScanline_BGOBJ(u32 line)
 }
 
 
-void SoftRenderer::DrawPixel_Normal(u32* dst, u16 color, u32 flag)
+void SoftRenderer::DrawPixel_Normal(u32* dst, u32 index, u16 color, u32 flag)
 {
+    dst += index;
+
     u8 r = (color & 0x001F) << 1;
     u8 g = (color & 0x03E0) >> 4;
     u8 b = (color & 0x7C00) >> 9;
     //g |= ((color & 0x8000) >> 15);
 
-    *(dst+256) = *dst;
+    *(dst+NATIVE_WIDTH) = *dst;
     *dst = r | (g << 8) | (b << 16) | flag;
 }
 
-void SoftRenderer::DrawPixel_Accel(u32* dst, u16 color, u32 flag)
+void SoftRenderer::DrawPixel_HiRes(u32* dst, u32 index, u16 color, u32 flag)
 {
+    // note: dst is always BGOBJLine
+    // The index given is as if ScaleFactor were 1.
+    dst += index * GPU::ScaleFactor;
+
     u8 r = (color & 0x001F) << 1;
     u8 g = (color & 0x03E0) >> 4;
     u8 b = (color & 0x7C00) >> 9;
+    u32 value = r | (g << 8) | (b << 16) | flag;
 
-    *(dst+512) = *(dst+256);
-    *(dst+256) = *dst;
+    u32 lineLength = NATIVE_WIDTH * GPU::ScaleFactor;
+    u32 pixelCount = lineLength * GPU::ScaleFactor;
+    for (int y = 0; y < GPU::ScaleFactor; y++)
+    {
+        u32* addr = dst + y * lineLength;
+        for (int x = 0; x < GPU::ScaleFactor; x++)
+        {
+            addr[x + pixelCount] = addr[x];
+            addr[x] = value;
+        }
+    }
+}
+
+void SoftRenderer::DrawPixel_Accel(u32* dst, u32 index, u16 color, u32 flag)
+{
+    dst += index;
+
+    u8 r = (color & 0x001F) << 1;
+    u8 g = (color & 0x03E0) >> 4;
+    u8 b = (color & 0x7C00) >> 9;
+
+    *(dst+NATIVE_WIDTH*2) = *(dst+NATIVE_WIDTH);
+    *(dst+NATIVE_WIDTH) = *dst;
     *dst = r | (g << 8) | (b << 16) | flag;
 }
 
 void SoftRenderer::DrawBG_3D()
 {
-    int i = 0;
-
     if (GPU3D::CurrentRenderer->Accelerated)
     {
-        for (i = 0; i < 256; i++)
+        for (int i = 0; i < NATIVE_WIDTH; i++)
         {
             if (!(WindowMask[i] & 0x01)) continue;
 
-            BGOBJLine[i+512] = BGOBJLine[i+256];
-            BGOBJLine[i+256] = BGOBJLine[i];
+            BGOBJLine[i+NATIVE_WIDTH*2] = BGOBJLine[i+NATIVE_WIDTH];
+            BGOBJLine[i+NATIVE_WIDTH] = BGOBJLine[i];
             BGOBJLine[i] = 0x40000000; // 3D-layer placeholder
         }
     }
     else
     {
-        for (i = 0; i < 256; i++)
+        int pixelCount = NATIVE_WIDTH * GPU::ScaleFactor * GPU::ScaleFactor;
+        // soft renderer has a 1-pixel border
+        u32 stride3D = GPU3D::CurrentRenderer->GetStride();
+        for (int x = 0; x < NATIVE_WIDTH; x++)
         {
-            u32 c = _3DLine[i];
+            if (!(WindowMask[x] & 0x01))
+                continue;
 
-            if ((c >> 24) == 0) continue;
-            if (!(WindowMask[i] & 0x01)) continue;
+            for (int y = 0; y < GPU::ScaleFactor; y++)
+            {
+                u32* src = _3DLine + y * stride3D + x * GPU::ScaleFactor;
+                u32* dst = BGOBJLine + y * NATIVE_WIDTH * GPU::ScaleFactor + x * GPU::ScaleFactor;
+                for (int i = 0; i < GPU::ScaleFactor; i++)
+                {
+                    u32 c = src[i];
+                    if ((c >> 24) == 0) continue;
 
-            BGOBJLine[i+256] = BGOBJLine[i];
-            BGOBJLine[i] = c | 0x40000000;
+                    dst[i+pixelCount] = dst[i];
+                    dst[i] = c | 0x40000000;
+                }
+            }
         }
     }
 }
@@ -1041,7 +1132,7 @@ void SoftRenderer::DrawBG_Text(u32 line, u32 bgnum)
 
         if (mosaic) lastxpos = xoff;
 
-        for (int i = 0; i < 256; i++)
+        for (int i = 0; i < NATIVE_WIDTH; i++)
         {
             u32 xpos;
             if (mosaic) xpos = xoff - CurBGXMosaicTable[i];
@@ -1069,7 +1160,7 @@ void SoftRenderer::DrawBG_Text(u32 line, u32 bgnum)
                 color = bgvram[(pixelsaddr + tilexoff) & bgvrammask];
 
                 if (color)
-                    drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum);
+                    drawPixel(BGOBJLine, i, curpal[color], 0x01000000<<bgnum);
             }
 
             xoff++;
@@ -1090,7 +1181,7 @@ void SoftRenderer::DrawBG_Text(u32 line, u32 bgnum)
 
         if (mosaic) lastxpos = xoff;
 
-        for (int i = 0; i < 256; i++)
+        for (int i = 0; i < NATIVE_WIDTH; i++)
         {
             u32 xpos;
             if (mosaic) xpos = xoff - CurBGXMosaicTable[i];
@@ -1122,7 +1213,7 @@ void SoftRenderer::DrawBG_Text(u32 line, u32 bgnum)
                 }
 
                 if (color)
-                    drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum);
+                    drawPixel(BGOBJLine, i, curpal[color], 0x01000000<<bgnum);
             }
 
             xoff++;
@@ -1191,7 +1282,7 @@ void SoftRenderer::DrawBG_Affine(u32 line, u32 bgnum)
 
     yshift -= 3;
 
-    for (int i = 0; i < 256; i++)
+    for (int i = 0; i < NATIVE_WIDTH; i++)
     {
         if (WindowMask[i] & (1<<bgnum))
         {
@@ -1219,7 +1310,7 @@ void SoftRenderer::DrawBG_Affine(u32 line, u32 bgnum)
                 color = bgvram[(tilesetaddr + (curtile << 6) + (tileyoff << 3) + tilexoff) & bgvrammask];
 
                 if (color)
-                    drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum);
+                    drawPixel(BGOBJLine, i, pal[color], 0x01000000<<bgnum);
             }
         }
 
@@ -1296,7 +1387,7 @@ void SoftRenderer::DrawBG_Extended(u32 line, u32 bgnum)
 
             u16 color;
 
-            for (int i = 0; i < 256; i++)
+            for (int i = 0; i < NATIVE_WIDTH; i++)
             {
                 if (WindowMask[i] & (1<<bgnum))
                 {
@@ -1318,7 +1409,7 @@ void SoftRenderer::DrawBG_Extended(u32 line, u32 bgnum)
                         color = *(u16*)&bgvram[(tilemapaddr + (((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) << 1)) & bgvrammask];
 
                         if (color & 0x8000)
-                            drawPixel(&BGOBJLine[i], color, 0x01000000<<bgnum);
+                            drawPixel(BGOBJLine, i, color, 0x01000000<<bgnum);
                     }
                 }
 
@@ -1335,7 +1426,7 @@ void SoftRenderer::DrawBG_Extended(u32 line, u32 bgnum)
 
             u8 color;
 
-            for (int i = 0; i < 256; i++)
+            for (int i = 0; i < NATIVE_WIDTH; i++)
             {
                 if (WindowMask[i] & (1<<bgnum))
                 {
@@ -1357,7 +1448,7 @@ void SoftRenderer::DrawBG_Extended(u32 line, u32 bgnum)
                         color = bgvram[(tilemapaddr + (((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask];
 
                         if (color)
-                            drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<bgnum);
+                            drawPixel(BGOBJLine, i, pal[color], 0x01000000<<bgnum);
                     }
                 }
 
@@ -1405,7 +1496,7 @@ void SoftRenderer::DrawBG_Extended(u32 line, u32 bgnum)
 
         yshift -= 3;
 
-        for (int i = 0; i < 256; i++)
+        for (int i = 0; i < NATIVE_WIDTH; i++)
         {
             if (WindowMask[i] & (1<<bgnum))
             {
@@ -1439,7 +1530,7 @@ void SoftRenderer::DrawBG_Extended(u32 line, u32 bgnum)
                     color = bgvram[(tilesetaddr + ((curtile & 0x03FF) << 6) + (tileyoff << 3) + tilexoff) & bgvrammask];
 
                     if (color)
-                        drawPixel(&BGOBJLine[i], curpal[color], 0x01000000<<bgnum);
+                        drawPixel(BGOBJLine, i, curpal[color], 0x01000000<<bgnum);
                 }
             }
 
@@ -1512,7 +1603,7 @@ void SoftRenderer::DrawBG_Large(u32 line) // BG is always BG2
 
     u8 color;
 
-    for (int i = 0; i < 256; i++)
+    for (int i = 0; i < NATIVE_WIDTH; i++)
     {
         if (WindowMask[i] & (1<<2))
         {
@@ -1534,7 +1625,7 @@ void SoftRenderer::DrawBG_Large(u32 line) // BG is always BG2
                 color = bgvram[((((finalY & ymask) >> 8) << yshift) + ((finalX & xmask) >> 8)) & bgvrammask];
 
                 if (color)
-                    drawPixel(&BGOBJLine[i], pal[color], 0x01000000<<2);
+                    drawPixel(BGOBJLine, i, pal[color], 0x01000000<<2);
             }
         }
 
@@ -1566,7 +1657,7 @@ void SoftRenderer::ApplySpriteMosaicX()
 
     u32 lastcolor = objLine[0];
 
-    for (u32 i = 1; i < 256; i++)
+    for (u32 i = 1; i < NATIVE_WIDTH; i++)
     {
         u32 currentcolor = objLine[i];
 
@@ -1587,7 +1678,7 @@ void SoftRenderer::InterleaveSprites(u32 prio)
     {
         u16* extpal = CurUnit->GetOBJExtPal();
 
-        for (u32 i = 0; i < 256; i++)
+        for (u32 i = 0; i < NATIVE_WIDTH; i++)
         {
             if ((objLine[i] & 0x70000) != prio) continue;
             if (!(WindowMask[i] & 0x10))        continue;
@@ -1602,14 +1693,14 @@ void SoftRenderer::InterleaveSprites(u32 prio)
             else
                 color = extpal[pixel & 0xFFF];
 
-            drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000);
+            drawPixel(BGOBJLine, i, color, pixel & 0xFF000000);
         }
     }
     else
     {
         // optimized no-extpal version
 
-        for (u32 i = 0; i < 256; i++)
+        for (u32 i = 0; i < NATIVE_WIDTH; i++)
         {
             if ((objLine[i] & 0x70000) != prio) continue;
             if (!(WindowMask[i] & 0x10))        continue;
@@ -1622,7 +1713,7 @@ void SoftRenderer::InterleaveSprites(u32 prio)
             else
                 color = pal[pixel & 0xFF];
 
-            drawPixel(&BGOBJLine[i], color, pixel & 0xFF000000);
+            drawPixel(BGOBJLine, i, color, pixel & 0xFF000000);
         }
     }
 }
@@ -1665,8 +1756,8 @@ void SoftRenderer::DrawSprites(u32 line, Unit* unit)
     }
 
     NumSprites[CurUnit->Num] = 0;
-    memset(OBJLine[CurUnit->Num], 0, 256*4);
-    memset(OBJWindow[CurUnit->Num], 0, 256);
+    memset(OBJLine[CurUnit->Num], 0, sizeof(OBJLine));
+    memset(OBJWindow[CurUnit->Num], 0, sizeof(OBJWindow));
     if (!(CurUnit->DispCnt & 0x1000)) return;
 
     u16* oam = (u16*)&GPU::OAM[CurUnit->Num ? 0x400 : 0];
@@ -1794,8 +1885,8 @@ void SoftRenderer::DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight,
     if (xpos >= 0)
     {
         xoff = 0;
-        if ((xpos+boundwidth) > 256)
-            boundwidth = 256-xpos;
+        if ((xpos+boundwidth) > NATIVE_WIDTH)
+            boundwidth = NATIVE_WIDTH-xpos;
     }
     else
     {
@@ -2007,8 +2098,8 @@ void SoftRenderer::DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s
     if (xpos >= 0)
     {
         xoff = 0;
-        if ((xpos+xend) > 256)
-            xend = 256-xpos;
+        if ((xpos+xend) > NATIVE_WIDTH)
+            xend = NATIVE_WIDTH-xpos;
     }
     else
     {
diff --git a/src/GPU2D_Soft.h b/src/GPU2D_Soft.h
index a9fff97..6f0098c 100644
--- a/src/GPU2D_Soft.h
+++ b/src/GPU2D_Soft.h
@@ -27,24 +27,24 @@ class SoftRenderer : public Renderer2D
 {
 public:
     SoftRenderer();
-    ~SoftRenderer() override {}
+    ~SoftRenderer() override;
 
     void DrawScanline(u32 line, Unit* unit) override;
     void DrawSprites(u32 line, Unit* unit) override;
     void VBlankEnd(Unit* unitA, Unit* unitB) override;
 private:
-    alignas(8) u32 BGOBJLine[256*3];
+    u32* BGOBJLine;
     u32* _3DLine;
 
-    alignas(8) u8 WindowMask[256];
+    alignas(8) u8 WindowMask[NATIVE_WIDTH];
 
-    alignas(8) u32 OBJLine[2][256];
-    alignas(8) u8 OBJWindow[2][256];
+    alignas(8) u32 OBJLine[2][NATIVE_WIDTH];
+    alignas(8) u8 OBJWindow[2][NATIVE_WIDTH];
 
     u32 NumSprites[2];
 
     u8* CurBGXMosaicTable;
-    u8 MosaicTable[16][256];
+    u8 MosaicTable[16][NATIVE_WIDTH];;
 
     u32 ColorBlend4(u32 val1, u32 val2, u32 eva, u32 evb);
     u32 ColorBlend5(u32 val1, u32 val2);
@@ -57,10 +57,11 @@ private:
     void DrawScanlineBGMode7(u32 line);
     void DrawScanline_BGOBJ(u32 line);
 
-    static void DrawPixel_Normal(u32* dst, u16 color, u32 flag);
-    static void DrawPixel_Accel(u32* dst, u16 color, u32 flag);
+    static void DrawPixel_Normal(u32* dst, u32 index, u16 color, u32 flag);
+    static void DrawPixel_HiRes(u32* dst, u32 index, u16 color, u32 flag);
+    static void DrawPixel_Accel(u32* dst, u32 index, u16 color, u32 flag);
 
-    typedef void (*DrawPixel)(u32* dst, u16 color, u32 flag);
+    typedef void (*DrawPixel)(u32* dst, u32 index, u16 color, u32 flag);
 
     void DrawBG_3D();
     template<bool mosaic, DrawPixel drawPixel> void DrawBG_Text(u32 line, u32 bgnum);
@@ -74,6 +75,7 @@ private:
     template<bool window> void DrawSprite_Rotscale(u32 num, u32 boundwidth, u32 boundheight, u32 width, u32 height, s32 xpos, s32 ypos);
     template<bool window> void DrawSprite_Normal(u32 num, u32 width, u32 height, s32 xpos, s32 ypos);
 
+    void ExpandLine(u32* line);
     void DoCapture(u32 line, u32 width);
 };
 
diff --git a/src/GPU3D.cpp b/src/GPU3D.cpp
index dcd5bd7..8f16d25 100644
--- a/src/GPU3D.cpp
+++ b/src/GPU3D.cpp
@@ -275,6 +275,7 @@ u32 RenderNumPolygons;
 u32 FlushRequest;
 u32 FlushAttributes;
 
+u32 CalculatePolygonMetadata(Polygon* poly);
 std::unique_ptr<GPU3D::Renderer3D> CurrentRenderer = {};
 
 bool AbortFrame;
@@ -514,7 +515,14 @@ void DoSavestate(Savestate* file)
 
         file->Bool32(&vtx->Clipped);
 
-        file->VarArray(vtx->FinalPosition, sizeof(s32)*2);
+        if (file->IsAtleastVersion(7, 3))
+        {
+            file->VarArray(vtx->HiresPosition, sizeof(s32)*2);
+            vtx->FinalPosition[0] = (vtx->HiresPosition[0] * GPU::ScaleFactor) >> HD_SHIFT;
+            vtx->FinalPosition[1] = (vtx->HiresPosition[1] * GPU::ScaleFactor) >> HD_SHIFT;
+        }
+        else
+            file->VarArray(vtx->FinalPosition, sizeof(s32)*2);
         file->VarArray(vtx->FinalColor, sizeof(s32)*3);
     }
 
@@ -567,27 +575,33 @@ void DoSavestate(Savestate* file)
         else
             poly->Type = 0;
 
-        file->Var32(&poly->VTop);
-        file->Var32(&poly->VBottom);
-        file->Var32((u32*)&poly->YTop);
-        file->Var32((u32*)&poly->YBottom);
-        file->Var32((u32*)&poly->XTop);
-        file->Var32((u32*)&poly->XBottom);
-
-        file->Var32(&poly->SortKey);
-
-        if (!file->Saving)
+        if (!file->IsAtleastVersion(10, 3))
         {
             poly->Degenerate = false;
+            file->Var32(&poly->VTop);
+            file->Var32(&poly->VBottom);
+            file->Var32((u32*)&poly->YTop);
+            file->Var32((u32*)&poly->YBottom);
+            file->Var32((u32*)&poly->XTop);
+            file->Var32((u32*)&poly->XBottom);
 
-            for (u32 j = 0; j < poly->NumVertices; j++)
+            file->Var32(&poly->SortKey);
+
+            if (!file->Saving)
             {
-                if (poly->Vertices[j]->Position[3] == 0)
-                    poly->Degenerate = true;
-            }
+                poly->Degenerate = false;
+                
+                for (u32 j = 0; j < poly->NumVertices; j++)
+                {
+                    if (poly->Vertices[j]->Position[3] == 0)
+                        poly->Degenerate = true;
+                }
 
-            if (poly->YBottom > 192) poly->Degenerate = true;
+                if (poly->YBottom > 192) poly->Degenerate = true;
+            }
         }
+        else if (!file->Saving)
+            CalculatePolygonMetadata(poly);
     }
 
     // probably not worth storing the vblank-latched Renderxxxxxx variables
@@ -1182,7 +1196,7 @@ void SubmitPolygon()
         // note: the DS performs these divisions using a 32-bit divider
         // thus, if W is greater than 0xFFFF, some precision is sacrificed
         // to make the numbers fit into the divider
-        u32 posX, posY;
+        u64 posX, posY;
         u32 w = vtx->Position[3];
         if (w == 0)
         {
@@ -1203,23 +1217,15 @@ void SubmitPolygon()
             }
 
             den <<= 1;
-            posX = ((posX * Viewport[4]) / den) + Viewport[0];
-            posY = ((posY * Viewport[5]) / den) + Viewport[3];
+            posX = (((posX * Viewport[4]) << HD_SHIFT) / den) + (Viewport[0] << HD_SHIFT);
+            posY = (((posY * Viewport[5]) << HD_SHIFT) / den) + (Viewport[3] << HD_SHIFT);
         }
 
-        vtx->FinalPosition[0] = posX & 0x1FF;
-        vtx->FinalPosition[1] = posY & 0xFF;
-
-        // hi-res positions
-        // to consider: only do this when using the GL renderer? apply the aforementioned quirk to this?
-        if (w != 0)
-        {
-            posX = ((((s64)(vtx->Position[0] + w) * Viewport[4]) << 4) / (((s64)w) << 1)) + (Viewport[0] << 4);
-            posY = ((((s64)(-vtx->Position[1] + w) * Viewport[5]) << 4) / (((s64)w) << 1)) + (Viewport[3] << 4);
+        vtx->HiresPosition[0] = posX & (0x200 << HD_SHIFT) - 1;
+        vtx->HiresPosition[1] = posY & (0x100 << HD_SHIFT) - 1;
 
-            vtx->HiresPosition[0] = posX & 0x1FFF;
-            vtx->HiresPosition[1] = posY & 0xFFF;
-        }
+        vtx->FinalPosition[0] = (vtx->HiresPosition[0] * GPU::ScaleFactor) >> HD_SHIFT;
+        vtx->FinalPosition[1] = (vtx->HiresPosition[1] * GPU::ScaleFactor) >> HD_SHIFT;
     }
 
     // zero-dot W check:
@@ -1336,51 +1342,9 @@ void SubmitPolygon()
         if (vtx->FinalColor[2]) vtx->FinalColor[2] = ((vtx->FinalColor[2] << 4) + 0xF);
     }
 
-    // determine bounds of the polygon
-    // also determine the W shift and normalize W
-    // normalization works both ways
-    // (ie two W's that span 12 bits or less will be brought to 16 bits)
-
-    u32 vtop = 0, vbot = 0;
-    s32 ytop = 192, ybot = 0;
-    s32 xtop = 256, xbot = 0;
-    u32 wsize = 0;
-
-    for (int i = 0; i < nverts; i++)
-    {
-        Vertex* vtx = poly->Vertices[i];
-
-        if (vtx->FinalPosition[1] < ytop || (vtx->FinalPosition[1] == ytop && vtx->FinalPosition[0] < xtop))
-        {
-            xtop = vtx->FinalPosition[0];
-            ytop = vtx->FinalPosition[1];
-            vtop = i;
-        }
-        if (vtx->FinalPosition[1] > ybot || (vtx->FinalPosition[1] == ybot && vtx->FinalPosition[0] > xbot))
-        {
-            xbot = vtx->FinalPosition[0];
-            ybot = vtx->FinalPosition[1];
-            vbot = i;
-        }
-
-        u32 w = (u32)vtx->Position[3];
-        if (w == 0) poly->Degenerate = true;
-
-        while ((w >> wsize) && (wsize < 32))
-            wsize += 4;
-    }
-
-    poly->VTop = vtop; poly->VBottom = vbot;
-    poly->YTop = ytop; poly->YBottom = ybot;
-    poly->XTop = xtop; poly->XBottom = xbot;
-
-    if (ybot > 192) poly->Degenerate = true;
-
-    poly->SortKey = (ybot << 8) | ytop;
-    if (poly->Translucent) poly->SortKey |= 0x10000;
-
     poly->WBuffer = (FlushAttributes & 0x2);
 
+    u32 wsize = CalculatePolygonMetadata(poly);
     for (int i = 0; i < nverts; i++)
     {
         Vertex* vtx = poly->Vertices[i];
@@ -1423,6 +1387,53 @@ void SubmitPolygon()
     else
         LastStripPolygon = NULL;
 }
+u32 CalculatePolygonMetadata(Polygon* poly)
+{
+    // determine bounds of the polygon
+    // also determine the W shift and normalize W
+    // normalization works both ways
+    // (ie two W's that span 12 bits or less will be brought to 16 bits)
+
+    u32 vtop = 0, vbot = 0;
+    s32 ytop = 192 * GPU::ScaleFactor, ybot = 0;
+    s32 xtop = 256 * GPU::ScaleFactor, xbot = 0;
+    u32 wsize = 0;
+
+    for (int i = 0; i < poly->NumVertices; i++)
+    {
+        Vertex* vtx = poly->Vertices[i];
+
+        if (vtx->FinalPosition[1] < ytop || (vtx->FinalPosition[1] == ytop && vtx->FinalPosition[0] < xtop))
+        {
+            xtop = vtx->FinalPosition[0];
+            ytop = vtx->FinalPosition[1];
+            vtop = i;
+        }
+        if (vtx->FinalPosition[1] > ybot || (vtx->FinalPosition[1] == ybot && vtx->FinalPosition[0] > xbot))
+        {
+            xbot = vtx->FinalPosition[0];
+            ybot = vtx->FinalPosition[1];
+            vbot = i;
+        }
+
+        u32 w = (u32)vtx->Position[3];
+        if (w == 0) poly->Degenerate = true;
+
+        while ((w >> wsize) && (wsize < 32))
+            wsize += 4;
+    }
+
+    poly->VTop = vtop; poly->VBottom = vbot;
+    poly->YTop = ytop; poly->YBottom = ybot;
+    poly->XTop = xtop; poly->XBottom = xbot;
+
+    if (ybot > 192 * GPU::ScaleFactor) poly->Degenerate = true;
+
+    poly->SortKey = (ybot << (8 + HD_SHIFT)) | ytop;
+    if (poly->Translucent) poly->SortKey |= 0x10000 << (HD_SHIFT * 2);
+
+    return wsize;
+}
 
 void SubmitVertex()
 {
@@ -2615,7 +2626,7 @@ void SetRenderXPos(u16 xpos)
     RenderXPos = xpos & 0x01FF;
 }
 
-u32 ScrolledLine[256];
+std::vector<u32> ScrolledLine;
 
 u32* GetLine(int line)
 {
@@ -2626,30 +2637,42 @@ u32* GetLine(int line)
         if (RenderXPos == 0) return rawline;
 
         // apply X scroll
+        int scale = GPU3D::CurrentRenderer->Accelerated ? 1 : GPU::ScaleFactor;
+        int pixelCount = GPU3D::CurrentRenderer->GetStride() * scale;
+        if (ScrolledLine.size() != pixelCount)
+            ScrolledLine.resize(pixelCount);
+        u32* dst = ScrolledLine.data();
 
         if (RenderXPos & 0x100)
         {
-            int i = 0, j = RenderXPos;
-            for (; j < 512; i++, j++)
-                ScrolledLine[i] = 0;
-            for (j = 0; i < 256; i++, j++)
-                ScrolledLine[i] = rawline[j];
+            // IDK why this isn't working
+            for (int y = 0; y < scale; y++)
+            {
+                int blank = (NATIVE_WIDTH*2 - RenderXPos) * scale;
+                memset(dst, 0, blank * sizeof(u32));
+                memcpy(dst+blank, rawline, (NATIVE_WIDTH * scale - blank) * sizeof(u32));
+                dst += GPU3D::CurrentRenderer->GetStride();
+                rawline += GPU3D::CurrentRenderer->GetStride();
+            }
         }
         else
         {
-            int i = 0, j = RenderXPos;
-            for (; j < 256; i++, j++)
-                ScrolledLine[i] = rawline[j];
-            for (; i < 256; i++)
-                ScrolledLine[i] = 0;
+            for (int y = 0; y < scale; y++)
+            {
+                int nonBlank = (NATIVE_WIDTH - RenderXPos) * scale;
+                memcpy(dst, rawline + (RenderXPos * scale), nonBlank * sizeof(u32));
+                memset(dst+nonBlank, 0, (NATIVE_WIDTH * scale - nonBlank) * sizeof(u32));
+                dst += GPU3D::CurrentRenderer->GetStride();
+                rawline += GPU3D::CurrentRenderer->GetStride();
+            }
         }
     }
     else
     {
-        memset(ScrolledLine, 0, 256*4);
+        memset(ScrolledLine, 0, sizeof(ScrolledLine));
     }
 
-    return ScrolledLine;
+    return ScrolledLine.data();
 }
 
 
diff --git a/src/GPU3D.h b/src/GPU3D.h
index 4a7bfdc..5aacfb0 100644
--- a/src/GPU3D.h
+++ b/src/GPU3D.h
@@ -25,6 +25,10 @@
 #include "GPU.h"
 #include "Savestate.h"
 
+// should be able to be as high as 7
+// limiting factor: SortKey must contain ytop and ybottom and translucent
+#define HD_SHIFT 7
+
 namespace GPU3D
 {
 
@@ -159,6 +163,7 @@ public:
     virtual void RenderFrame() = 0;
     virtual void RestartFrame() {};
     virtual u32* GetLine(int line) = 0;
+    virtual u32 GetStride() { return NATIVE_WIDTH; }
 };
 
 extern int Renderer;
diff --git a/src/GPU3D_OpenGL.cpp b/src/GPU3D_OpenGL.cpp
index ebabd51..006b2d4 100644
--- a/src/GPU3D_OpenGL.cpp
+++ b/src/GPU3D_OpenGL.cpp
@@ -255,9 +255,9 @@ bool GLRenderer::Init()
     SetupDefaultTexParams(FramebufferTex[5]);
     SetupDefaultTexParams(FramebufferTex[7]);
 
-    // downscale framebuffer for display capture (always 256x192)
+    // downscale framebuffer for display capture (always native)
     SetupDefaultTexParams(FramebufferTex[3]);
-    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 256, 192, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, NATIVE_WIDTH, NATIVE_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
 
     glEnable(GL_BLEND);
     glBlendEquationSeparate(GL_FUNC_ADD, GL_MAX);
@@ -315,13 +315,12 @@ void GLRenderer::Reset()
 
 void GLRenderer::SetRenderSettings(GPU::RenderSettings& settings)
 {
-    int scale = settings.GL_ScaleFactor;
+    int scale = settings.ScaleFactor;
 
-    ScaleFactor = scale;
     BetterPolygons = settings.GL_BetterPolygons;
 
-    ScreenW = 256 * scale;
-    ScreenH = 192 * scale;
+    ScreenW = NATIVE_WIDTH * scale;
+    ScreenH = NATIVE_HEIGHT * scale;
 
     glBindTexture(GL_TEXTURE_2D, FramebufferTex[0]);
     glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, ScreenW, ScreenH, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
@@ -358,7 +357,7 @@ void GLRenderer::SetRenderSettings(GPU::RenderSettings& settings)
     glBindFramebuffer(GL_FRAMEBUFFER, FramebufferID[0]);
 
     glBindBuffer(GL_PIXEL_PACK_BUFFER, PixelbufferID);
-    glBufferData(GL_PIXEL_PACK_BUFFER, 256*192*4, NULL, GL_DYNAMIC_READ);
+    glBufferData(GL_PIXEL_PACK_BUFFER, NATIVE_WIDTH*NATIVE_HEIGHT*4, NULL, GL_DYNAMIC_READ);
 
     glBindFramebuffer(GL_FRAMEBUFFER, 0);
 
@@ -425,10 +424,10 @@ u32* GLRenderer::SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u
     while (z > 0xFFFF) { z >>= 1; zshift++; }
 
     u32 x, y;
-    if (ScaleFactor > 1)
+    if (GPU::ScaleFactor > 1)
     {
-        x = (vtx->HiresPosition[0] * ScaleFactor) >> 4;
-        y = (vtx->HiresPosition[1] * ScaleFactor) >> 4;
+        x = vtx->FinalPosition[0];
+        y = vtx->FinalPosition[1];
     }
     else
     {
@@ -450,10 +449,7 @@ u32* GLRenderer::SetupVertex(Polygon* poly, int vid, Vertex* vtx, u32 vtxattr, u
         if ((vtop->FinalPosition[1] < vtx->FinalPosition[1]) &&
             (vtx->FinalPosition[0] == vtop->FinalPosition[0]-1))
         {
-            if (ScaleFactor > 1)
-                x = (vtop->HiresPosition[0] * ScaleFactor) >> 4;
-            else
-                x = vtop->FinalPosition[0];
+            x = vtop->FinalPosition[0];
         }
     }*/
 
@@ -591,8 +587,8 @@ void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys
                 {
                     Vertex* vtx = poly->Vertices[j];
 
-                    cX += vtx->HiresPosition[0];
-                    cY += vtx->HiresPosition[1];
+                    cX += vtx->FinalPosition[0];
+                    cY += vtx->FinalPosition[1];
 
                     float fw = (float)poly->FinalW[j] * poly->NumVertices;
                     cW += 1.0f / fw;
@@ -623,9 +619,6 @@ void GLRenderer::BuildPolygons(GLRenderer::RendererPolygon* polygons, int npolys
                 cS *= cW;
                 cT *= cW;
 
-                cX = (cX * ScaleFactor) >> 4;
-                cY = (cY * ScaleFactor) >> 4;
-
                 u32 w = (u32)cW;
 
                 u32 z = (u32)cZ;
@@ -752,7 +745,7 @@ void GLRenderer::RenderSceneChunk(int y, int h)
     u32 flags = 0;
     if (RenderPolygonRAM[0]->WBuffer) flags |= RenderFlag_WBuffer;
 
-    if (h != 192) glScissor(0, y<<ScaleFactor, 256<<ScaleFactor, h<<ScaleFactor);
+    if (h != NATIVE_HEIGHT) glScissor(0, y * GPU::ScaleFactor, NATIVE_WIDTH * GPU::ScaleFactor, h * GPU::ScaleFactor);
 
     GLboolean fogenable = (RenderDispCnt & (1<<7)) ? GL_TRUE : GL_FALSE;
 
@@ -1280,7 +1273,7 @@ void GLRenderer::RenderFrame()
         glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, NumIndices * 2, IndexBuffer);
         glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, EdgeIndicesOffset * 2, NumEdgeIndices * 2, IndexBuffer + EdgeIndicesOffset);
 
-        RenderSceneChunk(0, 192);
+        RenderSceneChunk(0, NATIVE_HEIGHT);
     }
 
     FrontBuffer = FrontBuffer ? 0 : 1;
@@ -1295,20 +1288,20 @@ void GLRenderer::PrepareCaptureFrame()
     glReadBuffer(GL_COLOR_ATTACHMENT0);
     glBindFramebuffer(GL_DRAW_FRAMEBUFFER, FramebufferID[3]);
     glDrawBuffer(GL_COLOR_ATTACHMENT0);
-    glBlitFramebuffer(0, 0, ScreenW, ScreenH, 0, 0, 256, 192, GL_COLOR_BUFFER_BIT, GL_NEAREST);
+    glBlitFramebuffer(0, 0, ScreenW, ScreenH, 0, 0, NATIVE_WIDTH, NATIVE_HEIGHT, GL_COLOR_BUFFER_BIT, GL_NEAREST);
 
     glBindFramebuffer(GL_READ_FRAMEBUFFER, FramebufferID[3]);
-    glReadPixels(0, 0, 256, 192, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
+    glReadPixels(0, 0, NATIVE_WIDTH, NATIVE_HEIGHT, GL_BGRA, GL_UNSIGNED_BYTE, NULL);
 }
 
 u32* GLRenderer::GetLine(int line)
 {
-    int stride = 256;
+    int stride = NATIVE_WIDTH;
 
     if (line == 0)
     {
         u8* data = (u8*)glMapBuffer(GL_PIXEL_PACK_BUFFER, GL_READ_ONLY);
-        if (data) memcpy(&Framebuffer[stride*0], data, 4*stride*192);
+        if (data) memcpy(&Framebuffer[stride*0], data, 4*stride*NATIVE_HEIGHT);
         glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
     }
 
diff --git a/src/GPU3D_OpenGL.h b/src/GPU3D_OpenGL.h
index a9dd62c..b4e493b 100644
--- a/src/GPU3D_OpenGL.h
+++ b/src/GPU3D_OpenGL.h
@@ -145,7 +145,7 @@ private:
     GLuint FramebufferTex[8];
     int FrontBuffer;
     GLuint FramebufferID[4], PixelbufferID;
-    u32 Framebuffer[256*192];
+    u32 Framebuffer[NATIVE_WIDTH*NATIVE_HEIGHT];
 
 
 };
diff --git a/src/GPU3D_Soft.cpp b/src/GPU3D_Soft.cpp
index 98514ab..afe686f 100644
--- a/src/GPU3D_Soft.cpp
+++ b/src/GPU3D_Soft.cpp
@@ -77,11 +77,17 @@ SoftRenderer::SoftRenderer()
 
 }
 
+SoftRenderer::~SoftRenderer()
+{
+    DeInit();
+}
+
 bool SoftRenderer::Init()
 {
     Sema_RenderStart = Platform::Semaphore_Create();
     Sema_RenderDone = Platform::Semaphore_Create();
     Sema_ScanlineCount = Platform::Semaphore_Create();
+    Mutex_Buffer = Platform::Mutex_Create();
 
     Threaded = false;
     RenderThreadRunning = false;
@@ -94,9 +100,25 @@ void SoftRenderer::DeInit()
 {
     StopRenderThread();
 
-    Platform::Semaphore_Free(Sema_RenderStart);
-    Platform::Semaphore_Free(Sema_RenderDone);
-    Platform::Semaphore_Free(Sema_ScanlineCount);
+    if (Sema_RenderStart) Platform::Semaphore_Free(Sema_RenderStart);
+    if (Sema_RenderDone) Platform::Semaphore_Free(Sema_RenderDone);
+    if (Sema_ScanlineCount) Platform::Semaphore_Free(Sema_ScanlineCount);
+    if (Mutex_Buffer) Platform::Mutex_Free(Mutex_Buffer);
+
+    Sema_RenderStart = NULL;
+    Sema_RenderDone = NULL;
+    Sema_ScanlineCount = NULL;
+    Mutex_Buffer = NULL;
+
+    if (ColorBuffer) delete[] ColorBuffer;
+    if (DepthBuffer) delete[] DepthBuffer;
+    if (AttrBuffer) delete[] AttrBuffer;
+    if (StencilBuffer) delete[] StencilBuffer;
+
+    ColorBuffer = NULL;
+    DepthBuffer = NULL;
+    AttrBuffer = NULL;
+    StencilBuffer = NULL;
 }
 
 void SoftRenderer::Reset()
@@ -112,7 +134,30 @@ void SoftRenderer::Reset()
 
 void SoftRenderer::SetRenderSettings(GPU::RenderSettings& settings)
 {
+    Platform::Mutex_Lock(Mutex_Buffer);
+
     Threaded = settings.Soft_Threaded;
+
+    int scale = settings.ScaleFactor;
+    RenderWidth = NATIVE_WIDTH * scale;
+    RenderHeight = NATIVE_HEIGHT * scale;
+
+    ScanlineWidth = RenderWidth + 2;
+    NumScanlines = RenderHeight + 2;
+    BufferSize = ScanlineWidth * NumScanlines;
+    FirstPixelOffset = ScanlineWidth + 1;
+
+    if (ColorBuffer) delete[] ColorBuffer;
+    if (DepthBuffer) delete[] DepthBuffer;
+    if (AttrBuffer) delete[] AttrBuffer;
+    ColorBuffer = new u32[BufferSize * 2];
+    DepthBuffer = new u32[BufferSize * 2];
+    AttrBuffer = new u32[BufferSize * 2];
+
+    if (StencilBuffer) delete[] StencilBuffer;
+    StencilBuffer = new u8[NATIVE_WIDTH * scale * 2];
+
+    Platform::Mutex_Unlock(Mutex_Buffer);
     SetupRenderThread();
 }
 
@@ -695,7 +740,7 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
         fnDepthTest = DepthTest_LessThan;
 
     if (!PrevIsShadowMask)
-        memset(&StencilBuffer[256 * (y&0x1)], 0, 256);
+        memset(&StencilBuffer[RenderWidth * (y&0x1)], 0, RenderWidth);
 
     PrevIsShadowMask = true;
 
@@ -824,7 +869,7 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
     edge = yedge | 0x1;
     xlimit = xstart+l_edgelen;
     if (xlimit > xend+1) xlimit = xend+1;
-    if (xlimit > 256) xlimit = 256;
+    if (xlimit > RenderWidth) xlimit = RenderWidth;
 
     if (!l_filledge) x = xlimit;
     else
@@ -838,13 +883,13 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
         u32 dstattr = AttrBuffer[pixeladdr];
 
         if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
-            StencilBuffer[256*(y&0x1) + x] = 1;
+            StencilBuffer[RenderWidth*(y&0x1) + x] = 1;
 
         if (dstattr & 0xF)
         {
             pixeladdr += BufferSize;
             if (!fnDepthTest(DepthBuffer[pixeladdr], z, AttrBuffer[pixeladdr]))
-                StencilBuffer[256*(y&0x1) + x] |= 0x2;
+                StencilBuffer[RenderWidth*(y&0x1) + x] |= 0x2;
         }
     }
 
@@ -852,7 +897,7 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
     edge = yedge;
     xlimit = xend-r_edgelen+1;
     if (xlimit > xend+1) xlimit = xend+1;
-    if (xlimit > 256) xlimit = 256;
+    if (xlimit > RenderWidth) xlimit = RenderWidth;
     if (wireframe && !edge) x = std::max(x, xlimit);
     else for (; x < xlimit; x++)
     {
@@ -864,20 +909,20 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
         u32 dstattr = AttrBuffer[pixeladdr];
 
         if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
-            StencilBuffer[256*(y&0x1) + x] = 1;
+            StencilBuffer[RenderWidth*(y&0x1) + x] = 1;
 
         if (dstattr & 0xF)
         {
             pixeladdr += BufferSize;
             if (!fnDepthTest(DepthBuffer[pixeladdr], z, AttrBuffer[pixeladdr]))
-                StencilBuffer[256*(y&0x1) + x] |= 0x2;
+                StencilBuffer[RenderWidth*(y&0x1) + x] |= 0x2;
         }
     }
 
     // part 3: right edge
     edge = yedge | 0x2;
     xlimit = xend+1;
-    if (xlimit > 256) xlimit = 256;
+    if (xlimit > RenderWidth) xlimit = RenderWidth;
     
     if (r_filledge)
     for (; x < xlimit; x++)
@@ -890,13 +935,13 @@ void SoftRenderer::RenderShadowMaskScanline(RendererPolygon* rp, s32 y)
         u32 dstattr = AttrBuffer[pixeladdr];
 
         if (!fnDepthTest(DepthBuffer[pixeladdr], z, dstattr))
-            StencilBuffer[256*(y&0x1) + x] = 1;
+            StencilBuffer[RenderWidth*(y&0x1) + x] = 1;
 
         if (dstattr & 0xF)
         {
             pixeladdr += BufferSize;
             if (!fnDepthTest(DepthBuffer[pixeladdr], z, AttrBuffer[pixeladdr]))
-                StencilBuffer[256*(y&0x1) + x] |= 0x2;
+                StencilBuffer[RenderWidth*(y&0x1) + x] |= 0x2;
         }
     }
 
@@ -1071,7 +1116,7 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
     edge = yedge | 0x1;
     xlimit = xstart+l_edgelen;
     if (xlimit > xend+1) xlimit = xend+1;
-    if (xlimit > 256) xlimit = 256;
+    if (xlimit > RenderWidth) xlimit = RenderWidth;
     if (l_edgecov & (1<<31))
     {
         xcov = (l_edgecov >> 12) & 0x3FF;
@@ -1088,7 +1133,7 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
         // check stencil buffer for shadows
         if (polygon->IsShadow)
         {
-            u8 stencil = StencilBuffer[256*(y&0x1) + x];
+            u8 stencil = StencilBuffer[RenderWidth*(y&0x1) + x];
             if (!stencil)
                 continue;
             if (!(stencil & 0x1))
@@ -1172,7 +1217,7 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
     edge = yedge;
     xlimit = xend-r_edgelen+1;
     if (xlimit > xend+1) xlimit = xend+1;
-    if (xlimit > 256) xlimit = 256;
+    if (xlimit > RenderWidth) xlimit = RenderWidth;
 
     if (wireframe && !edge) x = std::max(x, xlimit);
     else
@@ -1184,7 +1229,7 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
         // check stencil buffer for shadows
         if (polygon->IsShadow)
         {
-            u8 stencil = StencilBuffer[256*(y&0x1) + x];
+            u8 stencil = StencilBuffer[RenderWidth*(y&0x1) + x];
             if (!stencil)
                 continue;
             if (!(stencil & 0x1))
@@ -1260,7 +1305,7 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
     // part 3: right edge
     edge = yedge | 0x2;
     xlimit = xend+1;
-    if (xlimit > 256) xlimit = 256;
+    if (xlimit > RenderWidth) xlimit = RenderWidth;
     if (r_edgecov & (1<<31))
     {
         xcov = (r_edgecov >> 12) & 0x3FF;
@@ -1276,7 +1321,7 @@ void SoftRenderer::RenderPolygonScanline(RendererPolygon* rp, s32 y)
         // check stencil buffer for shadows
         if (polygon->IsShadow)
         {
-            u8 stencil = StencilBuffer[256*(y&0x1) + x];
+            u8 stencil = StencilBuffer[RenderWidth*(y&0x1) + x];
             if (!stencil)
                 continue;
             if (!(stencil & 0x1))
@@ -1427,7 +1472,7 @@ void SoftRenderer::ScanlineFinalPass(s32 y)
         // edge marking
         // only applied to topmost pixels
 
-        for (int x = 0; x < 256; x++)
+        for (int x = 0; x < RenderWidth; x++)
         {
             u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
 
@@ -1475,7 +1520,7 @@ void SoftRenderer::ScanlineFinalPass(s32 y)
         u32 fogB = (RenderFogColor >> 9) & 0x3E; if (fogB) fogB++;
         u32 fogA = (RenderFogColor >> 16) & 0x1F;
 
-        for (int x = 0; x < 256; x++)
+        for (int x = 0; x < RenderWidth; x++)
         {
             u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
             u32 density, srccolor, srcR, srcG, srcB, srcA;
@@ -1540,7 +1585,7 @@ void SoftRenderer::ScanlineFinalPass(s32 y)
         // edges were flagged and their coverages calculated during rendering
         // this is where such edge pixels are blended with the pixels underneath
 
-        for (int x = 0; x < 256; x++)
+        for (int x = 0; x < RenderWidth; x++)
         {
             u32 pixeladdr = FirstPixelOffset + (y*ScanlineWidth) + x;
 
@@ -1624,9 +1669,9 @@ void SoftRenderer::ClearBuffers()
         u8 xoff = (RenderClearAttr2 >> 16) & 0xFF;
         u8 yoff = (RenderClearAttr2 >> 24) & 0xFF;
 
-        for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth)
+        for (int y = 0; y < ScanlineWidth * RenderHeight; y+=ScanlineWidth)
         {
-            for (int x = 0; x < 256; x++)
+            for (int x = 0; x < RenderWidth; x++)
             {
                 u16 val2 = ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1));
                 u16 val3 = ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1));
@@ -1645,10 +1690,12 @@ void SoftRenderer::ClearBuffers()
                 DepthBuffer[pixeladdr] = z;
                 AttrBuffer[pixeladdr] = polyid | (val3 & 0x8000);
 
-                xoff++;
+                if (x % GPU::ScaleFactor == 0)
+                    xoff++;
             }
 
-            yoff++;
+            if (y % GPU::ScaleFactor == 0)
+                yoff++;
         }
     }
     else
@@ -1662,9 +1709,9 @@ void SoftRenderer::ClearBuffers()
 
         polyid |= (RenderClearAttr1 & 0x8000);
 
-        for (int y = 0; y < ScanlineWidth*192; y+=ScanlineWidth)
+        for (int y = 0; y < ScanlineWidth * RenderHeight; y+=ScanlineWidth)
         {
-            for (int x = 0; x < 256; x++)
+            for (int x = 0; x < RenderWidth; x++)
             {
                 u32 pixeladdr = FirstPixelOffset + y + x;
                 ColorBuffer[pixeladdr] = color;
@@ -1686,16 +1733,16 @@ void SoftRenderer::RenderPolygons(bool threaded, Polygon** polygons, int npolys)
 
     RenderScanline(0, j);
 
-    for (s32 y = 1; y < 192; y++)
+    for (s32 y = 1; y < RenderHeight; y++)
     {
         RenderScanline(y, j);
         ScanlineFinalPass(y-1);
 
-        if (threaded)
+        if (threaded && (y % GPU::ScaleFactor == 0))
             Platform::Semaphore_Post(Sema_ScanlineCount);
     }
 
-    ScanlineFinalPass(191);
+    ScanlineFinalPass(RenderHeight - 1);
 
     if (threaded)
         Platform::Semaphore_Post(Sema_ScanlineCount);
@@ -1723,8 +1770,11 @@ void SoftRenderer::RenderFrame()
     }
     else if (!FrameIdentical)
     {
+        // It is possible that the UI will try to update settings mid-render.
+        Platform::Mutex_Lock(Mutex_Buffer);
         ClearBuffers();
         RenderPolygons(false, &RenderPolygonRAM[0], RenderNumPolygons);
+        Platform::Mutex_Unlock(Mutex_Buffer);
     }
 }
 
@@ -1740,16 +1790,18 @@ void SoftRenderer::RenderThreadFunc()
         Platform::Semaphore_Wait(Sema_RenderStart);
         if (!RenderThreadRunning) return;
 
+        Platform::Mutex_Lock(Mutex_Buffer);
         RenderThreadRendering = true;
         if (FrameIdentical)
         {
-            Platform::Semaphore_Post(Sema_ScanlineCount, 192);
+            Platform::Semaphore_Post(Sema_ScanlineCount, NATIVE_HEIGHT);
         }
         else
         {
             ClearBuffers();
             RenderPolygons(true, &RenderPolygonRAM[0], RenderNumPolygons);
         }
+        Platform::Mutex_Unlock(Mutex_Buffer);
 
         Platform::Semaphore_Post(Sema_RenderDone);
         RenderThreadRendering = false;
@@ -1759,12 +1811,14 @@ void SoftRenderer::RenderThreadFunc()
 u32* SoftRenderer::GetLine(int line)
 {
     if (RenderThreadRunning.load(std::memory_order_relaxed))
-    {
-        if (line < 192)
-            Platform::Semaphore_Wait(Sema_ScanlineCount);
-    }
+        Platform::Semaphore_Wait(Sema_ScanlineCount);
 
     return &ColorBuffer[(line * ScanlineWidth) + FirstPixelOffset];
 }
 
+u32 SoftRenderer::GetStride()
+{
+    return ScanlineWidth;
+}
+
 }
diff --git a/src/GPU3D_Soft.h b/src/GPU3D_Soft.h
index 0c6ca43..6c9062a 100644
--- a/src/GPU3D_Soft.h
+++ b/src/GPU3D_Soft.h
@@ -29,7 +29,7 @@ class SoftRenderer : public Renderer3D
 {
 public:
     SoftRenderer();
-    virtual ~SoftRenderer() override {};
+    virtual ~SoftRenderer() override;
     virtual bool Init() override;
     virtual void DeInit() override;
     virtual void Reset() override;
@@ -40,6 +40,7 @@ public:
     virtual void RenderFrame() override;
     virtual void RestartFrame() override;
     virtual u32* GetLine(int line) override;
+    virtual u32 GetStride() override;
 
     void SetupRenderThread();
     void StopRenderThread();
@@ -474,14 +475,17 @@ private:
     // TODO: check if the hardware can accidentally plot pixels
     // offscreen in that border
 
-    static constexpr int ScanlineWidth = 258;
-    static constexpr int NumScanlines = 194;
-    static constexpr int BufferSize = ScanlineWidth * NumScanlines;
-    static constexpr int FirstPixelOffset = ScanlineWidth + 1;
+    int ScanlineWidth = NATIVE_WIDTH + 2;
+    int NumScanlines = NATIVE_HEIGHT + 2;
+    int BufferSize = ScanlineWidth * NumScanlines;
+    int FirstPixelOffset = ScanlineWidth + 1;
 
-    u32 ColorBuffer[BufferSize * 2];
-    u32 DepthBuffer[BufferSize * 2];
-    u32 AttrBuffer[BufferSize * 2];
+    int RenderWidth = NATIVE_WIDTH;
+    int RenderHeight = NATIVE_HEIGHT;
+
+    u32* ColorBuffer;
+    u32* DepthBuffer;
+    u32* AttrBuffer;
 
     // attribute buffer:
     // bit0-3: edge flags (left/right/top/bottom)
@@ -492,7 +496,7 @@ private:
     // bit22: translucent flag
     // bit24-29: polygon ID for opaque pixels
 
-    u8 StencilBuffer[256*2];
+    u8* StencilBuffer;
     bool PrevIsShadowMask;
 
     bool Enabled;
@@ -508,5 +512,6 @@ private:
     Platform::Semaphore* Sema_RenderStart;
     Platform::Semaphore* Sema_RenderDone;
     Platform::Semaphore* Sema_ScanlineCount;
+    Platform::Mutex* Mutex_Buffer;
 };
 }
\ No newline at end of file
diff --git a/src/GPU_OpenGL.cpp b/src/GPU_OpenGL.cpp
index 837d224..6f98721 100644
--- a/src/GPU_OpenGL.cpp
+++ b/src/GPU_OpenGL.cpp
@@ -143,7 +143,7 @@ void GLCompositor::Reset()
 
 void GLCompositor::SetRenderSettings(RenderSettings& settings)
 {
-    int scale = settings.GL_ScaleFactor;
+    int scale = settings.ScaleFactor;
 
     Scale = scale;
     ScreenW = 256 * scale;
diff --git a/src/Savestate.h b/src/Savestate.h
index 0aef517..03c9d88 100644
--- a/src/Savestate.h
+++ b/src/Savestate.h
@@ -25,7 +25,7 @@
 #include "types.h"
 
 #define SAVESTATE_MAJOR 10
-#define SAVESTATE_MINOR 0
+#define SAVESTATE_MINOR 3
 
 class Savestate
 {
diff --git a/src/frontend/FrontendUtil.h b/src/frontend/FrontendUtil.h
index 51f8f61..5665497 100644
--- a/src/frontend/FrontendUtil.h
+++ b/src/frontend/FrontendUtil.h
@@ -117,3 +117,4 @@ void Mic_SetExternalBuffer(s16* buffer, u32 len);
 }
 
 #endif // FRONTENDUTIL_H
+    int scaleFactor,
diff --git a/src/frontend/Util_Video.cpp b/src/frontend/Util_Video.cpp
index e4c49e9..b99489f 100644
--- a/src/frontend/Util_Video.cpp
+++ b/src/frontend/Util_Video.cpp
@@ -126,6 +126,7 @@ void SetupScreenLayout(int screenWidth, int screenHeight,
     ScreenLayout screenLayout,
     ScreenRotation rotation,
     ScreenSizing sizing,
+    int scaleFactor,
     int screenGap,
     bool integerScale,
     bool swapScreens,
@@ -141,12 +142,15 @@ void SetupScreenLayout(int screenWidth, int screenHeight,
         topAspect = botAspect = 1;
         HybPrevTouchScreen = 0;
     }
+    
+    int width = 256 * scaleFactor;
+    int height = 192 * scaleFactor;
 
     float refpoints[6][2] =
     {
-        {0, 0}, {256, 192},
-        {0, 0}, {256, 192},
-        {0, 0}, {256, 192}
+        {0, 0}, {width, height},
+        {0, 0}, {width, height},
+        {0, 0}, {width, height}
     };
 
     int layout = screenLayout == screenLayout_Natural
@@ -162,8 +166,8 @@ void SetupScreenLayout(int screenWidth, int screenHeight,
     M23_Identity(BotScreenMtx);
     M23_Identity(HybScreenMtx);
 
-    M23_Translate(TopScreenMtx, -256/2, -192/2);
-    M23_Translate(BotScreenMtx, -256/2, -192/2);
+    M23_Translate(TopScreenMtx, -width/2, -height/2);
+    M23_Translate(BotScreenMtx, -width/2, -height/2);
 
     M23_Scale(TopScreenMtx, topAspect, 1);
     M23_Scale(BotScreenMtx, botAspect, 1);
@@ -223,8 +227,8 @@ void SetupScreenLayout(int screenWidth, int screenHeight,
 
             bool moveV = rotation % 2 == layout;
 
-            float offsetBot = (moveV ? 192.0 : 256.0 * botAspect) / 2.0 + screenGap / 2.0;
-            float offsetTop = -((moveV ? 192.0 : 256.0 * topAspect) / 2.0 + screenGap / 2.0);
+            float offsetBot = (moveV ? height : width * botAspect) / 2.0 + screenGap / 2.0;
+            float offsetTop = -((moveV ? height : width * topAspect) / 2.0 + screenGap / 2.0);
 
             if ((rotation == 1 || rotation == 2) ^ swapScreens)
             {
@@ -435,7 +439,10 @@ void SetupScreenLayout(int screenWidth, int screenHeight,
         M23_Multiply(TouchMtx, rotmtx, TouchMtx);
 
         M23_Scale(TouchMtx, 1.f/botAspect, 1);
-        M23_Translate(TouchMtx, 256/2, 192/2);
+        M23_Translate(TouchMtx, width/2, height/2);
+
+        // it should also undo the scaleFactor
+        M23_Scale(TouchMtx, 1.f / scaleFactor);
 
         if (HybEnable && HybScreen == 1)
         {
@@ -445,6 +452,8 @@ void SetupScreenLayout(int screenWidth, int screenHeight,
             M23_Scale(HybTouchMtx, 1.f/hybScale);
             M23_Multiply(HybTouchMtx, rotmtx, HybTouchMtx);
         }
+
+            M23_Scale(HybTouchMtx, 1.f / scaleFactor);
     }
 }
 
diff --git a/src/frontend/qt_sdl/VideoSettingsDialog.cpp b/src/frontend/qt_sdl/VideoSettingsDialog.cpp
index 95ec7d3..b8ad4f5 100644
--- a/src/frontend/qt_sdl/VideoSettingsDialog.cpp
+++ b/src/frontend/qt_sdl/VideoSettingsDialog.cpp
@@ -46,7 +46,7 @@ VideoSettingsDialog::VideoSettingsDialog(QWidget* parent) : QDialog(parent), ui(
     oldVSync = Config::ScreenVSync;
     oldVSyncInterval = Config::ScreenVSyncInterval;
     oldSoftThreaded = Config::Threaded3D;
-    oldGLScale = Config::GL_ScaleFactor;
+    oldGLScale = Config::ScaleFactor;
     oldGLBetterPolygons = Config::GL_BetterPolygons;
 
     grp3DRenderer = new QButtonGroup(this);
@@ -71,8 +71,8 @@ VideoSettingsDialog::VideoSettingsDialog(QWidget* parent) : QDialog(parent), ui(
     ui->cbSoftwareThreaded->setChecked(Config::Threaded3D != 0);
 
     for (int i = 1; i <= 16; i++)
-        ui->cbxGLResolution->addItem(QString("%1x native (%2x%3)").arg(i).arg(256*i).arg(192*i));
-    ui->cbxGLResolution->setCurrentIndex(Config::GL_ScaleFactor-1);
+        ui->cbxResolution->addItem(QString("%1x native (%2x%3)").arg(i).arg(256*i).arg(192*i));
+    ui->cbxResolution->setCurrentIndex(Config::ScaleFactor-1);
 
     ui->cbBetterPolygons->setChecked(Config::GL_BetterPolygons != 0);
 
@@ -84,14 +84,12 @@ VideoSettingsDialog::VideoSettingsDialog(QWidget* parent) : QDialog(parent), ui(
     {
         ui->cbGLDisplay->setEnabled(true);
         ui->cbSoftwareThreaded->setEnabled(true);
-        ui->cbxGLResolution->setEnabled(false);
         ui->cbBetterPolygons->setEnabled(false);
     }
     else
     {
         ui->cbGLDisplay->setEnabled(false);
         ui->cbSoftwareThreaded->setEnabled(false);
-        ui->cbxGLResolution->setEnabled(true);
         ui->cbBetterPolygons->setEnabled(true);
     }
 }
@@ -117,10 +115,10 @@ void VideoSettingsDialog::on_VideoSettingsDialog_rejected()
     Config::ScreenVSync = oldVSync;
     Config::ScreenVSyncInterval = oldVSyncInterval;
     Config::Threaded3D = oldSoftThreaded;
-    Config::GL_ScaleFactor = oldGLScale;
+    Config::ScaleFactor = oldGLScale;
     Config::GL_BetterPolygons = oldGLBetterPolygons;
 
-    emit updateVideoSettings(old_gl != UsesGL());
+    emit updateVideoSettings(old_gl != UsesGL || old_scale != Config::ScaleFactor);
 
     closeDlg();
 }
@@ -134,6 +132,7 @@ void VideoSettingsDialog::setVsyncControlEnable(bool hasOGL)
 void VideoSettingsDialog::onChange3DRenderer(int renderer)
 {
     bool old_gl = (Config::ScreenUseGL != 0) || (Config::_3DRenderer != 0);
+    int old_scale = Config::ScaleFactor;
 
     Config::_3DRenderer = renderer;
 
@@ -141,14 +140,12 @@ void VideoSettingsDialog::onChange3DRenderer(int renderer)
     {
         ui->cbGLDisplay->setEnabled(true);
         ui->cbSoftwareThreaded->setEnabled(true);
-        ui->cbxGLResolution->setEnabled(false);
         ui->cbBetterPolygons->setEnabled(false);
     }
     else
     {
         ui->cbGLDisplay->setEnabled(false);
         ui->cbSoftwareThreaded->setEnabled(false);
-        ui->cbxGLResolution->setEnabled(true);
         ui->cbBetterPolygons->setEnabled(true);
     }
 
@@ -187,12 +184,12 @@ void VideoSettingsDialog::on_cbSoftwareThreaded_stateChanged(int state)
     emit updateVideoSettings(false);
 }
 
-void VideoSettingsDialog::on_cbxGLResolution_currentIndexChanged(int idx)
+void VideoSettingsDialog::on_cbxResolution_currentIndexChanged(int idx)
 {
     // prevent a spurious change
-    if (ui->cbxGLResolution->count() < 16) return;
+    if (ui->cbxResolution->count() < 16) return;
 
-    Config::GL_ScaleFactor = idx+1;
+    Config::ScaleFactor = idx+1;
 
     setVsyncControlEnable(UsesGL());
 
@@ -203,5 +200,5 @@ void VideoSettingsDialog::on_cbBetterPolygons_stateChanged(int state)
 {
     Config::GL_BetterPolygons = (state != 0);
 
-    emit updateVideoSettings(false);
+    emit updateVideoSettings(true);
 }
diff --git a/src/frontend/qt_sdl/VideoSettingsDialog.h b/src/frontend/qt_sdl/VideoSettingsDialog.h
index 7fee5bb..16ca438 100644
--- a/src/frontend/qt_sdl/VideoSettingsDialog.h
+++ b/src/frontend/qt_sdl/VideoSettingsDialog.h
@@ -63,7 +63,7 @@ private slots:
     void on_cbVSync_stateChanged(int state);
     void on_sbVSyncInterval_valueChanged(int val);
 
-    void on_cbxGLResolution_currentIndexChanged(int idx);
+    void on_cbxResolution_currentIndexChanged(int idx);
     void on_cbBetterPolygons_stateChanged(int state);
 
     void on_cbSoftwareThreaded_stateChanged(int state);
diff --git a/src/frontend/qt_sdl/VideoSettingsDialog.ui b/src/frontend/qt_sdl/VideoSettingsDialog.ui
index 11cfe3d..ad086bd 100644
--- a/src/frontend/qt_sdl/VideoSettingsDialog.ui
+++ b/src/frontend/qt_sdl/VideoSettingsDialog.ui
@@ -33,20 +33,6 @@
      </property>
      <layout class="QGridLayout" name="gridLayout_4">
       <item row="0" column="0">
-       <widget class="QLabel" name="label_3">
-        <property name="text">
-         <string>Internal resolution:</string>
-        </property>
-       </widget>
-      </item>
-      <item row="1" column="0">
-       <widget class="QComboBox" name="cbxGLResolution">
-        <property name="whatsThis">
-         <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;The resolution at which the 3D graphics will be rendered. Higher resolutions improve graphics quality when the main window is enlarged, but may also cause glitches.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
-        </property>
-       </widget>
-      </item>
-      <item row="2" column="0">
        <widget class="QCheckBox" name="cbBetterPolygons">
         <property name="whatsThis">
          <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Enabling this may help reduce distortion on quads and more complex polygons, but may also reduce performance.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
@@ -94,6 +80,36 @@
       <string>Display settings</string>
      </property>
      <layout class="QGridLayout" name="gridLayout_2">
+      <item row="9" column="0">
+       <widget class="QComboBox" name="cbxResolution">
+        <property name="whatsThis">
+         <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;The resolution at which the 3D graphics will be rendered. Higher resolutions improve graphics quality when the main window is enlarged, but may also cause glitches.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
+        </property>
+       </widget>
+      </item>
+      <item row="8" column="0">
+       <widget class="QLabel" name="label_3">
+        <property name="text">
+         <string>3D resolution:</string>
+        </property>
+       </widget>
+      </item>
+      <item row="7" column="0" colspan="2">
+       <spacer name="verticalSpacer">
+        <property name="orientation">
+         <enum>Qt::Vertical</enum>
+        </property>
+        <property name="sizeType">
+         <enum>QSizePolicy::Fixed</enum>
+        </property>
+        <property name="sizeHint" stdset="0">
+         <size>
+          <width>20</width>
+          <height>20</height>
+         </size>
+        </property>
+       </spacer>
+      </item>
       <item row="6" column="0">
        <widget class="QLabel" name="label_2">
         <property name="sizePolicy">
diff --git a/src/frontend/qt_sdl/main.cpp b/src/frontend/qt_sdl/main.cpp
index 7f0c0ef..3bfcb80 100644
--- a/src/frontend/qt_sdl/main.cpp
+++ b/src/frontend/qt_sdl/main.cpp
@@ -151,6 +151,9 @@ const QStringList ArchiveExtensions
 #endif
 };
 
+int screenWidth;
+int screenHeight;
+
 
 bool RunningSomething;
 
@@ -244,24 +247,26 @@ void EmuThread::initOpenGL()
 
     // to prevent bleeding between both parts of the screen
     // with bilinear filtering enabled
-    const int paddedHeight = 192*2+2;
+    const int paddedHeight = screenHeight*2+2;
     const float padPixels = 1.f / paddedHeight;
 
-    const float vertices[] =
-    {
-        0.f,   0.f,    0.f, 0.f,
-        0.f,   192.f,  0.f, 0.5f - padPixels,
-        256.f, 192.f,  1.f, 0.5f - padPixels,
-        0.f,   0.f,    0.f, 0.f,
-        256.f, 192.f,  1.f, 0.5f - padPixels,
-        256.f, 0.f,    1.f, 0.f,
-
-        0.f,   0.f,    0.f, 0.5f + padPixels,
-        0.f,   192.f,  0.f, 1.f,
-        256.f, 192.f,  1.f, 1.f,
-        0.f,   0.f,    0.f, 0.5f + padPixels,
-        256.f, 192.f,  1.f, 1.f,
-        256.f, 0.f,    1.f, 0.5f + padPixels
+    float w = screenWidth;
+    float h = screenHeight;
+    float vertices[] =
+    {
+        0.f, 0.f,  0.f, 0.f,
+        0.f, h  ,  0.f, 0.5f - padPixels,
+        w  , h  ,  1.f, 0.5f - padPixels,
+        0.f, 0.f,  0.f, 0.f,
+        w  , h  ,  1.f, 0.5f - padPixels,
+        w  , 0.f,  1.f, 0.f,
+
+        0.f, 0.f,  0.f, 0.5f + padPixels,
+        0.f, h  ,  0.f, 1.f,
+        w  , h  ,  1.f, 1.f,
+        0.f, 0.f,  0.f, 0.5f + padPixels,
+        w  , h  ,  1.f, 1.f,
+        w  , 0.f,  1.f, 0.5f + padPixels
     };
 
     glGenBuffers(1, &screenVertexBuffer);
@@ -282,11 +287,12 @@ void EmuThread::initOpenGL()
     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 256, paddedHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, screenWidth, paddedHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
     // fill the padding
-    u8 zeroData[256*4*4];
-    memset(zeroData, 0, sizeof(zeroData));
-    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192, 256, 2, GL_RGBA, GL_UNSIGNED_BYTE, zeroData);
+    u8 zeroData[screenWidth*4*4];
+    memset(zeroData, 0, screenWidth*4*4);
+    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, screenHeight, screenWidth, 2, GL_RGBA, GL_UNSIGNED_BYTE, zeroData);
+    delete[] zeroData;
 
     OSD::Init(true);
 
@@ -310,6 +316,42 @@ void EmuThread::deinitOpenGL()
     lastScreenWidth = lastScreenHeight = -1;
 }
 
+void EmuThread::updateDisplay(bool forceInit)
+{
+        // update render settings if needed
+        // HACK:
+        // once the fast forward hotkey is released, we need to update vsync
+        // to the old setting again
+        if (videoSettingsDirty || Input::HotkeyReleased(HK_FastForward))
+        {
+            if (oglContext)
+            {
+                oglContext->SetSwapInterval(Config::ScreenVSync ? Config::ScreenVSyncInterval : 0);
+                videoRenderer = Config::_3DRenderer;
+            }
+#ifdef OGLRENDERER_ENABLED
+            else
+#endif
+        {
+            videoRenderer = 0;
+        }
+
+        videoRenderer = oglContext ? Config::_3DRenderer : 0;
+
+        videoSettingsDirty = false;
+
+        videoSettings.Soft_Threaded = Config::Threaded3D != 0;
+        videoSettings.ScaleFactor = Config::ScaleFactor;
+        videoSettings.GL_BetterPolygons = Config::GL_BetterPolygons;
+
+        FrontBufferLock.lock();
+        GPU::SetRenderSettings(videoRenderer, videoSettings);
+        FrontBufferLock.unlock();
+    }
+
+    emit windowUpdate();
+}
+
 void EmuThread::run()
 {
     u32 mainScreenPos[3];
@@ -321,23 +363,8 @@ void EmuThread::run()
     mainScreenPos[2] = 0;
     autoScreenSizing = 0;
 
-    videoSettingsDirty = false;
-    videoSettings.Soft_Threaded = Config::Threaded3D != 0;
-    videoSettings.GL_ScaleFactor = Config::GL_ScaleFactor;
-    videoSettings.GL_BetterPolygons = Config::GL_BetterPolygons;
-
-    if (mainWindow->hasOGL)
-    {
-        initOpenGL();
-        videoRenderer = Config::_3DRenderer;
-    }
-    else
-    {
-        videoRenderer = 0;
-    }
-
     GPU::InitRenderer(videoRenderer);
-    GPU::SetRenderSettings(videoRenderer, videoSettings);
+    updateDisplay(true);
 
     SPU::SetInterpolation(Config::AudioInterp);
 
@@ -431,35 +458,6 @@ void EmuThread::run()
             EmuStatus = emuStatus_Running;
             if (EmuRunning == emuStatus_FrameStep) EmuRunning = emuStatus_Paused;
 
-            // update render settings if needed
-            // HACK:
-            // once the fast forward hotkey is released, we need to update vsync
-            // to the old setting again
-            if (videoSettingsDirty || Input::HotkeyReleased(HK_FastForward))
-            {
-                if (oglContext)
-                {
-                    oglContext->SetSwapInterval(Config::ScreenVSync ? Config::ScreenVSyncInterval : 0);
-                    videoRenderer = Config::_3DRenderer;
-                }
-#ifdef OGLRENDERER_ENABLED
-                else
-#endif
-                {
-                    videoRenderer = 0;
-                }
-
-                videoRenderer = oglContext ? Config::_3DRenderer : 0;
-
-                videoSettingsDirty = false;
-
-                videoSettings.Soft_Threaded = Config::Threaded3D != 0;
-                videoSettings.GL_ScaleFactor = Config::GL_ScaleFactor;
-                videoSettings.GL_BetterPolygons = Config::GL_BetterPolygons;
-
-                GPU::SetRenderSettings(videoRenderer, videoSettings);
-            }
-
             // process input and hotkeys
             NDS::SetKeyMask(Input::InputMask);
 
@@ -617,7 +615,7 @@ void EmuThread::run()
             lastTime = SDL_GetPerformanceCounter() * perfCountsSec;
             lastMeasureTime = lastTime;
 
-            emit windowUpdate();
+            updateDisplay();
 
             EmuStatus = EmuRunning;
 
@@ -766,9 +764,9 @@ void EmuThread::drawScreenGL()
 
         if (GPU::Framebuffer[frontbuf][0] && GPU::Framebuffer[frontbuf][1])
         {
-            glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 256, 192, GL_RGBA,
+            glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, screenWidth, screenHeight, GL_RGBA,
                             GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][0]);
-            glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 192+2, 256, 192, GL_RGBA,
+            glTexSubImage2D(GL_TEXTURE_2D, 0, 0, screenHeight+2, screenWidth, screenHeight, GL_RGBA,
                             GL_UNSIGNED_BYTE, GPU::Framebuffer[frontbuf][1]);
         }
     }
@@ -835,6 +833,7 @@ void ScreenHandler::screenSetupLayout(int w, int h)
                                 static_cast<Frontend::ScreenRotation>(Config::ScreenRotation),
                                 static_cast<Frontend::ScreenSizing>(sizing),
                                 Config::ScreenGap,
+                                Config::ScaleFactor,
                                 Config::IntegerScaling != 0,
                                 Config::ScreenSwap != 0,
                                 aspectTop,
@@ -849,8 +848,8 @@ QSize ScreenHandler::screenGetMinSize(int factor = 1)
         || Config::ScreenRotation == Frontend::screenRot_270Deg);
     int gap = Config::ScreenGap * factor;
 
-    int w = 256 * factor;
-    int h = 192 * factor;
+    int w = NATIVE_WIDTH * factor;
+    int h = NATIVE_HEIGHT * factor;
 
     if (Config::ScreenSizing == Frontend::screenSizing_TopOnly
         || Config::ScreenSizing == Frontend::screenSizing_BotOnly)
@@ -1013,8 +1012,8 @@ QTimer* ScreenHandler::setupMouseTimer()
 
 ScreenPanelNative::ScreenPanelNative(QWidget* parent) : QWidget(parent), ScreenHandler(this)
 {
-    screen[0] = QImage(256, 192, QImage::Format_RGB32);
-    screen[1] = QImage(256, 192, QImage::Format_RGB32);
+    screen[0] = QImage(screenWidth, screenHeight, QImage::Format_RGB32);
+    screen[1] = QImage(screenWidth, screenHeight, QImage::Format_RGB32);
 
     screenTrans[0].reset();
     screenTrans[1].reset();
@@ -1045,6 +1044,9 @@ void ScreenPanelNative::setupScreenLayout()
 
 void ScreenPanelNative::paintEvent(QPaintEvent* event)
 {
+    if (videoSettingsDirty)
+        return;
+
     QPainter painter(this);
 
     // fill background
@@ -1052,7 +1054,7 @@ void ScreenPanelNative::paintEvent(QPaintEvent* event)
 
     if (emuThread->emuIsActive())
     {
-        emuThread->FrontBufferLock.lock();
+        emuThread->FrontBufferLock.lock(); locked = true;
         int frontbuf = emuThread->FrontBuffer;
         if (!GPU::Framebuffer[frontbuf][0] || !GPU::Framebuffer[frontbuf][1])
         {
@@ -1060,11 +1062,11 @@ void ScreenPanelNative::paintEvent(QPaintEvent* event)
             return;
         }
 
-        memcpy(screen[0].scanLine(0), GPU::Framebuffer[frontbuf][0], 256 * 192 * 4);
-        memcpy(screen[1].scanLine(0), GPU::Framebuffer[frontbuf][1], 256 * 192 * 4);
+        memcpy(screen[0].scanLine(0), GPU::Framebuffer[frontbuf][0], screenWidth * screenHeight * 4);
+        memcpy(screen[1].scanLine(0), GPU::Framebuffer[frontbuf][1], screenWidth * screenHeight * 4);
         emuThread->FrontBufferLock.unlock();
 
-        QRect screenrc(0, 0, 256, 192);
+        QRect screenrc(0, 0, screenWidth, screenHeight);
 
         for (int i = 0; i < numScreens; i++)
         {
@@ -1116,7 +1118,7 @@ bool ScreenPanelNative::event(QEvent* event)
 
 void ScreenPanelNative::onScreenLayoutChanged()
 {
-    setMinimumSize(screenGetMinSize());
+    setMinimumSize(screenGetMinSize(Config::ScaleFactor));
     setupScreenLayout();
 }
 
@@ -1221,6 +1223,9 @@ QPaintEngine* ScreenPanelGL::paintEngine() const
 
 void ScreenPanelGL::setupScreenLayout()
 {
+    if (videoSettingsDirty)
+        return;
+
     int w = width();
     int h = height();
 
@@ -1238,6 +1243,7 @@ void ScreenPanelGL::resizeEvent(QResizeEvent* event)
 
 void ScreenPanelGL::mousePressEvent(QMouseEvent* event)
 {
+    bool locked = false;
     screenOnMousePress(event);
 }
 
@@ -1277,7 +1283,7 @@ void ScreenPanelGL::transferLayout(EmuThread* thread)
 
 void ScreenPanelGL::onScreenLayoutChanged()
 {
-    setMinimumSize(screenGetMinSize());
+    setMinimumSize(screenGetMinSize(Config::ScaleFactor));
     setupScreenLayout();
 }
 
@@ -1860,6 +1866,8 @@ void MainWindow::closeEvent(QCloseEvent* event)
 void MainWindow::createScreenPanel()
 {
     hasOGL = (Config::ScreenUseGL != 0) || (Config::_3DRenderer != 0);
+    screenWidth = NATIVE_WIDTH * Config::ScaleFactor;
+    screenHeight = NATIVE_HEIGHT * Config::ScaleFactor;
 
     if (hasOGL)
     {
@@ -3171,9 +3179,11 @@ void MainWindow::onEmuStop()
     actTitleManager->setEnabled(!Config::DSiNANDPath.empty());
 }
 
-void MainWindow::onUpdateVideoSettings(bool glchange)
+void MainWindow::onUpdateVideoSettings(bool displayChange)
 {
-    if (glchange)
+    videoSettingsDirty = true;
+
+    if (displayChange)
     {
         emuThread->emuPause();
         if (hasOGL) emuThread->deinitContext();
@@ -3183,9 +3193,7 @@ void MainWindow::onUpdateVideoSettings(bool glchange)
         connect(emuThread, SIGNAL(windowUpdate()), panelWidget, SLOT(repaint()));
     }
 
-    videoSettingsDirty = true;
-
-    if (glchange)
+    if (displayChange)
     {
         if (hasOGL) emuThread->initContext();
         emuThread->emuUnpause();
@@ -3280,7 +3288,7 @@ int main(int argc, char** argv)
     #endif
     );
     SANITIZE(Config::ScreenVSyncInterval, 1, 20);
-    SANITIZE(Config::GL_ScaleFactor, 1, 16);
+    SANITIZE(Config::ScaleFactor, 1, 16);
     SANITIZE(Config::AudioInterp, 0, 3);
     SANITIZE(Config::AudioVolume, 0, 256);
     SANITIZE(Config::MicInputType, 0, (int)micInputType_MAX);
diff --git a/src/frontend/qt_sdl/main.h b/src/frontend/qt_sdl/main.h
index 073a4da..f9ea1f7 100644
--- a/src/frontend/qt_sdl/main.h
+++ b/src/frontend/qt_sdl/main.h
@@ -89,6 +89,9 @@ signals:
 
     void syncVolumeLevel();
 
+    bool hasOGL;
+
+    void updateDisplay(bool forceInit = false);
 private:
     void drawScreenGL();
     void initOpenGL();
@@ -261,6 +264,8 @@ public:
     bool preloadROMs(QStringList file, QStringList gbafile, bool boot);
     QStringList splitArchivePath(const QString& filename, bool useMemberSyntax);
 
+    void createScreenPanel();
+
     void onAppStateChanged(Qt::ApplicationState state);
 
 protected:
@@ -348,7 +353,7 @@ private slots:
     void onEmuStart();
     void onEmuStop();
 
-    void onUpdateVideoSettings(bool glchange);
+    void onUpdateVideoSettings(bool displayChange);
 
     void onFullscreenToggled();
     void onScreenEmphasisToggled();
@@ -367,8 +372,6 @@ private:
     QStringList pickROM(bool gba);
     void updateCartInserted(bool gba);
 
-    void createScreenPanel();
-
     bool pausedManually = false;
 
     int oldW, oldH;
-- 
2.42.0

openSUSE Build Service is sponsored by