File CVE-2020-29050.patch of Package sphinx

From 17245f7dd75d47c7365797b3d5feba8976367c6e Mon Sep 17 00:00:00 2001
From: klirichek <alexey@manticoresearch.com>
Date: Tue, 2 Jul 2019 19:06:09 +0700
Subject: [PATCH] Fix random file reading by scattered snippets

Issue was that load_files_scattered option just concatenated prefix with
given name and then opened the file. So, if you provide something like
'/etc/password' as file, or '../../../etc/passwd' even with non-zero
prefix, it will unfortunately work. After this commit such behavior
possible ONLY if user explicitly set `snippets_file_prefix` to empty
string or '/'; another cases will not cause uncontroled reading.
That fixes #866.

[basilgello: Back-port to 2.2.11:
 - Refactor sphNormalizePath to use only available classes.
 - Do not use 'g_sSnippetsFilePrefix' in src/sphinxexcerpt.h,
   use 'tOptions.m_sFilePrefix' instead, as for 2.2.11, all
   two instances are assigned to 'g_sSnippetsFilePrefix'.
 - Adjust context. ]

The following test passes, adapted from commit
66b5761ad258c60b1866a8e1333f86e74f48035 at
https://github.com/manticoresoftware/manticoresearch :

====
/*
 cd test &&
 g++ -o test test.cpp -I../src -I../config -I/usr/include/postgresql \
	-I/usr/include/mariadb -DHAVE_CONFIG_H -L../src -lsphinx -pthread \
	-lm -ldl -lstemmer -lmariadb -lpq -lexpat -lz
*/

void ASSERT_STREQ(const char *src, const char *dst)
{
	auto normalized = sphNormalizePath(src);
	if (!strcmp(dst, normalized.scstr())) {
		auto _src = src;
		auto _dst = dst;

		if (src == nullptr) _src = "nullptr";
		if (dst == nullptr) _dst = "nullptr";

		std::cout << "OK  : '" << _src << "' = '" << _dst <<
			"'" << std::endl;
	} else {
		auto _src = src;
		auto _dst = dst;

		if (src == nullptr) _src = "nullptr";
		if (dst == nullptr) _dst = "nullptr";

		std::cout << "FAIL: '" << _src << "' != '" << _dst << "' ( = '" <<
			normalized.scstr() << "')" << std::endl;
	}
}

int main()
{
	ASSERT_STREQ ( "/", "/" );
	ASSERT_STREQ ( "/..//bbb", "/bbb" );
	ASSERT_STREQ ( "/quite/long/path/../../../etc/passwd", "/etc/passwd" );
	ASSERT_STREQ ( "/aaa/bbb/ccc/ddd/../../../../../../../", "/" );

	ASSERT_STREQ ( "",  "" );
	ASSERT_STREQ ( nullptr, "" );
	ASSERT_STREQ ( "aaa/", "aaa" );
	ASSERT_STREQ ( "aaa/.", "aaa" );
	ASSERT_STREQ ( "aaa/././././////././", "aaa" );
	ASSERT_STREQ ( "aaa/////", "aaa" );
	ASSERT_STREQ ( "aaa/bbb/ccc", "aaa/bbb/ccc" );
	ASSERT_STREQ ( "aaa/bbb/ccc/ddd/..", "aaa/bbb/ccc" );
	ASSERT_STREQ ( "aaa/bbb/ccc/ddd/../../..", "aaa" );
	ASSERT_STREQ ( "aaa/bbb/ccc/ddd/../../../xxx", "aaa/xxx" );
	ASSERT_STREQ ( "aaa/bbb/ccc/ddd/../../../..", "" );
	ASSERT_STREQ ( "aaa/bbb/ccc/ddd/../../../../", "" );
	ASSERT_STREQ ( "aaa/bbb/ccc/ddd/../../../../../../../", "../../.." );
	ASSERT_STREQ ( "..//bbb", "../bbb" );
	return 0;
}
====

Fixes CVE-2020-29050.

Signed-off-by: Vasyl Gello <vasek.gello@gmail.com>
---
 doc/sphinx.html        | 19 ++++++++++-
 doc/sphinx.txt         | 19 ++++++++++-
 sphinx.conf.in         |  4 +--
 src/searchd.cpp        |  8 ++++-
 src/sphinx.cpp         | 71 ++++++++++++++++++++++++++++++++++++++++++
 src/sphinxexcerpt.cpp  | 14 +++++++++
 src/sphinxexcerpt.h    |  3 ++
 src/sphinxstd.h        |  3 ++
 test/test_130/test.xml |  7 +++--
 9 files changed, 140 insertions(+), 8 deletions(-)

diff --git a/doc/sphinx.html b/doc/sphinx.html
index b56e445..8ad44dd 100644
--- a/doc/sphinx.html
+++ b/doc/sphinx.html
@@ -11652,7 +11652,7 @@ binlog_max_log_size = 16M
 <div class="sect2" title="12.4.28.&nbsp;snippets_file_prefix"><div class="titlepage"><div><div><h3 class="title"><a name="conf-snippets-file-prefix"></a>12.4.28.&nbsp;snippets_file_prefix</h3></div></div></div>
 <p>
 A prefix to prepend to the local file names when generating snippets.
-Optional, default is empty.
+Optional, default is current working folder.
 Introduced in version 2.1.1-beta.
 </p><p>
 This prefix can be used in distributed snippets generation along with
@@ -11663,6 +11663,19 @@ is set to "server1" and the request refers to "file23", <code class="filename">s
 will attempt to open "server1file23" (all of that without quotes). So if you
 need it to be a path, you have to mention the trailing slash.
 </p><p>
+After constructing final file path, daemon unwinds all relative dirs and
+compares final result with the value of ``snippet_file_prefix``. If result
+is not begin with the prefix, such file will be rejected with error message.
+
+So, if you set it to '/mnt/data' and somebody calls snippet generation with file
+'../../../etc/passwd', as the source, it will get error message
+`File '/mnt/data/../../../etc/passwd' escapes '/mnt/data/' scope`
+instead of content of the file.
+
+Also, with non-set parameter and reading '/etc/passwd' it will actually read
+/daemon/working/folder/etc/passwd since default for param is exactly daemon's
+working folder.
+</p><p>
 Note also that this is a local option, it does not affect the agents anyhow.
 So you can safely set a prefix on a master server. The requests routed to the
 agents will not be affected by the master's setting. They will however be affected
@@ -11673,6 +11686,10 @@ This might be useful, for instance, when the document storage locations
 </p><h4><a name="idp33320288"></a>Example:</h4><pre class="programlisting">
 snippets_file_prefix = /mnt/common/server1/
 </pre></div>
+<p><span class="bold"><strong>WARNING:</strong></span>
+If you still want to access files from the FS root, you have to explicitly set
+'snippets_file_prefix' to empty value (by 'snippets_file_prefix=' line), or to
+root (by 'snippets_file_prefix=/').
 <div class="sect2" title="12.4.29.&nbsp;collation_server"><div class="titlepage"><div><div><h3 class="title"><a name="conf-collation-server"></a>12.4.29.&nbsp;collation_server</h3></div></div></div>
 <p>
 Default server collation.
diff --git a/doc/sphinx.txt b/doc/sphinx.txt
index ed994f9..f750f4e 100644
--- a/doc/sphinx.txt
+++ b/doc/sphinx.txt
@@ -12832,7 +12832,7 @@ Example:
 -----------------------------
 
 A prefix to prepend to the local file names when generating snippets.
-Optional, default is empty. Introduced in version 2.1.1-beta.
+Optional, default is current working folder. Introduced in version 2.1.1-beta.
 
 This prefix can be used in distributed snippets generation along with
 load_files or load_files_scattered options.
@@ -12842,6 +12842,19 @@ to "server1" and the request refers to "file23", searchd will attempt to
 open "server1file23" (all of that without quotes). So if you need it to be
 a path, you have to mention the trailing slash.
 
+After constructing final file path, daemon unwinds all relative dirs and
+compares final result with the value of ``snippet_file_prefix``. If result
+is not begin with the prefix, such file will be rejected with error message.
+
+So, if you set it to '/mnt/data' and somebody calls snippet generation with file
+'../../../etc/passwd', as the source, it will get error message
+`File '/mnt/data/../../../etc/passwd' escapes '/mnt/data/' scope`
+instead of content of the file.
+
+Also, with non-set parameter and reading '/etc/passwd' it will actually read
+/daemon/working/folder/etc/passwd since default for param is exactly daemon's
+working folder.
+
 Note also that this is a local option, it does not affect the agents
 anyhow. So you can safely set a prefix on a master server. The requests
 routed to the agents will not be affected by the master's setting. They
@@ -12855,6 +12868,10 @@ Example:
 
    | snippets_file_prefix = /mnt/common/server1/
 
+WARNING: If you still want to access files from the FS root, you have to
+explicitly set 'snippets_file_prefix' to empty value (by 'snippets_file_prefix='
+line), or to root (by 'snippets_file_prefix=/').
+
 12.4.29. collation_server
 -------------------------
 
diff --git a/sphinx.conf.in b/sphinx.conf.in
index 6ba2dc9..527bbd8 100644
--- a/sphinx.conf.in
+++ b/sphinx.conf.in
@@ -604,7 +604,7 @@ index test1
 	# snippet document file name prefix
 	# preprended to file names when generating snippets using load_files option
 	# WARNING, this is a prefix (not a path), trailing slash matters!
-	# optional, default is empty
+	# optional, default is current working directory of a running process
 	#
 	# snippets_file_prefix	= /mnt/mydocs/server1
 
@@ -1042,7 +1042,7 @@ searchd
 
 	# a prefix to prepend to the local file names when creating snippets
 	# with load_files and/or load_files_scatter options
-	# optional, default is empty
+	# optional, default is current working directory of a running process
 	#
 	# snippets_file_prefix		= /mnt/common/server1/
 }
diff --git a/src/searchd.cpp b/src/searchd.cpp
index 85b1cd6..6462b16 100644
--- a/src/searchd.cpp
+++ b/src/searchd.cpp
@@ -13330,6 +13330,12 @@ bool MakeSnippets ( CSphString sIndex, CSphVector<ExcerptQuery_t> & dQueries, CS
 				struct stat st;
 				CSphString sFilename;
 				sFilename.SetSprintf ( "%s%s", g_sSnippetsFilePrefix.cstr(), dQueries[i].m_sSource.cstr() );
+				if ( !TestEscaping ( g_sSnippetsFilePrefix, sFilename ))
+				{
+					sError.SetSprintf( "File '%s' escapes '%s' scope",
+						sFilename.scstr(), g_sSnippetsFilePrefix.scstr());
+					return false;
+				}
 				if ( ::stat ( sFilename.cstr(), &st )<0 )
 				{
 					if ( !bScattered )
@@ -23719,7 +23725,7 @@ int WINAPI ServiceMain ( int argc, char **argv )
 	if ( hSearchd.Exists ( "snippets_file_prefix" ) )
 		g_sSnippetsFilePrefix = hSearchd["snippets_file_prefix"].cstr();
 	else
-		g_sSnippetsFilePrefix = "";
+		g_sSnippetsFilePrefix.SetSprintf("%s/", sphGetCwd().scstr());
 
 	const char* sLogFormat = hSearchd.GetStr ( "query_log_format", "plain" );
 	if ( !strcmp ( sLogFormat, "sphinxql" ) )
diff --git a/src/sphinx.cpp b/src/sphinx.cpp
index d6a7b9d..492c908 100644
--- a/src/sphinx.cpp
+++ b/src/sphinx.cpp
@@ -81,9 +81,11 @@
 	#include <io.h> // for open()
 
 	// workaround Windows quirks
+	#include <direct.h>
 	#define popen		_popen
 	#define pclose		_pclose
 	#define snprintf	_snprintf
+	#define getcwd          _getcwd
 	#define sphSeek		_lseeki64
 
 	#define stat		_stat64
@@ -12420,6 +12422,75 @@ static bool sphTruncate ( int iFD )
 #endif
 }
 
+CSphString sphNormalizePath( const CSphString& sOrigPath )
+{
+	CSphVector<CSphString> dChunks;
+	const char* sBegin = sOrigPath.scstr();
+	const char* sEnd = sBegin + sOrigPath.Length();
+	const char* sPath = sBegin;
+	int iLevel = 0;
+
+	while ( sPath<sEnd )
+	{
+		const char* sSlash = ( char* ) memchr( sPath, '/', sEnd - sPath );
+		if ( !sSlash )
+			sSlash = sEnd;
+
+		auto iChunkLen = sSlash - sPath;
+
+		switch ( iChunkLen )
+		{
+		case 0: // empty chunk skipped
+			++sPath;
+			continue;
+		case 1: // simple dot chunk skipped
+			if ( *sPath=='.' )
+			{
+				sPath += 2;
+				continue;
+			}
+			break;
+		case 2: // double dot abandons chunks, then decrease level
+			if ( sPath[0]=='.' && sPath[1]=='.' )
+			{
+				if ( dChunks.GetLength() <= 0 )
+					--iLevel;
+				else
+					dChunks.Pop();
+				sPath += 3;
+				continue;
+			}
+		default: break;
+		}
+		CSphString temp( "" );
+		temp.SetBinary( sPath, iChunkLen );
+		dChunks.Add( temp );
+		sPath = sSlash + 1;
+	}
+
+	CSphStringBuilder sResult;
+	if ( *sBegin=='/' )
+		sResult += "/";
+	else
+		while ( iLevel++<0 )
+			dChunks.Insert(0, "..");
+
+	int i;
+	for ( i=0; i<dChunks.GetLength(); i++ ) {
+		sResult += dChunks[i].scstr();
+		if (i<dChunks.GetLength()-1)
+			sResult += "/";
+	}
+
+	return sResult.cstr();
+}
+
+CSphString sphGetCwd()
+{
+	CSphVector<char> sBuf (65536);
+	return getcwd( sBuf.Begin(), sBuf.GetLength());
+}
+
 class DeleteOnFail : public ISphNoncopyable
 {
 public:
diff --git a/src/sphinxexcerpt.cpp b/src/sphinxexcerpt.cpp
index b42c6a2..ff593f0 100644
--- a/src/sphinxexcerpt.cpp
+++ b/src/sphinxexcerpt.cpp
@@ -3817,6 +3817,11 @@ void sphBuildExcerpt ( ExcerptQuery_t & tOptions, const CSphIndex * pIndex, cons
 		{
 			CSphString sFilename;
 			sFilename.SetSprintf ( "%s%s", tOptions.m_sFilePrefix.cstr(), tOptions.m_sSource.cstr() );
+			if ( !TestEscaping( tOptions.m_sFilePrefix.scstr(), sFilename ))
+			{
+				sError.SetSprintf( "File '%s' escapes '%s' scope", sFilename.scstr(), tOptions.m_sFilePrefix.scstr());
+				return;
+			}
 			if ( tFile.Open ( sFilename.cstr(), SPH_O_READ, sError )<0 )
 				return;
 		} else if ( tOptions.m_sSource.IsEmpty() )
@@ -3859,6 +3864,15 @@ void sphBuildExcerpt ( ExcerptQuery_t & tOptions, const CSphIndex * pIndex, cons
 		sWarning, sError, pQueryTokenizer, tOptions.m_dRes );
 }
 
+// check whether filepath from sPath does not escape area of sPrefix
+bool TestEscaping( const CSphString& sPrefix, const CSphString& sPath )
+{
+	if ( sPrefix.IsEmpty() || sPrefix==sPath )
+		return true;
+	auto sNormalized = sphNormalizePath( sPath );
+	return sPrefix==sNormalized.SubString( 0, sPrefix.Length());
+}
+
 //
 // $Id$
 //
diff --git a/src/sphinxexcerpt.h b/src/sphinxexcerpt.h
index cf48b1f..87a55d4 100644
--- a/src/sphinxexcerpt.h
+++ b/src/sphinxexcerpt.h
@@ -81,6 +81,9 @@ struct XQQuery_t;
 void sphBuildExcerpt ( ExcerptQuery_t & tOptions, const CSphIndex * pIndex, const CSphHTMLStripper * pStripper, const XQQuery_t & tExtQuery,
 						DWORD eExtQuerySPZ, CSphString & sWarning, CSphString & sError, CSphDict * pDict, ISphTokenizer * pDocTokenizer, ISphTokenizer * pQueryTokenizer );
 
+// helper whether filepath from sPath does not escape area of sPrefix
+bool TestEscaping( const CSphString& sPrefix, const CSphString& sPath );
+
 #endif // _sphinxexcerpt_
 
 //
diff --git a/src/sphinxstd.h b/src/sphinxstd.h
index 39cc7ee..c1f15c1 100644
--- a/src/sphinxstd.h
+++ b/src/sphinxstd.h
@@ -2294,6 +2294,9 @@ int				sphOpenFile ( const char * sFile, CSphString & sError );
 /// return size of file descriptor
 int64_t			sphGetFileSize ( int iFD, CSphString & sError );
 
+// unwind different tricks like "../../../etc/passwd"
+CSphString		sphNormalizePath ( const CSphString& sOrigPath );
+CSphString		sphGetCwd();
 
 /// buffer trait that neither own buffer nor clean-up it on destroy
 template < typename T >
diff --git a/test/test_130/test.xml b/test/test_130/test.xml
index d8f746c..41e4961 100644
--- a/test/test_130/test.xml
+++ b/test/test_130/test.xml
@@ -7,6 +7,7 @@
 searchd
 {
 	<searchd_settings/>
+	snippets_file_prefix=<this_test/>/
 }
 
 source test
@@ -30,15 +31,15 @@ index test
 
 $results = array();
 
-$docs = array( 'test_130/load_file.txt' );
+$docs = array( "load_file.txt" );
 $opts = array( 'load_files'=>true, 'limit'=>0 );
 
 $results[] = $client->BuildExcerpts($docs, 'test', 'end point', $opts );
 $results[] = $client->BuildExcerpts($docs, 'test', 'not_found', $opts );
-$results[] = $client->BuildExcerpts(array( 'test_130/empty.txt' ), 'test', 'end point', $opts );
+$results[] = $client->BuildExcerpts(array( 'empty.txt' ), 'test', 'end point', $opts );
 $results[] = $client->BuildExcerpts(array( '' ), 'test', 'not_found', $opts );
 $results[] = $client->GetLastError();
-$results[] = $client->BuildExcerpts ( array ( 'test_130/512k.xml' ), 'test', 'it builds', array ( "limit" => 100, "load_files" => true ) );
+$results[] = $client->BuildExcerpts ( array ( '512k.xml' ), 'test', 'it builds', array ( "limit" => 100, "load_files" => true ) );
 
 ]]></custom_test>
 
-- 
2.17.1

openSUSE Build Service is sponsored by