Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
DISCONTINUED:openSUSE:11.1
ntfs-3g
ntfs-3g-utf8-fallback.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File ntfs-3g-utf8-fallback.patch of Package ntfs-3g
- bk@suse.de: # # NTFS uses Unicode (UTF-16LE [NTFS-3G uses UCS-2LE, which is enough # for now]) for path names, but the Unicode code points need to be # converted before a path can be accessed under NTFS. For 7 bit ASCII/ANSI, # glibc does this even without a locale in a hard-coded fashion as that # appears to be is easy because the low 7-bit ASCII range appears to be # available # in all charsets but it does not convert anything if # there was some error with the locale setup or none set up like # when mount is called during early boot where he (by policy) do # not use locales (and may be not available if /usr is not yet mounted), # so this patch fixes the resulting issues for systems which use # UTF-8 and for others, specifying the locale in fstab brings them # the encoding which they want. # # If no locale is defined or there was a problem with setting one # up and whenever nl_langinfo(CODESET) returns a sting starting with # "ANSI", use an internal UCS-2LE <-> UTF-8 codeset converter to fix # the bug where NTFS-3G does not show any path names which include # international characters!!! (and also fails on creating them) as result. # # Author: Bernhard Kaindl <bk@suse.de> # --- include/ntfs-3g/unistr.h +++ include/ntfs-3g/unistr.h @@ -26,6 +26,8 @@ #include "types.h" #include "layout.h" +extern int use_utf8; + extern BOOL ntfs_names_are_equal(const ntfschar *s1, size_t s1_len, const ntfschar *s2, size_t s2_len, const IGNORE_CASE_BOOL ic, const ntfschar *upcase, const u32 upcase_size); --- libntfs-3g/unistr.c +++ libntfs-3g/unistr.c @@ -47,6 +47,8 @@ #include "logging.h" #include "misc.h" +int use_utf8; + /* * IMPORTANT * ========= @@ -373,6 +375,85 @@ int ntfs_file_values_compare(const FILE_ err_val, ic, upcase, upcase_len); } +/* Return the amount of 16-bit elements in UTF-16LE needed (without + * the terminating null to store given UTF-8 string and -1 if it does + * noy fit into PATH_MAX + * TODO: Extend this with a function to suppport UTF-16LE. +*/ +static int ucs2_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_len) +{ + int i; + int count = 0; + + for (i = 0; i < ins_len && ins[i]; i++) { + unsigned short c = le16_to_cpu(ins[i]); + if (c < 0x80) + count++; + else + count += (c & 0xf800) ? 3 : 2; + if (count > outs_len) + goto fail; + } + return count; +fail: + return -1; +} + +/* + * ntfs_ucs_to_utf8 - convert a little endian Unicode string to an UTF-8 string + * @ins: input Unicode string buffer + * @ins_len: length of input string in Unicode characters + * @outs: on return contains the (allocated) output multibyte string + * @outs_len: length of output buffer in bytes + * TODO: Replace this with a function which converts from UTF-16LE because + * NTFS uses UTF-16LE. UTF-16 supports more rare/unusual characters than UCS-2 + */ +int ntfs_ucs_to_utf8(const ntfschar *ins, const int ins_len, char **outs, int outs_len) +{ + char *t, *end; + int i, size; + + if (!*outs) + outs_len = PATH_MAX; + + size = ucs2_to_utf8_size(ins, ins_len, outs_len); + + if (size < 0) { + errno = ENAMETOOLONG; + goto fail; + } + if (!*outs) + *outs = ntfs_malloc((outs_len = size + 1)); + + t = *outs; + end = t + outs_len; + + for (i = 0; i < ins_len && ins[i]; i++) { + unsigned short c = le16_to_cpu(ins[i]); + if (c < 0x80) { + *t++ = c; + if (t == end) + goto fail; + } else { + if (c & 0xf800) { + if (t+3 >= end) + goto fail; + *t++ = 0xe0 | (c >> 12); + *t++ = 0x80 | ((c >> 6) & 0x3f); + } else { + if (t+2 >= end) + goto fail; + *t++ = (0xc0 | ((c >> 6) & 0x3f)); + } + *t++ = 0x80 | (c & 0x3f); + } + } + *t = '\0'; + return t - *outs; +fail: + return -1; +} + /** * ntfs_ucstombs - convert a little endian Unicode string to a multibyte string * @ins: input Unicode string buffer @@ -397,6 +478,8 @@ int ntfs_file_values_compare(const FILE_ * sequence according to the current locale. * ENAMETOOLONG Destination buffer is too small for input string. * ENOMEM Not enough memory to allocate destination buffer. + * TODO: Replace this with a function which converts from UTF-16LE because + * NTFS uses UTF-16LE. UTF-16 supports more rare/unusual characters than UCS-2 */ int ntfs_ucstombs(const ntfschar *ins, const int ins_len, char **outs, int outs_len) @@ -419,12 +502,15 @@ int ntfs_ucstombs(const ntfschar *ins, c errno = ENAMETOOLONG; return -1; } + if (use_utf8) + return ntfs_ucs_to_utf8(ins, ins_len, outs, outs_len); if (!mbs) { mbs_len = (ins_len + 1) * MB_CUR_MAX; mbs = ntfs_malloc(mbs_len); if (!mbs) return -1; } + #ifdef HAVE_MBSINIT memset(&mbstate, 0, sizeof(mbstate)); #else @@ -487,6 +573,107 @@ err_out: return -1; } +/* Return the amount of 16-bit elements in UTF-16LE needed (without + * the terminating null to store given UTF-8 string and -1 if it does + * noy fit into PATH_MAX + * TODO: Extend this with a function to suppport UTF-16LE. +*/ +static int utf8_to_ucs2_size(const char *s) +{ + unsigned int byte; + size_t count = 0; + + while ((byte = *((unsigned char *)s++))) { + if (++count >= PATH_MAX || byte >= 0xF0) + goto fail; + if (!*s) break; + if (byte >= 0xC0) s++; + if (!*s) break; + if (byte >= 0xE0) s++; + } + return count; +fail: + return -1; +} +/* This converts one UTF-8 sequence to cpu-endian UCS-2 + * TODO: Replace this with a function which converts to UTF-16LE because + * NTFS uses UTF-16LE. UTF-16 supports more rare/unusual characters than UCS-2 +*/ +static int utf8toucs2(wchar_t *wc, const char *s) +{ + unsigned int byte = *((unsigned char *)s); + + if (byte == 0) { + *wc = (wchar_t) 0; + return 0; + } else if (byte < 0xC0) { + *wc = (wchar_t) byte; + return 1; + } else if (byte < 0xE0) { + if(strlen(s) < 2) + goto fail; + if ((s[1] & 0xC0) == 0x80) { + *wc = (wchar_t) (((byte & 0x1F) << 6) | (s[1] & 0x3F)); + return 2; + } else + goto fail; + } else if (byte < 0xF0) { + if(strlen(s) < 3) + goto fail; + if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)) { + *wc = (wchar_t) (((byte & 0x0F) << 12) + | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F)); + /* Surrogates range */ + if((*wc >= 0xD800 && *wc <= 0xDFFF) || + (*wc == 0xFFFE || *wc == 0xFFFF)) + goto fail; + return 3; + } + } +fail: + return -1; +} + +/** + * ntfs_utf8_to_ucs - convert a UTF-8 string to a UCS-2LE Unicode string + * @ins: input multibyte string buffer + * @outs: on return contains the (allocated) output Unicode string + * @outs_len: length of output buffer in Unicode characters + * TODO: Replace this with a function which converts to UTF-16LE because + * NTFS uses UTF-16LE. UTF-16 supports more rare/unusual characters than UCS-2 + */ +int ntfs_utf8_to_ucs(const char *ins, ntfschar **outs) +{ + const char *t = ins; + wchar_t wc; + ntfschar *outpos; + int shorts = utf8_to_ucs2_size(ins); + + if (shorts < 0) { + errno = EILSEQ; + goto fail; + } + if (!*outs) + *outs = ntfs_malloc((shorts+1) * sizeof(ntfschar)); + + outpos = *outs; + + while(1) { + int m = utf8toucs2(&wc, t); + if (m < 0) { + errno = EILSEQ; + goto fail; + } + *outpos++ = cpu_to_le16(wc); + if (m == 0) + break; + t += m; + } + return --outpos - *outs; +fail: + return -1; +} + /** * ntfs_mbstoucs - convert a multibyte string to a little endian Unicode string * @ins: input multibyte string buffer @@ -509,6 +696,8 @@ err_out: * string according to the current locale. * ENAMETOOLONG Destination buffer is too small for input string. * ENOMEM Not enough memory to allocate destination buffer. + * TODO: Replace this with a function which converts to UTF-16LE because + * NTFS uses UTF-16LE. UTF-16 supports more rare/unusual characters than UCS-2 */ int ntfs_mbstoucs(const char *ins, ntfschar **outs) { @@ -524,6 +713,8 @@ int ntfs_mbstoucs(const char *ins, ntfsc errno = EINVAL; return -1; } + if (use_utf8) + return ntfs_utf8_to_ucs(ins, outs); /* Determine the size of the multi-byte string in bytes. */ ins_size = strlen(ins); --- src/ntfs-3g.c +++ src/ntfs-3g.c @@ -69,6 +69,7 @@ #include <getopt.h> #include <syslog.h> #include <sys/wait.h> +#include <langinfo.h> #ifdef HAVE_SETXATTR #include <sys/xattr.h> @@ -2224,6 +2225,15 @@ static void setup_logging(char *parsed_o ntfs_log_info("Mount options: %s\n", parsed_options); } +void check_codeset() { + char *codeset = nl_langinfo(CODESET); + if (!codeset || !strncmp(codeset, "ANSI", 4)) { + ntfs_log_info("Locale invalid or has ANSI codeset: " + "Using UTF-8 for international characters.\n"); + use_utf8 = 1; + } +} + int main(int argc, char *argv[]) { char *parsed_options = NULL; @@ -2260,6 +2270,8 @@ int main(int argc, char *argv[]) err = NTFS_VOLUME_SYNTAX_ERROR; goto err_out; } + + check_codeset(); #if defined(linux) || defined(__uClinux__) fstype = get_fuse_fstype();
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor