ncmdump/win32_init.cpp

296 lines
8.0 KiB
C++

#include <windows.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include "platform.h"
#if defined(CP_UTF8)
static UINT saved_cp;
static int newargc;
static char **newargv;
static void cleanup(void)
{
int i;
SetConsoleOutputCP(saved_cp);
for (i = 0; i < newargc; i++)
free(newargv[i]);
free(newargv);
}
/*
* Incrementally [re]allocate newargv and keep it NULL-terminated.
*/
static int validate_argv(int argc)
{
static int size = 0;
if (argc >= size) {
char **ptr;
while (argc >= size)
size += 64;
ptr = (char**)realloc(newargv, size * sizeof(newargv[0]));
if (ptr == NULL)
return 0;
(newargv = ptr)[argc] = NULL;
} else {
newargv[argc] = NULL;
}
return 1;
}
static int process_glob(WCHAR *wstr, int wlen)
{
int i, slash, udlen;
WCHAR saved_char;
WIN32_FIND_DATAW data;
HANDLE h;
/*
* Note that we support wildcard characters only in filename part
* of the path, and not in directories. Windows users are used to
* this, that's why recursive glob processing is not implemented.
*/
/*
* Start by looking for last slash or backslash, ...
*/
for (slash = 0, i = 0; i < wlen; i++)
if (wstr[i] == L'/' || wstr[i] == L'\\')
slash = i + 1;
/*
* ... then look for asterisk or question mark in the file name.
*/
for (i = slash; i < wlen; i++)
if (wstr[i] == L'*' || wstr[i] == L'?')
break;
if (i == wlen)
return 0; /* definitely not a glob */
saved_char = wstr[wlen];
wstr[wlen] = L'\0';
h = FindFirstFileW(wstr, &data);
wstr[wlen] = saved_char;
if (h == INVALID_HANDLE_VALUE)
return 0; /* not a valid glob, just pass... */
if (slash)
udlen = WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
NULL, 0, NULL, NULL);
else
udlen = 0;
do {
int uflen;
char *arg;
/*
* skip over . and ..
*/
if (data.cFileName[0] == L'.') {
if ((data.cFileName[1] == L'\0') ||
(data.cFileName[1] == L'.' && data.cFileName[2] == L'\0'))
continue;
}
if (!validate_argv(newargc + 1))
break;
/*
* -1 below means "scan for trailing '\0' *and* count it",
* so that |uflen| covers even trailing '\0'.
*/
uflen = WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
NULL, 0, NULL, NULL);
arg = (char*)malloc(udlen + uflen);
if (arg == NULL)
break;
if (udlen)
WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
arg, udlen, NULL, NULL);
WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
arg + udlen, uflen, NULL, NULL);
newargv[newargc++] = arg;
} while (FindNextFileW(h, &data));
CloseHandle(h);
return 1;
}
void win32_utf8argv(int *argc, char **argv[])
{
const WCHAR *wcmdline;
WCHAR *warg, *wend, *p;
int wlen, ulen, valid = 1;
char *arg;
newargc = 0;
newargv = NULL;
if (!validate_argv(newargc))
return;
wcmdline = GetCommandLineW();
if (wcmdline == NULL) return;
/*
* make a copy of the command line, since we might have to modify it...
*/
wlen = wcslen(wcmdline);
p = (WCHAR*)_alloca((wlen + 1) * sizeof(WCHAR));
wcscpy(p, wcmdline);
while (*p != L'\0') {
int in_quote = 0;
if (*p == L' ' || *p == L'\t') {
p++; /* skip over whitespace */
continue;
}
/*
* Note: because we may need to fiddle with the number of backslashes,
* the argument string is copied into itself. This is safe because
* the number of characters will never expand.
*/
warg = wend = p;
while (*p != L'\0'
&& (in_quote || (*p != L' ' && *p != L'\t'))) {
switch (*p) {
case L'\\':
/*
* Microsoft documentation on how backslashes are treated
* is:
*
* + Backslashes are interpreted literally, unless they
* immediately precede a double quotation mark.
* + If an even number of backslashes is followed by a double
* quotation mark, one backslash is placed in the argv array
* for every pair of backslashes, and the double quotation
* mark is interpreted as a string delimiter.
* + If an odd number of backslashes is followed by a double
* quotation mark, one backslash is placed in the argv array
* for every pair of backslashes, and the double quotation
* mark is "escaped" by the remaining backslash, causing a
* literal double quotation mark (") to be placed in argv.
*
* Ref: https://msdn.microsoft.com/en-us/library/17w5ykft.aspx
*
* Though referred page doesn't mention it, multiple qouble
* quotes are also special. Pair of double quotes in quoted
* string is counted as single double quote.
*/
{
const WCHAR *q = p;
int i;
while (*p == L'\\')
p++;
if (*p == L'"') {
int i;
for (i = (p - q) / 2; i > 0; i--)
*wend++ = L'\\';
/*
* if odd amount of backslashes before the quote,
* said quote is part of the argument, not a delimiter
*/
if ((p - q) % 2 == 1)
*wend++ = *p++;
} else {
for (i = p - q; i > 0; i--)
*wend++ = L'\\';
}
}
break;
case L'"':
/*
* Without the preceding backslash (or when preceded with an
* even number of backslashes), the double quote is a simple
* string delimiter and just slightly change the parsing state
*/
if (in_quote && p[1] == L'"')
*wend++ = *p++;
else
in_quote = !in_quote;
p++;
break;
default:
/*
* Any other non-delimiter character is just taken verbatim
*/
*wend++ = *p++;
}
}
wlen = wend - warg;
if (wlen == 0 || !process_glob(warg, wlen)) {
if (!validate_argv(newargc + 1)) {
valid = 0;
break;
}
ulen = 0;
if (wlen > 0) {
ulen = WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
NULL, 0, NULL, NULL);
if (ulen <= 0)
continue;
}
arg = (char*)malloc(ulen + 1);
if (arg == NULL) {
valid = 0;
break;
}
if (wlen > 0)
WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
arg, ulen, NULL, NULL);
arg[ulen] = '\0';
newargv[newargc++] = arg;
}
}
if (valid) {
saved_cp = GetConsoleOutputCP();
SetConsoleOutputCP(CP_UTF8);
*argc = newargc;
*argv = newargv;
atexit(cleanup);
} else if (newargv != NULL) {
int i;
for (i = 0; i < newargc; i++)
free(newargv[i]);
free(newargv);
newargc = 0;
newargv = NULL;
}
return;
}
#else
void win32_utf8argv(int *argc, char **argv[])
{ return; }
#endif