fix: use utf-8 as filename encoding

This commit is contained in:
TaurusXin 2024-03-22 12:07:32 +08:00
parent 348057bd09
commit c12be04007
7 changed files with 625 additions and 275 deletions

View File

@ -10,6 +10,10 @@ set(WITH_ZLIB OFF)
set(BUILD_TESTING OFF)
add_subdirectory(taglib)
if(MSVC)
add_compile_options("$<$<CXX_COMPILER_ID:MSVC>:/execution-charset:utf-8>")
endif()
FILE(GLOB HEADERS ./*.h)
FILE(GLOB SOURCES ./*.cpp)
add_executable(ncmdump

6
aes.h
View File

@ -1,9 +1,10 @@
#pragma once
#pragma once
#include <string.h>
#include <stdio.h>
class AES {
class AES
{
public:
AES();
@ -53,5 +54,4 @@ private:
// get the secret key
void getKeyAt(unsigned char key[][4], int i);
};

View File

@ -6,7 +6,7 @@
#include <filesystem>
#if defined(_WIN32)
#include <Windows.h>
#include "platform.h"
#endif
#include "color.h"
@ -25,26 +25,26 @@ void processFile(const fs::path &filePath)
{
if (fs::exists(filePath) == false)
{
std::cerr << BOLDRED << "Error: " << RESET << "file '" << filePath.string() << "' does not exist." << std::endl;
std::cerr << BOLDRED << "Error: " << RESET << "file '" << filePath.u8string() << "' does not exist." << std::endl;
return;
}
try
{
NeteaseCrypt crypt(filePath.string());
NeteaseCrypt crypt(filePath.u8string());
crypt.Dump();
crypt.FixMetadata();
std::cout << BOLDGREEN << "Done: '" << RESET << crypt.dumpFilepath().string() << "'" << std::endl;
std::cout << BOLDGREEN << "Done: " << RESET << "'" << crypt.dumpFilepath().u8string() << "'" << std::endl;
}
catch (const std::invalid_argument &e)
{
std::cerr << BOLDRED << "Exception: " << RESET << RED << e.what() << RESET << " '" << filePath.string() << "'" << std::endl;
}
catch (...)
{
std::cerr << BOLDRED << "Unexpected exception while processing file: " << RESET << filePath.string() << std::endl;
std::cerr << BOLDRED << "Exception: " << RESET << RED << e.what() << RESET << " '" << filePath.u8string() << "'" << std::endl;
}
// catch (...)
// {
// std::cerr << BOLDRED << "Unexpected exception while processing file: " << RESET << filePath.u8string() << std::endl;
// }
}
void processFilesInFolder(const fs::path &folderPath)
@ -58,22 +58,9 @@ void processFilesInFolder(const fs::path &folderPath)
}
}
#if defined(_WIN32)
int wmain(int argc, wchar_t *wideargv[])
#else
int main(int argc, char **argv)
#endif
{
#if defined(_WIN32)
SetConsoleOutputCP(CP_UTF8);
char **argv = (char **)malloc(sizeof(char *) * argc);
for (int i = 0; i < argc; ++i)
{
int utf8_size = WideCharToMultiByte(CP_UTF8, 0, wideargv[i], -1, NULL, 0, NULL, NULL);
argv[i] = (char *)malloc(utf8_size);
WideCharToMultiByte(CP_UTF8, 0, wideargv[i], -1, argv[i], utf8_size, NULL, NULL);
}
#endif
win32_utf8argv(&argc, &argv);
if (argc <= 1)
{
@ -115,7 +102,7 @@ int main(int argc, char **argv)
}
else
{
fs::path path(argv[i]);
fs::path path = fs::u8path(argv[i]);
files.push_back(path);
}
}

View File

@ -13,7 +13,6 @@
#include <stdexcept>
#include <string>
#include <filesystem>
const unsigned char NeteaseCrypt::sCoreKey[17] = {0x68, 0x7A, 0x48, 0x52, 0x41, 0x6D, 0x73, 0x6F, 0x35, 0x6B, 0x49, 0x6E, 0x62, 0x61, 0x78, 0x57, 0};
@ -21,7 +20,8 @@ const unsigned char NeteaseCrypt::sModifyKey[17] = {0x23, 0x31, 0x34, 0x6C, 0x6A
const unsigned char NeteaseCrypt::mPng[8] = {0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A};
static void aesEcbDecrypt(const unsigned char *key, std::string& src, std::string& dst) {
static void aesEcbDecrypt(const unsigned char *key, std::string &src, std::string &dst)
{
int n, i;
unsigned char out[16];
@ -32,35 +32,42 @@ static void aesEcbDecrypt(const unsigned char *key, std::string& src, std::strin
AES aes(key);
for (i = 0; i < n-1; i++) {
for (i = 0; i < n - 1; i++)
{
aes.decrypt((unsigned char *)src.c_str() + (i << 4), out);
dst += std::string((char *)out, 16);
}
aes.decrypt((unsigned char *)src.c_str() + (i << 4), out);
char pad = out[15];
if (pad > 16) {
if (pad > 16)
{
pad = 0;
}
dst += std::string((char *)out, 16 - pad);
}
static void replace(std::string& str, const std::string& from, const std::string& to) {
static void replace(std::string &str, const std::string &from, const std::string &to)
{
if (from.empty())
return;
size_t start_pos = 0;
while((start_pos = str.find(from, start_pos)) != std::string::npos) {
while ((start_pos = str.find(from, start_pos)) != std::string::npos)
{
str.replace(start_pos, from.length(), to);
start_pos += to.length(); // In case 'to' contains 'from', like replacing 'x' with 'yx'
}
}
NeteaseMusicMetadata::~NeteaseMusicMetadata() {
NeteaseMusicMetadata::~NeteaseMusicMetadata()
{
cJSON_Delete(mRaw);
}
NeteaseMusicMetadata::NeteaseMusicMetadata(cJSON* raw) {
if (!raw) {
NeteaseMusicMetadata::NeteaseMusicMetadata(cJSON *raw)
{
if (!raw)
{
return;
}
@ -70,21 +77,25 @@ NeteaseMusicMetadata::NeteaseMusicMetadata(cJSON* raw) {
mRaw = raw;
swap = cJSON_GetObjectItem(raw, "musicName");
if (swap) {
if (swap)
{
mName = std::string(cJSON_GetStringValue(swap));
}
swap = cJSON_GetObjectItem(raw, "album");
if (swap) {
if (swap)
{
mAlbum = std::string(cJSON_GetStringValue(swap));
}
swap = cJSON_GetObjectItem(raw, "artist");
if (swap) {
if (swap)
{
artistLen = cJSON_GetArraySize(swap);
i = 0;
for (i = 0; i < artistLen-1; i++) {
for (i = 0; i < artistLen - 1; i++)
{
mArtist += std::string(cJSON_GetStringValue(cJSON_GetArrayItem(cJSON_GetArrayItem(swap, i), 0)));
mArtist += "/";
}
@ -92,61 +103,75 @@ NeteaseMusicMetadata::NeteaseMusicMetadata(cJSON* raw) {
}
swap = cJSON_GetObjectItem(raw, "bitrate");
if (swap) {
if (swap)
{
mBitrate = swap->valueint;
}
swap = cJSON_GetObjectItem(raw, "duration");
if (swap) {
if (swap)
{
mDuration = swap->valueint;
}
swap = cJSON_GetObjectItem(raw, "format");
if (swap) {
if (swap)
{
mFormat = std::string(cJSON_GetStringValue(swap));
}
}
bool NeteaseCrypt::openFile(std::filesystem::path const& path) {
mFile.open(path, std::ios::in | std::ios::binary);
if (!mFile.is_open()) {
bool NeteaseCrypt::openFile(std::string const &path)
{
mFile.open(std::filesystem::u8path(path), std::ios::in | std::ios::binary);
if (!mFile.is_open())
{
return false;
} else {
}
else
{
return true;
}
}
bool NeteaseCrypt::isNcmFile() {
bool NeteaseCrypt::isNcmFile()
{
unsigned int header;
mFile.read(reinterpret_cast<char *>(&header), sizeof(header));
if (header != (unsigned int)0x4e455443) {
if (header != (unsigned int)0x4e455443)
{
return false;
}
mFile.read(reinterpret_cast<char *>(&header), sizeof(header));
if (header != (unsigned int)0x4d414446) {
if (header != (unsigned int)0x4d414446)
{
return false;
}
return true;
}
int NeteaseCrypt::read(char *s, std::streamsize n) {
int NeteaseCrypt::read(char *s, std::streamsize n)
{
mFile.read(s, n);
int gcount = mFile.gcount();
if (gcount <= 0) {
if (gcount <= 0)
{
throw std::invalid_argument("Can't read file");
}
return gcount;
}
void NeteaseCrypt::buildKeyBox(unsigned char *key, int keyLen) {
void NeteaseCrypt::buildKeyBox(unsigned char *key, int keyLen)
{
int i;
for (i = 0; i < 256; ++i) {
for (i = 0; i < 256; ++i)
{
mKeyBox[i] = (unsigned char)i;
}
@ -159,34 +184,38 @@ void NeteaseCrypt::buildKeyBox(unsigned char *key, int keyLen) {
{
swap = mKeyBox[i];
c = ((swap + last_byte + key[key_offset++]) & 0xff);
if (key_offset >= keyLen) key_offset = 0;
mKeyBox[i] = mKeyBox[c]; mKeyBox[c] = swap;
if (key_offset >= keyLen)
key_offset = 0;
mKeyBox[i] = mKeyBox[c];
mKeyBox[c] = swap;
last_byte = c;
}
}
std::string NeteaseCrypt::mimeType(std::string& data) {
if (memcmp(data.c_str(), mPng, 8) == 0) {
std::string NeteaseCrypt::mimeType(std::string &data)
{
if (memcmp(data.c_str(), mPng, 8) == 0)
{
return std::string("image/png");
}
return std::string("image/jpeg");
}
void NeteaseCrypt::FixMetadata() {
if (mDumpFilepath.string().length() <= 0) {
throw std::invalid_argument("must dump before");
}
void NeteaseCrypt::FixMetadata()
{
TagLib::File *audioFile;
TagLib::Tag *tag;
TagLib::ByteVector vector(mImageData.c_str(), mImageData.length());
if (mFormat == NeteaseCrypt::MP3) {
if (mFormat == NeteaseCrypt::MP3)
{
audioFile = new TagLib::MPEG::File(mDumpFilepath.c_str());
tag = dynamic_cast<TagLib::MPEG::File *>(audioFile)->ID3v2Tag(true);
if (mImageData.length() > 0) {
if (mImageData.length() > 0)
{
TagLib::ID3v2::AttachedPictureFrame *frame = new TagLib::ID3v2::AttachedPictureFrame;
frame->setMimeType(mimeType(mImageData));
@ -194,11 +223,14 @@ void NeteaseCrypt::FixMetadata() {
dynamic_cast<TagLib::ID3v2::Tag *>(tag)->addFrame(frame);
}
} else if (mFormat == NeteaseCrypt::FLAC) {
}
else if (mFormat == NeteaseCrypt::FLAC)
{
audioFile = new TagLib::FLAC::File(mDumpFilepath.c_str());
tag = audioFile->tag();
if (mImageData.length() > 0) {
if (mImageData.length() > 0)
{
TagLib::FLAC::Picture *cover = new TagLib::FLAC::Picture;
cover->setMimeType(mimeType(mImageData));
cover->setType(TagLib::FLAC::Picture::FrontCover);
@ -208,7 +240,8 @@ void NeteaseCrypt::FixMetadata() {
}
}
if (mMetaData != NULL) {
if (mMetaData != NULL)
{
tag->setTitle(TagLib::String(mMetaData->name(), TagLib::String::UTF8));
tag->setArtist(TagLib::String(mMetaData->artist(), TagLib::String::UTF8));
tag->setAlbum(TagLib::String(mMetaData->album(), TagLib::String::UTF8));
@ -219,29 +252,36 @@ void NeteaseCrypt::FixMetadata() {
audioFile->save();
}
void NeteaseCrypt::Dump() {
mDumpFilepath = mFilepath;
void NeteaseCrypt::Dump()
{
mDumpFilepath = std::filesystem::u8path(mFilepath);
std::vector<unsigned char> buffer(0x8000);
std::ofstream output;
while (!mFile.eof()) {
while (!mFile.eof())
{
int n = read((char *)buffer.data(), buffer.size());
for (int i = 0; i < n; i++) {
for (int i = 0; i < n; i++)
{
int j = (i + 1) & 0xff;
buffer[i] ^= mKeyBox[(mKeyBox[j] + mKeyBox[(mKeyBox[j] + j) & 0xff]) & 0xff];
}
if (!output.is_open()) {
if (!output.is_open())
{
// identify format
// ID3 format mp3
if (buffer[0] == 0x49 && buffer[1] == 0x44 && buffer[2] == 0x33) {
mDumpFilepath.replace_extension(".mp3");
if (buffer[0] == 0x49 && buffer[1] == 0x44 && buffer[2] == 0x33)
{
mDumpFilepath = mDumpFilepath.replace_extension("mp3");
mFormat = NeteaseCrypt::MP3;
} else {
mDumpFilepath.replace_extension(".flac");
}
else
{
mDumpFilepath = mDumpFilepath.replace_extension("flac");
mFormat = NeteaseCrypt::FLAC;
}
@ -255,24 +295,30 @@ void NeteaseCrypt::Dump() {
output.close();
}
NeteaseCrypt::~NeteaseCrypt() {
if (mMetaData != NULL) {
NeteaseCrypt::~NeteaseCrypt()
{
if (mMetaData != NULL)
{
delete mMetaData;
}
mFile.close();
}
NeteaseCrypt::NeteaseCrypt(std::filesystem::path const& path) {
if (!openFile(path)) {
NeteaseCrypt::NeteaseCrypt(std::string const &path)
{
if (!openFile(path))
{
throw std::invalid_argument("Can't open file");
}
if (!isNcmFile()) {
if (!isNcmFile())
{
throw std::invalid_argument("Not netease protected file");
}
if (!mFile.seekg(2, mFile.cur)) {
if (!mFile.seekg(2, mFile.cur))
{
throw std::invalid_argument("Can't seek file");
}
@ -281,14 +327,16 @@ NeteaseCrypt::NeteaseCrypt(std::filesystem::path const& path) {
unsigned int n;
read(reinterpret_cast<char *>(&n), sizeof(n));
if (n <= 0) {
if (n <= 0)
{
throw std::invalid_argument("Broken NCM file");
}
std::vector<char> keydata(n);
read(keydata.data(), n);
for (size_t i = 0; i < n; i++) {
for (size_t i = 0; i < n; i++)
{
keydata[i] ^= 0x64;
}
@ -301,15 +349,19 @@ NeteaseCrypt::NeteaseCrypt(std::filesystem::path const& path) {
read(reinterpret_cast<char *>(&n), sizeof(n));
if (n <= 0) {
std::cout << "[Warn] " << path.string() << " missing metadata infomation can't fix some infomation!" << std::endl;
if (n <= 0)
{
std::cout << "[Warn] " << path << " missing metadata infomation can't fix some infomation!" << std::endl;
mMetaData = NULL;
} else {
}
else
{
std::vector<char> modifyData(n);
read(modifyData.data(), n);
for (size_t i = 0; i < n; i++) {
for (size_t i = 0; i < n; i++)
{
modifyData[i] ^= 0x63;
}
@ -333,18 +385,22 @@ NeteaseCrypt::NeteaseCrypt(std::filesystem::path const& path) {
}
// skip crc32 & unuse charset
if (!mFile.seekg(9, mFile.cur)) {
if (!mFile.seekg(9, mFile.cur))
{
throw std::invalid_argument("can't seek file");
}
read(reinterpret_cast<char *>(&n), sizeof(n));
if (n > 0) {
if (n > 0)
{
char *imageData = (char *)malloc(n);
read(imageData, n);
mImageData = std::string(imageData, n);
} else {
std::cout << "[Warn] " << path.string() << " missing album can't fix album image!" << std::endl;
}
else
{
std::cout << "[Warn] " << path << " missing album can't fix album image!" << std::endl;
}
}

View File

@ -42,7 +42,7 @@ private:
enum NcmFormat { MP3, FLAC };
private:
std::filesystem::path mFilepath;
std::string mFilepath;
std::filesystem::path mDumpFilepath;
NcmFormat mFormat;
std::string mImageData;
@ -52,17 +52,17 @@ private:
private:
bool isNcmFile();
bool openFile(std::filesystem::path const&);
bool openFile(std::string const&);
int read(char *s, std::streamsize n);
void buildKeyBox(unsigned char *key, int keyLen);
std::string mimeType(std::string& data);
public:
const std::filesystem::path& filepath() const { return mFilepath; }
const std::filesystem::path& dumpFilepath() const { return mDumpFilepath; }
const std::string& filepath() const { return mFilepath; }
const std::filesystem::path dumpFilepath() const { return mDumpFilepath; }
public:
NeteaseCrypt(std::filesystem::path const&);
NeteaseCrypt(std::string const&);
~NeteaseCrypt();
public:

7
platform.h Normal file
View File

@ -0,0 +1,7 @@
# ifdef _WIN32
/*
* Win32-specific argv initialization that splits OS-supplied UNICODE
* command line string to array of UTF8-encoded strings.
*/
void win32_utf8argv(int *argc, char **argv[]);
# endif

296
win32_init.cpp Normal file
View File

@ -0,0 +1,296 @@
#include <windows.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include "platform.h"
#if defined(CP_UTF8)
static UINT saved_cp;
static int newargc;
static char **newargv;
static void cleanup(void)
{
int i;
SetConsoleOutputCP(saved_cp);
for (i = 0; i < newargc; i++)
free(newargv[i]);
free(newargv);
}
/*
* Incrementally [re]allocate newargv and keep it NULL-terminated.
*/
static int validate_argv(int argc)
{
static int size = 0;
if (argc >= size) {
char **ptr;
while (argc >= size)
size += 64;
ptr = (char**)realloc(newargv, size * sizeof(newargv[0]));
if (ptr == NULL)
return 0;
(newargv = ptr)[argc] = NULL;
} else {
newargv[argc] = NULL;
}
return 1;
}
static int process_glob(WCHAR *wstr, int wlen)
{
int i, slash, udlen;
WCHAR saved_char;
WIN32_FIND_DATAW data;
HANDLE h;
/*
* Note that we support wildcard characters only in filename part
* of the path, and not in directories. Windows users are used to
* this, that's why recursive glob processing is not implemented.
*/
/*
* Start by looking for last slash or backslash, ...
*/
for (slash = 0, i = 0; i < wlen; i++)
if (wstr[i] == L'/' || wstr[i] == L'\\')
slash = i + 1;
/*
* ... then look for asterisk or question mark in the file name.
*/
for (i = slash; i < wlen; i++)
if (wstr[i] == L'*' || wstr[i] == L'?')
break;
if (i == wlen)
return 0; /* definitely not a glob */
saved_char = wstr[wlen];
wstr[wlen] = L'\0';
h = FindFirstFileW(wstr, &data);
wstr[wlen] = saved_char;
if (h == INVALID_HANDLE_VALUE)
return 0; /* not a valid glob, just pass... */
if (slash)
udlen = WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
NULL, 0, NULL, NULL);
else
udlen = 0;
do {
int uflen;
char *arg;
/*
* skip over . and ..
*/
if (data.cFileName[0] == L'.') {
if ((data.cFileName[1] == L'\0') ||
(data.cFileName[1] == L'.' && data.cFileName[2] == L'\0'))
continue;
}
if (!validate_argv(newargc + 1))
break;
/*
* -1 below means "scan for trailing '\0' *and* count it",
* so that |uflen| covers even trailing '\0'.
*/
uflen = WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
NULL, 0, NULL, NULL);
arg = (char*)malloc(udlen + uflen);
if (arg == NULL)
break;
if (udlen)
WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
arg, udlen, NULL, NULL);
WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
arg + udlen, uflen, NULL, NULL);
newargv[newargc++] = arg;
} while (FindNextFileW(h, &data));
CloseHandle(h);
return 1;
}
void win32_utf8argv(int *argc, char **argv[])
{
const WCHAR *wcmdline;
WCHAR *warg, *wend, *p;
int wlen, ulen, valid = 1;
char *arg;
newargc = 0;
newargv = NULL;
if (!validate_argv(newargc))
return;
wcmdline = GetCommandLineW();
if (wcmdline == NULL) return;
/*
* make a copy of the command line, since we might have to modify it...
*/
wlen = wcslen(wcmdline);
p = (WCHAR*)_alloca((wlen + 1) * sizeof(WCHAR));
wcscpy(p, wcmdline);
while (*p != L'\0') {
int in_quote = 0;
if (*p == L' ' || *p == L'\t') {
p++; /* skip over whitespace */
continue;
}
/*
* Note: because we may need to fiddle with the number of backslashes,
* the argument string is copied into itself. This is safe because
* the number of characters will never expand.
*/
warg = wend = p;
while (*p != L'\0'
&& (in_quote || (*p != L' ' && *p != L'\t'))) {
switch (*p) {
case L'\\':
/*
* Microsoft documentation on how backslashes are treated
* is:
*
* + Backslashes are interpreted literally, unless they
* immediately precede a double quotation mark.
* + If an even number of backslashes is followed by a double
* quotation mark, one backslash is placed in the argv array
* for every pair of backslashes, and the double quotation
* mark is interpreted as a string delimiter.
* + If an odd number of backslashes is followed by a double
* quotation mark, one backslash is placed in the argv array
* for every pair of backslashes, and the double quotation
* mark is "escaped" by the remaining backslash, causing a
* literal double quotation mark (") to be placed in argv.
*
* Ref: https://msdn.microsoft.com/en-us/library/17w5ykft.aspx
*
* Though referred page doesn't mention it, multiple qouble
* quotes are also special. Pair of double quotes in quoted
* string is counted as single double quote.
*/
{
const WCHAR *q = p;
int i;
while (*p == L'\\')
p++;
if (*p == L'"') {
int i;
for (i = (p - q) / 2; i > 0; i--)
*wend++ = L'\\';
/*
* if odd amount of backslashes before the quote,
* said quote is part of the argument, not a delimiter
*/
if ((p - q) % 2 == 1)
*wend++ = *p++;
} else {
for (i = p - q; i > 0; i--)
*wend++ = L'\\';
}
}
break;
case L'"':
/*
* Without the preceding backslash (or when preceded with an
* even number of backslashes), the double quote is a simple
* string delimiter and just slightly change the parsing state
*/
if (in_quote && p[1] == L'"')
*wend++ = *p++;
else
in_quote = !in_quote;
p++;
break;
default:
/*
* Any other non-delimiter character is just taken verbatim
*/
*wend++ = *p++;
}
}
wlen = wend - warg;
if (wlen == 0 || !process_glob(warg, wlen)) {
if (!validate_argv(newargc + 1)) {
valid = 0;
break;
}
ulen = 0;
if (wlen > 0) {
ulen = WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
NULL, 0, NULL, NULL);
if (ulen <= 0)
continue;
}
arg = (char*)malloc(ulen + 1);
if (arg == NULL) {
valid = 0;
break;
}
if (wlen > 0)
WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
arg, ulen, NULL, NULL);
arg[ulen] = '\0';
newargv[newargc++] = arg;
}
}
if (valid) {
saved_cp = GetConsoleOutputCP();
SetConsoleOutputCP(CP_UTF8);
*argc = newargc;
*argv = newargv;
atexit(cleanup);
} else if (newargv != NULL) {
int i;
for (i = 0; i < newargc; i++)
free(newargv[i]);
free(newargv);
newargc = 0;
newargv = NULL;
}
return;
}
#else
void win32_utf8argv(int *argc, char **argv[])
{ return; }
#endif