如何将minizip包装器转换为unicode?

我正在尝试使用[http://www.zlib.net/]的minzip包装来压缩文件夹。 只要文件名是英文,它就可以正常工作。 有没有人试图修改minizip来支持unicode?

修改后的代码发布在下面。 问题在于这个函数,第二个参数将const char *作为输入。 当我进行转换时,它会丢失数据,文件名也不一样。

例如:中文 – 统一码.txt成为zip中的中文-t +ƒS+Çtáü.txt。

err = zipOpenNewFileInZip3_64( zf,outstr.c_str(),&zi, NULL,0,NULL,0,NULL /* comment*/, (opt_compress_level != 0) ? Z_DEFLATED : 0, opt_compress_level,0, /* -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, */ -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, password,crcFile, zip64); minizip.c Version 1.1, February 14h, 2010 sample part of the MiniZip project - ( http://www.winimage.com/zLibDll/minizip.html ) Copyright (C) 1998-2010 Gilles Vollant (minizip) ( http://www.winimage.com/zLibDll/minizip.html ) Modifications of Unzip for Zip64 Copyright (C) 2007-2008 Even Rouault Modifications for Zip64 support on both zip and unzip Copyright (C) 2009-2010 Mathias Svensson ( http://result42.com ) */ #if (!defined(_WIN32)) && (!defined(WIN32)) && (!defined(__APPLE__)) #ifndef __USE_FILE_OFFSET64 #define __USE_FILE_OFFSET64 #endif #ifndef __USE_LARGEFILE64 #define __USE_LARGEFILE64 #endif #ifndef _LARGEFILE64_SOURCE #define _LARGEFILE64_SOURCE #endif #ifndef _FILE_OFFSET_BIT #define _FILE_OFFSET_BIT 64 #endif #endif #ifdef __APPLE__ // In darwin and perhaps other BSD variants off_t is a 64 bit value, hence no need for specific 64 bit functions #define FOPEN_FUNC(filename, mode) fopen(filename, mode) #define FTELLO_FUNC(stream) ftello(stream) #define FSEEKO_FUNC(stream, offset, origin) fseeko(stream, offset, origin) #else #define FOPEN_FUNC(filename, mode) fopen64(filename, mode) #define FTELLO_FUNC(stream) ftello64(stream) #define FSEEKO_FUNC(stream, offset, origin) fseeko64(stream, offset, origin) #endif #include  #include  #include  #include  #include  #include  #ifdef _WIN32 # include  # include  #define GetCurrentDir _getcwd #else # include  # include  # include  # include  #endif #include "zip.h" #include "Shlwapi.h" #ifdef _WIN32 #define USEWIN32IOAPI #include "iowin32.h" #endif #include  #include  #include  #include  #include  #include  #include  using namespace std; #define WRITEBUFFERSIZE (16384) #define MAXFILENAME (256) #ifdef _WIN32 uLong filetime( wchar_t *f, /* name of file to get info on */ tm_zip *tmzip, /* return value: access, modific. and creation times */ uLong *dt) /* dostime */ { int ret = 0; { FILETIME ftLocal; HANDLE hFind; _WIN32_FIND_DATAW ff32; hFind = FindFirstFileW(f,&ff32); if (hFind != INVALID_HANDLE_VALUE) { FileTimeToLocalFileTime(&(ff32.ftLastWriteTime),&ftLocal); FileTimeToDosDateTime(&ftLocal,((LPWORD)dt)+1,((LPWORD)dt)+0); FindClose(hFind); ret = 1; } } return ret; } #else #ifdef unix || __APPLE__ uLong filetime(f, tmzip, dt) char *f; /* name of file to get info on */ tm_zip *tmzip; /* return value: access, modific. and creation times */ uLong *dt; /* dostime */ { int ret=0; struct stat s; /* results of stat() */ struct tm* filedate; time_t tm_t=0; if (strcmp(f,"-")!=0) { char name[MAXFILENAME+1]; int len = strlen(f); if (len > MAXFILENAME) len = MAXFILENAME; strncpy(name, f,MAXFILENAME-1); /* strncpy doesnt append the trailing NULL, of the string is too long. */ name[ MAXFILENAME ] = '\0'; if (name[len - 1] == '/') name[len - 1] = '\0'; /* not all systems allow stat'ing a file with / appended */ if (stat(name,&s)==0) { tm_t = s.st_mtime; ret = 1; } } filedate = localtime(&tm_t); tmzip->tm_sec = filedate->tm_sec; tmzip->tm_min = filedate->tm_min; tmzip->tm_hour = filedate->tm_hour; tmzip->tm_mday = filedate->tm_mday; tmzip->tm_mon = filedate->tm_mon ; tmzip->tm_year = filedate->tm_year; return ret; } #else uLong filetime(f, tmzip, dt) char *f; /* name of file to get info on */ tm_zip *tmzip; /* return value: access, modific. and creation times */ uLong *dt; /* dostime */ { return 0; } #endif #endif void addFileToList(list& fileList, const wstring& directory, const set& excludeFilterSet, const wstring& fileName ) { wstring fileExtension = fileName.substr(fileName.find_last_of(L".") + 1); if (!fileExtension.empty()) { if (excludeFilterSet.find(fileExtension) != excludeFilterSet.end()) return; } fileList.push_back(directory + fileName); } void GetFileListing(list& fileList, wstring directory,const set& excludeFilterSet,bool recursively=true) { directory = directory + L"\\"; wstring filter = directory + L"*"; _WIN32_FIND_DATAW FindFileData; HANDLE hFind = FindFirstFileW(filter.c_str(), &FindFileData); if (hFind == INVALID_HANDLE_VALUE) { DWORD dwError = GetLastError(); if (dwError != ERROR_FILE_NOT_FOUND) { //cout << "Invalid file handle for filter " << filter << ". Error is " << GetLastError() << endl; } return; } do { if (FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { if ((recursively) && (wcscmp(FindFileData.cFileName, L".") != 0) && (wcscmp(FindFileData.cFileName, L"..") != 0)) { GetFileListing(fileList, directory + FindFileData.cFileName, excludeFilterSet); } } else { addFileToList(fileList, directory, excludeFilterSet,FindFileData.cFileName); } } while (FindNextFileW(hFind, &FindFileData) != 0); DWORD dwError = GetLastError(); FindClose(hFind); if (dwError != ERROR_NO_MORE_FILES) { //cout << "FindNextFile error. Error is "<< dwError << endl; } } int check_exist_file(wchar_t* filename) { FILE* ftestexist; int ret = 1; //ftestexist = FOPEN_FUNC(filename,"rb"); ftestexist = _wfopen(filename,L"rb"); if (ftestexist==NULL) ret = 0; else fclose(ftestexist); return ret; } /* calculate the CRC32 of a file, because to encrypt a file, we need known the CRC32 of the file before */ int getFileCrc(const wchar_t * filenameinzip,void*buf,unsigned long size_buf,unsigned long* result_crc) { unsigned long calculate_crc=0; int err=ZIP_OK; //FILE * fin = FOPEN_FUNC(filenameinzip,"rb"); FILE * fin = _wfopen(filenameinzip,L"rb"); unsigned long size_read = 0; unsigned long total_read = 0; if (fin==NULL) { err = ZIP_ERRNO; } if (err == ZIP_OK) do { err = ZIP_OK; size_read = (int)fread(buf,1,size_buf,fin); if (size_read 0) calculate_crc = crc32(calculate_crc,(const Bytef *)buf,size_read); total_read += size_read; } while ((err == ZIP_OK) && (size_read>0)); if (fin) fclose(fin); *result_crc=calculate_crc; printf("file %s crc %lx\n", filenameinzip, calculate_crc); return err; } int isLargeFile(const wchar_t * filename) { int largeFile = 0; ZPOS64_T pos = 0; //FILE* pFile = FOPEN_FUNC(filename, "rb"); FILE* pFile = _wfopen(filename, L"rb"); if(pFile != NULL) { int n = FSEEKO_FUNC(pFile, 0, SEEK_END); pos = FTELLO_FUNC(pFile); printf("File : %s is %lld bytes\n", filename, pos); if(pos >= 0xffffffff) largeFile = 1; fclose(pFile); } return largeFile; } void split( const wstring& text, wchar_t delimiter,set& result ) { wstring::size_type start = 0; wstring::size_type end = text.find( delimiter, start ); wstring token; while( end != wstring::npos ) { token = text.substr( start, end - start ); token.erase(0,2); result.insert( token ); start = end + 1; end = text.find( delimiter, start ); } // Add the last string token = text.substr(start); token = token.erase(0,2); result.insert(token); } //Do not call me. long getUTF8size(const wchar_t *string){ if (!string) return 0; long res=0; for (;*string;string++){ if (*string<0x80) res++; else if (*string<0x800) res+=2; else res+=3; } return res; } char *WChar_to_UTF8(const wchar_t *string){ long fSize=getUTF8size(string); char *res=new char[fSize+1]; res[fSize]=0; if (!string) return res; long b=0; for (;*string;string++,b++){ if (*string<0x80) res[b]=(char)*string; else if (*string>6)|192; res[b]=*string&63|128; }else{ res[b++]=(*string>>12)|224; res[b++]=((*string&4095)>>6)|128; res[b]=*string&63|128; } } return res; } std::string utf8_encode(const std::wstring &wstr) { int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL); std::string strTo( size_needed, 0 ); WideCharToMultiByte (CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL); return strTo; } wstring zipper( const wstring& directoryPath, const wstring& strExcludeFilter, wstring & zipFileName ) { int opt_overwrite=0,opt_compress_level=Z_BEST_COMPRESSION,opt_exclude_path=0,err=0,size_buf=0; void* buf=NULL; const char* password=NULL; list fileList; DWORD dwRet; wchar_t cCurrentPath[MAX_PATH]; dwRet = GetCurrentDirectoryW(MAX_PATH, cCurrentPath); if( dwRet == 0 ) { return wstring(); } // Change the directory to the current folder _wchdir(directoryPath.c_str()); set excludeFilterSet; split(strExcludeFilter,'|',excludeFilterSet); GetFileListing(fileList, directoryPath,excludeFilterSet); opt_overwrite = 1; size_buf = WRITEBUFFERSIZE; buf = (void*)malloc(size_buf); if (buf==NULL) return wstring(); wchar_t tempDirPath[MAX_PATH]; dwRet = GetTempPathW (MAX_PATH, tempDirPath); if( dwRet == 0 ) return wstring(); wstring directoryName,zipFilePath; _WIN32_FIND_DATAW FindFileData; HANDLE hFind = FindFirstFileW(directoryPath.c_str(), &FindFileData); if (FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { directoryName = FindFileData.cFileName; } zipFilePath = wstring(tempDirPath)+L"\\"+directoryName+L".zip"; zipFile zf; int errclose; #ifdef USEWIN32IOAPI zlib_filefunc64_def ffunc; fill_win32_filefunc64W (&ffunc); zf = zipOpen2_64(zipFilePath.c_str(),(opt_overwrite==2) ? 2 : 0,NULL,&ffunc); # else zf = zipOpen64(zipFilePath.c_str(),(opt_overwrite==2) ? 2 : 0); # endif if (zf == NULL) { //printf("error opening %s\n",filename_try); err= ZIP_ERRNO; } else { //printf("creating %s\n",filename_try); } for(list::iterator it = fileList.begin() ; it!=fileList.end();++it) { FILE * fin; int size_read; //const char* filenameinzip = (*it).c_str(); wstring filenameinzip = (*it).c_str(); wchar_t szOut[MAX_PATH]; PathRelativePathToW(szOut, directoryPath.c_str(), FILE_ATTRIBUTE_DIRECTORY, filenameinzip.c_str(), FILE_ATTRIBUTE_NORMAL); wchar_t *savefilenameinzip; zip_fileinfo zi; unsigned long crcFile=0; int zip64 = 0; zi.tmz_date.tm_sec = zi.tmz_date.tm_min = zi.tmz_date.tm_hour = zi.tmz_date.tm_mday = zi.tmz_date.tm_mon = zi.tmz_date.tm_year = 0; zi.dosDate = 0; zi.internal_fa = 0; zi.external_fa = 0; filetime(szOut,&zi.tmz_date,&zi.dosDate); if ((password != NULL) && (err==ZIP_OK)) err = getFileCrc(szOut,buf,size_buf,&crcFile); zip64 = isLargeFile(szOut); /* The path name saved, should not include a leading slash. */ /*if it did, windows/xp and dynazip couldn't read the zip file. */ savefilenameinzip = szOut; while( savefilenameinzip[0] == '\\' || savefilenameinzip[0] == '/' ) { savefilenameinzip++; } string outstr = utf8_encode(savefilenameinzip); //char * op = (char*)outstr.c_str(); err = zipOpenNewFileInZip3_64( zf,outstr.c_str(),&zi, NULL,0,NULL,0,NULL /* comment*/, (opt_compress_level != 0) ? Z_DEFLATED : 0, opt_compress_level,0, /* -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, */ -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, password,crcFile, zip64); if (err != ZIP_OK) { //printf("error in opening %s in zipfile\n",szOut); } else { //fin = FOPEN_FUNC(szOut,"rb"); fin = _wfopen(szOut,L"rb"); if (fin==NULL) { err=ZIP_ERRNO; //printf("error in opening %s for reading\n",szOut); } } if (err == ZIP_OK) do { err = ZIP_OK; size_read = (int)fread(buf,1,size_buf,fin); if (size_read 0) { err = zipWriteInFileInZip (zf,buf,size_read); if (err0)); if (fin) fclose(fin); if (err<0) err=ZIP_ERRNO; else { err = zipCloseFileInZip(zf); if (err!=ZIP_OK) { //printf("error in closing %s in the zipfile\n",szOut); } } } errclose = zipClose(zf,NULL); if (errclose != ZIP_OK) { //printf("error in closing %s\n",filename_try); } free(buf); // Change back the executabe context _wchdir(cCurrentPath); return zipFilePath; } 

根据标准 ,将UTF-8文件名存储在ZIP文件中的官方方法是设置“通用位11”。 看一下minizip的来源,在我看来minizip不会随时为你设置这个位,并且zipOpenNewFileInZip3_64无法通过这个位。 但是有一个zipOpenNewFileInZip4_64接受另外两个参数, versionMadeByflagBase 。 因此,您可以通过将呼叫更改为,以符合标准的方式存储UTF-8文件名

 err = zipOpenNewFileInZip4_64(zf, outstr.c_str(), […], crcFile, 36, 1<<11, zip64); 

这假设outstr实际上确实包含您的文件名的有效UTF-8编码,源代码建议它应该,但我还没有validation。 我建议你打印outstr字节的hex值来validation这一点。 除非我在这个过程中乱码,你的“统一码.txt”应该变成hexUTF-8中的e7 b5 b1 e4 b8 80 e7 a2 bc 2e 74 78 74

有关此versionMadeBy字段(我在通话中设置为36 )的详细信息,请参阅标准的第4.4.2节。 这取决于您使用的平台, zipfi参数(在您的情况下为&zi )具有的文件属性的格式,以及所有标准的所有版本。 当您使用unicode文件名时,我会说您使用的是标准版本6.3。*,因此低位字节应为36 。 由于minizip.c包装器根本不存储任何文件属性,因此您不必在那里指定平台。 从这些方面可以看出缺乏属性:

  zi.internal_fa = 0; zi.external_fa = 0; 

请注意,尽管标准确实提供了一种表示unicode文件名的方法,但该部分仅在2006年添加,并且可能仍有许多ZIP应用程序不支持它。 因此,即使您的存档是正确的,您的解压缩实用程序仍可能错误地解压缩此文件,将UTF-8字节解释为代码页437或拉丁语1或类似文件。

更改程序以正确处理UTF-8并非易事,请查看Unicode常见问题解答 。 处理可能的恶意数据(如压缩/解压缩)的程序必须格外小心。 值得努力,绝对不是微不足道的。