在过go 的几个月里,我一直在做一个导出项目,基本上应用程序需要从BLOB存储中获取文件,将文件统一到一个文件中,压缩成一个压缩文件并将其上传到BLOB存储中.我把这个过程分成了几个步骤.性能非常好,整个过程都在工作,但当我导出大量文件时,最后一步崩溃(因为我的环境只有15 GB内存,而文件比这个大).有什么主意吗?
以下是最后一步和代码的简单说明:
- 从BLOB中获取所有相关文件,并将它们与文件的路径和字节[]一起存储到词典中
public async Task<Dictionary<string, byte[]>> DownloadManyAsync(Guid exportId)
{
var tasks = new Queue<Task>();
var files = new ConcurrentDictionary<string, byte[]>();
var container = _blobServiceClient.GetBlobContainerClient("");
var blobs = container.GetBlobs(prefix: "");
var options = BlobStorageTools.GetOptions();
foreach (var blob in blobs)
{
tasks.Enqueue(DownloadAndEnlist(container.GetBlobClient(blob.Name), files, options, exportId));
}
await Task.WhenAll(tasks);
return files.ToDictionary(x => x.Key,
x => x.Value,
files.Comparer);
}
public async Task DownloadAndEnlist(BlobClient blob, ConcurrentDictionary<string, byte[]> files, StorageTransferOptions options, Guid exportId)
{
using var memoryStream = new MemoryStream();
await blob.DownloadToAsync(memoryStream, default, options);
files.TryAdd(blob.Name, memoryStream.ToArray());
}
- 创建一个Zip存档并将字节写入其中
using var memoryStream = new MemoryTributary();
using (var archive = new ZipArchive(memoryStream, ZipArchiveMode.Create, true))
{
for (int i = files.Count - 1; i >= 0; i--)
{
var file = files.ElementAt(i);
var zipArchiveEntry = archive.CreateEntry(file.Key, CompressionLevel.Fastest);
using var zipStream = zipArchiveEntry.Open();
zipStream.Write(file.Value, 0, file.Value.Length);
files.Remove(file.Key);
}
}
- 将压缩文件保存到BLOB中
public async Task<string> SaveExport(string fileName, Stream file)
{
var cloudBlockBlob = _blobClient.GetContainerReference("").GetBlockBlobReference($"{fileName}.zip");
BlockingCollection<string> blockList = new();
Queue<Task> tasks = new();
int bytesRead;
int blockNumber = 0;
if (file.Position != 0) file.Position = 0;
do
{
blockNumber++;
string blockId = $"{blockNumber:000000000}";
string base64BlockId = Convert.ToBase64String(Encoding.UTF8.GetBytes(blockId));
byte[] buffer = new byte[8000000];
bytesRead = await file.ReadAsync(buffer);
tasks.Enqueue(Task.Run(async () =>
{
await cloudBlockBlob.PutBlockAsync(base64BlockId, new MemoryStream(buffer, 0, bytesRead) { Position = 0 }, null);
blockList.Add(base64BlockId);
}));
} while (bytesRead == 8000000);
await Task.WhenAll(tasks);
await cloudBlockBlob.PutBlockListAsync(blockList);
return cloudBlockBlob.Uri.ToString();
}
我以为在使用az函数,但是函数有15 GB的内存限制,我也会有同样的问题.