在许多情况下,文件迭代是可以轻松并行执行的操作。 主题
如何:使用 PLINQ 循环访问文件目录
介绍了如何在许多情况下以最简单的方式执行此任务。 不过,如果代码必须处理访问文件系统时可能会出现的多种异常,可能会带来麻烦。 下面的示例展示了一种解决此问题的方法。 它使用基于堆栈的迭代遍历指定目录下的所有文件和文件夹,并让代码能够捕获和处理各种异常。 当然,如何处理异常还是取决于自己的选择。
下面的示例按顺序循环访问目录,但会并行处理文件。 这可能是文件与目录比很大时的最佳方法。 也可以并行执行目录迭代,并顺序访问每个文件。 并行执行两个循环的效率可能并不高,除非专门定目标到有大量处理器的计算机。 不过,与所有情况一样,应彻底测试应用,以确定最佳方法。
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Security;
using System.Threading;
using System.Threading.Tasks;
class Program
static void Main()
try {
TraverseTreeParallelForEach(@"C:\Program Files", (f) =>
// Exceptions are no-ops.
try {
// Do nothing with the data except read it.
byte[] data = File.ReadAllBytes(f);
catch (FileNotFoundException) {}
catch (IOException) {}
catch (UnauthorizedAccessException) {}
catch (SecurityException) {}
// Display the filename.
Console.WriteLine(f);
catch (ArgumentException) {
Console.WriteLine(@"The directory 'C:\Program Files' does not exist.");
// Keep the console window open.
Console.ReadKey();
public static void TraverseTreeParallelForEach(string root, Action<string> action)
//Count of files traversed and timer for diagnostic output
int fileCount = 0;
var sw = Stopwatch.StartNew();
// Determine whether to parallelize file processing on each folder based on processor count.
int procCount = System.Environment.ProcessorCount;
// Data structure to hold names of subfolders to be examined for files.
Stack<string> dirs = new Stack<string>();
if (!Directory.Exists(root)) {
throw new ArgumentException(
"The given root directory doesn't exist.", nameof(root));
dirs.Push(root);
while (dirs.Count > 0) {
string currentDir = dirs.Pop();
string[] subDirs = {};
string[] files = {};
try {
subDirs = Directory.GetDirectories(currentDir);
// Thrown if we do not have discovery permission on the directory.
catch (UnauthorizedAccessException e) {
Console.WriteLine(e.Message);
continue;
// Thrown if another process has deleted the directory after we retrieved its name.
catch (DirectoryNotFoundException e) {
Console.WriteLine(e.Message);
continue;
try {
files = Directory.GetFiles(currentDir);
catch (UnauthorizedAccessException e) {
Console.WriteLine(e.Message);
continue;
catch (DirectoryNotFoundException e) {
Console.WriteLine(e.Message);
continue;
catch (IOException e) {
Console.WriteLine(e.Message);
continue;
// Execute in parallel if there are enough files in the directory.
// Otherwise, execute sequentially.Files are opened and processed
// synchronously but this could be modified to perform async I/O.
try {
if (files.Length < procCount) {
foreach (var file in files) {
action(file);
fileCount++;
else {
Parallel.ForEach(files, () => 0, (file, loopState, localCount) =>
{ action(file);
return (int) ++localCount;
(c) => {
Interlocked.Add(ref fileCount, c);
catch (AggregateException ae) {
ae.Handle((ex) => {
if (ex is UnauthorizedAccessException) {
// Here we just output a message and go on.
Console.WriteLine(ex.Message);
return true;
// Handle other exceptions here if necessary...
return false;
// Push the subdirectories onto the stack for traversal.
// This could also be done before handing the files.
foreach (string str in subDirs)
dirs.Push(str);
// For diagnostic purposes.
Console.WriteLine("Processed {0} files in {1} milliseconds", fileCount, sw.ElapsedMilliseconds);
Imports System.Collections.Generic
Imports System.Diagnostics
Imports System.IO
Imports System.Security
Imports System.Threading
Imports System.Threading.Tasks
Module Example
Sub Main()
TraverseTreeParallelForEach("C:\Program Files",
Sub(f)
' Exceptions are No-ops.
' Do nothing with the data except read it.
Dim data() As Byte = File.ReadAllBytes(f)
' In the event the file has been deleted.
Catch e As FileNotFoundException
' General I/O exception, especially if the file is in use.
Catch e As IOException
' Lack of adequate permissions.
Catch e As UnauthorizedAccessException
' Lack of adequate permissions.
Catch e As SecurityException
End Try
' Display the filename.
Console.WriteLine(f)
End Sub)
Catch e As ArgumentException
Console.WriteLine("The directory 'C:\Program Files' does not exist.")
End Try
' Keep the console window open.
Console.ReadKey()
End Sub
Public Sub TraverseTreeParallelForEach(ByVal root As String, ByVal action As Action(Of String))
'Count of files traversed and timer for diagnostic output
Dim fileCount As Integer = 0
Dim sw As Stopwatch = Stopwatch.StartNew()
' Determine whether to parallelize file processing on each folder based on processor count.
Dim procCount As Integer = System.Environment.ProcessorCount
' Data structure to hold names of subfolders to be examined for files.
Dim dirs As New Stack(Of String)
If Not Directory.Exists(root) Then Throw New ArgumentException(
"The given root directory doesn't exist.", NameOf(root))
dirs.Push(root)
While (dirs.Count > 0)
Dim currentDir As String = dirs.Pop()
Dim subDirs() As String = Nothing
Dim files() As String = Nothing
subDirs = Directory.GetDirectories(currentDir)
' Thrown if we do not have discovery permission on the directory.
Catch e As UnauthorizedAccessException
Console.WriteLine(e.Message)
Continue While
' Thrown if another process has deleted the directory after we retrieved its name.
Catch e As DirectoryNotFoundException
Console.WriteLine(e.Message)
Continue While
End Try
files = Directory.GetFiles(currentDir)
Catch e As UnauthorizedAccessException
Console.WriteLine(e.Message)
Continue While
Catch e As DirectoryNotFoundException
Console.WriteLine(e.Message)
Continue While
Catch e As IOException
Console.WriteLine(e.Message)
Continue While
End Try
' Execute in parallel if there are enough files in the directory.
' Otherwise, execute sequentially.Files are opened and processed
' synchronously but this could be modified to perform async I/O.
If files.Length < procCount Then
For Each file In files
action(file)
fileCount += 1
Parallel.ForEach(files, Function() 0, Function(file, loopState, localCount)
action(file)
localCount = localCount + 1
Return localCount
End Function,
Sub(c)
Interlocked.Add(fileCount, c)
End Sub)
End If
Catch ae As AggregateException
ae.Handle(Function(ex)
If TypeOf (ex) Is UnauthorizedAccessException Then
' Here we just output a message and go on.
Console.WriteLine(ex.Message)
Return True
End If
' Handle other exceptions here if necessary...
Return False
End Function)
End Try
' Push the subdirectories onto the stack for traversal.
' This could also be done before handing the files.
For Each str As String In subDirs
dirs.Push(str)
' For diagnostic purposes.
Console.WriteLine("Processed {0} files in {1} milliseconds", fileCount, sw.ElapsedMilliseconds)
End While
End Sub
End Module
在此示例中,文件 I/O 是同步执行。 若要处理大文件或网络连接速度慢,最好异步访问文件。 可以将异步 I/O 技术与并行迭代结合使用。 有关详细信息,请参阅 TPL 和传统 .NET 异步编程。
此示例使用局部 fileCount
变量维护已处理的总文件数的计数。 由于多个任务可能并发访问此变量,可以调用 Interlocked.Add 方法来同步对它的访问。
请注意,如果主线程抛出异常,ForEach 方法启动的线程可能会继续运行。 若要停止这些线程,可以在异常处理程序中设置布尔变量,并在并行循环每次迭代时检查此变量的值。 如果此值指明异常已抛出,请使用 ParallelLoopState 变量停止或中断循环。 有关详细信息,请参阅如何:停止或中断 Parallel.For 循环。