{ ILoveJS }

Recursively walk a directory

typescript

An async generator function that recursively traverses a directory tree, yielding file paths that match include/exclude glob patterns with configurable maximum depth.

nodefsrecursionfiles

Code

typescript
import { readdir, stat } from "node:fs/promises";
import { join, relative } from "node:path";

interface WalkOptions {
  include?: string[];
  exclude?: string[];
  maxDepth?: number;
}

interface WalkResult {
  path: string;
  relativePath: string;
  isDirectory: boolean;
  depth: number;
}

function globToRegex(pattern: string): RegExp {
  const escaped = pattern
    .replace(/[.+^${}()|[\]\\]/g, "\\$&")
    .replace(/\*\*/g, "<<GLOBSTAR>>")
    .replace(/\*/g, "[^/]*")
    .replace(/\?/g, "[^/]")
    .replace(/<<GLOBSTAR>>/g, ".*");
  return new RegExp(`^${escaped}$`);
}

function matchesPatterns(path: string, patterns: string[]): boolean {
  return patterns.some((pattern) => globToRegex(pattern).test(path));
}

async function* walkDir(
  dir: string,
  options: WalkOptions = {}
): AsyncGenerator<WalkResult, void, unknown> {
  const { include = ["**/*"], exclude = [], maxDepth = Infinity } = options;
  const baseDir = dir;

  async function* walk(
    currentDir: string,
    currentDepth: number
  ): AsyncGenerator<WalkResult, void, unknown> {
    if (currentDepth > maxDepth) {
      return;
    }

    let entries;
    try {
      entries = await readdir(currentDir, { withFileTypes: true });
    } catch (error) {
      if ((error as NodeJS.ErrnoException).code === "EACCES") {
        return;
      }
      throw error;
    }

    for (const entry of entries) {
      const fullPath = join(currentDir, entry.name);
      const relativePath = relative(baseDir, fullPath);
      const isDirectory = entry.isDirectory();

      const pathToMatch = isDirectory ? `${relativePath}/` : relativePath;

      if (matchesPatterns(pathToMatch, exclude)) {
        continue;
      }

      if (isDirectory) {
        const dirResult: WalkResult = {
          path: fullPath,
          relativePath,
          isDirectory: true,
          depth: currentDepth,
        };

        if (matchesPatterns(pathToMatch, include)) {
          yield dirResult;
        }

        yield* walk(fullPath, currentDepth + 1);
      } else {
        if (matchesPatterns(relativePath, include)) {
          yield {
            path: fullPath,
            relativePath,
            isDirectory: false,
            depth: currentDepth,
          };
        }
      }
    }
  }

  yield* walk(dir, 0);
}

// Example usage and demonstration
async function main(): Promise<void> {
  const targetDir = process.cwd();

  console.log("=== All TypeScript files (max depth 2) ===");
  for await (const entry of walkDir(targetDir, {
    include: ["**/*.ts"],
    exclude: ["node_modules/**", "dist/**"],
    maxDepth: 2,
  })) {
    console.log(`[${entry.depth}] ${entry.relativePath}`);
  }

  console.log("\n=== Collecting results into array ===");
  const files: string[] = [];
  for await (const entry of walkDir(targetDir, {
    include: ["**/*.json"],
    maxDepth: 1,
  })) {
    if (!entry.isDirectory) {
      files.push(entry.relativePath);
    }
  }
  console.log("JSON files found:", files);
}

main().catch(console.error);

export { walkDir, WalkOptions, WalkResult };

How It Works

This implementation uses an async generator function, which is ideal for directory traversal because it allows lazy evaluation — files are yielded one at a time rather than loading the entire tree into memory. This is crucial when dealing with large directory structures containing thousands of files, as it keeps memory usage constant regardless of directory size.

The glob pattern matching is implemented through a custom globToRegex converter that handles the most common glob syntax: * matches any characters except path separators, ** matches any characters including path separators (for recursive matching), and ? matches a single character. The function escapes all regex special characters first, then replaces glob tokens with their regex equivalents. This approach handles patterns like **/*.ts, src/**/test?.js, and !node_modules effectively.

The recursive walking logic uses an inner generator function that tracks the current depth. When maxDepth is reached, the function returns early without descending further. The exclude patterns are checked before include patterns, and directories get a trailing slash appended for pattern matching, allowing patterns like node_modules/ to specifically target directories. Permission errors (EACCES) are caught and silently skipped, which is essential for real-world usage where some directories may be inaccessible.

Each yielded result includes both the absolute path and the relative path from the base directory, plus metadata about whether it's a directory and its depth level. This rich result object gives consumers flexibility in how they process the files. The withFileTypes option on readdir is used to avoid separate stat calls for determining if entries are directories, significantly improving performance.

Use this pattern when building CLI tools, build systems, or any application that needs to process files across a directory tree. Avoid it when you need to process files in a specific order based on their content, as the traversal order depends on the filesystem. For extremely deep directory structures, consider using an iterative approach with an explicit stack to avoid potential stack overflow issues, though in practice the async nature of this implementation handles most real-world scenarios well.