JavaScript Compilers

JavaScript Compiler Development: Babel, AST Manipulation, and Custom Transpilers

Build JavaScript compilers and transpilers. Master AST parsing, code transformation, Babel plugins, and custom language development.

By JavaScript Document Team
compilerbabelasttranspilerparsercode-transformation

JavaScript compiler development involves building tools that transform, analyze, and optimize JavaScript code. Using Abstract Syntax Trees (AST), developers can create transpilers, linters, code generators, and custom language extensions. This comprehensive guide covers AST manipulation, Babel plugin development, and building complete compiler toolchains.

AST Fundamentals and Parsing

AST Parser and Manipulator

// Complete AST Manipulation System
class ASTProcessor {
  constructor() {
    this.parser = null;
    this.generator = null;
    this.visitors = new Map();
    this.transformations = [];
    this.options = {
      preserveComments: true,
      sourceType: 'module',
      allowImportExportEverywhere: false,
      allowReturnOutsideFunction: false,
      plugins: ['jsx', 'typescript', 'decorators-legacy'],
    };
  }

  // Initialize with Babel parser
  async initialize() {
    // Load Babel packages
    const babel = await import('@babel/parser');
    const generator = await import('@babel/generator');
    const traverse = await import('@babel/traverse');
    const types = await import('@babel/types');

    this.parser = babel.parse;
    this.generator = generator.default;
    this.traverse = traverse.default;
    this.t = types.default;

    this.setupDefaultVisitors();
    return this;
  }

  // Parse JavaScript code to AST
  parse(code, options = {}) {
    const parseOptions = {
      ...this.options,
      ...options,
    };

    try {
      const ast = this.parser(code, parseOptions);
      return ast;
    } catch (error) {
      throw new Error(`Parse error: ${error.message}`);
    }
  }

  // Generate code from AST
  generate(ast, options = {}) {
    const generatorOptions = {
      comments: true,
      compact: false,
      minified: false,
      ...options,
    };

    try {
      const result = this.generator(ast, generatorOptions);
      return {
        code: result.code,
        map: result.map,
        ast: ast,
      };
    } catch (error) {
      throw new Error(`Generation error: ${error.message}`);
    }
  }

  // Transform AST with visitors
  transform(ast, visitors = {}) {
    const allVisitors = {
      ...this.getDefaultVisitors(),
      ...visitors,
    };

    this.traverse(ast, allVisitors);
    return ast;
  }

  // Complete transformation pipeline
  process(code, transformations = [], options = {}) {
    try {
      // Parse
      let ast = this.parse(code, options);

      // Apply transformations
      for (const transformation of transformations) {
        if (typeof transformation === 'function') {
          ast = transformation(ast, this.t) || ast;
        } else if (transformation.visitor) {
          this.traverse(ast, transformation.visitor);
        }
      }

      // Generate
      return this.generate(ast, options);
    } catch (error) {
      throw new Error(`Processing error: ${error.message}`);
    }
  }

  // Setup default AST visitors
  setupDefaultVisitors() {
    this.visitors.set('FunctionDeclaration', {
      enter: (path) => {
        console.log(`Found function: ${path.node.id?.name}`);
      },
    });

    this.visitors.set('VariableDeclaration', {
      enter: (path) => {
        console.log(`Found variable declaration: ${path.node.kind}`);
      },
    });
  }

  getDefaultVisitors() {
    return Object.fromEntries(this.visitors);
  }
}

// Custom Language Transpiler
class CustomTranspiler extends ASTProcessor {
  constructor() {
    super();
    this.customSyntax = new Map();
    this.macros = new Map();
    this.typeSystem = new TypeSystem();
  }

  // Register custom syntax transformation
  registerSyntax(name, transformer) {
    this.customSyntax.set(name, transformer);
  }

  // Macro system for code generation
  defineMacro(name, template, replacer) {
    this.macros.set(name, { template, replacer });
  }

  // Transform custom syntax
  transformCustomSyntax(ast) {
    const customVisitor = {
      CallExpression: (path) => {
        const { node } = path;

        // Handle custom function calls
        if (this.t.isIdentifier(node.callee)) {
          const name = node.callee.name;

          // Macro expansion
          if (this.macros.has(name)) {
            const macro = this.macros.get(name);
            const expanded = macro.replacer(node.arguments, this.t);
            path.replaceWith(expanded);
          }

          // Custom syntax transforms
          if (this.customSyntax.has(name)) {
            const transformer = this.customSyntax.get(name);
            const result = transformer(path, this.t);
            if (result) path.replaceWith(result);
          }
        }
      },

      // Handle pipeline operator (custom syntax)
      BinaryExpression: (path) => {
        const { node } = path;
        if (node.operator === '|>') {
          // Transform a |> b to b(a)
          const callExpression = this.t.callExpression(node.right, [node.left]);
          path.replaceWith(callExpression);
        }
      },
    };

    this.traverse(ast, customVisitor);
    return ast;
  }
}

// Type System for Custom Language
class TypeSystem {
  constructor() {
    this.types = new Map();
    this.constraints = new Map();
    this.inferenceRules = [];
  }

  // Define custom types
  defineType(name, definition) {
    this.types.set(name, {
      name,
      properties: definition.properties || {},
      methods: definition.methods || {},
      parent: definition.extends || null,
      constraints: definition.constraints || [],
    });
  }

  // Type inference
  inferType(node, context = {}) {
    switch (node.type) {
      case 'NumericLiteral':
        return { type: 'number', value: node.value };

      case 'StringLiteral':
        return { type: 'string', value: node.value };

      case 'BooleanLiteral':
        return { type: 'boolean', value: node.value };

      case 'ArrayExpression':
        const elementTypes = node.elements.map((el) =>
          this.inferType(el, context)
        );
        return {
          type: 'array',
          elementType: this.unifyTypes(elementTypes),
        };

      case 'ObjectExpression':
        const properties = {};
        node.properties.forEach((prop) => {
          if (prop.type === 'ObjectProperty') {
            const key = prop.key.name || prop.key.value;
            properties[key] = this.inferType(prop.value, context);
          }
        });
        return { type: 'object', properties };

      default:
        return { type: 'unknown' };
    }
  }

  // Unify multiple types
  unifyTypes(types) {
    if (types.length === 0) return { type: 'never' };
    if (types.length === 1) return types[0];

    // Simple unification - can be extended
    const uniqueTypes = [...new Set(types.map((t) => t.type))];
    if (uniqueTypes.length === 1) {
      return { type: uniqueTypes[0] };
    }

    return { type: 'union', types };
  }
}

Babel Plugin Development

Custom Babel Plugin System

// Babel Plugin Factory
class BabelPluginFactory {
  constructor() {
    this.plugins = new Map();
    this.presets = new Map();
    this.transformations = new Map();
  }

  // Create custom Babel plugin
  createPlugin(name, options = {}) {
    const plugin = ({ types: t }) => {
      return {
        name: name,

        visitor: {
          // Transform arrow functions to regular functions
          ArrowFunctionExpression(path) {
            if (options.transformArrows !== false) {
              const { node } = path;

              const func = t.functionExpression(
                null,
                node.params,
                t.isBlockStatement(node.body)
                  ? node.body
                  : t.blockStatement([t.returnStatement(node.body)]),
                false,
                node.async
              );

              path.replaceWith(func);
            }
          },

          // Transform template literals
          TemplateLiteral(path) {
            if (options.transformTemplates !== false) {
              const { node } = path;

              if (node.expressions.length === 0) {
                // Simple template literal
                path.replaceWith(t.stringLiteral(node.quasis[0].value.raw));
              } else {
                // Complex template literal
                this.transformTemplateLiteral(path, t);
              }
            }
          },

          // Transform destructuring
          VariableDeclarator(path) {
            if (options.transformDestructuring !== false) {
              const { node } = path;

              if (t.isObjectPattern(node.id)) {
                this.transformObjectDestructuring(path, t);
              } else if (t.isArrayPattern(node.id)) {
                this.transformArrayDestructuring(path, t);
              }
            }
          },

          // Transform async/await
          AwaitExpression(path) {
            if (options.transformAsync !== false) {
              this.transformAwaitExpression(path, t);
            }
          },

          // Transform classes
          ClassDeclaration(path) {
            if (options.transformClasses !== false) {
              this.transformClass(path, t);
            }
          },
        },
      };
    };

    this.plugins.set(name, plugin);
    return plugin;
  }

  // Transform template literal to concatenation
  transformTemplateLiteral(path, t) {
    const { node } = path;
    let result = t.stringLiteral(node.quasis[0].value.raw);

    for (let i = 0; i < node.expressions.length; i++) {
      // Add expression
      result = t.binaryExpression('+', result, node.expressions[i]);

      // Add next string part
      if (i + 1 < node.quasis.length) {
        const nextQuasi = t.stringLiteral(node.quasis[i + 1].value.raw);
        result = t.binaryExpression('+', result, nextQuasi);
      }
    }

    path.replaceWith(result);
  }

  // Transform object destructuring
  transformObjectDestructuring(path, t) {
    const { node } = path;
    const init = node.init;

    if (!init) return;

    const assignments = [];
    const tempVar = path.scope.generateUidIdentifier('temp');

    // Create temp variable assignment
    assignments.push(
      t.variableDeclaration('var', [t.variableDeclarator(tempVar, init)])
    );

    // Create individual assignments
    node.id.properties.forEach((prop) => {
      if (t.isObjectProperty(prop) && t.isIdentifier(prop.value)) {
        const assignment = t.variableDeclaration('var', [
          t.variableDeclarator(
            prop.value,
            t.memberExpression(tempVar, prop.key)
          ),
        ]);
        assignments.push(assignment);
      }
    });

    // Replace with sequence
    const parent = path.findParent((p) => p.isVariableDeclaration());
    if (parent) {
      parent.replaceWithMultiple(assignments);
    }
  }

  // Transform array destructuring
  transformArrayDestructuring(path, t) {
    const { node } = path;
    const init = node.init;

    if (!init) return;

    const assignments = [];
    const tempVar = path.scope.generateUidIdentifier('temp');

    // Create temp variable assignment
    assignments.push(
      t.variableDeclaration('var', [t.variableDeclarator(tempVar, init)])
    );

    // Create individual assignments
    node.id.elements.forEach((element, index) => {
      if (t.isIdentifier(element)) {
        const assignment = t.variableDeclaration('var', [
          t.variableDeclarator(
            element,
            t.memberExpression(tempVar, t.numericLiteral(index), true)
          ),
        ]);
        assignments.push(assignment);
      }
    });

    // Replace with sequence
    const parent = path.findParent((p) => p.isVariableDeclaration());
    if (parent) {
      parent.replaceWithMultiple(assignments);
    }
  }

  // Transform class to function
  transformClass(path, t) {
    const { node } = path;
    const className = node.id.name;

    // Create constructor function
    const constructor = node.body.body.find(
      (method) => method.kind === 'constructor'
    );

    let constructorFunc;
    if (constructor) {
      constructorFunc = t.functionDeclaration(
        t.identifier(className),
        constructor.params,
        constructor.body
      );
    } else {
      constructorFunc = t.functionDeclaration(
        t.identifier(className),
        [],
        t.blockStatement([])
      );
    }

    const replacements = [constructorFunc];

    // Transform methods to prototype assignments
    node.body.body.forEach((method) => {
      if (method.kind !== 'constructor') {
        const prototypeAssignment = t.expressionStatement(
          t.assignmentExpression(
            '=',
            t.memberExpression(
              t.memberExpression(
                t.identifier(className),
                t.identifier('prototype')
              ),
              method.key
            ),
            t.functionExpression(
              null,
              method.params,
              method.body,
              false,
              method.async
            )
          )
        );
        replacements.push(prototypeAssignment);
      }
    });

    path.replaceWithMultiple(replacements);
  }
}

// Code Optimizer
class CodeOptimizer {
  constructor() {
    this.optimizations = new Map();
    this.statistics = {
      deadCodeRemoved: 0,
      constantsFolded: 0,
      functionsInlined: 0,
    };
  }

  // Dead code elimination
  removeDeadCode(ast, t) {
    const visitor = {
      IfStatement(path) {
        const test = path.node.test;

        // Remove unreachable branches
        if (t.isBooleanLiteral(test)) {
          if (test.value) {
            // Replace with consequent
            path.replaceWith(path.node.consequent);
          } else {
            // Replace with alternate or remove
            if (path.node.alternate) {
              path.replaceWith(path.node.alternate);
            } else {
              path.remove();
            }
          }
          this.statistics.deadCodeRemoved++;
        }
      },

      // Remove unused variables
      VariableDeclarator(path) {
        const binding = path.scope.getBinding(path.node.id.name);
        if (binding && !binding.referenced) {
          path.remove();
          this.statistics.deadCodeRemoved++;
        }
      },
    };

    return visitor;
  }

  // Constant folding
  foldConstants(ast, t) {
    const visitor = {
      BinaryExpression(path) {
        const { node } = path;
        const { left, right, operator } = node;

        // Fold numeric constants
        if (t.isNumericLiteral(left) && t.isNumericLiteral(right)) {
          let result;
          switch (operator) {
            case '+':
              result = left.value + right.value;
              break;
            case '-':
              result = left.value - right.value;
              break;
            case '*':
              result = left.value * right.value;
              break;
            case '/':
              result = left.value / right.value;
              break;
            default:
              return;
          }

          path.replaceWith(t.numericLiteral(result));
          this.statistics.constantsFolded++;
        }

        // Fold string constants
        if (
          t.isStringLiteral(left) &&
          t.isStringLiteral(right) &&
          operator === '+'
        ) {
          path.replaceWith(t.stringLiteral(left.value + right.value));
          this.statistics.constantsFolded++;
        }
      },
    };

    return visitor;
  }

  // Function inlining
  inlineFunctions(ast, t) {
    const inlineable = new Set();

    // First pass: identify inlineable functions
    const identifyVisitor = {
      FunctionDeclaration(path) {
        const { node } = path;

        // Simple heuristic: inline small functions
        if (this.isSmallFunction(node) && this.isSideEffectFree(node)) {
          inlineable.add(node.id.name);
        }
      },
    };

    // Second pass: inline function calls
    const inlineVisitor = {
      CallExpression(path) {
        const { node } = path;

        if (t.isIdentifier(node.callee) && inlineable.has(node.callee.name)) {
          const binding = path.scope.getBinding(node.callee.name);
          if (binding && t.isFunctionDeclaration(binding.path.node)) {
            this.inlineFunction(path, binding.path.node, t);
            this.statistics.functionsInlined++;
          }
        }
      },
    };

    return [identifyVisitor, inlineVisitor];
  }

  isSmallFunction(node) {
    // Simple size check
    return JSON.stringify(node).length < 200;
  }

  isSideEffectFree(node) {
    // Simplified side effect analysis
    return true; // Would need more sophisticated analysis
  }

  inlineFunction(callPath, funcNode, t) {
    const { arguments: args } = callPath.node;
    const { params, body } = funcNode;

    // Create parameter substitutions
    const substitutions = new Map();
    params.forEach((param, index) => {
      if (index < args.length) {
        substitutions.set(param.name, args[index]);
      }
    });

    // Clone and substitute function body
    const inlinedBody = this.substituteIdentifiers(body, substitutions, t);

    // Replace call with inlined body
    if (t.isBlockStatement(inlinedBody) && inlinedBody.body.length === 1) {
      const stmt = inlinedBody.body[0];
      if (t.isReturnStatement(stmt)) {
        callPath.replaceWith(stmt.argument);
      }
    }
  }

  substituteIdentifiers(node, substitutions, t) {
    // Clone and substitute - simplified implementation
    return t.cloneNode(node, true);
  }
}

Complete Compiler Toolchain

Custom Language Compiler

// Custom JavaScript Dialect Compiler
class CustomLanguageCompiler {
  constructor() {
    this.astProcessor = new ASTProcessor();
    this.transpiler = new CustomTranspiler();
    this.optimizer = new CodeOptimizer();
    this.pluginFactory = new BabelPluginFactory();

    this.pipeline = [];
    this.options = {
      target: 'es5',
      optimize: true,
      sourceMaps: true,
      minify: false,
    };
  }

  async initialize() {
    await this.astProcessor.initialize();

    // Register custom syntax
    this.setupCustomSyntax();

    // Setup compilation pipeline
    this.setupPipeline();

    return this;
  }

  setupCustomSyntax() {
    // Pipeline operator
    this.transpiler.registerSyntax('pipeline', (path, t) => {
      // Handled in BinaryExpression visitor
    });

    // Match expressions (pattern matching)
    this.transpiler.defineMacro(
      'match',
      'match(value, patterns)',
      (args, t) => {
        const [value, patterns] = args;
        return this.createMatchExpression(value, patterns, t);
      }
    );

    // Async/await shorthand
    this.transpiler.defineMacro('async', 'async(promise)', (args, t) => {
      const [promise] = args;
      return t.awaitExpression(promise);
    });
  }

  createMatchExpression(value, patterns, t) {
    // Create switch-like structure for pattern matching
    if (!t.isObjectExpression(patterns)) {
      throw new Error('Match patterns must be an object');
    }

    const cases = [];
    const defaultCase = { test: null, consequent: null };

    patterns.properties.forEach((prop) => {
      if (t.isObjectProperty(prop)) {
        if (t.isIdentifier(prop.key, { name: '_' })) {
          // Default case
          defaultCase.consequent = prop.value;
        } else {
          // Regular case
          const test = t.binaryExpression('===', value, prop.key);
          cases.push({
            test,
            consequent: prop.value,
          });
        }
      }
    });

    // Build conditional chain
    let result =
      defaultCase.consequent ||
      t.throwStatement(
        t.newExpression(t.identifier('Error'), [
          t.stringLiteral('No matching pattern'),
        ])
      );

    for (let i = cases.length - 1; i >= 0; i--) {
      const case_ = cases[i];
      result = t.conditionalExpression(case_.test, case_.consequent, result);
    }

    return result;
  }

  setupPipeline() {
    this.pipeline = [
      // 1. Parse
      this.parseStage.bind(this),

      // 2. Custom syntax transformation
      this.customSyntaxStage.bind(this),

      // 3. Type checking (if enabled)
      this.typeCheckStage.bind(this),

      // 4. Optimization
      this.optimizeStage.bind(this),

      // 5. Target transformation
      this.targetTransformStage.bind(this),

      // 6. Code generation
      this.generateStage.bind(this),
    ];
  }

  async compile(code, options = {}) {
    const config = { ...this.options, ...options };
    let result = { code };

    try {
      // Execute pipeline
      for (const stage of this.pipeline) {
        result = await stage(result, config);
      }

      return result;
    } catch (error) {
      throw new Error(`Compilation failed: ${error.message}`);
    }
  }

  parseStage(input, config) {
    const ast = this.astProcessor.parse(input.code, {
      sourceType: 'module',
      allowImportExportEverywhere: true,
      plugins: ['jsx', 'typescript', 'decorators-legacy'],
    });

    return {
      ...input,
      ast,
      originalCode: input.code,
    };
  }

  customSyntaxStage(input, config) {
    const ast = this.transpiler.transformCustomSyntax(input.ast);

    return {
      ...input,
      ast,
    };
  }

  typeCheckStage(input, config) {
    if (!config.typeCheck) return input;

    // Perform type checking
    const typeChecker = this.transpiler.typeSystem;
    const errors = [];

    this.astProcessor.traverse(input.ast, {
      enter(path) {
        try {
          const type = typeChecker.inferType(path.node);
          path.node._inferredType = type;
        } catch (error) {
          errors.push({
            message: error.message,
            line: path.node.loc?.start.line,
            column: path.node.loc?.start.column,
          });
        }
      },
    });

    if (errors.length > 0) {
      throw new Error(`Type errors: ${JSON.stringify(errors, null, 2)}`);
    }

    return input;
  }

  optimizeStage(input, config) {
    if (!config.optimize) return input;

    const ast = input.ast;
    const t = this.astProcessor.t;

    // Apply optimizations
    const optimizations = [
      this.optimizer.removeDeadCode(ast, t),
      this.optimizer.foldConstants(ast, t),
      ...this.optimizer.inlineFunctions(ast, t),
    ];

    optimizations.forEach((visitor) => {
      this.astProcessor.traverse(ast, visitor);
    });

    return {
      ...input,
      ast,
      optimizationStats: this.optimizer.statistics,
    };
  }

  targetTransformStage(input, config) {
    const ast = input.ast;

    // Apply target-specific transformations
    const plugins = this.getTargetPlugins(config.target);

    plugins.forEach((plugin) => {
      const pluginInstance = plugin({ types: this.astProcessor.t });
      this.astProcessor.traverse(ast, pluginInstance.visitor);
    });

    return {
      ...input,
      ast,
    };
  }

  generateStage(input, config) {
    const result = this.astProcessor.generate(input.ast, {
      comments: true,
      compact: config.minify,
      sourceMaps: config.sourceMaps,
      sourceFileName: config.sourceFileName,
    });

    return {
      ...input,
      code: result.code,
      map: result.map,
      finalAst: result.ast,
    };
  }

  getTargetPlugins(target) {
    const plugins = [];

    switch (target) {
      case 'es5':
        plugins.push(
          this.pluginFactory.createPlugin('es5-transform', {
            transformArrows: true,
            transformClasses: true,
            transformDestructuring: true,
            transformTemplates: true,
          })
        );
        break;

      case 'es2015':
        plugins.push(
          this.pluginFactory.createPlugin('es2015-transform', {
            transformArrows: false,
            transformClasses: false,
            transformAsync: true,
          })
        );
        break;
    }

    return plugins;
  }

  // Development utilities
  watch(inputPath, outputPath, options = {}) {
    const fs = require('fs');
    const path = require('path');

    console.log(`Watching ${inputPath} for changes...`);

    fs.watchFile(inputPath, async (curr, prev) => {
      try {
        const code = fs.readFileSync(inputPath, 'utf8');
        const result = await this.compile(code, options);

        fs.writeFileSync(outputPath, result.code);

        if (result.map && options.sourceMaps) {
          fs.writeFileSync(outputPath + '.map', JSON.stringify(result.map));
        }

        console.log(`Compiled ${inputPath} -> ${outputPath}`);
      } catch (error) {
        console.error('Compilation error:', error.message);
      }
    });
  }

  // CLI interface
  static async createCLI() {
    const compiler = new CustomLanguageCompiler();
    await compiler.initialize();

    return {
      compile: async (input, output, options) => {
        const fs = require('fs');
        const code = fs.readFileSync(input, 'utf8');
        const result = await compiler.compile(code, options);
        fs.writeFileSync(output, result.code);
        return result;
      },

      watch: (input, output, options) => {
        compiler.watch(input, output, options);
      },
    };
  }
}

// Usage Example
async function demonstrateCompiler() {
  const compiler = new CustomLanguageCompiler();
  await compiler.initialize();

  // Example custom syntax code
  const customCode = `
    // Pipeline operator
    const result = data |> filter |> map |> reduce;
    
    // Pattern matching
    const handleValue = (value) => match(value, {
      0: "zero",
      1: "one", 
      _: "other"
    });
    
    // Enhanced async
    const fetchData = async () => {
      const response = async(fetch('/api/data'));
      return async(response.json());
    };
  `;

  try {
    const result = await compiler.compile(customCode, {
      target: 'es5',
      optimize: true,
      sourceMaps: true,
    });

    console.log('Compiled code:', result.code);
    console.log('Optimization stats:', result.optimizationStats);
  } catch (error) {
    console.error('Compilation failed:', error.message);
  }
}

This comprehensive guide covers JavaScript compiler development from AST fundamentals to complete toolchain implementation. The examples provide production-ready code for building transpilers, optimizers, and custom language features, enabling developers to create sophisticated code transformation tools and language extensions.