Skip to content

Node.js Examples

Complete examples for using HTML Layout Parser in Node.js environments.

Setup

Before using these examples, copy the Node.js bundle to your project:

bash
# Copy Node.js bundle to your project
cp -r node_modules/html-layout-parser/node ./src/lib/html-layout-parser

Basic Node.js Usage

typescript
// Import from copied files
import { HtmlLayoutParser, CharLayout } from 'html-layout-parser/node';
import * as fs from 'fs/promises';
import * as path from 'path';

async function basicNodeExample() {
  const parser = new HtmlLayoutParser();
  // Initialize with explicit WASM path
  await parser.init('./lib/html-layout-parser/html_layout_parser.js');

  try {
    // Load font from file
    const fontPath = path.join(__dirname, 'fonts', 'arial.ttf');
    const fontData = new Uint8Array(await fs.readFile(fontPath));
    const fontId = parser.loadFont(fontData, 'Arial');
    parser.setDefaultFont(fontId);

    // Parse HTML
    const html = '<div style="font-size: 24px; color: #333333FF;">Hello from Node.js!</div>';
    const layouts: CharLayout[] = parser.parse(html, { viewportWidth: 800 });

    console.log(`Parsed ${layouts.length} characters`);
    
    for (const char of layouts) {
      console.log(`'${char.character}' at (${char.x.toFixed(1)}, ${char.y.toFixed(1)})`);
    }

    return layouts;
  } finally {
    parser.destroy();
  }
}

basicNodeExample().catch(console.error);

File-Based Font Loading

Using the Node.js-specific loadFontFromFile method.

typescript
import { HtmlLayoutParser } from 'html-layout-parser/node';
import * as path from 'path';

async function fileFontLoadingExample() {
  const parser = new HtmlLayoutParser();
  await parser.init('./lib/html-layout-parser/html_layout_parser.js');

  try {
    const fontsDir = path.join(__dirname, 'fonts');

    // Load multiple fonts from files
    const fonts = [
      { file: 'arial.ttf', name: 'Arial' },
      { file: 'times.ttf', name: 'Times New Roman' },
      { file: 'courier.ttf', name: 'Courier New' }
    ];

    const fontIds: Map<string, number> = new Map();

    for (const font of fonts) {
      const fontPath = path.join(fontsDir, font.file);
      
      try {
        // loadFontFromFile is only available in Node.js
        const fontId = await parser.loadFontFromFile(fontPath, font.name);
        
        if (fontId > 0) {
          fontIds.set(font.name, fontId);
          console.log(`✓ Loaded ${font.name} (ID: ${fontId})`);
        }
      } catch (error) {
        console.warn(`✗ Failed to load ${font.name}:`, error);
      }
    }

    // Set default font
    const defaultId = fontIds.get('Arial');
    if (defaultId) {
      parser.setDefaultFont(defaultId);
    }

    // Parse HTML with multiple fonts
    const html = `
      <div style="font-family: Arial; font-size: 20px;">Arial text</div>
      <div style="font-family: 'Times New Roman'; font-size: 20px;">Times text</div>
    `;

    const layouts = parser.parse(html, { viewportWidth: 600 });
    console.log(`\nParsed ${layouts.length} characters`);

    return layouts;
  } finally {
    parser.destroy();
  }
}

Batch Processing

Processing multiple HTML files efficiently.

typescript
import { HtmlLayoutParser, CharLayout } from 'html-layout-parser/node';
import * as fs from 'fs/promises';
import * as path from 'path';

interface ProcessingResult {
  file: string;
  characterCount: number;
  processingTime: number;
  success: boolean;
  error?: string;
}

async function batchProcessingExample() {
  const parser = new HtmlLayoutParser();
  await parser.init('./lib/html-layout-parser/html_layout_parser.js');

  try {
    // Load font once
    const fontPath = path.join(__dirname, 'fonts', 'arial.ttf');
    const fontId = await parser.loadFontFromFile(fontPath, 'Arial');
    parser.setDefaultFont(fontId);

    const inputDir = path.join(__dirname, 'input');
    const outputDir = path.join(__dirname, 'output');

    await fs.mkdir(outputDir, { recursive: true });

    const files = await fs.readdir(inputDir);
    const htmlFiles = files.filter(f => f.endsWith('.html'));

    console.log(`Processing ${htmlFiles.length} HTML files...`);

    const results: ProcessingResult[] = [];

    for (const file of htmlFiles) {
      const startTime = performance.now();
      const result: ProcessingResult = {
        file,
        characterCount: 0,
        processingTime: 0,
        success: false
      };

      try {
        const htmlPath = path.join(inputDir, file);
        const html = await fs.readFile(htmlPath, 'utf-8');

        const layouts = parser.parse(html, { viewportWidth: 800 });

        const outputPath = path.join(outputDir, file.replace('.html', '.json'));
        await fs.writeFile(outputPath, JSON.stringify(layouts, null, 2));

        result.characterCount = layouts.length;
        result.success = true;
      } catch (error) {
        result.error = error instanceof Error ? error.message : String(error);
      }

      result.processingTime = performance.now() - startTime;
      results.push(result);

      const status = result.success ? '✓' : '✗';
      console.log(`${status} ${file} (${result.processingTime.toFixed(1)}ms)`);
    }

    // Summary
    const successful = results.filter(r => r.success);
    const totalChars = successful.reduce((sum, r) => sum + r.characterCount, 0);
    const totalTime = results.reduce((sum, r) => sum + r.processingTime, 0);

    console.log('\n=== Summary ===');
    console.log(`Processed: ${successful.length}/${results.length} files`);
    console.log(`Total characters: ${totalChars}`);
    console.log(`Total time: ${totalTime.toFixed(1)}ms`);

    return results;
  } finally {
    parser.destroy();
  }
}

Server-Side Rendering (Express.js)

typescript
import express, { Request, Response } from 'express';
import { HtmlLayoutParser, CharLayout } from 'html-layout-parser/node';
import * as path from 'path';

// Parser singleton
class ParserService {
  private parser: HtmlLayoutParser | null = null;
  private initPromise: Promise<void> | null = null;

  async ensureInitialized(): Promise<HtmlLayoutParser> {
    if (this.parser) return this.parser;

    if (!this.initPromise) {
      this.initPromise = this.initialize();
    }

    await this.initPromise;
    return this.parser!;
  }

  private async initialize(): Promise<void> {
    this.parser = new HtmlLayoutParser();
    await this.parser.init('./lib/html-layout-parser/html_layout_parser.js');

    const fontPath = path.join(__dirname, 'fonts', 'arial.ttf');
    const fontId = await this.parser.loadFontFromFile(fontPath, 'Arial');
    this.parser.setDefaultFont(fontId);

    console.log('Parser service initialized');
  }

  async parse(html: string, options: { viewportWidth: number; css?: string }): Promise<CharLayout[]> {
    const parser = await this.ensureInitialized();
    return parser.parse(html, options);
  }

  destroy(): void {
    if (this.parser) {
      this.parser.destroy();
      this.parser = null;
      this.initPromise = null;
    }
  }
}

const parserService = new ParserService();
const app = express();
app.use(express.json({ limit: '10mb' }));

// Parse HTML endpoint
app.post('/api/parse', async (req: Request, res: Response) => {
  try {
    const { html, css, viewportWidth = 800 } = req.body;

    if (!html) {
      return res.status(400).json({ error: 'HTML content is required' });
    }

    const result = await parserService.parse(html, { viewportWidth, css });

    res.json({
      success: true,
      characterCount: result.length,
      data: result
    });
  } catch (error) {
    res.status(500).json({
      success: false,
      error: error instanceof Error ? error.message : 'Unknown error'
    });
  }
});

// Health check
app.get('/health', (_req: Request, res: Response) => {
  res.json({ status: 'ok' });
});

const PORT = process.env.PORT || 3000;
const server = app.listen(PORT, () => {
  console.log(`Server running on port ${PORT}`);
});

// Graceful shutdown
process.on('SIGTERM', () => {
  server.close(() => {
    parserService.destroy();
    process.exit(0);
  });
});

CLI Tool Example

typescript
#!/usr/bin/env node
import { HtmlLayoutParser } from 'html-layout-parser/node';
import * as fs from 'fs/promises';
import * as path from 'path';

interface CliOptions {
  input: string;
  output?: string;
  font?: string;
  width: number;
  mode: 'flat' | 'byRow' | 'simple' | 'full';
  css?: string;
  pretty: boolean;
}

function parseArgs(): CliOptions {
  const args = process.argv.slice(2);
  const options: CliOptions = {
    input: '',
    width: 800,
    mode: 'flat',
    pretty: false
  };

  for (let i = 0; i < args.length; i++) {
    switch (args[i]) {
      case '-i':
      case '--input':
        options.input = args[++i];
        break;
      case '-o':
      case '--output':
        options.output = args[++i];
        break;
      case '-f':
      case '--font':
        options.font = args[++i];
        break;
      case '-w':
      case '--width':
        options.width = parseInt(args[++i], 10);
        break;
      case '-m':
      case '--mode':
        options.mode = args[++i] as any;
        break;
      case '-c':
      case '--css':
        options.css = args[++i];
        break;
      case '-p':
      case '--pretty':
        options.pretty = true;
        break;
      case '-h':
      case '--help':
        printHelp();
        process.exit(0);
      default:
        if (!options.input && !args[i].startsWith('-')) {
          options.input = args[i];
        }
    }
  }

  return options;
}

function printHelp(): void {
  console.log(`
HTML Layout Parser CLI

Usage: html-layout-parser [options] <input-file>

Options:
  -i, --input <file>    Input HTML file
  -o, --output <file>   Output JSON file (default: stdout)
  -f, --font <file>     Font file to use (TTF/OTF)
  -w, --width <number>  Viewport width (default: 800)
  -m, --mode <mode>     Output mode: flat, byRow, simple, full
  -c, --css <file>      External CSS file
  -p, --pretty          Pretty print JSON output
  -h, --help            Show this help message

Examples:
  html-layout-parser input.html
  html-layout-parser -i input.html -o output.json -w 1024
  html-layout-parser -i input.html -f arial.ttf -m full -p
`);
}

async function main(): Promise<void> {
  const options = parseArgs();

  if (!options.input) {
    console.error('Error: Input file is required');
    printHelp();
    process.exit(1);
  }

  const parser = new HtmlLayoutParser();

  try {
    await parser.init('./lib/html-layout-parser/html_layout_parser.js');

    // Load font
    if (options.font) {
      const fontPath = path.resolve(options.font);
      const fontName = path.basename(fontPath, path.extname(fontPath));
      const fontId = await parser.loadFontFromFile(fontPath, fontName);
      if (fontId > 0) parser.setDefaultFont(fontId);
    }

    // Read input HTML
    const inputPath = path.resolve(options.input);
    const html = await fs.readFile(inputPath, 'utf-8');

    // Read CSS if provided
    let css: string | undefined;
    if (options.css) {
      css = await fs.readFile(path.resolve(options.css), 'utf-8');
    }

    // Parse HTML
    const result = parser.parse(html, {
      viewportWidth: options.width,
      mode: options.mode,
      css
    });

    // Format output
    const output = options.pretty
      ? JSON.stringify(result, null, 2)
      : JSON.stringify(result);

    // Write output
    if (options.output) {
      await fs.writeFile(path.resolve(options.output), output);
      console.error(`Output written to: ${options.output}`);
    } else {
      console.log(output);
    }

  } catch (error) {
    console.error('Error:', error instanceof Error ? error.message : error);
    process.exit(1);
  } finally {
    parser.destroy();
  }
}

main();

Usage Examples

bash
# Basic usage
html-layout-parser input.html

# With custom font and output file
html-layout-parser -i input.html -f ./fonts/arial.ttf -o output.json

# Full mode with pretty printing
html-layout-parser -i input.html -m full -p

# With external CSS
html-layout-parser -i input.html -c styles.css -w 1024 -o output.json

Released under the MIT License.