Node.js 示例
在 Node.js 环境中使用 HTML Layout Parser 的完整示例。
基础 Node.js 用法
typescript
// 从环境特定入口点导入
import { HtmlLayoutParser, CharLayout } from 'html-layout-parser/node';
import * as fs from 'fs/promises';
import * as path from 'path';
async function basicNodeExample() {
const parser = new HtmlLayoutParser();
await parser.init();
try {
// 从文件加载字体
const fontPath = path.join(__dirname, 'fonts', 'arial.ttf');
const fontData = new Uint8Array(await fs.readFile(fontPath));
const fontId = parser.loadFont(fontData, 'Arial');
parser.setDefaultFont(fontId);
// 解析 HTML
const html = '<div style="font-size: 24px; color: #333333FF;">来自 Node.js 的问候!</div>';
const layouts: CharLayout[] = parser.parse(html, { viewportWidth: 800 });
console.log(`解析了 ${layouts.length} 个字符`);
for (const char of layouts) {
console.log(`'${char.character}' 位于 (${char.x.toFixed(1)}, ${char.y.toFixed(1)})`);
}
return layouts;
} finally {
parser.destroy();
}
}
basicNodeExample().catch(console.error);基于文件的字体加载
使用 Node.js 特有的 loadFontFromFile 方法。
typescript
import { HtmlLayoutParser } from 'html-layout-parser/node';
import * as path from 'path';
async function fileFontLoadingExample() {
const parser = new HtmlLayoutParser();
await parser.init();
try {
const fontsDir = path.join(__dirname, 'fonts');
// 从文件加载多个字体
const fonts = [
{ file: 'arial.ttf', name: 'Arial' },
{ file: 'times.ttf', name: 'Times New Roman' },
{ file: 'courier.ttf', name: 'Courier New' }
];
const fontIds: Map<string, number> = new Map();
for (const font of fonts) {
const fontPath = path.join(fontsDir, font.file);
try {
// loadFontFromFile 仅在 Node.js 中可用
const fontId = await parser.loadFontFromFile(fontPath, font.name);
if (fontId > 0) {
fontIds.set(font.name, fontId);
console.log(`✓ 已加载 ${font.name} (ID: ${fontId})`);
}
} catch (error) {
console.warn(`✗ 加载 ${font.name} 失败:`, error);
}
}
// 设置默认字体
const defaultId = fontIds.get('Arial');
if (defaultId) {
parser.setDefaultFont(defaultId);
}
// 解析包含多种字体的 HTML
const html = `
<div style="font-family: Arial; font-size: 20px;">Arial 文本</div>
<div style="font-family: 'Times New Roman'; font-size: 20px;">Times 文本</div>
`;
const layouts = parser.parse(html, { viewportWidth: 600 });
console.log(`\n解析了 ${layouts.length} 个字符`);
return layouts;
} finally {
parser.destroy();
}
}批量处理
高效处理多个 HTML 文件。
typescript
import { HtmlLayoutParser, CharLayout } from 'html-layout-parser/node';
import * as fs from 'fs/promises';
import * as path from 'path';
interface ProcessingResult {
file: string;
characterCount: number;
processingTime: number;
success: boolean;
error?: string;
}
async function batchProcessingExample() {
const parser = new HtmlLayoutParser();
await parser.init();
try {
// 只加载一次字体
const fontPath = path.join(__dirname, 'fonts', 'arial.ttf');
const fontId = await parser.loadFontFromFile(fontPath, 'Arial');
parser.setDefaultFont(fontId);
const inputDir = path.join(__dirname, 'input');
const outputDir = path.join(__dirname, 'output');
await fs.mkdir(outputDir, { recursive: true });
const files = await fs.readdir(inputDir);
const htmlFiles = files.filter(f => f.endsWith('.html'));
console.log(`处理 ${htmlFiles.length} 个 HTML 文件...`);
const results: ProcessingResult[] = [];
for (const file of htmlFiles) {
const startTime = performance.now();
const result: ProcessingResult = {
file,
characterCount: 0,
processingTime: 0,
success: false
};
try {
const htmlPath = path.join(inputDir, file);
const html = await fs.readFile(htmlPath, 'utf-8');
const layouts = parser.parse(html, { viewportWidth: 800 });
const outputPath = path.join(outputDir, file.replace('.html', '.json'));
await fs.writeFile(outputPath, JSON.stringify(layouts, null, 2));
result.characterCount = layouts.length;
result.success = true;
} catch (error) {
result.error = error instanceof Error ? error.message : String(error);
}
result.processingTime = performance.now() - startTime;
results.push(result);
const status = result.success ? '✓' : '✗';
console.log(`${status} ${file} (${result.processingTime.toFixed(1)}ms)`);
}
// 摘要
const successful = results.filter(r => r.success);
const totalChars = successful.reduce((sum, r) => sum + r.characterCount, 0);
const totalTime = results.reduce((sum, r) => sum + r.processingTime, 0);
console.log('\n=== 摘要 ===');
console.log(`已处理: ${successful.length}/${results.length} 个文件`);
console.log(`总字符数: ${totalChars}`);
console.log(`总耗时: ${totalTime.toFixed(1)}ms`);
return results;
} finally {
parser.destroy();
}
}服务端渲染 (Express.js)
typescript
import express, { Request, Response } from 'express';
import { HtmlLayoutParser, CharLayout } from 'html-layout-parser/node';
import * as path from 'path';
// 解析器单例
class ParserService {
private parser: HtmlLayoutParser | null = null;
private initPromise: Promise<void> | null = null;
async ensureInitialized(): Promise<HtmlLayoutParser> {
if (this.parser) return this.parser;
if (!this.initPromise) {
this.initPromise = this.initialize();
}
await this.initPromise;
return this.parser!;
}
private async initialize(): Promise<void> {
this.parser = new HtmlLayoutParser();
await this.parser.init();
const fontPath = path.join(__dirname, 'fonts', 'arial.ttf');
const fontId = await this.parser.loadFontFromFile(fontPath, 'Arial');
this.parser.setDefaultFont(fontId);
console.log('解析器服务已初始化');
}
async parse(html: string, options: { viewportWidth: number; css?: string }): Promise<CharLayout[]> {
const parser = await this.ensureInitialized();
return parser.parse(html, options);
}
destroy(): void {
if (this.parser) {
this.parser.destroy();
this.parser = null;
this.initPromise = null;
}
}
}
const parserService = new ParserService();
const app = express();
app.use(express.json({ limit: '10mb' }));
// 解析 HTML 端点
app.post('/api/parse', async (req: Request, res: Response) => {
try {
const { html, css, viewportWidth = 800 } = req.body;
if (!html) {
return res.status(400).json({ error: '需要 HTML 内容' });
}
const result = await parserService.parse(html, { viewportWidth, css });
res.json({
success: true,
characterCount: result.length,
data: result
});
} catch (error) {
res.status(500).json({
success: false,
error: error instanceof Error ? error.message : '未知错误'
});
}
});
// 健康检查
app.get('/health', (_req: Request, res: Response) => {
res.json({ status: 'ok' });
});
const PORT = process.env.PORT || 3000;
const server = app.listen(PORT, () => {
console.log(`服务器运行在端口 ${PORT}`);
});
// 优雅关闭
process.on('SIGTERM', () => {
server.close(() => {
parserService.destroy();
process.exit(0);
});
});CLI 工具示例
typescript
#!/usr/bin/env node
import { HtmlLayoutParser } from 'html-layout-parser/node';
import * as fs from 'fs/promises';
import * as path from 'path';
interface CliOptions {
input: string;
output?: string;
font?: string;
width: number;
mode: 'flat' | 'byRow' | 'simple' | 'full';
css?: string;
pretty: boolean;
}
function parseArgs(): CliOptions {
const args = process.argv.slice(2);
const options: CliOptions = {
input: '',
width: 800,
mode: 'flat',
pretty: false
};
for (let i = 0; i < args.length; i++) {
switch (args[i]) {
case '-i':
case '--input':
options.input = args[++i];
break;
case '-o':
case '--output':
options.output = args[++i];
break;
case '-f':
case '--font':
options.font = args[++i];
break;
case '-w':
case '--width':
options.width = parseInt(args[++i], 10);
break;
case '-m':
case '--mode':
options.mode = args[++i] as any;
break;
case '-c':
case '--css':
options.css = args[++i];
break;
case '-p':
case '--pretty':
options.pretty = true;
break;
case '-h':
case '--help':
printHelp();
process.exit(0);
default:
if (!options.input && !args[i].startsWith('-')) {
options.input = args[i];
}
}
}
return options;
}
function printHelp(): void {
console.log(`
HTML Layout Parser CLI
用法: html-layout-parser [选项] <输入文件>
选项:
-i, --input <文件> 输入 HTML 文件
-o, --output <文件> 输出 JSON 文件 (默认: stdout)
-f, --font <文件> 要使用的字体文件 (TTF/OTF)
-w, --width <数字> 视口宽度 (默认: 800)
-m, --mode <模式> 输出模式: flat, byRow, simple, full
-c, --css <文件> 外部 CSS 文件
-p, --pretty 美化 JSON 输出
-h, --help 显示此帮助信息
示例:
html-layout-parser input.html
html-layout-parser -i input.html -o output.json -w 1024
html-layout-parser -i input.html -f arial.ttf -m full -p
`);
}
async function main(): Promise<void> {
const options = parseArgs();
if (!options.input) {
console.error('错误: 需要输入文件');
printHelp();
process.exit(1);
}
const parser = new HtmlLayoutParser();
try {
await parser.init();
// 加载字体
if (options.font) {
const fontPath = path.resolve(options.font);
const fontName = path.basename(fontPath, path.extname(fontPath));
const fontId = await parser.loadFontFromFile(fontPath, fontName);
if (fontId > 0) parser.setDefaultFont(fontId);
}
// 读取输入 HTML
const inputPath = path.resolve(options.input);
const html = await fs.readFile(inputPath, 'utf-8');
// 读取 CSS(如果提供)
let css: string | undefined;
if (options.css) {
css = await fs.readFile(path.resolve(options.css), 'utf-8');
}
// 解析 HTML
const result = parser.parse(html, {
viewportWidth: options.width,
mode: options.mode,
css
});
// 格式化输出
const output = options.pretty
? JSON.stringify(result, null, 2)
: JSON.stringify(result);
// 写入输出
if (options.output) {
await fs.writeFile(path.resolve(options.output), output);
console.error(`输出已写入: ${options.output}`);
} else {
console.log(output);
}
} catch (error) {
console.error('错误:', error instanceof Error ? error.message : error);
process.exit(1);
} finally {
parser.destroy();
}
}
main();使用示例
bash
# 基础用法
html-layout-parser input.html
# 使用自定义字体和输出文件
html-layout-parser -i input.html -f ./fonts/arial.ttf -o output.json
# 完整模式并美化输出
html-layout-parser -i input.html -m full -p
# 使用外部 CSS
html-layout-parser -i input.html -c styles.css -w 1024 -o output.json