import type { ImageLike } from 'tesseract.js';
import { createWorker } from 'tesseract.js';

// 内部使用了免费的cdn,如果需要可以在createWorker的参数上配置
export const recognizeImageToText = async (image: ImageLike) => {
  const worker = await createWorker(['chi_sim']); // Chinese - Simplified
  try {
    const ret = await worker.recognize(image);
    return ret.data.text;
  } finally {
    void worker.terminate();
  }
};

export const fixRecognizeText = (text: string) => {
  text = text.replace(/[ ]{2,}/g, ' ');
  const lines = text.split('\n');
  const filterLines = lines.filter((v) => v.trim());
  return filterLines.filter((v) => v.trim()).join('\n');
};
