Hello, I need your help,
I'm using processDocument Method for the JS example from GitHub (https://github.com/abbyysdk/ocrsdk.com/tree/master/JavaScript). Everything works when I upload the image and I see text output, but when I upload pdf inside which are the only images, I'm getting as follows:
Error: Non-whitespace before first tag. Line: 0 Column: 1 Char: 4
I guess, it library xml2js causing this error, but I'm not sure of that.
here are my configs for processDocument at ocrsdk.js:
//PART 1
function ProcessingSettings() {
this.language = "English"; // Recognition language or comma-separated list this.exportFormat = "txt"; // Output format. One of: txt, rtf, docx, xlsx, this.customOptions = ''; // Other custom options passed to RESTful call,
}
//PART 2 - maybe at CONVERTION has been done something more, that in the end data is passed without errors
ocrsdk.prototype.processImage = function(buffer, settings, userCallback) {
// I HAVE COMMENTED CONVERTION OF FILE, AS I ALREADY PASSING BUFFER (without any manipulations)
// if (!fs.existsSync(filePath) || !fs.statSync(filePath).isFile()) {
// userCallback(new Error("file " + filePath + " doesn't exist"), null);
// return;
// }
if (settings == null) { settings = new ProcessingSettings(); }
var urlOptions = settings.asUrlParams();
//METHOD processDocument
var req = this._createTaskRequest('POST', '/processDocument' + urlOptions, userCallback);
//PASSING STREAM OF BUFFER
var fileContents = buffer; req.write(fileContents);
req.end();
}
コメント
2件のコメント
I guess, the problem is with an encoding of a buffer.
I have solved this using the different library from npm (https://www.npmjs.com/package/nodejs-ocr).
steps at Node-red:
1. let client = new AbbyyClient(appId, appPwd, 'https://cloud.ocrsdk.com'); // Use https here if you'd like
client.submitImage("", msg.payload, ocrCompleteSubmitImage);
2. next you got task id and use it
let client = new AbbyyClient(appId, appPwd, 'https://cloud.ocrsdk.com'); // Use https here if you'd like
let apiParameters = {
taskId:global.get("appId"),
language: 'English',
profile: 'textExtraction',
textType: 'normal',
exportFormat: 'txt'
};
client.processDocument(apiParameters, msg.payload, ocrCompleteAppId);
サインインしてコメントを残してください。