Skip to content

Commit

Permalink
feat: Add multibyte character support (#398)
Browse files Browse the repository at this point in the history
* add multibyte character support with TextDecoder

* create TextDecoder for each service instead of 1 for transmuxer

* remove debugger

* remove template strings for ie

* log message tweak

Co-authored-by: Gary Katsevman <git@gkatsev.com>

* remove for loop

Co-authored-by: Gary Katsevman <git@gkatsev.com>

* accept full captionServices option block, restructure multibyte character handling

* add default value

* options should default to empty object

Co-authored-by: Gary Katsevman <git@gkatsev.com>

* log any error thrown by TextDecoder

* add tests

* tweak test

* add TextDecoder support check

Co-authored-by: Gary Katsevman <git@gkatsev.com>
  • Loading branch information
alex-barstow and gkatsev committed Sep 21, 2021
1 parent 4354bd7 commit 0849e0a
Show file tree
Hide file tree
Showing 3 changed files with 324 additions and 8 deletions.
107 changes: 99 additions & 8 deletions lib/m2ts/caption-stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ var CaptionStream = function(options) {
];

if (this.parse708captions_) {
this.cc708Stream_ = new Cea708Stream(); // eslint-disable-line no-use-before-define
this.cc708Stream_ = new Cea708Stream({captionServices: options.captionServices}); // eslint-disable-line no-use-before-define
}

this.reset();
Expand Down Expand Up @@ -371,11 +371,17 @@ Cea708Window.prototype.backspace = function() {
}
};

var Cea708Service = function(serviceNum) {
var Cea708Service = function(serviceNum, encoding, stream) {
this.serviceNum = serviceNum;
this.text = '';
this.currentWindow = new Cea708Window(-1);
this.windows = [];
this.stream = stream;

// Try to setup a TextDecoder if an `encoding` value was provided
if (typeof encoding === 'string') {
this.createTextDecoder(encoding);
}
};

/**
Expand Down Expand Up @@ -406,10 +412,46 @@ Cea708Service.prototype.setCurrentWindow = function(windowNum) {
this.currentWindow = this.windows[windowNum];
};

var Cea708Stream = function() {
/**
* Try to create a TextDecoder if it is natively supported
*/
Cea708Service.prototype.createTextDecoder = function(encoding) {
if (typeof TextDecoder === 'undefined') {
this.stream.trigger('log', {
level: 'warn',
message: 'The `encoding` option is unsupported without TextDecoder support'
});
} else {
try {
this.textDecoder_ = new TextDecoder(encoding);
} catch (error) {
this.stream.trigger('log', {
level: 'warn',
message: 'TextDecoder could not be created with ' + encoding + ' encoding. ' + error
});
}
}
}

var Cea708Stream = function(options) {
options = options || {};
Cea708Stream.prototype.init.call(this);

var self = this;
var captionServices = options.captionServices || {};
var captionServiceEncodings = {};
var serviceProps;

// Get service encodings from captionServices option block
Object.keys(captionServices).forEach(serviceName => {
serviceProps = captionServices[serviceName];

if (/^SERVICE/.test(serviceName)) {
captionServiceEncodings[serviceName] = serviceProps.encoding;
}
});

this.serviceEncodings = captionServiceEncodings;
this.current708Packet = null;
this.services = {};

Expand Down Expand Up @@ -519,6 +561,8 @@ Cea708Stream.prototype.pushServiceBlock = function(serviceNum, start, size) {

if (within708TextBlock(b)) {
i = this.handleText(i, service);
} else if (b === 0x18) {
i = this.multiByteCharacter(i, service);
} else if (b === 0x10) {
i = this.extendedCommands(i, service);
} else if (0x80 <= b && b <= 0x87) {
Expand Down Expand Up @@ -583,7 +627,7 @@ Cea708Stream.prototype.extendedCommands = function(i, service) {
var packetData = this.current708Packet.data;
var b = packetData[++i];
if (within708TextBlock(b)) {
i = this.handleText(i, service, true);
i = this.handleText(i, service, {isExtended: true});
} else {
// Unknown command
}
Expand All @@ -609,9 +653,16 @@ Cea708Stream.prototype.getPts = function(byteIndex) {
* @return {Service} Initialized service object
*/
Cea708Stream.prototype.initService = function(serviceNum, i) {
var serviceName = 'SERVICE' + serviceNum;
var self = this;
var serviceName;
var encoding;

if (serviceName in this.serviceEncodings) {
encoding = this.serviceEncodings[serviceName];
}

this.services[serviceNum] = new Cea708Service(serviceNum);
this.services[serviceNum] = new Cea708Service(serviceNum, encoding, self);
this.services[serviceNum].init(this.getPts(i), function(pts) {
self.flushDisplayed(pts, self.services[serviceNum]);
});
Expand All @@ -626,22 +677,62 @@ Cea708Stream.prototype.initService = function(serviceNum, i) {
* @param {Service} service The service object to be affected
* @return {Integer} New index after parsing
*/
Cea708Stream.prototype.handleText = function(i, service, isExtended) {
Cea708Stream.prototype.handleText = function(i, service, options) {
var isExtended = options && options.isExtended;
var isMultiByte = options && options.isMultiByte;
var packetData = this.current708Packet.data;
var b = packetData[i];
var extended = isExtended ? 0x1000 : 0x0000;
var char = get708CharFromCode(extended | b);
var currentByte = packetData[i];
var nextByte = packetData[i + 1];
var win = service.currentWindow;
var char;
var charCodeArray;

// Use the TextDecoder if one was created for this service
if (service.textDecoder_ && !isExtended) {
if (isMultiByte) {
charCodeArray = [currentByte, nextByte];
i++;
} else {
charCodeArray = [currentByte];
}

char = service.textDecoder_.decode(new Uint8Array(charCodeArray));
} else {
char = get708CharFromCode(extended | currentByte);
}

if (win.pendingNewLine && !win.isEmpty()) {
win.newLine(this.getPts(i));
}

win.pendingNewLine = false;
win.addText(char);

return i;
};

/**
* Handle decoding of multibyte character
*
* @param {Integer} i Current index in the 708 packet
* @param {Service} service The service object to be affected
* @return {Integer} New index after parsing
*/
Cea708Stream.prototype.multiByteCharacter = function (i, service) {
var packetData = this.current708Packet.data;
var firstByte = packetData[i + 1];
var secondByte = packetData[i + 2];

if (within708TextBlock(firstByte) && within708TextBlock(secondByte)) {
i = this.handleText(++i, service, {isMultiByte: true});
} else {
// Unknown command
}

return i;
};

/**
* Parse and execute the CW# command.
*
Expand Down
91 changes: 91 additions & 0 deletions test/caption-stream.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
var segments = require('data-files!segments');

var
window = require('global/window'),
captionStream,
m2ts = require('../lib/m2ts'),
mp4 = require('../lib/mp4'),
Expand All @@ -14,6 +15,7 @@ var
packetHeader708 = seiNalUnitGenerator.packetHeader708,
displayWindows708 = seiNalUnitGenerator.displayWindows708,
cc708PinkUnderscore = require('./utils/cc708-pink-underscore'),
cc708Korean = require('./utils/cc708-korean'),
sintelCaptions = segments['sintel-captions.ts'](),
mixed608708Captions = require('./utils/mixed-608-708-captions.js'),
multiChannel608Captions = segments['multi-channel-608-captions.ts']();
Expand Down Expand Up @@ -2687,6 +2689,30 @@ QUnit.module('CEA 708 Stream', {
}
});

QUnit.test('Filters encoding values out of captionServices option block', function(assert) {
var expectedServiceEncodings = {
SERVICE1: 'euc-kr',
SERVICE2: 'utf-8',
};

cea708Stream = new m2ts.Cea708Stream({
captionServices: {
SERVICE1: {
language: 'kr',
label: 'Korean',
encoding: 'euc-kr'
},
SERVICE2: {
language: 'en',
label: 'English',
encoding: 'utf-8'
}
}
});

assert.deepEqual(cea708Stream.serviceEncodings, expectedServiceEncodings, 'filtered encodings correctly');
});

QUnit.test('parses 708 captions', function(assert) {
var captions = [];

Expand Down Expand Up @@ -2735,6 +2761,71 @@ QUnit.test('parses 708 captions', function(assert) {
}, 'parsed caption 234 correctly');
});

QUnit.test('Decodes multibyte characters if valid encoding option is provided and TextDecoder is supported', function(assert) {
var captions = [];

cea708Stream = new m2ts.Cea708Stream({
captionServices: {
SERVICE1: {
encoding: 'euc-kr'
}
}
});

cea708Stream.on('data', function(caption) {
captions.push(caption);
});

cc708Korean.forEach(cea708Stream.push, cea708Stream);

cea708Stream.flushDisplayed(4721138662, cea708Stream.services[1]);

assert.equal(captions.length, 1, 'parsed single caption correctly');

if (window.TextDecoder) {
assert.ok(cea708Stream.services[1].textDecoder_, 'TextDecoder created when supported');
assert.equal(
captions[0].text,
'니가 ',
'parsed multibyte characters correctly'
);
} else {
assert.notOk(cea708Stream.services[1].textDecoder_, 'TextDecoder not created when unsupported');
}
});

QUnit.test('Creates TextDecoder only if valid encoding value is provided', function(assert) {
var secondCea708Stream;

cea708Stream = new m2ts.Cea708Stream({
captionServices: {
SERVICE1: {
encoding: 'euc-kr'
}
}
});

cc708Korean.forEach(cea708Stream.push, cea708Stream);
cea708Stream.flushDisplayed(4721138662, cea708Stream.services[1]);

if (window.TextDecoder) {
assert.ok(cea708Stream.services[1].textDecoder_, 'TextDecoder created successfully when encoding is valid');
}

secondCea708Stream = new m2ts.Cea708Stream({
captionServices: {
SERVICE1: {
encoding: 'invalid'
}
}
});

cc708Korean.forEach(secondCea708Stream.push, secondCea708Stream);
secondCea708Stream.flushDisplayed(4721138662, secondCea708Stream.services[1]);

assert.notOk(secondCea708Stream.services[1].textDecoder_, 'TextDecoder not created when encoding is invalid');
});

QUnit.test('reset command', function(assert) {
var captions = [];

Expand Down
Loading

0 comments on commit 0849e0a

Please sign in to comment.