From 5d3eb0b620951d0451359e5ac42048b052d6af17 Mon Sep 17 00:00:00 2001 From: va barbosa Date: Thu, 6 Jun 2019 11:55:40 -0400 Subject: [PATCH 01/15] helper functions for base64 --- src/io/io_utils.ts | 100 ++++++++++++++++++++++++++++++++++++++++ src/io/io_utils_test.ts | 26 ++++++++++- 2 files changed, 125 insertions(+), 1 deletion(-) diff --git a/src/io/io_utils.ts b/src/io/io_utils.ts index 86540c2eb8..dd07fdc878 100644 --- a/src/io/io_utils.ts +++ b/src/io/io_utils.ts @@ -310,3 +310,103 @@ export function getModelArtifactsInfoForJSON(modelArtifacts: ModelArtifacts): modelArtifacts.weightData.byteLength, }; } + +/** + * Make Base64 string URL safe by replacing `+` with `-` and `/` with `_`. + * + * @param str Base64 string to make URL safe. + */ +export function urlSafeBase64(str: string): string { + return str.replace(/\+/g, '-').replace(/\//g, '_'); +} + +// revert Base64 URL safe replacement of + and / +/** + * Revert Base64 URL safe changes by replacing `-` with `+` and `_` with `/`. + * + * @param str URL safe Base string to revert changes. + */ +export function urlUnsafeBase64(str: string): string { + return str.replace(/-/g, '+').replace(/_/g, '/'); +} + +/** + * Convert a string to an ArrayBuffer of UTF-8 multibyte sequence + * + * @param str A string to be converted + * @returns `ArrayBuffer` with the contents of `str` + */ +export function stringToArrayBuffer(str: string): ArrayBuffer { + // contains multibyte characters + if (/[\u0080-\uffff]/.test(str)) { + const arr = new Array(); + for (let i = 0; i < str.length; i++) { + // var cc = str.charCodeAt(i); + const cc = str.codePointAt(i); + if (cc < 0x80) { + // single byte + arr.push(cc); + } else { + // UTF-8 multibyte + if (cc < 0x800) { + // two bytes + arr.push(0xc0 | (cc >> 6)); + arr.push(0x80 | (cc & 0x3f)); + } else if (cc < 0x10000) { + // three bytes + arr.push(0xe0 | ((cc >> 12) & 0x0f)); + arr.push(0x80 | ((cc >> 6) & 0x3f)); + arr.push(0x80 | (cc & 0x3f)); + } else { + // four bytes + arr.push(0xf0 | ((cc >> 18) & 0x07)); + arr.push(0x80 | ((cc >> 12) & 0x3f)); + arr.push(0x80 | ((cc >> 6) & 0x3f)); + arr.push(0x80 | (cc & 0x3f)); + i++; + } + } + } + return (new Uint8Array(arr)).buffer; + } else { + const byteArray = new Uint8Array(str.length); + for (let i = str.length; i--;) { + byteArray[i] = str.charCodeAt(i); + } + return byteArray.buffer; + } +} + +/** + * Convert an ArrayBuffer of UTF-8 multibyte sequence to a string. + * + * @param buffer `ArrayBuffer` to be converted. + * @returns A string representation of `buffer`. + */ +export function arrayBufferToString(buffer: ArrayBuffer): string { + const byteArray = new Uint8Array(buffer); + const arr = new Array(); + + for (let i = 0, len = byteArray.length; i < len; ++i) { + const v = byteArray[i]; + if (v < 0x80) { + // one byte + arr.push(v); + } else if (v >= 0xc0 && v < 0xe0) { + // two bytes + arr.push(((0x1f & byteArray[i++]) << 6) + (0x3f & byteArray[i])); + } else if (v < 0xf0) { + // three bytes + arr.push( + ((0x0f & byteArray[i++]) << 12) + ((0x3f & byteArray[i++]) << 6) + + (0x3f & byteArray[i])); + } else if (v >= 0xf0 && v < 0xf7) { + // four bytes + arr.push( + ((0x07 & byteArray[i++]) << 18) + ((0x3f & byteArray[i++]) << 12) + + ((0x3f & byteArray[i++]) << 6) + (0x3f & byteArray[i])); + } + } + + return String.fromCodePoint(...arr); +} diff --git a/src/io/io_utils_test.ts b/src/io/io_utils_test.ts index 5a6feaa07d..d7f960835e 100644 --- a/src/io/io_utils_test.ts +++ b/src/io/io_utils_test.ts @@ -22,7 +22,7 @@ import {NamedTensor, NamedTensorMap} from '../tensor_types'; import {expectArraysEqual} from '../test_util'; import {expectArraysClose} from '../test_util'; -import {arrayBufferToBase64String, base64StringToArrayBuffer, basename, concatenateArrayBuffers, concatenateTypedArrays, stringByteLength} from './io_utils'; +import {arrayBufferToBase64String, arrayBufferToString, base64StringToArrayBuffer, basename, concatenateArrayBuffers, concatenateTypedArrays, stringByteLength, stringToArrayBuffer} from './io_utils'; import {WeightsManifestEntry} from './types'; describe('concatenateTypedArrays', () => { @@ -542,3 +542,27 @@ describe('basename', () => { expect(basename('foo/bar/baz//')).toEqual('baz'); }); }); + +describe('stringToArrayBuffer-arrayBufferToString', () => { + it('round-trip', () => { + const len = Math.floor((Math.random() * 200) + 10); + const arr = new Array(); + + // Generate some random unicode code points + for (let i = 0; i < len; i++) { + const cp = Math.floor((Math.random() * 500) + 32); + if (cp > 126 && cp < 161) { + continue; + } + arr.push(cp); + } + + // turn code points into a string + const str = String.fromCodePoint(...arr); + + const aBuff = stringToArrayBuffer(str); + const str2 = arrayBufferToString(aBuff); + + expect(str2).toEqual(str); + }); +}); From b6c465e59b13d6b90b33fe1206f6029bc88b9668 Mon Sep 17 00:00:00 2001 From: va barbosa Date: Thu, 6 Jun 2019 11:56:33 -0400 Subject: [PATCH 02/15] backend for encodeBase64, decodeBase64 --- src/backends/backend.ts | 11 +++++- src/backends/cpu/backend_cpu.ts | 12 +++++- src/backends/string_shared.ts | 59 +++++++++++++++++++++++++++++ src/backends/webgl/backend_webgl.ts | 12 +++++- 4 files changed, 91 insertions(+), 3 deletions(-) create mode 100644 src/backends/string_shared.ts diff --git a/src/backends/backend.ts b/src/backends/backend.ts index 2b15d2e263..d1906047b1 100644 --- a/src/backends/backend.ts +++ b/src/backends/backend.ts @@ -17,7 +17,7 @@ import {Conv2DInfo, Conv3DInfo} from '../ops/conv_util'; import {Activation} from '../ops/fused_util'; -import {Backend, DataId, Scalar, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D} from '../tensor'; +import {Backend, DataId, Scalar, StringTensor, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D} from '../tensor'; import {DataType, DataValues, PixelData, Rank, ShapeMap} from '../types'; export const EPSILON_FLOAT32 = 1e-7; @@ -623,4 +623,13 @@ export class KernelBackend implements TensorStorage, Backend, BackendTimer { dispose(): void { throw new Error('Not yet implemented'); } + + encodeBase64(str: StringTensor|Tensor, pad = false): + T { + throw new Error('Not yet implemented'); + } + + decodeBase64(str: StringTensor|Tensor): T { + throw new Error('Not yet implemented'); + } } diff --git a/src/backends/cpu/backend_cpu.ts b/src/backends/cpu/backend_cpu.ts index b0fc21a092..2df51755df 100644 --- a/src/backends/cpu/backend_cpu.ts +++ b/src/backends/cpu/backend_cpu.ts @@ -33,7 +33,7 @@ import {buffer, scalar, tensor, tensor3d, tensor4d} from '../../ops/ops'; import * as scatter_nd_util from '../../ops/scatter_nd_util'; import * as selu_util from '../../ops/selu_util'; import {computeFlatOffset, getStridedSlicedInfo, isSliceContinous} from '../../ops/slice_util'; -import {DataId, Scalar, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D, TensorBuffer} from '../../tensor'; +import {DataId, Scalar, StringTensor, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D, TensorBuffer} from '../../tensor'; import {DataType, DataTypeMap, DataValues, NumericDataType, PixelData, Rank, ShapeMap, TypedArray, upcastType} from '../../types'; import * as util from '../../util'; import {getArrayFromDType, inferDtype, now, sizeFromShape} from '../../util'; @@ -42,6 +42,7 @@ import * as backend_util from '../backend_util'; import * as complex_util from '../complex_util'; import {nonMaxSuppressionImpl} from '../non_max_suppression_impl'; import {split} from '../split_shared'; +import {decodeBase64, encodeBase64} from '../string_shared'; import {topkImpl} from '../topk_impl'; import {whereImpl} from '../where_impl'; @@ -3183,6 +3184,15 @@ export class MathBackendCPU implements KernelBackend { dispose() {} + encodeBase64(str: StringTensor|Tensor, pad = false): + T { + return encodeBase64(str, pad); + } + + decodeBase64(str: StringTensor|Tensor): T { + return decodeBase64(str); + } + floatPrecision(): 16|32 { return 32; } diff --git a/src/backends/string_shared.ts b/src/backends/string_shared.ts new file mode 100644 index 0000000000..404b7dfb93 --- /dev/null +++ b/src/backends/string_shared.ts @@ -0,0 +1,59 @@ +/** + * @license + * Copyright 2018 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {arrayBufferToBase64String, arrayBufferToString, base64StringToArrayBuffer, stringToArrayBuffer, urlSafeBase64, urlUnsafeBase64} from '../io/io_utils'; +import {StringTensor, Tensor} from '../tensor'; + +/** Shared implementation of the encodeBase64 kernel across WebGL and CPU. */ +export function encodeBase64( + str: StringTensor|Tensor, pad = false): T { + const resultValues = new Array(str.size); + const values = str.dataSync(); + + for (let i = 0; i < values.length; ++i) { + // Convert from string to ArrayBuffer of UTF-8 multibyte sequence + // tslint:disable-next-line: max-line-length + // https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#The_Unicode_Problem + const aBuff = stringToArrayBuffer(values[i].toString()); + + // Encode to Base64 and make URL safe + const bVal = urlSafeBase64(arrayBufferToBase64String(aBuff)); + + // Remove padding + resultValues[i] = pad ? bVal : bVal.replace(/=/g, ''); + } + + return Tensor.make(str.shape, {values: resultValues}, str.dtype) as T; +} + +/** Shared implementation of the decodeBase64 kernel across WebGL and CPU. */ +export function decodeBase64(str: StringTensor| + Tensor): T { + const resultValues = new Array(str.size); + const values = str.dataSync(); + + for (let i = 0; i < values.length; ++i) { + // Undo URL safe and decode from Base64 to ArrayBuffer + const aBuff = + base64StringToArrayBuffer(urlUnsafeBase64(values[i].toString())); + + // Convert from ArrayBuffer of UTF-8 multibyte sequence to string + resultValues[i] = arrayBufferToString(aBuff); + } + + return Tensor.make(str.shape, {values: resultValues}, str.dtype) as T; +} diff --git a/src/backends/webgl/backend_webgl.ts b/src/backends/webgl/backend_webgl.ts index 85e4272235..cac9ecc0b1 100644 --- a/src/backends/webgl/backend_webgl.ts +++ b/src/backends/webgl/backend_webgl.ts @@ -35,7 +35,7 @@ import * as segment_util from '../../ops/segment_util'; import {computeFlatOffset, getStridedSlicedInfo, isSliceContinous} from '../../ops/slice_util'; import {softmax} from '../../ops/softmax'; import {range, scalar, tensor} from '../../ops/tensor_ops'; -import {DataId, Scalar, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D} from '../../tensor'; +import {DataId, Scalar, StringTensor, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D} from '../../tensor'; import {DataType, DataTypeMap, DataValues, NumericDataType, PixelData, Rank, RecursiveArray, ShapeMap, sumOutType, TypedArray, upcastType} from '../../types'; import * as util from '../../util'; import {getArrayFromDType, getTypedArrayFromDType, inferDtype, sizeFromShape} from '../../util'; @@ -44,6 +44,7 @@ import * as backend_util from '../backend_util'; import {mergeRealAndImagArrays} from '../complex_util'; import {nonMaxSuppressionImpl} from '../non_max_suppression_impl'; import {split} from '../split_shared'; +import {decodeBase64, encodeBase64} from '../string_shared'; import {topkImpl} from '../topk_impl'; import {whereImpl} from '../where_impl'; @@ -2158,6 +2159,15 @@ export class MathBackendWebGL implements KernelBackend { return split(x, sizeSplits, axis); } + encodeBase64(str: StringTensor|Tensor, pad = false): + T { + return encodeBase64(str, pad); + } + + decodeBase64(str: StringTensor|Tensor): T { + return decodeBase64(str); + } + scatterND( indices: Tensor, updates: Tensor, shape: ShapeMap[R]): Tensor { const {sliceRank, numUpdates, sliceSize, strides, outputSize} = From 21dcf437f2b5b406ae5be3b17580eb373d84c92b Mon Sep 17 00:00:00 2001 From: va barbosa Date: Thu, 6 Jun 2019 13:59:42 -0400 Subject: [PATCH 03/15] encodeBase64, decodeBase64 ops --- src/ops/ops.ts | 1 + src/ops/string_ops.ts | 78 +++++++++++++++++++++++++++++++++++++++++++ src/tensor.ts | 12 +++++++ 3 files changed, 91 insertions(+) create mode 100644 src/ops/string_ops.ts diff --git a/src/ops/ops.ts b/src/ops/ops.ts index 7714ba320b..7b4b9252bf 100644 --- a/src/ops/ops.ts +++ b/src/ops/ops.ts @@ -46,6 +46,7 @@ export * from './sparse_to_dense'; export * from './gather_nd'; export * from './dropout'; export * from './signal_ops'; +export * from './string_ops'; export {op} from './operation'; diff --git a/src/ops/string_ops.ts b/src/ops/string_ops.ts new file mode 100644 index 0000000000..1bc21e6027 --- /dev/null +++ b/src/ops/string_ops.ts @@ -0,0 +1,78 @@ +/** + * @license + * Copyright 2018 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ +import {ENGINE} from '../engine'; +import {StringTensor, Tensor} from '../tensor'; +import {convertToTensor} from '../tensor_util_env'; + +import {op} from './operation'; + +/** + * Encodes the values of a `tf.Tensor` (of dtype `string`) to Base64. + * + * Given a String tensor, returns a new tensor with the values encoded into + * web-safe base64 format. + * + * Web-safe means that the encoder uses `-` and `_` instead of `+` and `/`: + * + * en.wikipedia.org/wiki/Base64 + * + * ```js + * const x = tf.tensor1d(['Hello world!'], 'string'); + * + * x.encodeBase64().print(); + * ``` + * @param str The input `tf.Tensor` of dtype `string` to encode. + * @param pad Whether to add padding (`=`) to the end of the encoded string. + */ +/** @doc {heading: 'Operations', subheading: 'String'} */ +function encodeBase64_( + str: StringTensor|Tensor, pad = false): T { + const $str = convertToTensor(str, 'str', 'encodeBase64', 'string'); + + const backwardsFunc = (dy: T) => ({$str: () => decodeBase64(dy)}); + + return ENGINE.runKernel( + backend => backend.encodeBase64($str, pad), {$str}, backwardsFunc); +} + +/** + * Decodes the values of a `tf.Tensor` (of dtype `string`) from Base64. + * + * Given a String tensor of Base64 encoded values, returns a new tensor with the + * decoded values. + * + * en.wikipedia.org/wiki/Base64 + * + * ```js + * const y = tf.scalar('SGVsbG8gd29ybGQh', 'string'); + * + * y.decodeBase64().print(); + * ``` + * @param str The input `tf.Tensor` of dtype `string` to decode. + */ +/** @doc {heading: 'Operations', subheading: 'String'} */ +function decodeBase64_(str: StringTensor|Tensor): T { + const $str = convertToTensor(str, 'str', 'decodeBase64', 'string'); + + const backwardsFunc = (dy: T) => ({$str: () => encodeBase64(dy)}); + + return ENGINE.runKernel( + backend => backend.decodeBase64($str), {$str}, backwardsFunc); +} + +export const encodeBase64 = op({encodeBase64_}); +export const decodeBase64 = op({decodeBase64_}); diff --git a/src/tensor.ts b/src/tensor.ts index 676aa95b09..a49bb7bad6 100644 --- a/src/tensor.ts +++ b/src/tensor.ts @@ -367,6 +367,8 @@ export interface OpHandler { fft(x: Tensor): Tensor; ifft(x: Tensor): Tensor; rfft(x: Tensor): Tensor; irfft(x: Tensor): Tensor }; + encodeBase64(x: T, pad: boolean): T; + decodeBase64(x: T): T; } // For tracking tensor creation and disposal. @@ -1388,6 +1390,16 @@ export class Tensor { this.throwIfDisposed(); return opHandler.spectral.irfft(this); } + + encodeBase64(this: T, pad = false): T { + this.throwIfDisposed(); + return opHandler.encodeBase64(this, pad); + } + + decodeBase64(this: T): T { + this.throwIfDisposed(); + return opHandler.decodeBase64(this); + } } Object.defineProperty(Tensor, Symbol.hasInstance, { value: (instance: Tensor) => { From ac1530694c620d2517e10b5a08184416cc8460ac Mon Sep 17 00:00:00 2001 From: va barbosa Date: Thu, 6 Jun 2019 13:59:48 -0400 Subject: [PATCH 04/15] test encodeBase64, decodeBase64 --- src/ops/string_ops_test.ts | 105 +++++++++++++++++++++++++++++++++++++ src/tests.ts | 1 + 2 files changed, 106 insertions(+) create mode 100644 src/ops/string_ops_test.ts diff --git a/src/ops/string_ops_test.ts b/src/ops/string_ops_test.ts new file mode 100644 index 0000000000..5da5213f21 --- /dev/null +++ b/src/ops/string_ops_test.ts @@ -0,0 +1,105 @@ +/** + * @license + * Copyright 2017 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import * as tf from '../index'; +import {ALL_ENVS, describeWithFlags} from '../jasmine_util'; +import {expectArraysEqual} from '../test_util'; + +const txtArr = [ + 'Hello TensorFlow.js!', '𝌆', 'Pre\u2014trained models with Base64 ops\u002e', + 'how about these? 🌍💻🍕', 'https://www.tensorflow.org/js', 'àβÇdéf', + '你好, 世界', `Build, train, & deploy +ML models in JS` +]; +const urlSafeB64 = [ + 'SGVsbG8gVGVuc29yRmxvdy5qcyE', '8J2Mhg', + 'UHJl4oCUdHJhaW5lZCBtb2RlbHMgd2l0aCBCYXNlNjQgb3BzLg', + 'aG93IGFib3V0IHRoZXNlPyDwn4yN8J-Su_CfjZU', + 'aHR0cHM6Ly93d3cudGVuc29yZmxvdy5vcmcvanM', 'w6DOssOHZMOpZg', + '5L2g5aW9LCDkuJbnlYw', 'QnVpbGQsIHRyYWluLCAmIGRlcGxveQpNTCBtb2RlbHMgaW4gSlM' +]; +const urlSafeB64Pad = [ + 'SGVsbG8gVGVuc29yRmxvdy5qcyE=', '8J2Mhg==', + 'UHJl4oCUdHJhaW5lZCBtb2RlbHMgd2l0aCBCYXNlNjQgb3BzLg==', + 'aG93IGFib3V0IHRoZXNlPyDwn4yN8J-Su_CfjZU=', + 'aHR0cHM6Ly93d3cudGVuc29yZmxvdy5vcmcvanM=', 'w6DOssOHZMOpZg==', + '5L2g5aW9LCDkuJbnlYw=', 'QnVpbGQsIHRyYWluLCAmIGRlcGxveQpNTCBtb2RlbHMgaW4gSlM=' +]; + +describeWithFlags('encodeBase64', ALL_ENVS, () => { + it('scalar', async () => { + const a = tf.scalar(txtArr[1], 'string'); + const r = tf.encodeBase64(a); + expect(r.shape).toEqual([]); + expectArraysEqual(await r.data(), urlSafeB64[1]); + }); + it('1D padded', async () => { + const a = tf.tensor1d([txtArr[2]], 'string'); + const r = tf.encodeBase64(a, true); + expect(r.shape).toEqual([1]); + expectArraysEqual(await r.data(), [urlSafeB64Pad[2]]); + }); + it('2D', async () => { + const a = tf.tensor2d(txtArr, [2, 4], 'string'); + const r = tf.encodeBase64(a, false); + expect(r.shape).toEqual([2, 4]); + expectArraysEqual(await r.data(), urlSafeB64); + }); + it('3D padded', async () => { + const a = tf.tensor3d(txtArr, [2, 2, 2], 'string'); + const r = tf.encodeBase64(a, true); + expect(r.shape).toEqual([2, 2, 2]); + expectArraysEqual(await r.data(), urlSafeB64Pad); + }); +}); + +describeWithFlags('decodeBase64', ALL_ENVS, () => { + it('scalar', async () => { + const a = tf.scalar(urlSafeB64[1], 'string'); + const r = tf.decodeBase64(a); + expect(r.shape).toEqual([]); + expectArraysEqual(await r.data(), txtArr[1]); + }); + it('1D padded', async () => { + const a = tf.tensor1d([urlSafeB64Pad[2]], 'string'); + const r = tf.decodeBase64(a); + expect(r.shape).toEqual([1]); + expectArraysEqual(await r.data(), [txtArr[2]]); + }); + it('2D', async () => { + const a = tf.tensor2d(urlSafeB64, [2, 4], 'string'); + const r = tf.decodeBase64(a); + expect(r.shape).toEqual([2, 4]); + expectArraysEqual(await r.data(), txtArr); + }); + it('3D padded', async () => { + const a = tf.tensor3d(urlSafeB64Pad, [2, 2, 2], 'string'); + const r = tf.decodeBase64(a); + expect(r.shape).toEqual([2, 2, 2]); + expectArraysEqual(await r.data(), txtArr); + }); +}); + +describeWithFlags('encodeBase64-decodeBase64', ALL_ENVS, () => { + it('round-trip', async () => { + const s = [txtArr.join('')]; + const a = tf.tensor(s, [1], 'string'); + const b = tf.encodeBase64(a); + const c = tf.decodeBase64(b); + expectArraysEqual(await c.data(), s); + }); +}); diff --git a/src/tests.ts b/src/tests.ts index eba4c3b71b..6792238fef 100644 --- a/src/tests.ts +++ b/src/tests.ts @@ -84,6 +84,7 @@ import './ops/softmax_test'; import './ops/sparse_to_dense_test'; import './ops/spectral_ops_test'; import './ops/strided_slice_test'; +import './ops/string_ops_test'; import './ops/topk_test'; import './ops/transpose_test'; import './ops/unary_ops_test'; From 0d7bddaabe3a94b899c4d8a55a5c329ef0646a34 Mon Sep 17 00:00:00 2001 From: va barbosa Date: Thu, 6 Jun 2019 11:55:40 -0400 Subject: [PATCH 05/15] helper functions for base64 --- src/io/io_utils.ts | 100 ++++++++++++++++++++++++++++++++++++++++ src/io/io_utils_test.ts | 26 ++++++++++- 2 files changed, 125 insertions(+), 1 deletion(-) diff --git a/src/io/io_utils.ts b/src/io/io_utils.ts index 86540c2eb8..dd07fdc878 100644 --- a/src/io/io_utils.ts +++ b/src/io/io_utils.ts @@ -310,3 +310,103 @@ export function getModelArtifactsInfoForJSON(modelArtifacts: ModelArtifacts): modelArtifacts.weightData.byteLength, }; } + +/** + * Make Base64 string URL safe by replacing `+` with `-` and `/` with `_`. + * + * @param str Base64 string to make URL safe. + */ +export function urlSafeBase64(str: string): string { + return str.replace(/\+/g, '-').replace(/\//g, '_'); +} + +// revert Base64 URL safe replacement of + and / +/** + * Revert Base64 URL safe changes by replacing `-` with `+` and `_` with `/`. + * + * @param str URL safe Base string to revert changes. + */ +export function urlUnsafeBase64(str: string): string { + return str.replace(/-/g, '+').replace(/_/g, '/'); +} + +/** + * Convert a string to an ArrayBuffer of UTF-8 multibyte sequence + * + * @param str A string to be converted + * @returns `ArrayBuffer` with the contents of `str` + */ +export function stringToArrayBuffer(str: string): ArrayBuffer { + // contains multibyte characters + if (/[\u0080-\uffff]/.test(str)) { + const arr = new Array(); + for (let i = 0; i < str.length; i++) { + // var cc = str.charCodeAt(i); + const cc = str.codePointAt(i); + if (cc < 0x80) { + // single byte + arr.push(cc); + } else { + // UTF-8 multibyte + if (cc < 0x800) { + // two bytes + arr.push(0xc0 | (cc >> 6)); + arr.push(0x80 | (cc & 0x3f)); + } else if (cc < 0x10000) { + // three bytes + arr.push(0xe0 | ((cc >> 12) & 0x0f)); + arr.push(0x80 | ((cc >> 6) & 0x3f)); + arr.push(0x80 | (cc & 0x3f)); + } else { + // four bytes + arr.push(0xf0 | ((cc >> 18) & 0x07)); + arr.push(0x80 | ((cc >> 12) & 0x3f)); + arr.push(0x80 | ((cc >> 6) & 0x3f)); + arr.push(0x80 | (cc & 0x3f)); + i++; + } + } + } + return (new Uint8Array(arr)).buffer; + } else { + const byteArray = new Uint8Array(str.length); + for (let i = str.length; i--;) { + byteArray[i] = str.charCodeAt(i); + } + return byteArray.buffer; + } +} + +/** + * Convert an ArrayBuffer of UTF-8 multibyte sequence to a string. + * + * @param buffer `ArrayBuffer` to be converted. + * @returns A string representation of `buffer`. + */ +export function arrayBufferToString(buffer: ArrayBuffer): string { + const byteArray = new Uint8Array(buffer); + const arr = new Array(); + + for (let i = 0, len = byteArray.length; i < len; ++i) { + const v = byteArray[i]; + if (v < 0x80) { + // one byte + arr.push(v); + } else if (v >= 0xc0 && v < 0xe0) { + // two bytes + arr.push(((0x1f & byteArray[i++]) << 6) + (0x3f & byteArray[i])); + } else if (v < 0xf0) { + // three bytes + arr.push( + ((0x0f & byteArray[i++]) << 12) + ((0x3f & byteArray[i++]) << 6) + + (0x3f & byteArray[i])); + } else if (v >= 0xf0 && v < 0xf7) { + // four bytes + arr.push( + ((0x07 & byteArray[i++]) << 18) + ((0x3f & byteArray[i++]) << 12) + + ((0x3f & byteArray[i++]) << 6) + (0x3f & byteArray[i])); + } + } + + return String.fromCodePoint(...arr); +} diff --git a/src/io/io_utils_test.ts b/src/io/io_utils_test.ts index 5a6feaa07d..d7f960835e 100644 --- a/src/io/io_utils_test.ts +++ b/src/io/io_utils_test.ts @@ -22,7 +22,7 @@ import {NamedTensor, NamedTensorMap} from '../tensor_types'; import {expectArraysEqual} from '../test_util'; import {expectArraysClose} from '../test_util'; -import {arrayBufferToBase64String, base64StringToArrayBuffer, basename, concatenateArrayBuffers, concatenateTypedArrays, stringByteLength} from './io_utils'; +import {arrayBufferToBase64String, arrayBufferToString, base64StringToArrayBuffer, basename, concatenateArrayBuffers, concatenateTypedArrays, stringByteLength, stringToArrayBuffer} from './io_utils'; import {WeightsManifestEntry} from './types'; describe('concatenateTypedArrays', () => { @@ -542,3 +542,27 @@ describe('basename', () => { expect(basename('foo/bar/baz//')).toEqual('baz'); }); }); + +describe('stringToArrayBuffer-arrayBufferToString', () => { + it('round-trip', () => { + const len = Math.floor((Math.random() * 200) + 10); + const arr = new Array(); + + // Generate some random unicode code points + for (let i = 0; i < len; i++) { + const cp = Math.floor((Math.random() * 500) + 32); + if (cp > 126 && cp < 161) { + continue; + } + arr.push(cp); + } + + // turn code points into a string + const str = String.fromCodePoint(...arr); + + const aBuff = stringToArrayBuffer(str); + const str2 = arrayBufferToString(aBuff); + + expect(str2).toEqual(str); + }); +}); From 4c20a6eabbd4f7552c14808ef6e08c4d711fec45 Mon Sep 17 00:00:00 2001 From: va barbosa Date: Thu, 6 Jun 2019 11:56:33 -0400 Subject: [PATCH 06/15] backend for encodeBase64, decodeBase64 --- src/backends/backend.ts | 11 +++++- src/backends/cpu/backend_cpu.ts | 12 +++++- src/backends/string_shared.ts | 59 +++++++++++++++++++++++++++++ src/backends/webgl/backend_webgl.ts | 12 +++++- 4 files changed, 91 insertions(+), 3 deletions(-) create mode 100644 src/backends/string_shared.ts diff --git a/src/backends/backend.ts b/src/backends/backend.ts index 2b15d2e263..d1906047b1 100644 --- a/src/backends/backend.ts +++ b/src/backends/backend.ts @@ -17,7 +17,7 @@ import {Conv2DInfo, Conv3DInfo} from '../ops/conv_util'; import {Activation} from '../ops/fused_util'; -import {Backend, DataId, Scalar, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D} from '../tensor'; +import {Backend, DataId, Scalar, StringTensor, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D} from '../tensor'; import {DataType, DataValues, PixelData, Rank, ShapeMap} from '../types'; export const EPSILON_FLOAT32 = 1e-7; @@ -623,4 +623,13 @@ export class KernelBackend implements TensorStorage, Backend, BackendTimer { dispose(): void { throw new Error('Not yet implemented'); } + + encodeBase64(str: StringTensor|Tensor, pad = false): + T { + throw new Error('Not yet implemented'); + } + + decodeBase64(str: StringTensor|Tensor): T { + throw new Error('Not yet implemented'); + } } diff --git a/src/backends/cpu/backend_cpu.ts b/src/backends/cpu/backend_cpu.ts index b0fc21a092..2df51755df 100644 --- a/src/backends/cpu/backend_cpu.ts +++ b/src/backends/cpu/backend_cpu.ts @@ -33,7 +33,7 @@ import {buffer, scalar, tensor, tensor3d, tensor4d} from '../../ops/ops'; import * as scatter_nd_util from '../../ops/scatter_nd_util'; import * as selu_util from '../../ops/selu_util'; import {computeFlatOffset, getStridedSlicedInfo, isSliceContinous} from '../../ops/slice_util'; -import {DataId, Scalar, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D, TensorBuffer} from '../../tensor'; +import {DataId, Scalar, StringTensor, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D, TensorBuffer} from '../../tensor'; import {DataType, DataTypeMap, DataValues, NumericDataType, PixelData, Rank, ShapeMap, TypedArray, upcastType} from '../../types'; import * as util from '../../util'; import {getArrayFromDType, inferDtype, now, sizeFromShape} from '../../util'; @@ -42,6 +42,7 @@ import * as backend_util from '../backend_util'; import * as complex_util from '../complex_util'; import {nonMaxSuppressionImpl} from '../non_max_suppression_impl'; import {split} from '../split_shared'; +import {decodeBase64, encodeBase64} from '../string_shared'; import {topkImpl} from '../topk_impl'; import {whereImpl} from '../where_impl'; @@ -3183,6 +3184,15 @@ export class MathBackendCPU implements KernelBackend { dispose() {} + encodeBase64(str: StringTensor|Tensor, pad = false): + T { + return encodeBase64(str, pad); + } + + decodeBase64(str: StringTensor|Tensor): T { + return decodeBase64(str); + } + floatPrecision(): 16|32 { return 32; } diff --git a/src/backends/string_shared.ts b/src/backends/string_shared.ts new file mode 100644 index 0000000000..404b7dfb93 --- /dev/null +++ b/src/backends/string_shared.ts @@ -0,0 +1,59 @@ +/** + * @license + * Copyright 2018 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import {arrayBufferToBase64String, arrayBufferToString, base64StringToArrayBuffer, stringToArrayBuffer, urlSafeBase64, urlUnsafeBase64} from '../io/io_utils'; +import {StringTensor, Tensor} from '../tensor'; + +/** Shared implementation of the encodeBase64 kernel across WebGL and CPU. */ +export function encodeBase64( + str: StringTensor|Tensor, pad = false): T { + const resultValues = new Array(str.size); + const values = str.dataSync(); + + for (let i = 0; i < values.length; ++i) { + // Convert from string to ArrayBuffer of UTF-8 multibyte sequence + // tslint:disable-next-line: max-line-length + // https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#The_Unicode_Problem + const aBuff = stringToArrayBuffer(values[i].toString()); + + // Encode to Base64 and make URL safe + const bVal = urlSafeBase64(arrayBufferToBase64String(aBuff)); + + // Remove padding + resultValues[i] = pad ? bVal : bVal.replace(/=/g, ''); + } + + return Tensor.make(str.shape, {values: resultValues}, str.dtype) as T; +} + +/** Shared implementation of the decodeBase64 kernel across WebGL and CPU. */ +export function decodeBase64(str: StringTensor| + Tensor): T { + const resultValues = new Array(str.size); + const values = str.dataSync(); + + for (let i = 0; i < values.length; ++i) { + // Undo URL safe and decode from Base64 to ArrayBuffer + const aBuff = + base64StringToArrayBuffer(urlUnsafeBase64(values[i].toString())); + + // Convert from ArrayBuffer of UTF-8 multibyte sequence to string + resultValues[i] = arrayBufferToString(aBuff); + } + + return Tensor.make(str.shape, {values: resultValues}, str.dtype) as T; +} diff --git a/src/backends/webgl/backend_webgl.ts b/src/backends/webgl/backend_webgl.ts index 85e4272235..cac9ecc0b1 100644 --- a/src/backends/webgl/backend_webgl.ts +++ b/src/backends/webgl/backend_webgl.ts @@ -35,7 +35,7 @@ import * as segment_util from '../../ops/segment_util'; import {computeFlatOffset, getStridedSlicedInfo, isSliceContinous} from '../../ops/slice_util'; import {softmax} from '../../ops/softmax'; import {range, scalar, tensor} from '../../ops/tensor_ops'; -import {DataId, Scalar, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D} from '../../tensor'; +import {DataId, Scalar, StringTensor, Tensor, Tensor1D, Tensor2D, Tensor3D, Tensor4D, Tensor5D} from '../../tensor'; import {DataType, DataTypeMap, DataValues, NumericDataType, PixelData, Rank, RecursiveArray, ShapeMap, sumOutType, TypedArray, upcastType} from '../../types'; import * as util from '../../util'; import {getArrayFromDType, getTypedArrayFromDType, inferDtype, sizeFromShape} from '../../util'; @@ -44,6 +44,7 @@ import * as backend_util from '../backend_util'; import {mergeRealAndImagArrays} from '../complex_util'; import {nonMaxSuppressionImpl} from '../non_max_suppression_impl'; import {split} from '../split_shared'; +import {decodeBase64, encodeBase64} from '../string_shared'; import {topkImpl} from '../topk_impl'; import {whereImpl} from '../where_impl'; @@ -2158,6 +2159,15 @@ export class MathBackendWebGL implements KernelBackend { return split(x, sizeSplits, axis); } + encodeBase64(str: StringTensor|Tensor, pad = false): + T { + return encodeBase64(str, pad); + } + + decodeBase64(str: StringTensor|Tensor): T { + return decodeBase64(str); + } + scatterND( indices: Tensor, updates: Tensor, shape: ShapeMap[R]): Tensor { const {sliceRank, numUpdates, sliceSize, strides, outputSize} = From ec1d4083bd984b8ebe4707fbc09a8dd9014af269 Mon Sep 17 00:00:00 2001 From: va barbosa Date: Thu, 6 Jun 2019 13:59:42 -0400 Subject: [PATCH 07/15] encodeBase64, decodeBase64 ops --- src/ops/ops.ts | 1 + src/ops/string_ops.ts | 78 +++++++++++++++++++++++++++++++++++++++++++ src/tensor.ts | 12 +++++++ 3 files changed, 91 insertions(+) create mode 100644 src/ops/string_ops.ts diff --git a/src/ops/ops.ts b/src/ops/ops.ts index 7714ba320b..7b4b9252bf 100644 --- a/src/ops/ops.ts +++ b/src/ops/ops.ts @@ -46,6 +46,7 @@ export * from './sparse_to_dense'; export * from './gather_nd'; export * from './dropout'; export * from './signal_ops'; +export * from './string_ops'; export {op} from './operation'; diff --git a/src/ops/string_ops.ts b/src/ops/string_ops.ts new file mode 100644 index 0000000000..1bc21e6027 --- /dev/null +++ b/src/ops/string_ops.ts @@ -0,0 +1,78 @@ +/** + * @license + * Copyright 2018 Google LLC. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ +import {ENGINE} from '../engine'; +import {StringTensor, Tensor} from '../tensor'; +import {convertToTensor} from '../tensor_util_env'; + +import {op} from './operation'; + +/** + * Encodes the values of a `tf.Tensor` (of dtype `string`) to Base64. + * + * Given a String tensor, returns a new tensor with the values encoded into + * web-safe base64 format. + * + * Web-safe means that the encoder uses `-` and `_` instead of `+` and `/`: + * + * en.wikipedia.org/wiki/Base64 + * + * ```js + * const x = tf.tensor1d(['Hello world!'], 'string'); + * + * x.encodeBase64().print(); + * ``` + * @param str The input `tf.Tensor` of dtype `string` to encode. + * @param pad Whether to add padding (`=`) to the end of the encoded string. + */ +/** @doc {heading: 'Operations', subheading: 'String'} */ +function encodeBase64_( + str: StringTensor|Tensor, pad = false): T { + const $str = convertToTensor(str, 'str', 'encodeBase64', 'string'); + + const backwardsFunc = (dy: T) => ({$str: () => decodeBase64(dy)}); + + return ENGINE.runKernel( + backend => backend.encodeBase64($str, pad), {$str}, backwardsFunc); +} + +/** + * Decodes the values of a `tf.Tensor` (of dtype `string`) from Base64. + * + * Given a String tensor of Base64 encoded values, returns a new tensor with the + * decoded values. + * + * en.wikipedia.org/wiki/Base64 + * + * ```js + * const y = tf.scalar('SGVsbG8gd29ybGQh', 'string'); + * + * y.decodeBase64().print(); + * ``` + * @param str The input `tf.Tensor` of dtype `string` to decode. + */ +/** @doc {heading: 'Operations', subheading: 'String'} */ +function decodeBase64_(str: StringTensor|Tensor): T { + const $str = convertToTensor(str, 'str', 'decodeBase64', 'string'); + + const backwardsFunc = (dy: T) => ({$str: () => encodeBase64(dy)}); + + return ENGINE.runKernel( + backend => backend.decodeBase64($str), {$str}, backwardsFunc); +} + +export const encodeBase64 = op({encodeBase64_}); +export const decodeBase64 = op({decodeBase64_}); diff --git a/src/tensor.ts b/src/tensor.ts index 676aa95b09..a49bb7bad6 100644 --- a/src/tensor.ts +++ b/src/tensor.ts @@ -367,6 +367,8 @@ export interface OpHandler { fft(x: Tensor): Tensor; ifft(x: Tensor): Tensor; rfft(x: Tensor): Tensor; irfft(x: Tensor): Tensor }; + encodeBase64(x: T, pad: boolean): T; + decodeBase64(x: T): T; } // For tracking tensor creation and disposal. @@ -1388,6 +1390,16 @@ export class Tensor { this.throwIfDisposed(); return opHandler.spectral.irfft(this); } + + encodeBase64(this: T, pad = false): T { + this.throwIfDisposed(); + return opHandler.encodeBase64(this, pad); + } + + decodeBase64(this: T): T { + this.throwIfDisposed(); + return opHandler.decodeBase64(this); + } } Object.defineProperty(Tensor, Symbol.hasInstance, { value: (instance: Tensor) => { From c934a4967ac3fb1223d75732e77d578fe6f76368 Mon Sep 17 00:00:00 2001 From: va barbosa Date: Thu, 6 Jun 2019 13:59:48 -0400 Subject: [PATCH 08/15] test encodeBase64, decodeBase64 --- src/ops/string_ops_test.ts | 105 +++++++++++++++++++++++++++++++++++++ src/tests.ts | 1 + 2 files changed, 106 insertions(+) create mode 100644 src/ops/string_ops_test.ts diff --git a/src/ops/string_ops_test.ts b/src/ops/string_ops_test.ts new file mode 100644 index 0000000000..5da5213f21 --- /dev/null +++ b/src/ops/string_ops_test.ts @@ -0,0 +1,105 @@ +/** + * @license + * Copyright 2017 Google Inc. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ============================================================================= + */ + +import * as tf from '../index'; +import {ALL_ENVS, describeWithFlags} from '../jasmine_util'; +import {expectArraysEqual} from '../test_util'; + +const txtArr = [ + 'Hello TensorFlow.js!', '𝌆', 'Pre\u2014trained models with Base64 ops\u002e', + 'how about these? 🌍💻🍕', 'https://www.tensorflow.org/js', 'àβÇdéf', + '你好, 世界', `Build, train, & deploy +ML models in JS` +]; +const urlSafeB64 = [ + 'SGVsbG8gVGVuc29yRmxvdy5qcyE', '8J2Mhg', + 'UHJl4oCUdHJhaW5lZCBtb2RlbHMgd2l0aCBCYXNlNjQgb3BzLg', + 'aG93IGFib3V0IHRoZXNlPyDwn4yN8J-Su_CfjZU', + 'aHR0cHM6Ly93d3cudGVuc29yZmxvdy5vcmcvanM', 'w6DOssOHZMOpZg', + '5L2g5aW9LCDkuJbnlYw', 'QnVpbGQsIHRyYWluLCAmIGRlcGxveQpNTCBtb2RlbHMgaW4gSlM' +]; +const urlSafeB64Pad = [ + 'SGVsbG8gVGVuc29yRmxvdy5qcyE=', '8J2Mhg==', + 'UHJl4oCUdHJhaW5lZCBtb2RlbHMgd2l0aCBCYXNlNjQgb3BzLg==', + 'aG93IGFib3V0IHRoZXNlPyDwn4yN8J-Su_CfjZU=', + 'aHR0cHM6Ly93d3cudGVuc29yZmxvdy5vcmcvanM=', 'w6DOssOHZMOpZg==', + '5L2g5aW9LCDkuJbnlYw=', 'QnVpbGQsIHRyYWluLCAmIGRlcGxveQpNTCBtb2RlbHMgaW4gSlM=' +]; + +describeWithFlags('encodeBase64', ALL_ENVS, () => { + it('scalar', async () => { + const a = tf.scalar(txtArr[1], 'string'); + const r = tf.encodeBase64(a); + expect(r.shape).toEqual([]); + expectArraysEqual(await r.data(), urlSafeB64[1]); + }); + it('1D padded', async () => { + const a = tf.tensor1d([txtArr[2]], 'string'); + const r = tf.encodeBase64(a, true); + expect(r.shape).toEqual([1]); + expectArraysEqual(await r.data(), [urlSafeB64Pad[2]]); + }); + it('2D', async () => { + const a = tf.tensor2d(txtArr, [2, 4], 'string'); + const r = tf.encodeBase64(a, false); + expect(r.shape).toEqual([2, 4]); + expectArraysEqual(await r.data(), urlSafeB64); + }); + it('3D padded', async () => { + const a = tf.tensor3d(txtArr, [2, 2, 2], 'string'); + const r = tf.encodeBase64(a, true); + expect(r.shape).toEqual([2, 2, 2]); + expectArraysEqual(await r.data(), urlSafeB64Pad); + }); +}); + +describeWithFlags('decodeBase64', ALL_ENVS, () => { + it('scalar', async () => { + const a = tf.scalar(urlSafeB64[1], 'string'); + const r = tf.decodeBase64(a); + expect(r.shape).toEqual([]); + expectArraysEqual(await r.data(), txtArr[1]); + }); + it('1D padded', async () => { + const a = tf.tensor1d([urlSafeB64Pad[2]], 'string'); + const r = tf.decodeBase64(a); + expect(r.shape).toEqual([1]); + expectArraysEqual(await r.data(), [txtArr[2]]); + }); + it('2D', async () => { + const a = tf.tensor2d(urlSafeB64, [2, 4], 'string'); + const r = tf.decodeBase64(a); + expect(r.shape).toEqual([2, 4]); + expectArraysEqual(await r.data(), txtArr); + }); + it('3D padded', async () => { + const a = tf.tensor3d(urlSafeB64Pad, [2, 2, 2], 'string'); + const r = tf.decodeBase64(a); + expect(r.shape).toEqual([2, 2, 2]); + expectArraysEqual(await r.data(), txtArr); + }); +}); + +describeWithFlags('encodeBase64-decodeBase64', ALL_ENVS, () => { + it('round-trip', async () => { + const s = [txtArr.join('')]; + const a = tf.tensor(s, [1], 'string'); + const b = tf.encodeBase64(a); + const c = tf.decodeBase64(b); + expectArraysEqual(await c.data(), s); + }); +}); diff --git a/src/tests.ts b/src/tests.ts index eba4c3b71b..6792238fef 100644 --- a/src/tests.ts +++ b/src/tests.ts @@ -84,6 +84,7 @@ import './ops/softmax_test'; import './ops/sparse_to_dense_test'; import './ops/spectral_ops_test'; import './ops/strided_slice_test'; +import './ops/string_ops_test'; import './ops/topk_test'; import './ops/transpose_test'; import './ops/unary_ops_test'; From 4eefddd3e6d8c0e0aa5b75b0d01aced6621bfe86 Mon Sep 17 00:00:00 2001 From: va barbosa Date: Sat, 8 Jun 2019 04:40:44 -0400 Subject: [PATCH 09/15] copyright year 2019 --- src/backends/string_shared.ts | 2 +- src/ops/string_ops.ts | 2 +- src/ops/string_ops_test.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/backends/string_shared.ts b/src/backends/string_shared.ts index 404b7dfb93..f7efef933e 100644 --- a/src/backends/string_shared.ts +++ b/src/backends/string_shared.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2018 Google Inc. All Rights Reserved. + * Copyright 2019 Google Inc. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at diff --git a/src/ops/string_ops.ts b/src/ops/string_ops.ts index 1bc21e6027..c1c48ec711 100644 --- a/src/ops/string_ops.ts +++ b/src/ops/string_ops.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2018 Google LLC. All Rights Reserved. + * Copyright 2019 Google LLC. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at diff --git a/src/ops/string_ops_test.ts b/src/ops/string_ops_test.ts index 5da5213f21..5eecc442d1 100644 --- a/src/ops/string_ops_test.ts +++ b/src/ops/string_ops_test.ts @@ -1,6 +1,6 @@ /** * @license - * Copyright 2017 Google Inc. All Rights Reserved. + * Copyright 2019 Google Inc. All Rights Reserved. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at From c209006960e5efd1c25f5f8413959ae17d172daa Mon Sep 17 00:00:00 2001 From: va barbosa Date: Mon, 10 Jun 2019 21:41:37 -0400 Subject: [PATCH 10/15] unit test dataSync() error --- src/backends/string_shared.ts | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/backends/string_shared.ts b/src/backends/string_shared.ts index f7efef933e..ee00e646dc 100644 --- a/src/backends/string_shared.ts +++ b/src/backends/string_shared.ts @@ -16,44 +16,50 @@ */ import {arrayBufferToBase64String, arrayBufferToString, base64StringToArrayBuffer, stringToArrayBuffer, urlSafeBase64, urlUnsafeBase64} from '../io/io_utils'; +import * as ops from '../ops/ops'; import {StringTensor, Tensor} from '../tensor'; /** Shared implementation of the encodeBase64 kernel across WebGL and CPU. */ export function encodeBase64( str: StringTensor|Tensor, pad = false): T { - const resultValues = new Array(str.size); - const values = str.dataSync(); + const buffer = ops.buffer(str.shape, str.dtype); + const strBuffer = str.bufferSync(); + + for (let i = 0; i < buffer.size; ++i) { + const loc = buffer.indexToLoc(i); + const value = strBuffer.get(...loc).toString(); - for (let i = 0; i < values.length; ++i) { // Convert from string to ArrayBuffer of UTF-8 multibyte sequence // tslint:disable-next-line: max-line-length // https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#The_Unicode_Problem - const aBuff = stringToArrayBuffer(values[i].toString()); + const aBuff = stringToArrayBuffer(value); // Encode to Base64 and make URL safe const bVal = urlSafeBase64(arrayBufferToBase64String(aBuff)); // Remove padding - resultValues[i] = pad ? bVal : bVal.replace(/=/g, ''); + buffer.values[i] = pad ? bVal : bVal.replace(/=/g, ''); } - return Tensor.make(str.shape, {values: resultValues}, str.dtype) as T; + return buffer.toTensor() as T; } /** Shared implementation of the decodeBase64 kernel across WebGL and CPU. */ export function decodeBase64(str: StringTensor| Tensor): T { - const resultValues = new Array(str.size); - const values = str.dataSync(); + const buffer = ops.buffer(str.shape, str.dtype); + const strBuffer = str.bufferSync(); + + for (let i = 0; i < buffer.size; ++i) { + const loc = buffer.indexToLoc(i); + const value = strBuffer.get(...loc).toString(); - for (let i = 0; i < values.length; ++i) { // Undo URL safe and decode from Base64 to ArrayBuffer - const aBuff = - base64StringToArrayBuffer(urlUnsafeBase64(values[i].toString())); + const aBuff = base64StringToArrayBuffer(urlUnsafeBase64(value)); // Convert from ArrayBuffer of UTF-8 multibyte sequence to string - resultValues[i] = arrayBufferToString(aBuff); + buffer.values[i] = arrayBufferToString(aBuff); } - return Tensor.make(str.shape, {values: resultValues}, str.dtype) as T; + return buffer.toTensor() as T; } From 87b3d1fe07e004725e47307a2c86c63537b12e2c Mon Sep 17 00:00:00 2001 From: va barbosa Date: Mon, 10 Jun 2019 22:15:52 -0400 Subject: [PATCH 11/15] Revert "unit test dataSync() error" This reverts commit c209006960e5efd1c25f5f8413959ae17d172daa. --- src/backends/string_shared.ts | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/src/backends/string_shared.ts b/src/backends/string_shared.ts index ee00e646dc..f7efef933e 100644 --- a/src/backends/string_shared.ts +++ b/src/backends/string_shared.ts @@ -16,50 +16,44 @@ */ import {arrayBufferToBase64String, arrayBufferToString, base64StringToArrayBuffer, stringToArrayBuffer, urlSafeBase64, urlUnsafeBase64} from '../io/io_utils'; -import * as ops from '../ops/ops'; import {StringTensor, Tensor} from '../tensor'; /** Shared implementation of the encodeBase64 kernel across WebGL and CPU. */ export function encodeBase64( str: StringTensor|Tensor, pad = false): T { - const buffer = ops.buffer(str.shape, str.dtype); - const strBuffer = str.bufferSync(); - - for (let i = 0; i < buffer.size; ++i) { - const loc = buffer.indexToLoc(i); - const value = strBuffer.get(...loc).toString(); + const resultValues = new Array(str.size); + const values = str.dataSync(); + for (let i = 0; i < values.length; ++i) { // Convert from string to ArrayBuffer of UTF-8 multibyte sequence // tslint:disable-next-line: max-line-length // https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#The_Unicode_Problem - const aBuff = stringToArrayBuffer(value); + const aBuff = stringToArrayBuffer(values[i].toString()); // Encode to Base64 and make URL safe const bVal = urlSafeBase64(arrayBufferToBase64String(aBuff)); // Remove padding - buffer.values[i] = pad ? bVal : bVal.replace(/=/g, ''); + resultValues[i] = pad ? bVal : bVal.replace(/=/g, ''); } - return buffer.toTensor() as T; + return Tensor.make(str.shape, {values: resultValues}, str.dtype) as T; } /** Shared implementation of the decodeBase64 kernel across WebGL and CPU. */ export function decodeBase64(str: StringTensor| Tensor): T { - const buffer = ops.buffer(str.shape, str.dtype); - const strBuffer = str.bufferSync(); - - for (let i = 0; i < buffer.size; ++i) { - const loc = buffer.indexToLoc(i); - const value = strBuffer.get(...loc).toString(); + const resultValues = new Array(str.size); + const values = str.dataSync(); + for (let i = 0; i < values.length; ++i) { // Undo URL safe and decode from Base64 to ArrayBuffer - const aBuff = base64StringToArrayBuffer(urlUnsafeBase64(value)); + const aBuff = + base64StringToArrayBuffer(urlUnsafeBase64(values[i].toString())); // Convert from ArrayBuffer of UTF-8 multibyte sequence to string - buffer.values[i] = arrayBufferToString(aBuff); + resultValues[i] = arrayBufferToString(aBuff); } - return buffer.toTensor() as T; + return Tensor.make(str.shape, {values: resultValues}, str.dtype) as T; } From 6bd606568b06346d38fa72e7154f59e836f19387 Mon Sep 17 00:00:00 2001 From: va barbosa Date: Mon, 10 Jun 2019 22:42:13 -0400 Subject: [PATCH 12/15] replace dataSync with readSync --- src/backends/string_shared.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/backends/string_shared.ts b/src/backends/string_shared.ts index f7efef933e..11c5670e8f 100644 --- a/src/backends/string_shared.ts +++ b/src/backends/string_shared.ts @@ -15,6 +15,7 @@ * ============================================================================= */ +import {ENGINE} from '../engine'; import {arrayBufferToBase64String, arrayBufferToString, base64StringToArrayBuffer, stringToArrayBuffer, urlSafeBase64, urlUnsafeBase64} from '../io/io_utils'; import {StringTensor, Tensor} from '../tensor'; @@ -22,7 +23,7 @@ import {StringTensor, Tensor} from '../tensor'; export function encodeBase64( str: StringTensor|Tensor, pad = false): T { const resultValues = new Array(str.size); - const values = str.dataSync(); + const values = ENGINE.backend.readSync(str.dataId); for (let i = 0; i < values.length; ++i) { // Convert from string to ArrayBuffer of UTF-8 multibyte sequence @@ -44,7 +45,7 @@ export function encodeBase64( export function decodeBase64(str: StringTensor| Tensor): T { const resultValues = new Array(str.size); - const values = str.dataSync(); + const values = ENGINE.backend.readSync(str.dataId); for (let i = 0; i < values.length; ++i) { // Undo URL safe and decode from Base64 to ArrayBuffer From 1545ed7301ab9d736dc8aedda92e6fa55bfb70b9 Mon Sep 17 00:00:00 2001 From: va barbosa Date: Tue, 11 Jun 2019 00:02:44 -0400 Subject: [PATCH 13/15] test-async-cpu --- src/backends/cpu/backend_cpu.ts | 8 +++++--- src/backends/string_shared.ts | 20 +++++++++----------- src/backends/webgl/backend_webgl.ts | 8 +++++--- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/backends/cpu/backend_cpu.ts b/src/backends/cpu/backend_cpu.ts index 2df51755df..6cb2c0ba04 100644 --- a/src/backends/cpu/backend_cpu.ts +++ b/src/backends/cpu/backend_cpu.ts @@ -42,7 +42,7 @@ import * as backend_util from '../backend_util'; import * as complex_util from '../complex_util'; import {nonMaxSuppressionImpl} from '../non_max_suppression_impl'; import {split} from '../split_shared'; -import {decodeBase64, encodeBase64} from '../string_shared'; +import {decodeBase64Impl, encodeBase64Impl} from '../string_shared'; import {topkImpl} from '../topk_impl'; import {whereImpl} from '../where_impl'; @@ -3186,11 +3186,13 @@ export class MathBackendCPU implements KernelBackend { encodeBase64(str: StringTensor|Tensor, pad = false): T { - return encodeBase64(str, pad); + const sVals = this.readSync(str.dataId) as TypedArray; + return encodeBase64Impl(sVals, str.shape, pad); } decodeBase64(str: StringTensor|Tensor): T { - return decodeBase64(str); + const sVals = this.readSync(str.dataId) as TypedArray; + return decodeBase64Impl(sVals, str.shape); } floatPrecision(): 16|32 { diff --git a/src/backends/string_shared.ts b/src/backends/string_shared.ts index 11c5670e8f..7930742e61 100644 --- a/src/backends/string_shared.ts +++ b/src/backends/string_shared.ts @@ -15,15 +15,14 @@ * ============================================================================= */ -import {ENGINE} from '../engine'; import {arrayBufferToBase64String, arrayBufferToString, base64StringToArrayBuffer, stringToArrayBuffer, urlSafeBase64, urlUnsafeBase64} from '../io/io_utils'; import {StringTensor, Tensor} from '../tensor'; +import {TypedArray} from '../types'; /** Shared implementation of the encodeBase64 kernel across WebGL and CPU. */ -export function encodeBase64( - str: StringTensor|Tensor, pad = false): T { - const resultValues = new Array(str.size); - const values = ENGINE.backend.readSync(str.dataId); +export function encodeBase64Impl( + values: TypedArray|string[], shape: number[], pad = false): T { + const resultValues = new Array(values.length); for (let i = 0; i < values.length; ++i) { // Convert from string to ArrayBuffer of UTF-8 multibyte sequence @@ -38,14 +37,13 @@ export function encodeBase64( resultValues[i] = pad ? bVal : bVal.replace(/=/g, ''); } - return Tensor.make(str.shape, {values: resultValues}, str.dtype) as T; + return Tensor.make(shape, {values: resultValues}, 'string') as T; } /** Shared implementation of the decodeBase64 kernel across WebGL and CPU. */ -export function decodeBase64(str: StringTensor| - Tensor): T { - const resultValues = new Array(str.size); - const values = ENGINE.backend.readSync(str.dataId); +export function decodeBase64Impl( + values: TypedArray|string[], shape: number[]): T { + const resultValues = new Array(values.length); for (let i = 0; i < values.length; ++i) { // Undo URL safe and decode from Base64 to ArrayBuffer @@ -56,5 +54,5 @@ export function decodeBase64(str: StringTensor| resultValues[i] = arrayBufferToString(aBuff); } - return Tensor.make(str.shape, {values: resultValues}, str.dtype) as T; + return Tensor.make(shape, {values: resultValues}, 'string') as T; } diff --git a/src/backends/webgl/backend_webgl.ts b/src/backends/webgl/backend_webgl.ts index cac9ecc0b1..581d3a5ca1 100644 --- a/src/backends/webgl/backend_webgl.ts +++ b/src/backends/webgl/backend_webgl.ts @@ -44,7 +44,7 @@ import * as backend_util from '../backend_util'; import {mergeRealAndImagArrays} from '../complex_util'; import {nonMaxSuppressionImpl} from '../non_max_suppression_impl'; import {split} from '../split_shared'; -import {decodeBase64, encodeBase64} from '../string_shared'; +import {decodeBase64Impl, encodeBase64Impl} from '../string_shared'; import {topkImpl} from '../topk_impl'; import {whereImpl} from '../where_impl'; @@ -2161,11 +2161,13 @@ export class MathBackendWebGL implements KernelBackend { encodeBase64(str: StringTensor|Tensor, pad = false): T { - return encodeBase64(str, pad); + const sVals = str.dataSync(); + return encodeBase64Impl(sVals, str.shape, pad); } decodeBase64(str: StringTensor|Tensor): T { - return decodeBase64(str); + const sVals = str.dataSync(); + return decodeBase64Impl(sVals, str.shape); } scatterND( From 722a7002d9511b59fa103fc34e21069ecd0718d0 Mon Sep 17 00:00:00 2001 From: va barbosa Date: Mon, 1 Jul 2019 17:31:31 -0400 Subject: [PATCH 14/15] update after fetching upstream changes --- src/backends/string_shared.ts | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/backends/string_shared.ts b/src/backends/string_shared.ts index 7930742e61..776bf05a78 100644 --- a/src/backends/string_shared.ts +++ b/src/backends/string_shared.ts @@ -15,26 +15,25 @@ * ============================================================================= */ -import {arrayBufferToBase64String, arrayBufferToString, base64StringToArrayBuffer, stringToArrayBuffer, urlSafeBase64, urlUnsafeBase64} from '../io/io_utils'; +import {arrayBufferToBase64String, arrayBufferToString, base64StringToArrayBuffer, urlSafeBase64, urlUnsafeBase64} from '../io/io_utils'; import {StringTensor, Tensor} from '../tensor'; -import {TypedArray} from '../types'; +import {decodeString} from '../util'; /** Shared implementation of the encodeBase64 kernel across WebGL and CPU. */ export function encodeBase64Impl( - values: TypedArray|string[], shape: number[], pad = false): T { + values: Uint8Array[], shape: number[], pad = false): T { const resultValues = new Array(values.length); for (let i = 0; i < values.length; ++i) { - // Convert from string to ArrayBuffer of UTF-8 multibyte sequence - // tslint:disable-next-line: max-line-length - // https://developer.mozilla.org/en-US/docs/Web/API/WindowBase64/Base64_encoding_and_decoding#The_Unicode_Problem - const aBuff = stringToArrayBuffer(values[i].toString()); - - // Encode to Base64 and make URL safe - const bVal = urlSafeBase64(arrayBufferToBase64String(aBuff)); - - // Remove padding - resultValues[i] = pad ? bVal : bVal.replace(/=/g, ''); + const bStr = arrayBufferToBase64String(values[i].buffer); + const bStrUrl = urlSafeBase64(bStr); + + if (pad) { + resultValues[i] = bStrUrl; + } else { + // Remove padding + resultValues[i] = bStrUrl.replace(/=/g, ''); + } } return Tensor.make(shape, {values: resultValues}, 'string') as T; @@ -42,13 +41,14 @@ export function encodeBase64Impl( /** Shared implementation of the decodeBase64 kernel across WebGL and CPU. */ export function decodeBase64Impl( - values: TypedArray|string[], shape: number[]): T { + values: Uint8Array[], shape: number[]): T { const resultValues = new Array(values.length); for (let i = 0; i < values.length; ++i) { // Undo URL safe and decode from Base64 to ArrayBuffer - const aBuff = - base64StringToArrayBuffer(urlUnsafeBase64(values[i].toString())); + const bStrUrl = decodeString(values[i]); + const bStr = urlUnsafeBase64(bStrUrl); + const aBuff = base64StringToArrayBuffer(bStr); // Convert from ArrayBuffer of UTF-8 multibyte sequence to string resultValues[i] = arrayBufferToString(aBuff); From ced75c2c1fd4b5269242e2c455a5fb044776dd52 Mon Sep 17 00:00:00 2001 From: va barbosa Date: Mon, 1 Jul 2019 18:23:25 -0400 Subject: [PATCH 15/15] update after fetching upstream changes - replace `arrayBufferToString` with `decodeString()` - remove unused `stringToArrayBuffer` --- src/backends/string_shared.ts | 5 +-- src/io/io_utils.ts | 81 ----------------------------------- src/io/io_utils_test.ts | 26 +---------- 3 files changed, 3 insertions(+), 109 deletions(-) diff --git a/src/backends/string_shared.ts b/src/backends/string_shared.ts index 776bf05a78..c0416b3fcf 100644 --- a/src/backends/string_shared.ts +++ b/src/backends/string_shared.ts @@ -15,7 +15,7 @@ * ============================================================================= */ -import {arrayBufferToBase64String, arrayBufferToString, base64StringToArrayBuffer, urlSafeBase64, urlUnsafeBase64} from '../io/io_utils'; +import {arrayBufferToBase64String, base64StringToArrayBuffer, urlSafeBase64, urlUnsafeBase64} from '../io/io_utils'; import {StringTensor, Tensor} from '../tensor'; import {decodeString} from '../util'; @@ -50,8 +50,7 @@ export function decodeBase64Impl( const bStr = urlUnsafeBase64(bStrUrl); const aBuff = base64StringToArrayBuffer(bStr); - // Convert from ArrayBuffer of UTF-8 multibyte sequence to string - resultValues[i] = arrayBufferToString(aBuff); + resultValues[i] = decodeString(new Uint8Array(aBuff)); } return Tensor.make(shape, {values: resultValues}, 'string') as T; diff --git a/src/io/io_utils.ts b/src/io/io_utils.ts index 028313cc49..4653619a1f 100644 --- a/src/io/io_utils.ts +++ b/src/io/io_utils.ts @@ -354,84 +354,3 @@ export function urlSafeBase64(str: string): string { export function urlUnsafeBase64(str: string): string { return str.replace(/-/g, '+').replace(/_/g, '/'); } - -/** - * Convert a string to an ArrayBuffer of UTF-8 multibyte sequence - * - * @param str A string to be converted - * @returns `ArrayBuffer` with the contents of `str` - */ -export function stringToArrayBuffer(str: string): ArrayBuffer { - // contains multibyte characters - if (/[\u0080-\uffff]/.test(str)) { - const arr = new Array(); - for (let i = 0; i < str.length; i++) { - // var cc = str.charCodeAt(i); - const cc = str.codePointAt(i); - if (cc < 0x80) { - // single byte - arr.push(cc); - } else { - // UTF-8 multibyte - if (cc < 0x800) { - // two bytes - arr.push(0xc0 | (cc >> 6)); - arr.push(0x80 | (cc & 0x3f)); - } else if (cc < 0x10000) { - // three bytes - arr.push(0xe0 | ((cc >> 12) & 0x0f)); - arr.push(0x80 | ((cc >> 6) & 0x3f)); - arr.push(0x80 | (cc & 0x3f)); - } else { - // four bytes - arr.push(0xf0 | ((cc >> 18) & 0x07)); - arr.push(0x80 | ((cc >> 12) & 0x3f)); - arr.push(0x80 | ((cc >> 6) & 0x3f)); - arr.push(0x80 | (cc & 0x3f)); - i++; - } - } - } - return (new Uint8Array(arr)).buffer; - } else { - const byteArray = new Uint8Array(str.length); - for (let i = str.length; i--;) { - byteArray[i] = str.charCodeAt(i); - } - return byteArray.buffer; - } -} - -/** - * Convert an ArrayBuffer of UTF-8 multibyte sequence to a string. - * - * @param buffer `ArrayBuffer` to be converted. - * @returns A string representation of `buffer`. - */ -export function arrayBufferToString(buffer: ArrayBuffer): string { - const byteArray = new Uint8Array(buffer); - const arr = new Array(); - - for (let i = 0, len = byteArray.length; i < len; ++i) { - const v = byteArray[i]; - if (v < 0x80) { - // one byte - arr.push(v); - } else if (v >= 0xc0 && v < 0xe0) { - // two bytes - arr.push(((0x1f & byteArray[i++]) << 6) + (0x3f & byteArray[i])); - } else if (v < 0xf0) { - // three bytes - arr.push( - ((0x0f & byteArray[i++]) << 12) + ((0x3f & byteArray[i++]) << 6) + - (0x3f & byteArray[i])); - } else if (v >= 0xf0 && v < 0xf7) { - // four bytes - arr.push( - ((0x07 & byteArray[i++]) << 18) + ((0x3f & byteArray[i++]) << 12) + - ((0x3f & byteArray[i++]) << 6) + (0x3f & byteArray[i])); - } - } - - return String.fromCodePoint(...arr); -} diff --git a/src/io/io_utils_test.ts b/src/io/io_utils_test.ts index f2491dcf40..71a79e7273 100644 --- a/src/io/io_utils_test.ts +++ b/src/io/io_utils_test.ts @@ -23,7 +23,7 @@ import {expectArraysEqual} from '../test_util'; import {expectArraysClose} from '../test_util'; import {encodeString} from '../util'; -import {arrayBufferToBase64String, arrayBufferToString, base64StringToArrayBuffer, basename, concatenateArrayBuffers, concatenateTypedArrays, stringByteLength, stringToArrayBuffer} from './io_utils'; +import {arrayBufferToBase64String, base64StringToArrayBuffer, basename, concatenateArrayBuffers, concatenateTypedArrays, stringByteLength} from './io_utils'; import {WeightsManifestEntry} from './types'; describe('concatenateTypedArrays', () => { @@ -609,27 +609,3 @@ describe('basename', () => { expect(basename('foo/bar/baz//')).toEqual('baz'); }); }); - -describe('stringToArrayBuffer-arrayBufferToString', () => { - it('round-trip', () => { - const len = Math.floor((Math.random() * 200) + 10); - const arr = new Array(); - - // Generate some random unicode code points - for (let i = 0; i < len; i++) { - const cp = Math.floor((Math.random() * 500) + 32); - if (cp > 126 && cp < 161) { - continue; - } - arr.push(cp); - } - - // turn code points into a string - const str = String.fromCodePoint(...arr); - - const aBuff = stringToArrayBuffer(str); - const str2 = arrayBufferToString(aBuff); - - expect(str2).toEqual(str); - }); -});