Add sliding, rolling functions with tests

This commit is contained in:
Jerry Kurian 2019-08-09 17:13:48 -04:00
parent c1ef5fec4b
commit fdcc5bafc6
4 changed files with 222 additions and 151 deletions

View File

@ -44,7 +44,7 @@ export interface RollingFlushOptions<T, R> {
export interface SlidingFlushOptions<T, R> { export interface SlidingFlushOptions<T, R> {
windowLength: number; windowLength: number;
flushMapper?: (flushed: Array<T>) => Array<R>; windowMapper?: (flushed: Array<T>) => Array<R>;
timeout?: number; timeout?: number;
} }

View File

@ -3,7 +3,6 @@ import test from "ava";
import { expect } from "chai"; import { expect } from "chai";
import { performance } from "perf_hooks"; import { performance } from "perf_hooks";
import { Readable } from "stream"; import { Readable } from "stream";
import { FlushStrategy } from "./definitions";
import { import {
fromArray, fromArray,
map, map,
@ -1404,7 +1403,36 @@ test.cb("parallel() parallel mapping", t => {
source.push(null); source.push(null);
}); });
test.cb("accumulator() buffering strategy clears buffer on condition", t => { test.cb("accumulator() rolling", t => {
t.plan(3);
let chunkIndex = 0;
interface TestObject {
ts: number;
key: string;
}
const source = new Readable({ objectMode: true });
const firstFlush = [{ ts: 0, key: "a" }, { ts: 1, key: "b" }];
const secondFlush = [{ ts: 2, key: "d" }, { ts: 3, key: "e" }];
const thirdFlush = [{ ts: 4, key: "f" }];
const flushes = [firstFlush, secondFlush, thirdFlush];
source
.pipe(accumulator(2, 999, "rolling"))
.on("data", (flush: TestObject[]) => {
t.deepEqual(flush, flushes[chunkIndex]);
chunkIndex++;
})
.on("error", (e: any) => t.end)
.on("end", () => {
t.end();
});
[...firstFlush, ...secondFlush, ...thirdFlush].forEach(item => {
source.push(item);
});
source.push(null);
});
test.cb("accumulator() rolling with key", t => {
t.plan(2); t.plan(2);
let chunkIndex = 0; let chunkIndex = 0;
interface TestObject { interface TestObject {
@ -1421,11 +1449,7 @@ test.cb("accumulator() buffering strategy clears buffer on condition", t => {
const secondFlush = [{ ts: 3, key: "e" }]; const secondFlush = [{ ts: 3, key: "e" }];
source source
.pipe( .pipe(accumulator(3, 999, "rolling", "ts"))
accumulator(FlushStrategy.sampling, {
condition: (event: TestObject) => event.ts > 2,
}),
)
.on("data", (flush: TestObject[]) => { .on("data", (flush: TestObject[]) => {
if (chunkIndex === 0) { if (chunkIndex === 0) {
chunkIndex++; chunkIndex++;
@ -1434,93 +1458,118 @@ test.cb("accumulator() buffering strategy clears buffer on condition", t => {
t.deepEqual(flush, secondFlush); t.deepEqual(flush, secondFlush);
} }
}) })
.on("error", e => t.end) .on("error", (e: any) => t.end)
.on("end", () => { .on("end", () => {
t.end(); t.end();
}); });
source.push([...firstFlush, ...secondFlush]); [...firstFlush, ...secondFlush].forEach(item => {
source.push(item);
});
source.push(null); source.push(null);
}); });
test.cb("accumulator() buffering strategy clears buffer on timeout", t => { test.cb("accumulator() sliding", t => {
t.plan(2); t.plan(5);
let chunkIndex = 0; let chunkIndex = 0;
interface TestObject { interface TestObject {
ts: number; ts: number;
key: string; key: string;
} }
const source = new Readable({ objectMode: true, read: () => {} }); const source = new Readable({ objectMode: true });
const firstFlush = [{ ts: 0, key: "a" }, { ts: 1, key: "b" }]; const input = [
const secondFlush = [ { ts: 0, key: "a" },
{ ts: 1, key: "b" },
{ ts: 2, key: "c" }, { ts: 2, key: "c" },
{ ts: 2, key: "d" }, { ts: 4, key: "d" },
{ ts: 3, key: "e" }, ];
const firstFlush = [{ ts: 0, key: "a" }];
const secondFlush = [{ ts: 0, key: "a" }, { ts: 1, key: "b" }];
const thirdFlush = [
{ ts: 0, key: "a" },
{ ts: 1, key: "b" },
{ ts: 2, key: "c" },
];
const fourthFlush = [
{ ts: 1, key: "b" },
{ ts: 2, key: "c" },
{ ts: 4, key: "d" },
];
const flushes = [
firstFlush,
secondFlush,
thirdFlush,
fourthFlush,
fourthFlush,
]; ];
source source
.pipe( .pipe(accumulator(3, 999, "sliding"))
accumulator(FlushStrategy.sampling, {
condition: (event: TestObject) => event.ts > 3,
timeout: 1000,
}),
)
.on("data", (flush: TestObject[]) => { .on("data", (flush: TestObject[]) => {
if (chunkIndex === 0) { t.deepEqual(flush, flushes[chunkIndex]);
chunkIndex++; chunkIndex++;
t.deepEqual(flush, firstFlush);
} else {
t.deepEqual(flush, secondFlush);
}
}) })
.on("error", e => t.end) .on("error", (e: any) => t.end)
.on("end", () => { .on("end", () => {
t.end(); t.end();
}); });
source.push(firstFlush); input.forEach(item => {
setTimeout(() => { source.push(item);
source.push(secondFlush); });
source.push(null); source.push(null);
}, 2000);
}); });
test.cb( test.cb("accumulator() sliding with key", t => {
"accumulator() buffering strategy clears buffer on condition or timeout", t.plan(7);
t => {
t.plan(3);
let chunkIndex = 0; let chunkIndex = 0;
interface TestObject { interface TestObject {
ts: number; ts: number;
key: string; key: string;
} }
const source = new Readable({ objectMode: true, read: () => {} }); const source = new Readable({ objectMode: true });
const firstFlush = [{ ts: 0, key: "a" }, { ts: 1, key: "b" }]; const input = [
const secondFlush = [{ ts: 2, key: "c" }, { ts: 2, key: "d" }]; { ts: 0, key: "a" },
const thirdFlush = [{ ts: 3, key: "e" }]; { ts: 1, key: "b" },
{ ts: 2, key: "c" },
{ ts: 3, key: "d" },
{ ts: 5, key: "f" },
{ ts: 6, key: "g" },
];
const firstFlush = [{ ts: 0, key: "a" }];
const secondFlush = [{ ts: 0, key: "a" }, { ts: 1, key: "b" }];
const thirdFlush = [
{ ts: 0, key: "a" },
{ ts: 1, key: "b" },
{ ts: 2, key: "c" },
];
const fourthFlush = [
{ ts: 1, key: "b" },
{ ts: 2, key: "c" },
{ ts: 3, key: "d" },
];
const fifthFlush = [{ ts: 3, key: "d" }, { ts: 5, key: "f" }];
const sixthFlush = [{ ts: 5, key: "f" }, { ts: 6, key: "g" }];
const flushes = [
firstFlush,
secondFlush,
thirdFlush,
fourthFlush,
fifthFlush,
sixthFlush,
sixthFlush,
];
source source
.pipe( .pipe(accumulator(3, 999, "sliding", "ts"))
accumulator(FlushStrategy.sampling, {
condition: (event: TestObject) => event.ts > 2,
timeout: 1000,
}),
)
.on("data", (flush: TestObject[]) => { .on("data", (flush: TestObject[]) => {
if (chunkIndex === 0) { t.deepEqual(flush, flushes[chunkIndex]);
chunkIndex++; chunkIndex++;
t.deepEqual(flush, firstFlush);
} else if (chunkIndex === 1) {
chunkIndex++;
t.deepEqual(flush, secondFlush);
} else {
t.deepEqual(flush, thirdFlush);
}
}) })
.on("error", e => t.end) .on("error", (e: any) => t.end)
.on("end", () => { .on("end", () => {
t.end(); t.end();
}); });
source.push(firstFlush); input.forEach(item => {
setTimeout(() => { source.push(item);
source.push([...secondFlush, ...thirdFlush]); });
source.push(null); source.push(null);
}, 2000); });
},
);

View File

@ -2,12 +2,7 @@ import { Transform, Readable, Writable, Duplex } from "stream";
import { performance } from "perf_hooks"; import { performance } from "perf_hooks";
import { ChildProcess } from "child_process"; import { ChildProcess } from "child_process";
import { StringDecoder } from "string_decoder"; import { StringDecoder } from "string_decoder";
import { import {
FlushStrategy,
AccumulatorOptions,
SamplingFlushOptions,
SamplingFlushResult,
TransformOptions, TransformOptions,
ThroughOptions, ThroughOptions,
WithEncoding, WithEncoding,
@ -606,75 +601,97 @@ export function parallelMap<T, R>(
}); });
} }
function samplingFlush<T, R>( function _accumulator<T>(
event: T, accumulateBy: (data: T, buffer: T[], stream: Transform) => void,
options: SamplingFlushOptions<T, R>,
buffer: Array<T>,
): SamplingFlushResult<T> {
let flush = null;
if (options.condition(event, buffer)) {
flush = buffer.slice(0);
buffer.length = 0;
}
buffer.push(event);
return { flushed: true, flush };
}
function executeSamplingStrategy<T, R>(
events: T[],
options: SamplingFlushOptions<T, R>,
buffer: Array<T>,
stream: Transform,
): void {
events.forEach(event => {
const sample = samplingFlush(event, options, buffer);
if (sample.flushed && sample.flush && options.flushMapper) {
stream.push(options.flushMapper(sample.flush));
} else if (sample.flushed && sample.flush) {
stream.push(sample.flush);
}
});
}
export function accumulator<T, R, S extends FlushStrategy>(
flushStrategy: S,
options: AccumulatorOptions<T, R, S>,
) { ) {
const buffer: Array<T> = []; const buffer: T[] = [];
let handle: NodeJS.Timer | null = null; return new Transform({
if (options.timeout) {
handle = setInterval(() => {
if (buffer.length > 0) {
transform.push(buffer);
buffer.length = 0;
}
}, options.timeout);
}
const transform = new Transform({
objectMode: true, objectMode: true,
async transform(data: T[] | T, encoding, callback) { async transform(data: any, encoding, callback) {
switch (flushStrategy) { accumulateBy(data, buffer, this);
case FlushStrategy.sampling: {
if (!Array.isArray(data)) data = [data];
executeSamplingStrategy(
data,
options as SamplingFlushOptions<T, R>,
buffer,
this,
);
callback(); callback();
break;
}
case FlushStrategy.sliding: {
break;
}
}
}, },
flush(callback) { flush(callback) {
handle && clearInterval(handle);
this.push(buffer); this.push(buffer);
callback(); callback();
}, },
}); });
return transform; }
function _slidingBy<T>(
windowLength: number,
rate: number,
key?: string,
): (event: T, buffer: T[], stream: Transform) => void {
return (event: T, buffer: T[], stream: Transform) => {
if (key) {
let index = 0;
while (
buffer.length > 0 &&
buffer[index][key] + windowLength <= event[key]
) {
index++;
}
buffer.splice(0, index);
} else if (buffer.length === windowLength) {
buffer.shift();
}
buffer.push(event);
stream.push(buffer);
};
}
function _rollingBy<T>(
windowLength: number,
rate: number,
key?: string,
): (event: T, buffer: T[], stream: Transform) => void {
return (event: T, buffer: T[], stream: Transform) => {
if (key) {
if (
buffer.length > 0 &&
buffer[0][key] + windowLength <= event[key]
) {
stream.push(buffer.slice(0));
buffer.length = 0;
}
} else if (buffer.length === windowLength) {
stream.push(buffer.slice(0));
buffer.length = 0;
}
buffer.push(event);
};
}
export function accumulator(
batchSize: number,
batchRate: number,
flushStrategy: "sliding" | "rolling",
keyBy?: string,
): Transform {
if (flushStrategy === "sliding") {
return sliding(batchSize, batchRate, keyBy);
} else if (flushStrategy === "rolling") {
return rolling(batchSize, batchRate, keyBy);
} else {
return batch(batchSize, batchRate);
}
}
export function sliding(
windowLength: number,
rate: number,
key?: string,
): Transform {
const slidingByFn = _slidingBy(windowLength, rate, key);
return _accumulator(slidingByFn);
}
export function rolling(
windowLength: number,
rate: number,
key?: string,
): Transform {
const rollingByFn = _rollingBy(windowLength, rate, key);
return _accumulator(rollingByFn);
} }

View File

@ -3,8 +3,6 @@ import { ChildProcess } from "child_process";
import * as baseFunctions from "./functions"; import * as baseFunctions from "./functions";
import { import {
AccumulatorOptions,
FlushStrategy,
ThroughOptions, ThroughOptions,
TransformOptions, TransformOptions,
WithEncoding, WithEncoding,
@ -248,9 +246,16 @@ export function parallelMap<T, R>(
return baseFunctions.parallelMap(mapper, parallel, sleepTime); return baseFunctions.parallelMap(mapper, parallel, sleepTime);
} }
export function accumulator<T, R, S extends FlushStrategy>( export function accumulator(
flushStrategy: S, batchSize: number,
options: AccumulatorOptions<T, R, S>, batchRate: number,
flushStrategy: "sliding" | "rolling",
keyBy?: string,
) { ) {
return baseFunctions.accumulator(flushStrategy, options); return baseFunctions.accumulator(
batchSize,
batchRate,
flushStrategy,
keyBy,
);
} }