Add sliding, rolling functions with tests
This commit is contained in:
parent
c1ef5fec4b
commit
fdcc5bafc6
@ -44,7 +44,7 @@ export interface RollingFlushOptions<T, R> {
|
|||||||
|
|
||||||
export interface SlidingFlushOptions<T, R> {
|
export interface SlidingFlushOptions<T, R> {
|
||||||
windowLength: number;
|
windowLength: number;
|
||||||
flushMapper?: (flushed: Array<T>) => Array<R>;
|
windowMapper?: (flushed: Array<T>) => Array<R>;
|
||||||
timeout?: number;
|
timeout?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,7 +3,6 @@ import test from "ava";
|
|||||||
import { expect } from "chai";
|
import { expect } from "chai";
|
||||||
import { performance } from "perf_hooks";
|
import { performance } from "perf_hooks";
|
||||||
import { Readable } from "stream";
|
import { Readable } from "stream";
|
||||||
import { FlushStrategy } from "./definitions";
|
|
||||||
import {
|
import {
|
||||||
fromArray,
|
fromArray,
|
||||||
map,
|
map,
|
||||||
@ -1404,7 +1403,36 @@ test.cb("parallel() parallel mapping", t => {
|
|||||||
source.push(null);
|
source.push(null);
|
||||||
});
|
});
|
||||||
|
|
||||||
test.cb("accumulator() buffering strategy clears buffer on condition", t => {
|
test.cb("accumulator() rolling", t => {
|
||||||
|
t.plan(3);
|
||||||
|
let chunkIndex = 0;
|
||||||
|
interface TestObject {
|
||||||
|
ts: number;
|
||||||
|
key: string;
|
||||||
|
}
|
||||||
|
const source = new Readable({ objectMode: true });
|
||||||
|
const firstFlush = [{ ts: 0, key: "a" }, { ts: 1, key: "b" }];
|
||||||
|
const secondFlush = [{ ts: 2, key: "d" }, { ts: 3, key: "e" }];
|
||||||
|
const thirdFlush = [{ ts: 4, key: "f" }];
|
||||||
|
const flushes = [firstFlush, secondFlush, thirdFlush];
|
||||||
|
|
||||||
|
source
|
||||||
|
.pipe(accumulator(2, 999, "rolling"))
|
||||||
|
.on("data", (flush: TestObject[]) => {
|
||||||
|
t.deepEqual(flush, flushes[chunkIndex]);
|
||||||
|
chunkIndex++;
|
||||||
|
})
|
||||||
|
.on("error", (e: any) => t.end)
|
||||||
|
.on("end", () => {
|
||||||
|
t.end();
|
||||||
|
});
|
||||||
|
[...firstFlush, ...secondFlush, ...thirdFlush].forEach(item => {
|
||||||
|
source.push(item);
|
||||||
|
});
|
||||||
|
source.push(null);
|
||||||
|
});
|
||||||
|
|
||||||
|
test.cb("accumulator() rolling with key", t => {
|
||||||
t.plan(2);
|
t.plan(2);
|
||||||
let chunkIndex = 0;
|
let chunkIndex = 0;
|
||||||
interface TestObject {
|
interface TestObject {
|
||||||
@ -1421,11 +1449,7 @@ test.cb("accumulator() buffering strategy clears buffer on condition", t => {
|
|||||||
const secondFlush = [{ ts: 3, key: "e" }];
|
const secondFlush = [{ ts: 3, key: "e" }];
|
||||||
|
|
||||||
source
|
source
|
||||||
.pipe(
|
.pipe(accumulator(3, 999, "rolling", "ts"))
|
||||||
accumulator(FlushStrategy.sampling, {
|
|
||||||
condition: (event: TestObject) => event.ts > 2,
|
|
||||||
}),
|
|
||||||
)
|
|
||||||
.on("data", (flush: TestObject[]) => {
|
.on("data", (flush: TestObject[]) => {
|
||||||
if (chunkIndex === 0) {
|
if (chunkIndex === 0) {
|
||||||
chunkIndex++;
|
chunkIndex++;
|
||||||
@ -1434,93 +1458,118 @@ test.cb("accumulator() buffering strategy clears buffer on condition", t => {
|
|||||||
t.deepEqual(flush, secondFlush);
|
t.deepEqual(flush, secondFlush);
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.on("error", e => t.end)
|
.on("error", (e: any) => t.end)
|
||||||
.on("end", () => {
|
.on("end", () => {
|
||||||
t.end();
|
t.end();
|
||||||
});
|
});
|
||||||
source.push([...firstFlush, ...secondFlush]);
|
[...firstFlush, ...secondFlush].forEach(item => {
|
||||||
|
source.push(item);
|
||||||
|
});
|
||||||
source.push(null);
|
source.push(null);
|
||||||
});
|
});
|
||||||
|
|
||||||
test.cb("accumulator() buffering strategy clears buffer on timeout", t => {
|
test.cb("accumulator() sliding", t => {
|
||||||
t.plan(2);
|
t.plan(5);
|
||||||
let chunkIndex = 0;
|
let chunkIndex = 0;
|
||||||
interface TestObject {
|
interface TestObject {
|
||||||
ts: number;
|
ts: number;
|
||||||
key: string;
|
key: string;
|
||||||
}
|
}
|
||||||
const source = new Readable({ objectMode: true, read: () => {} });
|
const source = new Readable({ objectMode: true });
|
||||||
const firstFlush = [{ ts: 0, key: "a" }, { ts: 1, key: "b" }];
|
const input = [
|
||||||
const secondFlush = [
|
{ ts: 0, key: "a" },
|
||||||
|
{ ts: 1, key: "b" },
|
||||||
{ ts: 2, key: "c" },
|
{ ts: 2, key: "c" },
|
||||||
{ ts: 2, key: "d" },
|
{ ts: 4, key: "d" },
|
||||||
{ ts: 3, key: "e" },
|
];
|
||||||
|
const firstFlush = [{ ts: 0, key: "a" }];
|
||||||
|
const secondFlush = [{ ts: 0, key: "a" }, { ts: 1, key: "b" }];
|
||||||
|
const thirdFlush = [
|
||||||
|
{ ts: 0, key: "a" },
|
||||||
|
{ ts: 1, key: "b" },
|
||||||
|
{ ts: 2, key: "c" },
|
||||||
|
];
|
||||||
|
const fourthFlush = [
|
||||||
|
{ ts: 1, key: "b" },
|
||||||
|
{ ts: 2, key: "c" },
|
||||||
|
{ ts: 4, key: "d" },
|
||||||
|
];
|
||||||
|
|
||||||
|
const flushes = [
|
||||||
|
firstFlush,
|
||||||
|
secondFlush,
|
||||||
|
thirdFlush,
|
||||||
|
fourthFlush,
|
||||||
|
fourthFlush,
|
||||||
];
|
];
|
||||||
source
|
source
|
||||||
.pipe(
|
.pipe(accumulator(3, 999, "sliding"))
|
||||||
accumulator(FlushStrategy.sampling, {
|
|
||||||
condition: (event: TestObject) => event.ts > 3,
|
|
||||||
timeout: 1000,
|
|
||||||
}),
|
|
||||||
)
|
|
||||||
.on("data", (flush: TestObject[]) => {
|
.on("data", (flush: TestObject[]) => {
|
||||||
if (chunkIndex === 0) {
|
t.deepEqual(flush, flushes[chunkIndex]);
|
||||||
chunkIndex++;
|
chunkIndex++;
|
||||||
t.deepEqual(flush, firstFlush);
|
|
||||||
} else {
|
|
||||||
t.deepEqual(flush, secondFlush);
|
|
||||||
}
|
|
||||||
})
|
})
|
||||||
.on("error", e => t.end)
|
.on("error", (e: any) => t.end)
|
||||||
.on("end", () => {
|
.on("end", () => {
|
||||||
t.end();
|
t.end();
|
||||||
});
|
});
|
||||||
source.push(firstFlush);
|
input.forEach(item => {
|
||||||
setTimeout(() => {
|
source.push(item);
|
||||||
source.push(secondFlush);
|
});
|
||||||
source.push(null);
|
source.push(null);
|
||||||
}, 2000);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
test.cb(
|
test.cb("accumulator() sliding with key", t => {
|
||||||
"accumulator() buffering strategy clears buffer on condition or timeout",
|
t.plan(7);
|
||||||
t => {
|
let chunkIndex = 0;
|
||||||
t.plan(3);
|
interface TestObject {
|
||||||
let chunkIndex = 0;
|
ts: number;
|
||||||
interface TestObject {
|
key: string;
|
||||||
ts: number;
|
}
|
||||||
key: string;
|
const source = new Readable({ objectMode: true });
|
||||||
}
|
const input = [
|
||||||
const source = new Readable({ objectMode: true, read: () => {} });
|
{ ts: 0, key: "a" },
|
||||||
const firstFlush = [{ ts: 0, key: "a" }, { ts: 1, key: "b" }];
|
{ ts: 1, key: "b" },
|
||||||
const secondFlush = [{ ts: 2, key: "c" }, { ts: 2, key: "d" }];
|
{ ts: 2, key: "c" },
|
||||||
const thirdFlush = [{ ts: 3, key: "e" }];
|
{ ts: 3, key: "d" },
|
||||||
source
|
{ ts: 5, key: "f" },
|
||||||
.pipe(
|
{ ts: 6, key: "g" },
|
||||||
accumulator(FlushStrategy.sampling, {
|
];
|
||||||
condition: (event: TestObject) => event.ts > 2,
|
const firstFlush = [{ ts: 0, key: "a" }];
|
||||||
timeout: 1000,
|
const secondFlush = [{ ts: 0, key: "a" }, { ts: 1, key: "b" }];
|
||||||
}),
|
const thirdFlush = [
|
||||||
)
|
{ ts: 0, key: "a" },
|
||||||
.on("data", (flush: TestObject[]) => {
|
{ ts: 1, key: "b" },
|
||||||
if (chunkIndex === 0) {
|
{ ts: 2, key: "c" },
|
||||||
chunkIndex++;
|
];
|
||||||
t.deepEqual(flush, firstFlush);
|
const fourthFlush = [
|
||||||
} else if (chunkIndex === 1) {
|
{ ts: 1, key: "b" },
|
||||||
chunkIndex++;
|
{ ts: 2, key: "c" },
|
||||||
t.deepEqual(flush, secondFlush);
|
{ ts: 3, key: "d" },
|
||||||
} else {
|
];
|
||||||
t.deepEqual(flush, thirdFlush);
|
const fifthFlush = [{ ts: 3, key: "d" }, { ts: 5, key: "f" }];
|
||||||
}
|
const sixthFlush = [{ ts: 5, key: "f" }, { ts: 6, key: "g" }];
|
||||||
})
|
|
||||||
.on("error", e => t.end)
|
const flushes = [
|
||||||
.on("end", () => {
|
firstFlush,
|
||||||
t.end();
|
secondFlush,
|
||||||
});
|
thirdFlush,
|
||||||
source.push(firstFlush);
|
fourthFlush,
|
||||||
setTimeout(() => {
|
fifthFlush,
|
||||||
source.push([...secondFlush, ...thirdFlush]);
|
sixthFlush,
|
||||||
source.push(null);
|
sixthFlush,
|
||||||
}, 2000);
|
];
|
||||||
},
|
source
|
||||||
);
|
.pipe(accumulator(3, 999, "sliding", "ts"))
|
||||||
|
.on("data", (flush: TestObject[]) => {
|
||||||
|
t.deepEqual(flush, flushes[chunkIndex]);
|
||||||
|
chunkIndex++;
|
||||||
|
})
|
||||||
|
.on("error", (e: any) => t.end)
|
||||||
|
.on("end", () => {
|
||||||
|
t.end();
|
||||||
|
});
|
||||||
|
input.forEach(item => {
|
||||||
|
source.push(item);
|
||||||
|
});
|
||||||
|
source.push(null);
|
||||||
|
});
|
||||||
|
@ -2,12 +2,7 @@ import { Transform, Readable, Writable, Duplex } from "stream";
|
|||||||
import { performance } from "perf_hooks";
|
import { performance } from "perf_hooks";
|
||||||
import { ChildProcess } from "child_process";
|
import { ChildProcess } from "child_process";
|
||||||
import { StringDecoder } from "string_decoder";
|
import { StringDecoder } from "string_decoder";
|
||||||
|
|
||||||
import {
|
import {
|
||||||
FlushStrategy,
|
|
||||||
AccumulatorOptions,
|
|
||||||
SamplingFlushOptions,
|
|
||||||
SamplingFlushResult,
|
|
||||||
TransformOptions,
|
TransformOptions,
|
||||||
ThroughOptions,
|
ThroughOptions,
|
||||||
WithEncoding,
|
WithEncoding,
|
||||||
@ -606,75 +601,97 @@ export function parallelMap<T, R>(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
function samplingFlush<T, R>(
|
function _accumulator<T>(
|
||||||
event: T,
|
accumulateBy: (data: T, buffer: T[], stream: Transform) => void,
|
||||||
options: SamplingFlushOptions<T, R>,
|
|
||||||
buffer: Array<T>,
|
|
||||||
): SamplingFlushResult<T> {
|
|
||||||
let flush = null;
|
|
||||||
if (options.condition(event, buffer)) {
|
|
||||||
flush = buffer.slice(0);
|
|
||||||
buffer.length = 0;
|
|
||||||
}
|
|
||||||
buffer.push(event);
|
|
||||||
return { flushed: true, flush };
|
|
||||||
}
|
|
||||||
|
|
||||||
function executeSamplingStrategy<T, R>(
|
|
||||||
events: T[],
|
|
||||||
options: SamplingFlushOptions<T, R>,
|
|
||||||
buffer: Array<T>,
|
|
||||||
stream: Transform,
|
|
||||||
): void {
|
|
||||||
events.forEach(event => {
|
|
||||||
const sample = samplingFlush(event, options, buffer);
|
|
||||||
if (sample.flushed && sample.flush && options.flushMapper) {
|
|
||||||
stream.push(options.flushMapper(sample.flush));
|
|
||||||
} else if (sample.flushed && sample.flush) {
|
|
||||||
stream.push(sample.flush);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
export function accumulator<T, R, S extends FlushStrategy>(
|
|
||||||
flushStrategy: S,
|
|
||||||
options: AccumulatorOptions<T, R, S>,
|
|
||||||
) {
|
) {
|
||||||
const buffer: Array<T> = [];
|
const buffer: T[] = [];
|
||||||
let handle: NodeJS.Timer | null = null;
|
return new Transform({
|
||||||
if (options.timeout) {
|
|
||||||
handle = setInterval(() => {
|
|
||||||
if (buffer.length > 0) {
|
|
||||||
transform.push(buffer);
|
|
||||||
buffer.length = 0;
|
|
||||||
}
|
|
||||||
}, options.timeout);
|
|
||||||
}
|
|
||||||
const transform = new Transform({
|
|
||||||
objectMode: true,
|
objectMode: true,
|
||||||
async transform(data: T[] | T, encoding, callback) {
|
async transform(data: any, encoding, callback) {
|
||||||
switch (flushStrategy) {
|
accumulateBy(data, buffer, this);
|
||||||
case FlushStrategy.sampling: {
|
callback();
|
||||||
if (!Array.isArray(data)) data = [data];
|
|
||||||
executeSamplingStrategy(
|
|
||||||
data,
|
|
||||||
options as SamplingFlushOptions<T, R>,
|
|
||||||
buffer,
|
|
||||||
this,
|
|
||||||
);
|
|
||||||
callback();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case FlushStrategy.sliding: {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
flush(callback) {
|
flush(callback) {
|
||||||
handle && clearInterval(handle);
|
|
||||||
this.push(buffer);
|
this.push(buffer);
|
||||||
callback();
|
callback();
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
return transform;
|
}
|
||||||
|
|
||||||
|
function _slidingBy<T>(
|
||||||
|
windowLength: number,
|
||||||
|
rate: number,
|
||||||
|
key?: string,
|
||||||
|
): (event: T, buffer: T[], stream: Transform) => void {
|
||||||
|
return (event: T, buffer: T[], stream: Transform) => {
|
||||||
|
if (key) {
|
||||||
|
let index = 0;
|
||||||
|
while (
|
||||||
|
buffer.length > 0 &&
|
||||||
|
buffer[index][key] + windowLength <= event[key]
|
||||||
|
) {
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
buffer.splice(0, index);
|
||||||
|
} else if (buffer.length === windowLength) {
|
||||||
|
buffer.shift();
|
||||||
|
}
|
||||||
|
buffer.push(event);
|
||||||
|
stream.push(buffer);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function _rollingBy<T>(
|
||||||
|
windowLength: number,
|
||||||
|
rate: number,
|
||||||
|
key?: string,
|
||||||
|
): (event: T, buffer: T[], stream: Transform) => void {
|
||||||
|
return (event: T, buffer: T[], stream: Transform) => {
|
||||||
|
if (key) {
|
||||||
|
if (
|
||||||
|
buffer.length > 0 &&
|
||||||
|
buffer[0][key] + windowLength <= event[key]
|
||||||
|
) {
|
||||||
|
stream.push(buffer.slice(0));
|
||||||
|
buffer.length = 0;
|
||||||
|
}
|
||||||
|
} else if (buffer.length === windowLength) {
|
||||||
|
stream.push(buffer.slice(0));
|
||||||
|
buffer.length = 0;
|
||||||
|
}
|
||||||
|
buffer.push(event);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function accumulator(
|
||||||
|
batchSize: number,
|
||||||
|
batchRate: number,
|
||||||
|
flushStrategy: "sliding" | "rolling",
|
||||||
|
keyBy?: string,
|
||||||
|
): Transform {
|
||||||
|
if (flushStrategy === "sliding") {
|
||||||
|
return sliding(batchSize, batchRate, keyBy);
|
||||||
|
} else if (flushStrategy === "rolling") {
|
||||||
|
return rolling(batchSize, batchRate, keyBy);
|
||||||
|
} else {
|
||||||
|
return batch(batchSize, batchRate);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function sliding(
|
||||||
|
windowLength: number,
|
||||||
|
rate: number,
|
||||||
|
key?: string,
|
||||||
|
): Transform {
|
||||||
|
const slidingByFn = _slidingBy(windowLength, rate, key);
|
||||||
|
return _accumulator(slidingByFn);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function rolling(
|
||||||
|
windowLength: number,
|
||||||
|
rate: number,
|
||||||
|
key?: string,
|
||||||
|
): Transform {
|
||||||
|
const rollingByFn = _rollingBy(windowLength, rate, key);
|
||||||
|
return _accumulator(rollingByFn);
|
||||||
}
|
}
|
||||||
|
@ -3,8 +3,6 @@ import { ChildProcess } from "child_process";
|
|||||||
import * as baseFunctions from "./functions";
|
import * as baseFunctions from "./functions";
|
||||||
|
|
||||||
import {
|
import {
|
||||||
AccumulatorOptions,
|
|
||||||
FlushStrategy,
|
|
||||||
ThroughOptions,
|
ThroughOptions,
|
||||||
TransformOptions,
|
TransformOptions,
|
||||||
WithEncoding,
|
WithEncoding,
|
||||||
@ -248,9 +246,16 @@ export function parallelMap<T, R>(
|
|||||||
return baseFunctions.parallelMap(mapper, parallel, sleepTime);
|
return baseFunctions.parallelMap(mapper, parallel, sleepTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function accumulator<T, R, S extends FlushStrategy>(
|
export function accumulator(
|
||||||
flushStrategy: S,
|
batchSize: number,
|
||||||
options: AccumulatorOptions<T, R, S>,
|
batchRate: number,
|
||||||
|
flushStrategy: "sliding" | "rolling",
|
||||||
|
keyBy?: string,
|
||||||
) {
|
) {
|
||||||
return baseFunctions.accumulator(flushStrategy, options);
|
return baseFunctions.accumulator(
|
||||||
|
batchSize,
|
||||||
|
batchRate,
|
||||||
|
flushStrategy,
|
||||||
|
keyBy,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user