strom/src/index.ts

600 lines
19 KiB
TypeScript
Raw Normal View History

2018-12-02 06:18:28 +00:00
import { Transform, Readable, Writable, Duplex } from "stream";
import { performance } from "perf_hooks";
2018-12-02 06:18:28 +00:00
import { ChildProcess } from "child_process";
import { StringDecoder } from "string_decoder";
2018-11-30 06:31:09 +00:00
export interface ThroughOptions {
objectMode?: boolean;
}
2018-11-30 06:31:09 +00:00
export interface TransformOptions {
readableObjectMode?: boolean;
writableObjectMode?: boolean;
}
export interface WithEncoding {
encoding: string;
}
async function sleep(time: number) {
return time > 0 ? new Promise(resolve => setTimeout(resolve, time)) : null;
}
/**
2018-12-03 03:54:29 +00:00
* Convert an array into a Readable stream of its elements
2018-12-02 06:18:28 +00:00
* @param array Array of elements to stream
*/
export function fromArray(array: any[]): NodeJS.ReadableStream {
let cursor = 0;
return new Readable({
objectMode: true,
read() {
if (cursor < array.length) {
this.push(array[cursor]);
cursor++;
} else {
this.push(null);
}
},
});
}
/**
* Return a ReadWrite stream that maps streamed chunks
2018-12-02 06:18:28 +00:00
* @param mapper Mapper function, mapping each (chunk, encoding) to a new chunk (or a promise of such)
* @param options
* @param options.readableObjectMode Whether this stream should behave as a readable stream of objects
* @param options.writableObjectMode Whether this stream should behave as a writable stream of objects
*/
export function map<T, R>(
mapper: (chunk: T, encoding: string) => R,
2018-11-30 06:31:09 +00:00
options: TransformOptions = {
readableObjectMode: true,
writableObjectMode: true,
},
): NodeJS.ReadWriteStream {
return new Transform({
...options,
2018-11-30 06:31:09 +00:00
async transform(chunk: T, encoding, callback) {
let isPromise = false;
try {
const mapped = mapper(chunk, encoding);
isPromise = mapped instanceof Promise;
callback(undefined, await mapped);
} catch (err) {
if (isPromise) {
// Calling the callback asynchronously with an error wouldn't emit the error, so emit directly
this.emit("error", err);
callback();
} else {
callback(err);
}
}
},
});
}
/**
* Return a ReadWrite stream that flat maps streamed chunks
2018-12-02 06:18:28 +00:00
* @param mapper Mapper function, mapping each (chunk, encoding) to an array of new chunks (or a promise of such)
* @param options
* @param options.readableObjectMode Whether this stream should behave as a readable stream of objects
* @param options.writableObjectMode Whether this stream should behave as a writable stream of objects
*/
export function flatMap<T, R>(
mapper:
| ((chunk: T, encoding: string) => R[])
| ((chunk: T, encoding: string) => Promise<R[]>),
2018-11-30 06:31:09 +00:00
options: TransformOptions = {
readableObjectMode: true,
writableObjectMode: true,
},
): NodeJS.ReadWriteStream {
return new Transform({
...options,
2018-11-30 06:31:09 +00:00
async transform(chunk: T, encoding, callback) {
let isPromise = false;
try {
const mapped = mapper(chunk, encoding);
isPromise = mapped instanceof Promise;
(await mapped).forEach(c => this.push(c));
callback();
} catch (err) {
if (isPromise) {
// Calling the callback asynchronously with an error wouldn't emit the error, so emit directly
this.emit("error", err);
callback();
} else {
callback(err);
}
}
},
});
}
2018-12-03 03:54:29 +00:00
/**
* Return a ReadWrite stream that filters out streamed chunks for which the predicate does not hold
* @param predicate Predicate with which to filter scream chunks
* @param options
* @param options.objectMode Whether this stream should behave as a stream of objects
*/
2018-11-30 06:31:09 +00:00
export function filter<T>(
predicate:
| ((chunk: T, encoding: string) => boolean)
| ((chunk: T, encoding: string) => Promise<boolean>),
options: ThroughOptions = {
objectMode: true,
},
) {
return new Transform({
readableObjectMode: options.objectMode,
writableObjectMode: options.objectMode,
async transform(chunk: T, encoding, callback) {
let isPromise = false;
try {
const result = predicate(chunk, encoding);
isPromise = result instanceof Promise;
if (!!(await result)) {
callback(undefined, chunk);
} else {
callback();
}
} catch (err) {
if (isPromise) {
// Calling the callback asynchronously with an error wouldn't emit the error, so emit directly
this.emit("error", err);
callback();
} else {
callback(err);
}
}
},
});
}
2018-12-03 03:54:29 +00:00
/**
* Return a ReadWrite stream that reduces streamed chunks down to a single value and yield that
* value
* @param iteratee Reducer function to apply on each streamed chunk
* @param initialValue Initial value
* @param options
* @param options.readableObjectMode Whether this stream should behave as a readable stream of objects
* @param options.writableObjectMode Whether this stream should behave as a writable stream of objects
*/
2018-12-02 06:38:19 +00:00
export function reduce<T, R>(
iteratee:
| ((previousValue: R, chunk: T, encoding: string) => R)
| ((previousValue: R, chunk: T, encoding: string) => Promise<R>),
initialValue: R,
options: TransformOptions = {
readableObjectMode: true,
writableObjectMode: true,
},
) {
let value = initialValue;
return new Transform({
readableObjectMode: options.readableObjectMode,
writableObjectMode: options.writableObjectMode,
async transform(chunk: T, encoding, callback) {
let isPromise = false;
try {
const result = iteratee(value, chunk, encoding);
isPromise = result instanceof Promise;
value = await result;
callback();
} catch (err) {
if (isPromise) {
// Calling the callback asynchronously with an error wouldn't emit the error, so emit directly
this.emit("error", err);
callback();
} else {
callback(err);
}
}
},
flush(callback) {
2018-12-03 05:38:17 +00:00
// Best effort attempt at yielding the final value (will throw if e.g. yielding an object and
// downstream doesn't expect objects)
try {
callback(undefined, value);
} catch (err) {
try {
this.emit("error", err);
} catch {
// Best effort was made
}
}
2018-12-02 06:38:19 +00:00
},
});
}
/**
* Return a ReadWrite stream that splits streamed chunks using the given separator
2018-12-02 06:18:28 +00:00
* @param separator Separator to split by, defaulting to "\n"
* @param options
* @param options.encoding Encoding written chunks are assumed to use
*/
export function split(
separator: string | RegExp = "\n",
options: WithEncoding = { encoding: "utf8" },
): NodeJS.ReadWriteStream {
let buffered = "";
const decoder = new StringDecoder(options.encoding);
return new Transform({
readableObjectMode: true,
transform(chunk: Buffer, encoding, callback) {
const asString = decoder.write(chunk);
const splitted = asString.split(separator);
if (splitted.length > 1) {
splitted[0] = buffered.concat(splitted[0]);
buffered = "";
}
buffered += splitted[splitted.length - 1];
splitted.slice(0, -1).forEach((part: string) => this.push(part));
callback();
},
flush(callback) {
callback(undefined, buffered + decoder.end());
},
});
}
/**
* Return a ReadWrite stream that joins streamed chunks using the given separator
2018-12-02 06:18:28 +00:00
* @param separator Separator to join with
* @param options
* @param options.encoding Encoding written chunks are assumed to use
*/
export function join(
separator: string,
options: WithEncoding = { encoding: "utf8" },
): NodeJS.ReadWriteStream {
let isFirstChunk = true;
const decoder = new StringDecoder(options.encoding);
return new Transform({
readableObjectMode: true,
async transform(chunk: Buffer, encoding, callback) {
const asString = decoder.write(chunk);
// Take care not to break up multi-byte characters spanning multiple chunks
if (asString !== "" || chunk.length === 0) {
if (!isFirstChunk) {
this.push(separator);
}
this.push(asString);
isFirstChunk = false;
}
callback();
},
});
}
/**
* Return a ReadWrite stream that replaces occurrences of the given string or regular expression in
* the streamed chunks with the specified replacement string
2018-12-02 06:18:28 +00:00
* @param searchValue Search string to use
* @param replaceValue Replacement string to use
* @param options
* @param options.encoding Encoding written chunks are assumed to use
*/
export function replace(
searchValue: string | RegExp,
replaceValue: string,
options: WithEncoding = { encoding: "utf8" },
): NodeJS.ReadWriteStream {
const decoder = new StringDecoder(options.encoding);
return new Transform({
readableObjectMode: true,
transform(chunk: Buffer, encoding, callback) {
const asString = decoder.write(chunk);
// Take care not to break up multi-byte characters spanning multiple chunks
if (asString !== "" || chunk.length === 0) {
callback(
undefined,
asString.replace(searchValue, replaceValue),
);
} else {
callback();
}
},
});
}
/**
* Return a ReadWrite stream that parses the streamed chunks as JSON. Each streamed chunk
* must be a fully defined JSON string.
*/
export function parse(): NodeJS.ReadWriteStream {
const decoder = new StringDecoder("utf8"); // JSON must be utf8
return new Transform({
readableObjectMode: true,
writableObjectMode: true,
async transform(chunk: Buffer, encoding, callback) {
try {
const asString = decoder.write(chunk);
// Using await causes parsing errors to be emitted
callback(undefined, await JSON.parse(asString));
} catch (err) {
callback(err);
}
},
});
}
2018-12-03 03:54:29 +00:00
type JsonPrimitive = string | number | object;
type JsonValue = JsonPrimitive | JsonPrimitive[];
interface JsonParseOptions {
pretty: boolean;
}
2018-12-03 03:54:29 +00:00
/**
* Return a ReadWrite stream that stringifies the streamed chunks to JSON
*/
export function stringify(
options: JsonParseOptions = { pretty: false },
): NodeJS.ReadWriteStream {
return new Transform({
readableObjectMode: true,
writableObjectMode: true,
transform(chunk: JsonValue, encoding, callback) {
callback(
undefined,
options.pretty
? JSON.stringify(chunk, null, 2)
: JSON.stringify(chunk),
);
},
});
}
/**
* Return a ReadWrite stream that collects streamed chunks into an array or buffer
* @param options
* @param options.objectMode Whether this stream should behave as a stream of objects
*/
export function collect(
2018-11-30 06:31:09 +00:00
options: ThroughOptions = { objectMode: false },
): NodeJS.ReadWriteStream {
const collected: any[] = [];
return new Transform({
readableObjectMode: options.objectMode,
writableObjectMode: options.objectMode,
transform(data, encoding, callback) {
collected.push(data);
callback();
},
flush(callback) {
this.push(
options.objectMode ? collected : Buffer.concat(collected),
);
callback();
},
});
}
/**
2018-12-03 03:54:29 +00:00
* Return a Readable stream of readable streams concatenated together
2018-12-02 06:18:28 +00:00
* @param streams Readable streams to concatenate
*/
export function concat(
...streams: NodeJS.ReadableStream[]
): NodeJS.ReadableStream {
let isStarted = false;
let currentStreamIndex = 0;
const startCurrentStream = () => {
if (currentStreamIndex >= streams.length) {
wrapper.push(null);
} else {
streams[currentStreamIndex]
.on("data", chunk => {
if (!wrapper.push(chunk)) {
streams[currentStreamIndex].pause();
}
})
.on("error", err => wrapper.emit("error", err))
.on("end", () => {
currentStreamIndex++;
startCurrentStream();
});
}
};
const wrapper = new Readable({
objectMode: true,
read() {
if (!isStarted) {
isStarted = true;
startCurrentStream();
}
if (currentStreamIndex < streams.length) {
streams[currentStreamIndex].resume();
}
},
});
return wrapper;
}
2018-11-30 06:31:09 +00:00
/**
2018-12-03 03:54:29 +00:00
* Return a Readable stream of readable streams merged together in chunk arrival order
2018-12-02 06:18:28 +00:00
* @param streams Readable streams to merge
2018-11-30 06:31:09 +00:00
*/
export function merge(
...streams: NodeJS.ReadableStream[]
): NodeJS.ReadableStream {
let isStarted = false;
let streamEndedCount = 0;
return new Readable({
objectMode: true,
read() {
if (streamEndedCount >= streams.length) {
this.push(null);
} else if (!isStarted) {
isStarted = true;
streams.forEach(stream =>
stream
.on("data", chunk => {
if (!this.push(chunk)) {
streams.forEach(s => s.pause());
}
})
.on("error", err => this.emit("error", err))
.on("end", () => {
streamEndedCount++;
if (streamEndedCount === streams.length) {
this.push(null);
}
}),
);
} else {
streams.forEach(s => s.resume());
}
},
});
}
2018-12-02 06:18:28 +00:00
/**
* Return a Duplex stream from a writable stream that is assumed to somehow, when written to,
* cause the given readable stream to yield chunks
* @param writable Writable stream assumed to cause the readable stream to yield chunks when written to
* @param readable Readable stream assumed to yield chunks when the writable stream is written to
*/
export function duplex(writable: Writable, readable: Readable) {
const wrapper = new Duplex({
readableObjectMode: true,
writableObjectMode: true,
read() {
readable.resume();
},
write(chunk, encoding, callback) {
return writable.write(chunk, encoding, callback);
},
final(callback) {
writable.end(callback);
},
});
readable
.on("data", chunk => {
if (!wrapper.push(chunk)) {
readable.pause();
}
})
.on("error", err => wrapper.emit("error", err))
.on("end", () => wrapper.push(null));
writable.on("drain", () => wrapper.emit("drain"));
writable.on("error", err => wrapper.emit("error", err));
return wrapper;
}
/**
* Return a Duplex stream from a child process' stdin and stdout
* @param childProcess Child process from which to create duplex stream
*/
export function child(childProcess: ChildProcess) {
return duplex(childProcess.stdin, childProcess.stdout);
}
2018-12-03 02:53:10 +00:00
/**
2018-12-03 03:54:29 +00:00
* Return a Promise resolving to the last streamed chunk of the given readable stream, after it has
* ended
* @param readable Readable stream to wait on
2018-12-03 02:53:10 +00:00
*/
export function last<T>(readable: Readable): Promise<T | null> {
let lastChunk: T | null = null;
return new Promise((resolve, reject) => {
readable
.on("data", chunk => (lastChunk = chunk))
.on("end", () => resolve(lastChunk));
});
}
/**
* Stores chunks of data internally in array and batches when batchSize is reached.
*
* @param batchSize Size of the batches
*/
export function batch(batchSize: number) {
const buffer: any[] = [];
return new Transform({
objectMode: true,
transform(chunk, encoding, callback) {
if (buffer.length === batchSize - 1) {
buffer.push(chunk);
callback(undefined, buffer.splice(0));
} else {
buffer.push(chunk);
callback();
}
},
flush(callback) {
callback(undefined, buffer.splice(0));
},
});
}
/**
* Unbatches and sends individual chunks of data
*/
export function unbatch() {
return new Transform({
objectMode: true,
transform(data, encoding, callback) {
for (const d of data) {
this.push(d);
}
callback();
},
});
}
/**
* Limits date of data transferred into stream.
* @param rate Desired rate in ms
*/
export function rate(targetRate: number) {
const deltaMS = (1 / targetRate) * 1000;
let total = 0;
const start = performance.now();
return new Transform({
objectMode: true,
async transform(data, encoding, callback) {
const currentRate = (total / (performance.now() - start)) * 1000;
if (targetRate && currentRate > targetRate) {
await sleep(deltaMS);
}
total += 1;
callback(undefined, data);
},
});
}
/**
* Limits number of parallel processes in flight.
* @param parallel Max number of parallel processes.
* @param func Function to execute on each data chunk
*/
export function parallelMap<T, R>(parallel: number, func: (data: T) => R) {
let inflight = 0;
return new Transform({
objectMode: true,
async transform(data, encoding, callback) {
while (parallel <= inflight) {
await sleep(5);
}
inflight += 1;
callback();
try {
const res = await func(data);
this.push(res);
} catch (e) {
this.emit(e);
} finally {
inflight -= 1;
}
},
async flush(callback) {
while (inflight > 0) {
await sleep(5);
}
callback();
},
});
}