Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use streaming when creating log symbols file. #2858

Merged
merged 2 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 2 additions & 109 deletions extensions/ql-vscode/src/codeql-cli/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import { dirname, join, delimiter } from "path";
import * as sarif from "sarif";
import { SemVer } from "semver";
import { Readable } from "stream";
import { StringDecoder } from "string_decoder";
import tk from "tree-kill";
import { promisify } from "util";
import { CancellationToken, Disposable, Uri } from "vscode";
Expand All @@ -31,6 +30,7 @@ import { CompilationMessage } from "../query-server/legacy-messages";
import { sarifParser } from "../common/sarif-parser";
import { App } from "../common/app";
import { QueryLanguage } from "../common/query-language";
import { LINE_ENDINGS, splitStreamAtSeparators } from "../common/split-stream";

/**
* The version of the SARIF format that we are using.
Expand Down Expand Up @@ -1649,120 +1649,13 @@ export async function runCodeQlCliCommand(
}
}

/**
* Buffer to hold state used when splitting a text stream into lines.
*/
class SplitBuffer {
private readonly decoder = new StringDecoder("utf8");
private readonly maxSeparatorLength: number;
private buffer = "";
private searchIndex = 0;

constructor(private readonly separators: readonly string[]) {
this.maxSeparatorLength = separators
.map((s) => s.length)
.reduce((a, b) => Math.max(a, b), 0);
}

/**
* Append new text data to the buffer.
* @param chunk The chunk of data to append.
*/
public addChunk(chunk: Buffer): void {
this.buffer += this.decoder.write(chunk);
}

/**
* Signal that the end of the input stream has been reached.
*/
public end(): void {
this.buffer += this.decoder.end();
this.buffer += this.separators[0]; // Append a separator to the end to ensure the last line is returned.
}

/**
* A version of startsWith that isn't overriden by a broken version of ms-python.
*
* The definition comes from
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith
* which is CC0/public domain
*
* See https://github.com/github/vscode-codeql/issues/802 for more context as to why we need it.
*/
private static startsWith(
s: string,
searchString: string,
position: number,
): boolean {
const pos = position > 0 ? position | 0 : 0;
return s.substring(pos, pos + searchString.length) === searchString;
}

/**
* Extract the next full line from the buffer, if one is available.
* @returns The text of the next available full line (without the separator), or `undefined` if no
* line is available.
*/
public getNextLine(): string | undefined {
while (this.searchIndex <= this.buffer.length - this.maxSeparatorLength) {
for (const separator of this.separators) {
if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) {
const line = this.buffer.slice(0, this.searchIndex);
this.buffer = this.buffer.slice(this.searchIndex + separator.length);
this.searchIndex = 0;
return line;
}
}
this.searchIndex++;
}

return undefined;
}
}

/**
* Splits a text stream into lines based on a list of valid line separators.
* @param stream The text stream to split. This stream will be fully consumed.
* @param separators The list of strings that act as line separators.
* @returns A sequence of lines (not including separators).
*/
async function* splitStreamAtSeparators(
stream: Readable,
separators: string[],
): AsyncGenerator<string, void, unknown> {
const buffer = new SplitBuffer(separators);
for await (const chunk of stream) {
buffer.addChunk(chunk);
let line: string | undefined;
do {
line = buffer.getNextLine();
if (line !== undefined) {
yield line;
}
} while (line !== undefined);
}
buffer.end();
let line: string | undefined;
do {
line = buffer.getNextLine();
if (line !== undefined) {
yield line;
}
} while (line !== undefined);
}

/**
* Standard line endings for splitting human-readable text.
*/
const lineEndings = ["\r\n", "\r", "\n"];

/**
* Log a text stream to a `Logger` interface.
* @param stream The stream to log.
* @param logger The logger that will consume the stream output.
*/
async function logStream(stream: Readable, logger: BaseLogger): Promise<void> {
for await (const line of splitStreamAtSeparators(stream, lineEndings)) {
for await (const line of splitStreamAtSeparators(stream, LINE_ENDINGS)) {
// Await the result of log here in order to ensure the logs are written in the correct order.
await logger.log(line);
}
Expand Down
125 changes: 125 additions & 0 deletions extensions/ql-vscode/src/common/split-stream.ts
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there anything in this file that has changed when you extracted it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, other than the casing of LINE_ENDINGS.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, and the bug I discovered when you made me write a unit test:)

Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import { Readable } from "stream";
import { StringDecoder } from "string_decoder";

/**
* Buffer to hold state used when splitting a text stream into lines.
*/
export class SplitBuffer {
private readonly decoder = new StringDecoder("utf8");
private readonly maxSeparatorLength: number;
private buffer = "";
private searchIndex = 0;
private ended = false;

constructor(private readonly separators: readonly string[]) {
this.maxSeparatorLength = separators
.map((s) => s.length)
.reduce((a, b) => Math.max(a, b), 0);
}

/**
* Append new text data to the buffer.
* @param chunk The chunk of data to append.
*/
public addChunk(chunk: Buffer): void {
this.buffer += this.decoder.write(chunk);
}

/**
* Signal that the end of the input stream has been reached.
*/
public end(): void {
this.buffer += this.decoder.end();
this.ended = true;
}

/**
* A version of startsWith that isn't overriden by a broken version of ms-python.
*
* The definition comes from
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/startsWith
* which is CC0/public domain
*
* See https://github.com/github/vscode-codeql/issues/802 for more context as to why we need it.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like the upstream issues have been fixed. Maybe we can remove this workaround. (Not needed for this PR.)

*/
private static startsWith(
s: string,
searchString: string,
position: number,
): boolean {
const pos = position > 0 ? position | 0 : 0;
return s.substring(pos, pos + searchString.length) === searchString;
}

/**
* Extract the next full line from the buffer, if one is available.
* @returns The text of the next available full line (without the separator), or `undefined` if no
* line is available.
*/
public getNextLine(): string | undefined {
// If we haven't received all of the input yet, don't search too close to the end of the buffer,
// or we could match a separator that's split across two chunks. For example, we could see "\r"
// at the end of the buffer and match that, even though we were about to receive a "\n" right
// after it.
const maxSearchIndex = this.ended
? this.buffer.length - 1
: this.buffer.length - this.maxSeparatorLength;
while (this.searchIndex <= maxSearchIndex) {
for (const separator of this.separators) {
if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) {
const line = this.buffer.slice(0, this.searchIndex);
this.buffer = this.buffer.slice(this.searchIndex + separator.length);
this.searchIndex = 0;
return line;
}
}
this.searchIndex++;
}

if (this.ended && this.buffer.length > 0) {
// If we still have some text left in the buffer, return it as the last line.
const line = this.buffer;
this.buffer = "";
this.searchIndex = 0;
return line;
} else {
return undefined;
}
}
}

/**
* Splits a text stream into lines based on a list of valid line separators.
* @param stream The text stream to split. This stream will be fully consumed.
* @param separators The list of strings that act as line separators.
* @returns A sequence of lines (not including separators).
*/
export async function* splitStreamAtSeparators(
stream: Readable,
separators: string[],
): AsyncGenerator<string, void, unknown> {
const buffer = new SplitBuffer(separators);
for await (const chunk of stream) {
buffer.addChunk(chunk);
let line: string | undefined;
do {
line = buffer.getNextLine();
if (line !== undefined) {
yield line;
}
} while (line !== undefined);
}
buffer.end();
let line: string | undefined;
do {
line = buffer.getNextLine();
if (line !== undefined) {
yield line;
}
} while (line !== undefined);
}

/**
* Standard line endings for splitting human-readable text.
*/
export const LINE_ENDINGS = ["\r\n", "\r", "\n"];
98 changes: 52 additions & 46 deletions extensions/ql-vscode/src/log-insights/summary-parser.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { writeFile, promises } from "fs-extra";
import { createReadStream, writeFile } from "fs-extra";
import { LINE_ENDINGS, splitStreamAtSeparators } from "../common/split-stream";

/**
* Location information for a single pipeline invocation in the RA.
Expand Down Expand Up @@ -64,59 +65,64 @@ export async function generateSummarySymbolsFile(
async function generateSummarySymbols(
summaryPath: string,
): Promise<SummarySymbols> {
const summary = await promises.readFile(summaryPath, {
const stream = createReadStream(summaryPath, {
encoding: "utf-8",
});
const symbols: SummarySymbols = {
predicates: {},
};
try {
const lines = splitStreamAtSeparators(stream, LINE_ENDINGS);

const lines = summary.split(/\r?\n/);
let lineNumber = 0;
while (lineNumber < lines.length) {
const startLineNumber = lineNumber;
lineNumber++;
const startLine = lines[startLineNumber];
const nonRecursiveMatch = startLine.match(NON_RECURSIVE_TUPLE_COUNT_REGEXP);
let predicateName: string | undefined = undefined;
let iteration = 0;
if (nonRecursiveMatch) {
predicateName = nonRecursiveMatch.groups!.predicateName;
} else {
const recursiveMatch = startLine.match(RECURSIVE_TUPLE_COUNT_REGEXP);
if (recursiveMatch?.groups) {
predicateName = recursiveMatch.groups.predicateName;
iteration = parseInt(recursiveMatch.groups.iteration);
}
}
const symbols: SummarySymbols = {
predicates: {},
};

if (predicateName !== undefined) {
const raStartLine = lineNumber;
let raEndLine: number | undefined = undefined;
while (lineNumber < lines.length && raEndLine === undefined) {
const raLine = lines[lineNumber];
const returnMatch = raLine.match(RETURN_REGEXP);
if (returnMatch) {
raEndLine = lineNumber;
let lineNumber = 0;
let raStartLine = 0;
let iteration = 0;
let predicateName: string | undefined = undefined;
let startLine = 0;
for await (const line of lines) {
if (predicateName === undefined) {
// Looking for the start of the predicate.
const nonRecursiveMatch = line.match(NON_RECURSIVE_TUPLE_COUNT_REGEXP);
if (nonRecursiveMatch) {
iteration = 0;
predicateName = nonRecursiveMatch.groups!.predicateName;
} else {
const recursiveMatch = line.match(RECURSIVE_TUPLE_COUNT_REGEXP);
if (recursiveMatch?.groups) {
predicateName = recursiveMatch.groups.predicateName;
iteration = parseInt(recursiveMatch.groups.iteration);
}
}
lineNumber++;
}
if (raEndLine !== undefined) {
let symbol = symbols.predicates[predicateName];
if (symbol === undefined) {
symbol = {
iterations: {},
if (predicateName !== undefined) {
startLine = lineNumber;
raStartLine = lineNumber + 1;
}
} else {
const returnMatch = line.match(RETURN_REGEXP);
if (returnMatch) {
let symbol = symbols.predicates[predicateName];
if (symbol === undefined) {
symbol = {
iterations: {},
};
symbols.predicates[predicateName] = symbol;
}
symbol.iterations[iteration] = {
startLine,
raStartLine,
raEndLine: lineNumber,
};
symbols.predicates[predicateName] = symbol;

predicateName = undefined;
}
symbol.iterations[iteration] = {
startLine: lineNumber,
raStartLine,
raEndLine,
};
}

lineNumber++;
}
}

return symbols;
return symbols;
} finally {
stream.close();
}
}
Loading