Add basic csv view inside a table loaded through duckdb

This commit is contained in:
2025-04-24 18:47:53 +09:30
parent 5c42ba0dd4
commit 42e9470400
16 changed files with 999 additions and 517 deletions

127
src/app/duckdb.service.ts Normal file
View File

@@ -0,0 +1,127 @@
import { Injectable, OnInit } from '@angular/core';
import * as duckdb from '@duckdb/duckdb-wasm';
import { connect } from 'rxjs';
export interface Column {
name: string;
type: string;
}
export interface RowsResponse {
rows: any[];
totalRows: bigint;
}
const whitespaceRegex = /\s*/g;
const sanitisedFileName = (file: File) =>
file.name.toLowerCase().replaceAll("'", '').replaceAll(whitespaceRegex, '');
//https://www.npmjs.com/package/@duckdb/duckdb-wasm
@Injectable({
providedIn: 'root',
})
export class DuckdbService {
private db!: duckdb.AsyncDuckDB;
constructor() {
this.init();
}
async init() {
const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles();
// Select a bundle based on browser checks
const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES);
const worker_url = URL.createObjectURL(
new Blob([`importScripts("${bundle.mainWorker!}");`], {
type: 'text/javascript',
}),
);
// Instantiate the asynchronous version of DuckDB-wasm
const worker = new Worker(worker_url);
const logger = new duckdb.ConsoleLogger();
this.db = new duckdb.AsyncDuckDB(logger, worker);
await this.db.instantiate(bundle.mainModule, bundle.pthreadWorker);
URL.revokeObjectURL(worker_url);
}
// TODO: Consider adding this as a table into the db for performance improvements
async addFile(file: File) {
// Technically only queries require lowercase file names but keeping it consistent
await this.db.registerFileText(sanitisedFileName(file), await file.text());
}
// We ignore sql injection as it's all on the user's machine and only applies to this session and can't touch the filesystem
async getColumns(file: File): Promise<Column[]> {
const conn = await this.db.connect();
try {
const response = await conn.query(
`DESCRIBE SELECT * FROM '${sanitisedFileName(file)}'`,
);
const cols: Column[] = [];
const numCols = response.numRows;
for (let i = 0; i < numCols; i++) {
const jsonData = response.get(i)?.toJSON()!;
cols.push({
name: jsonData['column_name'],
type: jsonData['column_type'],
});
}
return cols;
} finally {
conn.close();
}
}
async getDistinctValuesForColumn(
file: File,
column: string,
limit = 2000,
): Promise<string[]> {
if (limit < 0 || !limit) {
throw 'Limit must be provided';
}
const conn = await this.db.connect();
try {
const response = await conn.query(
`SELECT DISTINCT ${column} FROM '${sanitisedFileName(file)}' ORDER BY ${column} LIMIT ${limit}`,
);
// return response.data.map();
return [];
} finally {
conn.close();
}
}
async getRows(
file: File,
start: number,
numRows: number,
columns: Column[],
filters: unknown[],
aggregations: unknown[],
): Promise<RowsResponse> {
const conn = await this.db.connect();
try {
const totalRowResponse = await conn.query(
`SELECT COUNT(1) totalRows FROM ${sanitisedFileName(file)}`,
);
const { totalRows } = totalRowResponse.get(0)?.toJSON()!;
const response = await conn.query(
`SELECT ${columns.map((column) => `"${column.name}"`).join(', ')} FROM ${sanitisedFileName(file)} LIMIT ${numRows} OFFSET ${start}`,
);
const rows = [];
for (let i = 0; i < response.numRows; i++) {
rows.push(response.get(i)?.toJSON()!);
}
return { rows, totalRows };
} catch (err) {
console.error(err);
return { rows: [], totalRows: 0n };
} finally {
conn.close();
}
}
}