Reworked support for processing

This commit is contained in:
Endeavorance 2025-04-02 11:45:08 -04:00
parent c1166680a8
commit 6a3157762a
14 changed files with 318 additions and 473 deletions

View file

@ -1,17 +1,18 @@
import type { BunFile } from "bun";
import { normalize, basename, extname, dirname } from "node:path";
import { normalize, extname } from "node:path";
import YAML from "yaml";
import TOML from "smol-toml";
import { FileError } from "./errors";
import EMDY from "@endeavorance/emdy";
type UnknownRecord = Record<string, unknown>;
export type MuseFileType = "md" | "yaml" | "json" | "toml";
export interface MuseFileContent {
type: MuseFileType;
text: string;
export interface MuseEntry {
_raw: string;
filePath: string;
data: Record<string, unknown>;
meta: Record<string, unknown>;
}
function extensionToFiletype(ext: string): MuseFileType {
function parseFileType(ext: string): MuseFileType {
switch (ext) {
case "md":
return "md";
@ -26,143 +27,102 @@ function extensionToFiletype(ext: string): MuseFileType {
}
}
const FRONTMATTER_REGEX = /^---[\s\S]*?---/gm;
/**
* Attempt to parse YAML frontmatter from a mixed yaml/md doc
* @param content The raw markdown content
* @returns Any successfully parsed frontmatter
*/
function extractFrontmatter(content: string) {
// If it does not start with `---`, it is invalid for frontmatter
if (content.trim().indexOf("---") !== 0) {
return {};
function parseYAMLEntries(text: string): UnknownRecord[] {
const parsedDocs = YAML.parseAllDocuments(text);
if (parsedDocs.some((doc) => doc.toJS() === null)) {
throw new Error("Encountered NULL resource");
}
if (FRONTMATTER_REGEX.test(content)) {
const frontmatterString = content.match(FRONTMATTER_REGEX)?.[0] ?? "";
const cleanFrontmatter = frontmatterString.replaceAll("---", "").trim();
return YAML.parse(cleanFrontmatter);
const errors = parsedDocs.flatMap((doc) => doc.errors);
if (errors.length > 0) {
throw new Error(
`Error parsing YAML resource: ${errors.map((e) => e.message).join(", ")}`,
);
}
return {};
const collection: UnknownRecord[] = parsedDocs.map((doc) => doc.toJS());
return collection;
}
/**
* Given a string of a markdown document, extract the markdown content
* @param content The raw markdown content
* @returns The markdown content without frontmatter
*/
function extractMarkdown(content: string): string {
if (content.trim().indexOf("---") !== 0) {
return content;
function parseJSONEntries(text: string): UnknownRecord[] {
const parsed = JSON.parse(text);
if (Array.isArray(parsed)) {
return parsed;
}
return content.replace(FRONTMATTER_REGEX, "").trim();
if (typeof parsed === "object") {
return [parsed];
}
throw new Error("JSON resource must be an object or an array of objects");
}
export class MuseFile {
protected _path: string;
protected _file: BunFile;
protected _fileType: MuseFileType;
function parseTOMLEntries(text: string): UnknownRecord[] {
const parsed = TOML.parse(text);
constructor(filePath: string) {
this._path = normalize(filePath);
this._file = Bun.file(this._path);
this._fileType = extensionToFiletype(this.extension.slice(1));
if (Array.isArray(parsed)) {
return parsed;
}
get dirname(): string {
return dirname(this._path);
if (typeof parsed === "object") {
return [parsed];
}
get extension(): string {
return extname(this._path);
}
get basename(): string {
return basename(this._path);
}
get filename(): string {
return basename(this._path, this.extension);
}
get path(): string {
return this._path;
}
public async readJSON(): Promise<MuseFileContent> {
try {
const text = await this._file.text();
return {
type: "json",
text: text,
data: JSON.parse(text),
};
} catch (error) {
throw new FileError(`Failed to read JSON file: ${error}`, this._path);
}
}
public async readYAML(): Promise<MuseFileContent> {
try {
const text = await this._file.text();
return {
type: "yaml",
text,
data: YAML.parse(text),
};
} catch (error) {
throw new FileError(`Failed to read YAML file: ${error}`, this._path);
}
}
public async readTOML(): Promise<MuseFileContent> {
try {
const text = await this._file.text();
return {
type: "toml",
text,
data: TOML.parse(text),
};
} catch (error) {
throw new FileError(`Failed to read TOML file: ${error}`, this._path);
}
}
public async readMarkdown(): Promise<MuseFileContent> {
try {
const text = await this._file.text();
const frontmatter = extractFrontmatter(text);
const markdown = extractMarkdown(text);
return {
type: "md",
text: markdown,
data: {
...frontmatter,
},
};
} catch (error) {
throw new FileError(`Failed to read Markdown file: ${error}`, this._path);
}
}
public async read(): Promise<MuseFileContent> {
switch (this._fileType) {
case "json":
return this.readJSON();
case "yaml":
return this.readYAML();
case "toml":
return this.readTOML();
case "md":
return this.readMarkdown();
default:
throw new FileError(
`No reader for file type ${this._fileType}`,
this._path,
);
}
}
throw new Error("TOML resource must be an object or an array of objects");
}
interface ParseMuseFileOptions {
markdownKey?: string;
}
export async function parseMuseFile(
rawFilePath: string,
{ markdownKey = "content" }: ParseMuseFileOptions = {},
): Promise<MuseEntry[]> {
const filePath = normalize(rawFilePath);
const file = Bun.file(filePath);
const fileType = parseFileType(extname(filePath).slice(1));
const rawFileContent = await file.text();
const partial = {
_raw: rawFileContent,
filePath,
meta: {},
};
if (fileType === "md") {
const parsed = EMDY.parse(rawFileContent, markdownKey);
return [
{
...partial,
data: parsed,
},
];
}
if (fileType === "yaml") {
return parseYAMLEntries(rawFileContent).map((data) => ({
...partial,
data,
}));
}
if (fileType === "json") {
return parseJSONEntries(rawFileContent).map((data) => ({
...partial,
data,
}));
}
if (fileType === "toml") {
return parseTOMLEntries(rawFileContent).map((data) => ({
...partial,
data,
}));
}
throw new Error(`Unsupported file type: ${fileType}`);
}