Support for url based resources

This commit is contained in:
Endeavorance 2025-04-04 13:50:43 -04:00
parent ae1b9e262e
commit 5b90a8e1b3
6 changed files with 342 additions and 135 deletions

254
src/core/sources.ts Normal file
View file

@ -0,0 +1,254 @@
import EMDY from "@endeavorance/emdy";
import TOML from "smol-toml";
import YAML from "yaml";
import { loadFileContent } from "./files";
import type { UnknownRecord } from "./types";
import { Glob } from "bun";
export type SourceType = "file" | "url";
export interface MuseSource {
location: string;
type: SourceType;
}
interface SourceOptions {
cwd: string;
contentKey: string;
ignore: string[];
}
export interface MuseEntry<MetaShape = UnknownRecord> {
_raw: string;
location: string;
data: Record<string, unknown>;
meta: MetaShape;
source: MuseSource;
}
function formatEntries(val: unknown): UnknownRecord[] {
if (Array.isArray(val) && val.every((el) => typeof el === "object")) {
return val;
}
if (typeof val === "object" && val !== null) {
return [val as UnknownRecord];
}
throw new Error(
`Invalid data format. Entry files must define an object or array of objects, but found "${val}"`,
);
}
function parseYAMLEntries(text: string): UnknownRecord[] {
const parsedDocs = YAML.parseAllDocuments(text);
if (parsedDocs.some((doc) => doc.toJS() === null)) {
throw new Error("Encountered NULL resource");
}
const errors = parsedDocs.flatMap((doc) => doc.errors);
if (errors.length > 0) {
throw new Error(
`Error parsing YAML resource: ${errors.map((e) => e.message).join(", ")}`,
);
}
const collection: UnknownRecord[] = parsedDocs.map((doc) => doc.toJS());
return collection;
}
function parseJSONEntries(text: string): UnknownRecord[] {
return formatEntries(JSON.parse(text));
}
function parseTOMLEntries(text: string): UnknownRecord[] {
return formatEntries(TOML.parse(text));
}
function parseMarkdownEntry(text: string, contentKey: string): UnknownRecord[] {
return formatEntries(EMDY.parse(text, contentKey));
}
export async function parseMuseFile(
rawFilePath: string,
{ contentKey = "content" }: SourceOptions,
): Promise<MuseEntry[]> {
const { content, filePath, fileType } = await loadFileContent(rawFilePath);
const partial = {
_raw: content,
source: {
location: filePath,
type: "file" as SourceType,
},
location: filePath,
meta: {},
};
if (fileType === "md") {
return parseMarkdownEntry(content, contentKey).map((data) => ({
...partial,
data,
}));
}
if (fileType === "yaml") {
return parseYAMLEntries(content).map((data) => ({
...partial,
data,
}));
}
if (fileType === "json") {
return parseJSONEntries(content).map((data) => ({
...partial,
data,
}));
}
if (fileType === "toml") {
return parseTOMLEntries(content).map((data) => ({
...partial,
data,
}));
}
throw new Error(`Unsupported file type: ${fileType}`);
}
async function loadFromFileSource(
source: MuseSource,
options: SourceOptions,
): Promise<MuseEntry[]> {
const paths = Array.from(
new Glob(source.location).scanSync({
cwd: options.cwd,
absolute: true,
followSymlinks: true,
onlyFiles: true,
}),
);
const filteredPaths = paths.filter((path) => !options.ignore.includes(path));
const entries: MuseEntry[] = [];
for (const filePath of filteredPaths) {
const fileEntries = await parseMuseFile(filePath, options);
entries.push(...fileEntries);
}
return entries;
}
function getFileExtensionFromURL(url: string): string | null {
const parsedUrl = new URL(url);
const pathname = parsedUrl.pathname;
const filename = pathname.substring(pathname.lastIndexOf("/") + 1);
const extension = filename.substring(filename.lastIndexOf(".") + 1);
return extension === filename ? null : extension;
}
function mimeTypeToFileType(mimeType: string): string | null {
switch (mimeType) {
case "text/markdown":
return "md";
case "text/yaml":
return "yaml";
case "application/x-yaml":
return "yaml";
case "application/json":
return "json";
case "application/toml":
return "toml";
default:
return null;
}
}
async function loadFromURLSource(
source: MuseSource,
options: SourceOptions,
): Promise<MuseEntry[]> {
const { contentKey = "content" } = options;
const response = await fetch(source.location);
if (!response.ok) {
throw new Error(`Failed to fetch URL: ${source.location}`);
}
const content = await response.text();
const mimeType = response.headers.get("Content-Type") || "unknown";
const parseType =
mimeTypeToFileType(mimeType) ??
getFileExtensionFromURL(source.location) ??
"unknown";
const partial = {
_raw: content,
source: {
location: source.location,
type: "url" as SourceType,
},
location: source.location,
meta: {},
};
if (parseType === "md") {
return parseMarkdownEntry(content, contentKey).map((data) => ({
...partial,
data,
}));
}
if (parseType === "yaml") {
return parseYAMLEntries(content).map((data) => ({
...partial,
data,
}));
}
if (parseType === "json") {
return parseJSONEntries(content).map((data) => ({
...partial,
data,
}));
}
if (parseType === "toml") {
return parseTOMLEntries(content).map((data) => ({
...partial,
data,
}));
}
// If it doesnt match one of these, try brute force parsing
// and return the result if any of them work
try {
return parseMarkdownEntry(content, contentKey).map((data) => ({
...partial,
data,
}));
} catch {
// noop
}
throw new Error(`Unsupported MIME type from URL source: ${mimeType}`);
}
export async function loadFromSource(
source: MuseSource,
options: SourceOptions,
): Promise<MuseEntry[]> {
switch (source.type) {
case "file":
return loadFromFileSource(source, options);
case "url":
return loadFromURLSource(source, options);
default:
throw new Error(`Unsupported source type: ${source.type}`);
}
}