I have blogs with Mdsvex which uses import.meta.glob
to build and index of all the blog posts.
It looks something like this.
const modules = import.meta.glob("../blog/*/*.{md,svx,svelte.md}");
const postPromises = [];
for (let [path, resolver] of Object.entries(modules)) {
const slug = path.replace("./", "").replace("/+page.md", "");
const promise = resolver().then((post) => ({
slug,
...post.metadata,
}));
postPromises.push(promise);
}
const all_posts = await Promise.all(postPromises);
Doing this on the server (in server.js) file is a good idea but I think we can render it even faster with just extracting metadata ourselves using a script during the build or even in during development.
Here is how I do it. Create a script at the root or somewhere in the project. Let’s call this extract-metadata.js
. This is an AI generated script. Go through it carefully before implementing on your project.
// extract-metadata.js generated by an AI
import fs from "fs";
import path from "path";
import matter from "gray-matter";
const POSTS_DIR = "src/routes/blog";
const OUTPUT_FILE = "src/lib/data/blog.json";
async function extractMetadata() {
try {
// Get all directories in the art folder
const postDirs = fs
.readdirSync(POSTS_DIR, { withFileTypes: true })
.filter((dirent) => dirent.isDirectory())
.map((dirent) => dirent.name)
.sort();
const metadata = [];
for (const postDir of postDirs) {
const markdownPath = path.join(POSTS_DIR, postDir, "+page.md");
// Check if +page.md exists in this directory
if (!fs.existsSync(markdownPath)) {
console.warn(`⚠️ No +page.md found in ${postDir}`);
continue;
}
const content = fs.readFileSync(markdownPath, "utf8");
const { data, excerpt } = matter(content, { excerpt: true });
// Extract metadata
const meta = {
slug: postDir, // Use directory name as slug
route: `/blog/${postDir}`, // Add the full route path
...data, // Include any other frontmatter fields
};
metadata.push(meta);
}
const published = metadata.filter((item) => item.published !== false);
// Sort by date (newest first)
published.sort((a, b) => new Date(b.date) - new Date(a.date));
// Write to output file
const outputDir = path.dirname(OUTPUT_FILE);
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
fs.writeFileSync(OUTPUT_FILE, JSON.stringify(published, null, 2));
console.log(
`✅ Extracted metadata from ${published.length} markdown files`
);
console.log(`📄 Output written to ${OUTPUT_FILE}`);
} catch (error) {
console.error("❌ Error extracting metadata:", error);
process.exit(1);
}
}
extractMetadata();
You may have to install gray-matter dependency using npm i gray-matter
.
Make sure this runs successfully - node extract-metadata.js
and generates a file src/lib/data/blog.json
. Check the file which should have an array of all your metadata.
If it does then you can modify your package.json
as shown
// package.json
{
"name": "webjeda",
"version": "1.0.0",
"scripts": {
"dev": "node extract-metadata.js && vite dev", // here
"build": "vite build",
"preview": "vite preview"
},
"devDependencies": {
"@sveltejs/kit": "2.7.1",
"@sveltejs/vite-plugin-svelte": "4.0.0-next.8",
"mdsvex": "^0.12.3",
"svelte": "5.0.0-next.266",
"svelte-youtube-embed": "^0.3.0",
"typescript": "^5.6.3",
"vite": "^5.4.9"
},
"type": "module",
"dependencies": {
"gray-matter": "^4.0.3"
}
}
This will generate the file every time you are in development mode(which I guess is often). This way you can be sure all your posts are indexed properly.
Now this json file can be directly imported anywhere you want your blog index.
<!-- src/routes/blog/+page.svelte -->
<script>
import posts from "$lib/data/blog.json";
</script>
<ul>
{#each posts as post}
<li>
<a href={post.route}>
{post.title}
</a>
</li>
{/each}
</ul>
If your blog index has to be automatically generated, you can also create a cronjob using github actions.
# .github/workflows/fetch-metadata.yml
name: Extract Markdown Metadata
on:
schedule:
- cron: "0 0 * * *" # Every day at midnight UTC
workflow_dispatch: # Allow manual triggering
jobs:
extract-metadata:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: "18"
cache: "npm"
- name: Install dependencies
run: npm ci
- name: Extract metadata from markdown files
run: node extract-metadata.js
# Step 5: Commit and push the JSON file
- name: Commit and Push Changes
run: |
git config --local user.name "Github Action"
git config --local user.email "action@github.com"
# Stash any changes first
git stash save "temp-changes"
# Pull latest changes from master
git pull origin master
# Apply stashed changes back
git stash pop
# Add the file
git add src/lib/data
# Check and commit if there are changes
if [[ `git diff --cached --stat` != '' ]]; then
git commit -m "Auto-update cron generated files"
git push origin master
else
echo "No changes to commit"
fi
This isn’t just for blogs. This can be used to generate sitemaps as well.
If you’re fetching something from an API once and it doesn’t change often then you can use this method to maybe keep your costs low if you’re invoking a lot of serverless functions just to get the same data.