fix: resole crawl cannot get docs (#5344)

This commit is contained in:
dreamer6680
2025-07-30 15:38:30 +08:00
committed by GitHub
parent 061547a983
commit 6c37776de1
29 changed files with 358 additions and 421 deletions
+32 -39
View File
@@ -1,71 +1,64 @@
import * as fs from 'node:fs/promises';
import * as path from 'node:path';
import fg from 'fast-glob';
import matter from 'gray-matter';
import { i18n } from '@/lib/i18n';
export const revalidate = false;
// 将文件路径转换为URL路径
// 黑名单路径(不带语言前缀)
const blacklist = ['use-cases/index', 'protocol/index', 'api/index'];
// 将文件路径转换为 URL 路径(包括文件名)
function filePathToUrl(filePath: string, defaultLanguage: string): string {
// 移除 ./content/docs/ 前缀
let urlPath = filePath.replace('./content/docs/', '');
// 确定基础路径
let relativePath = filePath.replace('./content/docs/', '');
const basePath = defaultLanguage === 'zh-CN' ? '/docs' : '/en/docs';
// 如果是英文文件,移除 .en 后缀
if (defaultLanguage !== 'zh-CN' && urlPath.endsWith('.en.mdx')) {
urlPath = urlPath.replace('.en.mdx', '');
} else if (urlPath.endsWith('.mdx')) {
urlPath = urlPath.replace('.mdx', '');
if (defaultLanguage !== 'zh-CN' && relativePath.endsWith('.en.mdx')) {
relativePath = relativePath.replace(/\.en\.mdx$/, '');
} else if (relativePath.endsWith('.mdx')) {
relativePath = relativePath.replace(/\.mdx$/, '');
}
// 处理 index 文件
if (urlPath.endsWith('/index')) {
urlPath = urlPath.replace('/index', '');
}
// 拼接完整路径
return `${basePath}/${urlPath}`.replace(/\/\/+/g, '/');
return `${basePath}/${relativePath}`.replace(/\/\/+/g, '/');
}
// 判断是否为黑名单路径
function isBlacklisted(url: string): boolean {
return blacklist.some(
(item) => url.endsWith(`/docs/${item}`) || url.endsWith(`/en/docs/${item}`)
);
}
export async function GET(request: Request) {
const defaultLanguage = i18n.defaultLanguage;
// 检查请求路径是否为 /en/robots
const requestUrl = new URL(request.url);
const isEnRobotsRoute = requestUrl.pathname === '/en/robots';
let globPattern;
if (isEnRobotsRoute) {
// 如果是 /en/robots 路由,只选择 .en.mdx 文件
globPattern = ['./content/docs/**/*.en.mdx'];
} else if (defaultLanguage === 'zh-CN') {
// 中文环境下的普通路由
globPattern = ['./content/docs/**/*.mdx'];
} else {
// 英文环境下的普通路由
globPattern = ['./content/docs/**/*.en.mdx'];
}
const files = await fg(globPattern);
const files = await fg(globPattern, { caseSensitiveMatch: true });
const urls = await Promise.all(
files.map(async (file: string) => {
const urlPath = filePathToUrl(file, defaultLanguage);
return `${urlPath}`;
})
);
// 转换文件路径为 URL,并过滤黑名单
const urls = files
.map((file) => filePathToUrl(file, defaultLanguage))
.filter((url) => !isBlacklisted(url));
// 按URL排序
urls.sort((a, b) => a.localeCompare(b));
// 生成HTML链接列表
const html = `
<html>
<head>
<title>FastGPT Documentation Links</title>
<title>FastGPT 文档目录</title>
<style>
body { font-family: Arial, sans-serif; margin: 20px; }
h1 { color: #333; }
@@ -78,7 +71,7 @@ export async function GET(request: Request) {
<body>
<h1>Documentation Links</h1>
<ul>
${urls.map(url => `<li><a href="${url}">${url}</a></li>`).join('')}
${urls.map((url) => `<li><a href="${url}">${url}</a></li>`).join('')}
</ul>
</body>
</html>
@@ -86,7 +79,7 @@ export async function GET(request: Request) {
return new Response(html, {
headers: {
'Content-Type': 'text/html',
},
'Content-Type': 'text/html'
}
});
}
}
+86
View File
@@ -0,0 +1,86 @@
import type { NextRequest } from 'next/server';
import { NextResponse } from 'next/server';
import fs from 'fs/promises';
import path from 'path';
const docsRoot = path.resolve(process.cwd(), 'content/docs');
function isInvalidPage(str: string): boolean {
if (!str || typeof str !== 'string') return true;
if (/\[.*?\]\(.*?\)/.test(str) || /^https?:\/\//.test(str) || /[()]/.test(str)) return true;
if (/^\s*---[\s\S]*---\s*$/.test(str)) return true;
return false;
}
function getPageName(str: string): string {
return str.startsWith('...') ? str.slice(3) : str;
}
async function findFirstValidPage(dirRelPath: string): Promise<string | null> {
const absDir = path.join(docsRoot, dirRelPath);
const metaPath = path.join(absDir, 'meta.json');
try {
const metaRaw = await fs.readFile(metaPath, 'utf-8');
const meta = JSON.parse(metaRaw);
if (!Array.isArray(meta.pages)) return null;
for (const page of meta.pages) {
if (isInvalidPage(page)) continue;
const pageName = getPageName(page);
const pagePath = path.join(dirRelPath, pageName);
const candidateDir = path.join(docsRoot, pagePath);
const candidateFile = candidateDir + '.mdx';
try {
await fs.access(candidateFile);
return pagePath;
} catch {
try {
const stat = await fs.stat(candidateDir);
if (stat.isDirectory()) {
const recursiveResult = await findFirstValidPage(pagePath);
if (recursiveResult) return recursiveResult;
}
} catch {
// ignore
}
}
}
} catch {
// ignore
}
return null;
}
export async function GET(req: NextRequest) {
const url = new URL(req.url);
const rawPath = url.searchParams.get('path');
if (!rawPath || !rawPath.startsWith('/docs')) {
return NextResponse.json({ error: 'Invalid path' }, { status: 400 });
}
// 去除 /docs 前缀,且清理首尾斜杠
const relPath = rawPath.replace(/^\/docs\/?/, '').replace(/^\/|\/$/g, '');
try {
// 先检测是否有该 mdx 文件
const maybeFile = path.join(docsRoot, relPath + '.mdx');
await fs.access(maybeFile);
// 如果存在,返回完整路径(带 /docs)
return NextResponse.json('/docs/' + relPath);
} catch {
// 不存在,尝试递归寻找第一个有效页面
const found = await findFirstValidPage(relPath);
if (found) {
// 返回带 /docs 前缀的完整路径
return NextResponse.json('/docs/' + found.replace(/\\/g, '/'));
} else {
return NextResponse.json({ error: 'No valid mdx page found' }, { status: 404 });
}
}
}