mirror of
https://github.com/labring/FastGPT.git
synced 2026-06-15 01:08:07 +08:00
fix: resole crawl cannot get docs (#5344)
This commit is contained in:
@@ -1,71 +1,64 @@
|
||||
import * as fs from 'node:fs/promises';
|
||||
import * as path from 'node:path';
|
||||
import fg from 'fast-glob';
|
||||
import matter from 'gray-matter';
|
||||
import { i18n } from '@/lib/i18n';
|
||||
|
||||
export const revalidate = false;
|
||||
|
||||
// 将文件路径转换为URL路径
|
||||
// 黑名单路径(不带语言前缀)
|
||||
const blacklist = ['use-cases/index', 'protocol/index', 'api/index'];
|
||||
|
||||
// 将文件路径转换为 URL 路径(包括文件名)
|
||||
function filePathToUrl(filePath: string, defaultLanguage: string): string {
|
||||
// 移除 ./content/docs/ 前缀
|
||||
let urlPath = filePath.replace('./content/docs/', '');
|
||||
|
||||
// 确定基础路径
|
||||
let relativePath = filePath.replace('./content/docs/', '');
|
||||
|
||||
const basePath = defaultLanguage === 'zh-CN' ? '/docs' : '/en/docs';
|
||||
|
||||
// 如果是英文文件,移除 .en 后缀
|
||||
if (defaultLanguage !== 'zh-CN' && urlPath.endsWith('.en.mdx')) {
|
||||
urlPath = urlPath.replace('.en.mdx', '');
|
||||
} else if (urlPath.endsWith('.mdx')) {
|
||||
urlPath = urlPath.replace('.mdx', '');
|
||||
|
||||
if (defaultLanguage !== 'zh-CN' && relativePath.endsWith('.en.mdx')) {
|
||||
relativePath = relativePath.replace(/\.en\.mdx$/, '');
|
||||
} else if (relativePath.endsWith('.mdx')) {
|
||||
relativePath = relativePath.replace(/\.mdx$/, '');
|
||||
}
|
||||
|
||||
// 处理 index 文件
|
||||
if (urlPath.endsWith('/index')) {
|
||||
urlPath = urlPath.replace('/index', '');
|
||||
}
|
||||
|
||||
// 拼接完整路径
|
||||
return `${basePath}/${urlPath}`.replace(/\/\/+/g, '/');
|
||||
|
||||
return `${basePath}/${relativePath}`.replace(/\/\/+/g, '/');
|
||||
}
|
||||
|
||||
// 判断是否为黑名单路径
|
||||
function isBlacklisted(url: string): boolean {
|
||||
return blacklist.some(
|
||||
(item) => url.endsWith(`/docs/${item}`) || url.endsWith(`/en/docs/${item}`)
|
||||
);
|
||||
}
|
||||
|
||||
export async function GET(request: Request) {
|
||||
const defaultLanguage = i18n.defaultLanguage;
|
||||
|
||||
// 检查请求路径是否为 /en/robots
|
||||
|
||||
const requestUrl = new URL(request.url);
|
||||
const isEnRobotsRoute = requestUrl.pathname === '/en/robots';
|
||||
|
||||
let globPattern;
|
||||
|
||||
if (isEnRobotsRoute) {
|
||||
// 如果是 /en/robots 路由,只选择 .en.mdx 文件
|
||||
globPattern = ['./content/docs/**/*.en.mdx'];
|
||||
} else if (defaultLanguage === 'zh-CN') {
|
||||
// 中文环境下的普通路由
|
||||
globPattern = ['./content/docs/**/*.mdx'];
|
||||
} else {
|
||||
// 英文环境下的普通路由
|
||||
globPattern = ['./content/docs/**/*.en.mdx'];
|
||||
}
|
||||
|
||||
const files = await fg(globPattern);
|
||||
const files = await fg(globPattern, { caseSensitiveMatch: true });
|
||||
|
||||
const urls = await Promise.all(
|
||||
files.map(async (file: string) => {
|
||||
const urlPath = filePathToUrl(file, defaultLanguage);
|
||||
return `${urlPath}`;
|
||||
})
|
||||
);
|
||||
// 转换文件路径为 URL,并过滤黑名单
|
||||
const urls = files
|
||||
.map((file) => filePathToUrl(file, defaultLanguage))
|
||||
.filter((url) => !isBlacklisted(url));
|
||||
|
||||
// 按URL排序
|
||||
urls.sort((a, b) => a.localeCompare(b));
|
||||
|
||||
// 生成HTML链接列表
|
||||
const html = `
|
||||
<html>
|
||||
<head>
|
||||
<title>FastGPT Documentation Links</title>
|
||||
<title>FastGPT 文档目录</title>
|
||||
<style>
|
||||
body { font-family: Arial, sans-serif; margin: 20px; }
|
||||
h1 { color: #333; }
|
||||
@@ -78,7 +71,7 @@ export async function GET(request: Request) {
|
||||
<body>
|
||||
<h1>Documentation Links</h1>
|
||||
<ul>
|
||||
${urls.map(url => `<li><a href="${url}">${url}</a></li>`).join('')}
|
||||
${urls.map((url) => `<li><a href="${url}">${url}</a></li>`).join('')}
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
@@ -86,7 +79,7 @@ export async function GET(request: Request) {
|
||||
|
||||
return new Response(html, {
|
||||
headers: {
|
||||
'Content-Type': 'text/html',
|
||||
},
|
||||
'Content-Type': 'text/html'
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,86 @@
|
||||
import type { NextRequest } from 'next/server';
|
||||
import { NextResponse } from 'next/server';
|
||||
import fs from 'fs/promises';
|
||||
import path from 'path';
|
||||
|
||||
const docsRoot = path.resolve(process.cwd(), 'content/docs');
|
||||
|
||||
function isInvalidPage(str: string): boolean {
|
||||
if (!str || typeof str !== 'string') return true;
|
||||
if (/\[.*?\]\(.*?\)/.test(str) || /^https?:\/\//.test(str) || /[()]/.test(str)) return true;
|
||||
if (/^\s*---[\s\S]*---\s*$/.test(str)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
function getPageName(str: string): string {
|
||||
return str.startsWith('...') ? str.slice(3) : str;
|
||||
}
|
||||
|
||||
async function findFirstValidPage(dirRelPath: string): Promise<string | null> {
|
||||
const absDir = path.join(docsRoot, dirRelPath);
|
||||
const metaPath = path.join(absDir, 'meta.json');
|
||||
|
||||
try {
|
||||
const metaRaw = await fs.readFile(metaPath, 'utf-8');
|
||||
const meta = JSON.parse(metaRaw);
|
||||
if (!Array.isArray(meta.pages)) return null;
|
||||
|
||||
for (const page of meta.pages) {
|
||||
if (isInvalidPage(page)) continue;
|
||||
|
||||
const pageName = getPageName(page);
|
||||
const pagePath = path.join(dirRelPath, pageName);
|
||||
|
||||
const candidateDir = path.join(docsRoot, pagePath);
|
||||
const candidateFile = candidateDir + '.mdx';
|
||||
|
||||
try {
|
||||
await fs.access(candidateFile);
|
||||
return pagePath;
|
||||
} catch {
|
||||
try {
|
||||
const stat = await fs.stat(candidateDir);
|
||||
if (stat.isDirectory()) {
|
||||
const recursiveResult = await findFirstValidPage(pagePath);
|
||||
if (recursiveResult) return recursiveResult;
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function GET(req: NextRequest) {
|
||||
const url = new URL(req.url);
|
||||
const rawPath = url.searchParams.get('path');
|
||||
|
||||
if (!rawPath || !rawPath.startsWith('/docs')) {
|
||||
return NextResponse.json({ error: 'Invalid path' }, { status: 400 });
|
||||
}
|
||||
|
||||
// 去除 /docs 前缀,且清理首尾斜杠
|
||||
const relPath = rawPath.replace(/^\/docs\/?/, '').replace(/^\/|\/$/g, '');
|
||||
|
||||
try {
|
||||
// 先检测是否有该 mdx 文件
|
||||
const maybeFile = path.join(docsRoot, relPath + '.mdx');
|
||||
await fs.access(maybeFile);
|
||||
// 如果存在,返回完整路径(带 /docs)
|
||||
return NextResponse.json('/docs/' + relPath);
|
||||
} catch {
|
||||
// 不存在,尝试递归寻找第一个有效页面
|
||||
const found = await findFirstValidPage(relPath);
|
||||
if (found) {
|
||||
// 返回带 /docs 前缀的完整路径
|
||||
return NextResponse.json('/docs/' + found.replace(/\\/g, '/'));
|
||||
} else {
|
||||
return NextResponse.json({ error: 'No valid mdx page found' }, { status: 404 });
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user