feat: Text check before synchronization (#689)

* fix: icon

* fix: web selector

* fix: web selector

* perf: link sync

* dev doc

* chomd doc

* perf: git intro

* 466 intro

* intro img

* add json editor (#5)

* team limit

* websync limit

* json editor

* text editor

* perf: search test

* change cq value type

* doc

* intro img

---------

Co-authored-by: heheer <71265218+newfish-cmyk@users.noreply.github.com>
This commit is contained in:
Archer
2024-01-04 23:19:24 +08:00
committed by GitHub
parent c2abbb579f
commit 828829011a
64 changed files with 1789 additions and 1489 deletions

View File

@@ -41,6 +41,7 @@ export type FileItemType = {
type: DatasetCollectionTypeEnum.file | DatasetCollectionTypeEnum.link;
fileId?: string;
rawLink?: string;
metadata?: Record<string, any>;
};
export interface Props extends BoxProps {
@@ -232,7 +233,7 @@ const FileSelect = ({
// link fetch
const onUrlFetch = useCallback(
(e: UrlFetchResponse) => {
const result: FileItemType[] = e.map<FileItemType>(({ url, content }) => {
const result: FileItemType[] = e.map<FileItemType>(({ url, content, selector }) => {
const { chunks, tokens } = splitText2Chunks({
text: content,
chunkLen,
@@ -250,7 +251,10 @@ const FileSelect = ({
chunks: chunks.map((chunk) => ({
q: chunk,
a: ''
}))
})),
metadata: {
webPageSelector: selector
}
};
});
onPushFiles(result);

View File

@@ -156,19 +156,24 @@ const Provider = ({
return formatModelPrice2Read(totalTokens * inputPrice);
}, [inputPrice, mode, outputPrice, totalTokens]);
/* start upload data */
/*
start upload data
1. create training bill
2. create collection
3. upload chunks
*/
const { mutate: onclickUpload, isLoading: uploading } = useRequest({
mutationFn: async (props?: { prompt?: string }) => {
const { prompt } = props || {};
let totalInsertion = 0;
for await (const file of files) {
const chunks = file.chunks;
// create training bill
const billId = await postCreateTrainingBill({
name: t('dataset.collections.Create Training Data', { filename: file.filename }),
vectorModel,
agentModel
});
// create a file collection and training bill
const collectionId = await postDatasetCollection({
datasetId,
@@ -181,10 +186,12 @@ const Provider = ({
trainingType: collectionTrainingType,
qaPrompt: mode === TrainingModeEnum.qa ? prompt : '',
rawTextLength: file.rawText.length,
hashRawText: hashStr(file.rawText)
hashRawText: hashStr(file.rawText),
metadata: file.metadata
});
// upload data
// upload chunks
const chunks = file.chunks;
const { insertLen } = await chunksUpload({
collectionId,
billId,