feat: able to configure ratio for more models now (close #53)

This commit is contained in:
JustSong
2023-05-11 20:59:35 +08:00
parent 241ade2fae
commit d9db16e999
6 changed files with 95 additions and 74 deletions

View File

@@ -118,24 +118,22 @@ func relayHelper(c *gin.Context) error {
defer func() {
if consumeQuota {
quota := 0
usingGPT4 := strings.HasPrefix(textRequest.Model, "gpt-4")
completionRatio := 1
if usingGPT4 {
completionRatio = 2
}
if isStream {
var text string
var promptText string
for _, message := range textRequest.Messages {
text += fmt.Sprintf("%s: %s\n", message.Role, message.Content)
promptText += fmt.Sprintf("%s: %s\n", message.Role, message.Content)
}
text += fmt.Sprintf("%s: %s\n", "assistant", streamResponseText)
quota = countToken(text) + 3
completionText := fmt.Sprintf("%s: %s\n", "assistant", streamResponseText)
quota = countToken(promptText) + countToken(completionText)*completionRatio + 3
} else {
quota = textResponse.Usage.TotalTokens
}
ratio := common.RatioGPT3dot5
if strings.HasPrefix(textRequest.Model, "gpt-4-32k") {
ratio = common.RatioGPT4_32k
} else if strings.HasPrefix(textRequest.Model, "gpt-4") {
ratio = common.RatioGPT4
} else {
ratio = common.RatioGPT3dot5
quota = textResponse.Usage.PromptTokens + textResponse.Usage.CompletionTokens*completionRatio
}
ratio := common.GetModelRatio(textRequest.Model)
quota = int(float64(quota) * ratio)
err := model.DecreaseTokenQuota(tokenId, quota)
if err != nil {