Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

The following is the default configuration for the module, it consists of four sections that will be discussed in detail below.

Code Block
{
    "agentConfigglossaryConfig": {
        "historyTokenLimitscope_id": 400null,
        "languageModelNamefilename": "AZURE_GPT_4_TURBO_2024_0409"null,
        "proposedLanguagesForTranslationIso6391active": [false
     },
      "entextPostProcessingConfig",
       : [],
    "esagentConfig",:    {
        "frhistoryTokenLimit": 400,
            "de"languageModelName": "AZURE_GPT_4_0613",
        "proposedLanguagesForTranslation": ["English", "Spanish", "French", "itGerman", "Italian",            "pt"Portuguese", "Dutch", "Russian", "Swedish" ],
          "nl"supportedDocEndings": [".docx", ".xlsx", ".pptx", ".pdf"]
    },
    "ruagentPromptConfig",: {
        "systemPrompt": "You are "sv"a friendly person that helps our customers to  ]translated uploaded documents.\nTherefore, you must find out which uploaded documents they "supportedDocEndings": [
            ".docx",
            ".xlsx",
            ".pptx"
        ]
    },
    "agentPromptConfig": {
        "systemPrompt": "You are a friendly person that helps our customers to translated uploaded documents.\nTherefore, you must find out which uploaded documents they want to translate and to what language before starting the translation.\nIf you cannot find the information from the conversation, nicely ask them to provide it or to upload a document.\nYou have access to tools that can help you with these tasks, but use only one at the time elsewise it is complicated for the user.\nNote that at the moment, only word, powerpoint and excel documents can be translated, thus the files must end with an .docx, .xlsx or .pptx file extension.\nMention this if the user uploads other files."
    },
    "translatorPromptConfig": {want to translate and to what language before starting the translation.\nIf you cannot find the information from the conversation, nicely ask them to provide it or to upload a document.\nYou have access to tools that can help you with these tasks, but use only one at the time elsewise it is complicated for the user.\nNote that at the moment, only pdf, word, powerpoint and excel documents can be translated, thus the files must end with an .pdf, .docx, .xlsx or .pptx file extension.\nMention this if the user uploads other files."
    },
    "translatorPromptConfig": {
        "systemPromptInstruction": "You are a helpful AI designed to to translate text to a specified language.\nDo it even if the target language is the same as the source language.\nMake sure the translated text contains the same amount of carriage returns '\\n' as the original text block.\nTry to keep the translated text as close to the original as possible and having approximately the same lenght.",
        "inputPromptInstructionTemplate": "List of text blocks to be translated to ${language}:\n\n<INPUT>\n${input_block}\n</INPUT>",
        "systemPromptInstructionoutputPromptInstructionTemplate": "You are a helpful AI designed to to translate text to a specified language.\nDo it even if the target language is the same as the source language.\nMake sure the translated text contains the same amount of carriage returns '\\n' as the original text block.\nTry to keep the translated text as close to the original as possible and having approximately the same lenght.",<OUTPUT>\n${output_block}\n</OUTPUT>"
    },
    "translatorUserConfig": {
        "languageModelName": "AZURE_GPT_4_0613",
        "maxTokensPerTranlationRequest": 1000,
        "maxTokenPerMinute": 40000,
        "allowedInputLanguages": [
                     "inputPromptInstructionTemplateAfrikaans":, "Albanian"List of text blocks to be translated to ${language}:\n\n<INPUT>\n${input_block}\n</INPUT>",
        "outputPromptInstructionTemplate": "<OUTPUT>\n${output_block}\n</OUTPUT>, "Arabic", "Aragonese", "Armenian", "Azeri", "Bashkir", "Basque", "Belarusian", "Bengali", "Bislama",
        "availableLanguagesForTranslationAsIso6391Codes": [             "enBosnian", "Breton", "Bulgarian", "Burmese", "Catalan", "Chamorro", "Chechen", "Chinese",     "Cornish", "Corsican", "esCroatian",
            "fr",         "Czech", "Danish", "Dutch", "English", "deEsperanto", "Estonian", "Ewe", "Faroese", "Fijian", "Finnish", "French", "Galician",
 ]     },     "translatorUserConfig": {         "languageModelNameGeorgian":, "AZURE_GPT_4_0613German",         "maxTokensPerTranlationRequest": 1000,
        "maxTokenPerMinute": 40000,
        "prependKeywordTranlatedDocument": "translated_"Greek", "Greenlandic", "Guaran\u00ed", "Haitian Creole", "Hausa", "Hebrew", "Hindi", "Hungarian", "Icelandic",
        "allowedInputLanguages": [            "Ido", "enIndonesian", "Interlingua", "Interlingue", "Inuktitut", "Irish", "Italian", "Japanese", "Javanese",     "Kannada", "Kazakh", "esKhmer",
            "fr",         "Korean", "Kurdish", "Kyrgyz", "Lao", "deLatin", "Latvian",            "it"Limburgish", "Lingala", "Lithuanian", "Luxembourgish", "Macedonian", "Malagasy",
            "pt",         "Malay", "Malayalam", "Maltese", "Manx", "nlMaori",             "ru",
  "Marathi", "Marshallese", "Mongolian", "Navajo", "Nepali", "Northern Sami", "Norwegian", "Norwegian Bokm\u00e5l",
         "sv"         ]   "Norwegian  }
}

Parameter Description

AgentConfig

...

Parameter

...

Description

...

Default

...

historyTokenLimit

Nynorsk", "Occitan", "Ojibwe", "Old Church Slavonic", "Ossetian", "Pashto", "Persian", "Polish", "Portuguese", "Punjabi", "Quechua", "Romanian",
                     "Romansch", "Russian", "Samoan", "Sanskrit", "Sardinian", "Scottish Gaelic", "Serbian", "Serbo-Croatian", "Sichuan Yi", "Sindhi", "Slovak", "Slovene",
                     "Somali", "Spanish", "Sundanese", "Swahili", "Swedish", "Tagalog", "Tahitian", "Tajik", "Tamil", "Tatar", "Telugu", "Thai", "Tibetan", "Tongan",
                     "Tswana", "Turkish", "Turkmen", "Ukrainian", "Urdu", "Uyghur", "Uzbek", "Vietnamese", "Volap\u00fck", "Walloon", "Welsh", "West Frisian", "Yiddish",
                     "Yoruba", "Zhuang", "Zulu" ]
    }
}

Parameter Description

GlossaryConfig

The glossary is expected to be in an .xlsx file within the knowledge base.

Parameter

Description

Default Value

scope_id

The scope id within the knowledge base

""

filename

The filename of the .xlsx file

""

active

If the glossary is used or not

False

Code Block
{
  "scope_id": "",
  "filename": "",
  "active": false
      }

PostProcessorConfig
The post processors are a list of text processors that are applied to translations

Parameter

Description

Default Value

name

Name of the processor

active

If the processor is used or not

False

applied_to_languages

The languages that will be processed by this processor.

[]

At the moment there are two valid processors "Replace sharp s with ss" and "American to British"

Code Block
[{
    "name": "Replace sharp s with ss",
    "active": true,
    "applied_to_languages": ["German"]
},
{
    "name": "American to British",
    "active": true,
    "applied_to_languages": ["English"]
}]

AgentConfig

Parameter

Description

Default

historyTokenLimit

The number of tokens used from the history when calling the LLM.

400

languageModelName

The name of the language model to use for the agent.

"AZURE_GPT_4_TURBO_2024_0409"

proposedLanguagesForTranslationIso6391

proposedLanguagesForTranslation

The languages that the agent proposes to translate to

as ISO 639-1 codes

.

["

en

English", "

es

Spanish", "

fr

French", "

de

German", "

it

Italian", "

pt

Portuguese","

nl

Dutch","

ru

Russian", "

sv

Swedish"]

supportedDocEndings

The supported document endings for translation.

[".pdf", ".docx", ".xlsx", ".pptx"]

Example

Code Block
languagejson
{
    "historyTokenLimit": 400,
    "languageModelName": "AZURE_GPT_4_TURBO_2024_0409",
    "proposedLanguagesForTranslationIso6391proposedLanguagesForTranslation": ["enEnglish", "esSpanish", "frFrench", "deGerman", "itItalian", "ptPortuguese", "nlDutch", "ruRussian", "svSwedish" ],
    "supportedDocEndings": [".pdf", ".docx", ".xlsx", ".pptx"]
}

AgentPromptConfi
❗Only adjust prompts if you are fully familiar with the code logic. Small changes can break the module or reduce the output quality.

Parameter

Description

systemPrompt

System prompt for document translation agent.

Default Value

Code Block
You are a friendly person that helps our customers to translate uploaded documents.
Therefore, you must find out which uploaded documents they want to translate and to what
language before starting the translation. If you cannot find the information from
the conversation, nicely ask them to provide it or to upload a document.
You have access to tools that can help you with these tasks, but use only one at a time
else it is complicated for the user.
Note that at the moment, only Word and Excel documents can be translated, thus the files must
end with a .docx or .xlsx file extension. Mention this if the user uploads other files.

...

TranslatorPromptConfig
❗Only adjust prompts if you are fully familiar with the code logic. Small changes can break the module or reduce the output quality.

Parameter

Description

Default Value

systemPromptInstruction

System prompt instruction for the document translation module.

See below

inputPromptInstructionTemplate

Input prompt for translating an array of text blocks with word formatting tags to a specified language.

See below

outputPromptInstructionTemplate

Output prompt instruction template.

See below

availableLanguagesForTranslationAsIso6391Codes

Available languages for translation as ISO 639-1 codes.

["en", "es", "fr", "de"]

Output prompt instruction template.

See below

Default Values

  • systemPromptInstruction:

    Code Block
    You are a helpful AI designed to translate text to a specified language.
    Do it even if the target language is the same as the source language.
    Make sure the translated text contains the same amount of carriage returns
    '\\\\n' as the original text block and keep the number of characters per line
    approximately the same.
  • inputPromptInstructionTemplate:

    Code Block
    List of text blocks to be translated to ${language}:
    
    <INPUT>
    ${input_block}
    </INPUT>
  • outputPromptInstructionTemplate:

    Code Block
    <OUTPUT>
    ${output_block}
    </OUTPUT>

...

Code Block
languagejson
{
    "systemPromptInstruction": "You are a helpful AI designed to translate text to a specified language. Do it even if the target language is the same as the source language. Make sure the translated text contains the same amount of carriage returns '\\n' as the original text block and keep the number of characters per line approximately the same.",
    "inputPromptInstructionTemplate": "List of text blocks to be translated to ${language}:\n\n<INPUT>\n${input_block}\n</INPUT>",
    "outputPromptInstructionTemplate": "<OUTPUT>\n${output_block}\n</OUTPUT>",
    "availableLanguagesForTranslationAsIso6391Codes": ["en",
"es", "fr", "de"]
}

TranslatorUserConfig

Parameter

Description

Default Value

languageModelName

The model that will be used to translate between languages.

"AZURE_GPT_4_0613"

maxTokensPerTranlationRequest

The maximum number of tokens that will be translated at once. If the model cannot handle more than this many tokens in a single request then it will be split into multiple requests.

1000

maxTokenPerMinute

The maximum number of tokens available for translation

tasks per minute.

40000

prependKeywordTranlatedDocument

The keyword that will be prepended to the translated document name.

"translated_"

tasks per minute.

40000

allowedInputLanguages

The languages

Languages that

will be allowed as input as ISO codes.

["en", "es", "fr", "de", "it", "pt", "nl", "ru", "sv"]

can be recognized to use correspondingly configured few-shot examples, glossary for translation and postprocessing of text.

["Afrikaans", ..,"Zulu"]

Full Example

Code Block
languagejson
{
    "languageModelName": "AZURE_GPT_4_0613",
    "maxTokensPerTranlationRequest": 1000,
    "maxTokenPerMinute": 40000,
    "prependKeywordTranlatedDocument": "translated_",
    "allowedInputLanguages": ["enGerman", "esItalian", "frSpanish", "de", "it", "pt", "nl", "ru", "svRussian"]
}