Question
How to check the list of characters that are used for the recognition in a specific language?
Answer
Some languages contain language-specific symbols. To be sure about what characters may appear in the output, please use the following code:
System.Text.StringBuilder supportedCharacters = new System.Text.StringBuilder();
string languageName = "French";
FREngine.ITextLanguage textLanguage = engineLoader.Engine.PredefinedLanguages.Find(languageName).TextLanguage;
foreach (FREngine.IBaseLanguage baseLanguage in textLanguage.BaseLanguages) {
supportedCharacters.AppendFormat("Alphabet letters: {0}\n", baseLanguage.LetterSet[FREngine.BaseLanguageLetterSetEnum.BLLS_Alphabet]);
supportedCharacters.AppendFormat("Prefixes: {0}\n", baseLanguage.LetterSet[FREngine.BaseLanguageLetterSetEnum.BLLS_Prefixes]);
supportedCharacters.AppendFormat("Suffixes: {0}\n", baseLanguage.LetterSet[FREngine.BaseLanguageLetterSetEnum.BLLS_Suffixes]);
}
supportedCharacters.AppendFormat("Punctuation marks: {0}\n", textLanguage.LetterSet[FREngine.TextLanguageLetterSetEnum.TLLS_InterwordPunctuators], textLanguage.LetterSet[FREngine.TextLanguageLetterSetEnum.TLLS_Prefixes], textLanguage.LetterSet[FREngine.TextLanguageLetterSetEnum.TLLS_Suffixes]);
MessageBox.Show(supportedCharacters.ToString());
Sample output for English:
Alphabet letters: '-.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz’
Prefixes: "'(-.[{©—‘’“•■□▲△►▻▼▽◄◅◊◎◦★☆♦✓❖
Suffixes: !"')*,-.:;?]}©®—’”™
Punctuation marks: !"#$%&'()*+,-./:;<=>?[]_{}£¥§©«°»—’“”„•€■□▲△►▻▼▽◄◅◊◎◦★☆♦✓❖
Sample output for French (please note symbols added: ÀÂÆÇÈÉÊËÎÏÔÙÛÜàâæçèéêëîïôùûüÿŒœŸ)
Alphabet letters: '-.ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzÀÂÆÇÈÉÊËÎÏÔÙÛÜàâæçèéêëîïôùûüÿŒœŸ’
Prefixes: "'(-.[{©«—‘“•■□▲△►▻▼▽◄◅◊◎◦★☆♦✓❖
Suffixes: !"')*,-.:;?]}©®»—’”™
Punctuation marks: !"#$%&'()*+,-./:;<=>?[]_{}£¥§©«°»—’“”„•€■□▲△►▻▼▽◄◅◊◎◦★☆♦✓❖
Comments
0 comments
Please sign in to leave a comment.