DVBCore  17.9.0
Open Source DVB Engine
 All Data Structures Files Functions Typedefs Macros Pages
Macros | Functions
stbuni.c File Reference

Contains Unicode string handling functions for STB usage. More...

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <ctype.h>
#include <techtype.h>
#include <dbgfuncs.h>
#include "asciimap.h"
#include "stbheap.h"
#include "stbuni.h"
#include "stbhuffman.h"

Macros

#define UTF16_HEADER_VALUE   0x11
 
#define UTF8_HEADER_VALUE   0x15
 
#define UNICODE_HEADER_POS   0
 
#define LENGTH_LOOP_LIMIT   INVALID_UNICODE_CHAR
 
#define STRINGS_EQUAL   0
 
#define FIRST_STRING_GREATER   1
 
#define SECOND_STRING_GREATER   -1
 
#define MAX_NUMBER_DIGITS   11
 
#define UNI_PERCENT_CHAR   0x0025
 
#define UNI_SMALL_D_CHAR   0x0064
 
#define UNI_SMALL_H_CHAR   0x0068
 
#define UNI_SMALL_U_CHAR   0x0075
 
#define UNI_SMALL_L_CHAR   0x006C
 
#define UNI_SMALL_S_CHAR   0x0073
 
#define UNI_SMALL_X_CHAR   0x0078
 
#define UNI_LARGE_X_CHAR   0x0058
 
#define UNI_ZERO_CHAR   0x0030
 
#define UNI_NINE_CHAR   0x0039
 
#define UNI_SUR_HIGH_START   0xD800
 
#define UNI_SUR_HIGH_END   0xDBFF
 
#define UNI_SUR_LOW_START   0xDC00
 
#define UNI_SUR_LOW_END   0xDFFF
 
#define UNI_REPLACEMENT_CHAR   0x0000FFFD
 
#define UNI_SMALL_L_SMALL_D_CHARS   ((UNI_SMALL_L_CHAR << 16) | UNI_SMALL_D_CHAR)
 
#define UNI_SMALL_L_SMALL_U_CHARS   ((UNI_SMALL_L_CHAR << 16) | UNI_SMALL_U_CHAR)
 
#define UNI_SMALL_L_SMALL_X_CHARS   ((UNI_SMALL_L_CHAR << 16) | UNI_SMALL_X_CHAR)
 
#define UNI_SMALL_L_LARGE_X_CHARS   ((UNI_SMALL_L_CHAR << 16) | UNI_LARGE_X_CHAR)
 
#define UNI_SMALL_H_SMALL_D_CHARS   ((UNI_SMALL_H_CHAR << 16) | UNI_SMALL_D_CHAR)
 
#define UNI_SMALL_H_SMALL_U_CHARS   ((UNI_SMALL_H_CHAR << 16) | UNI_SMALL_U_CHAR)
 
#define UNI_SMALL_H_SMALL_X_CHARS   ((UNI_SMALL_H_CHAR << 16) | UNI_SMALL_X_CHAR)
 
#define UNI_SMALL_H_LARGE_X_CHARS   ((UNI_SMALL_H_CHAR << 16) | UNI_LARGE_X_CHAR)
 
#define MAX_DECODE_BUFFER_SIZE   255 /* Buffer size used for decoding compressed strings */
 
#define MAX_NUM_FORMAT_SPEC_STR_SIZE   6 /* max size of numeric format specifier string e.g. "%011ld" */
 
#define MAX_NUM_WIDTH_DIGITS
 

Functions

U32BIT STB_UnicodeStringLen (U8BIT *string_ptr)
 Determines the length, in characters, of the given unicode string by searching for NULL. Count ignores the unicode header value. More...
 
BOOLEAN STB_IsUnicodeStringReversed (U8BIT *string_ptr)
 Checks to see if the supplied string is unicode and if it is reversed (arabic) More...
 
BOOLEAN STB_IsUnicodeString (U8BIT *string_ptr)
 Tests for unicode string. More...
 
BOOLEAN STB_IsNormalString (U8BIT *string_ptr)
 Tests for normal ascii string. More...
 
U32BIT STB_GetNumBytesInString (U8BIT *string_ptr)
 Determines the no of bytes of the given string. More...
 
U8BIT * STB_SetUnicodeStringChar (U8BIT *string_ptr, U16BIT char_id, U16BIT code)
 Takes a string and changes the requested location to a new value. This request may involve appending to the string in which case the string is extended (always extended by one character, independent of char_id). More...
 
U8BIT * STB_DeleteUnicodeStringChar (U8BIT *string_ptr, U16BIT char_id)
 Takes a string and removes the requested location, shuffling any following data down (thus removing gap) More...
 
U32BIT STB_GetUnicodeStringChar (U8BIT *string_ptr, U16BIT char_id)
 Retrieves the unicode value pointed to by char_id within the given string. If an invalid request occurs (ie char_id is beyond string limit) then 0 is returned. More...
 
U8BIT * STB_ConcatUnicodeStrings (U8BIT *string1_ptr, U8BIT *string2_ptr)
 Appends the contents of string2_ptr to string1_ptr and returns a pointer to the newly created string. More...
 
U8BIT * STB_UnicodeStringTokenise (U8BIT *string, U8BIT **save_ptr)
 Divides the (space separated) string up into individual words and returns them one per call. More...
 
U8BIT * STB_UnicodeStrStr (U8BIT *str1, U8BIT *str2, BOOLEAN ignore_case)
 Finds the first occurence of str2 in str1 and returns a pointer to the substring (as per strstr) More...
 
S8BIT STB_CompareUnicodeStrings (U8BIT *string1_ptr, U8BIT *string2_ptr, BOOLEAN exact_match, BOOLEAN ignore_case)
 Compares the contents of the two given unicode strings and returns the status (as per strcmp) More...
 
U8BIT * STB_ConvertStringToUnicode (U8BIT *string, BOOLEAN *reverse_dir, U16BIT *nchar, BOOLEAN strip_DVB_cntrl_char, U32BIT lang_code)
 Converts the specified DVB coded string into a unicode string, counting the number of characters and checking for right-to-left characters as it goes. More...
 
U8BIT * STB_ConvertStringToUTF8 (U8BIT *string, U16BIT *nchar, BOOLEAN strip_DVB_cntrl_char, U32BIT lang_code)
 Converts the given DVB coded string into a UTF-8 unicode string. The returned string will be preceded by the DVB byte, 0x15, indicating the string is UTF-8 format. The returned string should be freed using STB_ReleaseUnicodeString. More...
 
void STB_ReleaseUnicodeString (U8BIT *string)
 Releases the specified unicode string, freeing associated heap resources. More...
 
U8BIT * STB_ConvertUTF16toUTF8 (U8BIT *src, U32BIT *outlen)
 Creates the given string from UTF-16 to UTF-8 and returns a new string. The returned string should be freed using STB_ReleaseUnicodeString. More...
 
BOOLEAN STB_IsStringEmpty (U8BIT *string_ptr)
 Checks for a string of only spaces. More...
 
void STB_SetDefaultAsciiTable (U8BIT table)
 Sets default ascii table to be used, if not overridden by the table index at the start of a string. More...
 
S8BIT STB_CompareStringsIgnoreCase (U8BIT *string1_ptr, U8BIT *string2_ptr)
 Compares the contents of the two given ASCII strings and returns the status (as per strcmp) but ignores case. More...
 
U8BIT * STB_FormatUnicodeString (BOOLEAN strip_DVB_cntrl_char, BOOLEAN *reverse_dir, const U8BIT *const format_ptr,...)
 Unicode version of sprintf. More...
 
U8BIT * STB_UnicodeInsertString (U8BIT *src_str, U16BIT insert_pos, U8BIT *insert_str, BOOLEAN replace_char)
 Creates a new string by inserting one string into another at a given position, with the option of replacing the char at the given position. Strings can be passed as DVB or unicode, but output will always be unicode and the resulting string must be freed. More...
 
U8BIT * STB_UnicodeStripControlChars (U8BIT *string_ptr)
 Strips the DVB control characters from a string that's already in UTF-8 or UTF-16 format. The control chars that are stripped are DVB emphasis on/off and DVB CR/LF. The input string isn't changed and the returned string must be freed by calling STB_ReleaseUnicodeString. More...
 

Detailed Description

Contains Unicode string handling functions for STB usage.

Date
31/05/2001

Macro Definition Documentation

#define MAX_NUM_WIDTH_DIGITS
Value:
3 /* max number of digits to specify number width
in numeric format specifier e.g. "011" in "%011ld" */

Function Documentation

S8BIT STB_CompareStringsIgnoreCase ( U8BIT *  string1_ptr,
U8BIT *  string2_ptr 
)

Compares the contents of the two given ASCII strings and returns the status (as per strcmp) but ignores case.

Parameters
string1_ptrPointer to the 'master' string
string2_ptrPointer to the 'slave' string
Returns
Result of the comparison. 0 if equal, +ve if string1_ptr > string2_ptr, -ve if string1_ptr < string2_ptr.
S8BIT STB_CompareUnicodeStrings ( U8BIT *  string1_ptr,
U8BIT *  string2_ptr,
BOOLEAN  exact_match,
BOOLEAN  ignore_case 
)

Compares the contents of the two given unicode strings and returns the status (as per strcmp)

Parameters
string1_ptrPointer to the 'master' string
string2_ptrPointer to the 'slave' string
exact_matchIf TRUE, and the strings are the same upto the end of one of them, the lengths of the strings must also be the same for the strings to be equal
ignore_caseIf TRUE, case is ignored when comparing chars, if appropriate.
Returns
Result of the comparison. 0 if equal, +ve if string1_ptr > string2_ptr, -ve if string1_ptr < string2_ptr.
U8BIT* STB_ConcatUnicodeStrings ( U8BIT *  string1_ptr,
U8BIT *  string2_ptr 
)

Appends the contents of string2_ptr to string1_ptr and returns a pointer to the newly created string.

Parameters
string1_ptrPointer to the first string, and hence first part of concatinated string.
string2_ptrPointer to the second string and hence last part of concatinated string.
Returns
Pointer to the concatinated string or NULL if failed
U8BIT* STB_ConvertStringToUnicode ( U8BIT *  string,
BOOLEAN *  reverse_dir,
U16BIT *  nchar,
BOOLEAN  strip_DVB_cntrl_char,
U32BIT  lang_code 
)

Converts the specified DVB coded string into a unicode string, counting the number of characters and checking for right-to-left characters as it goes.

Parameters
string- pointer to the string to be converted
reverse_dir- reverse print direction (passed by ref and set by this function)
nchar- number of characters (passed by ref and set by this function)
strip_DVB_cntrl_char- True if all DVB control chars are to be removed
Returns
A unicode string or NULL. NULL indicates error or NULL string pointer.
U8BIT* STB_ConvertStringToUTF8 ( U8BIT *  string,
U16BIT *  nchar,
BOOLEAN  strip_DVB_cntrl_char,
U32BIT  lang_code 
)

Converts the given DVB coded string into a UTF-8 unicode string. The returned string will be preceded by the DVB byte, 0x15, indicating the string is UTF-8 format. The returned string should be freed using STB_ReleaseUnicodeString.


Parameters
string- DVB string to be converted
nchar- number of characters, not bytes, in the returned string
strip_DVB_cntrl_char- TRUE if DVB control character codes aren't to be included in the converted string
lang_code- language code of the string, which may affect the ETSI defined character code table used when doing the conversion. If the code is 0 then the default table will be used.
Returns
UTF-8 format string
U8BIT* STB_ConvertUTF16toUTF8 ( U8BIT *  src,
U32BIT *  outlen 
)

Creates the given string from UTF-16 to UTF-8 and returns a new string. The returned string should be freed using STB_ReleaseUnicodeString.


Parameters
src- UTF-16 string to be converted
outlen- number of bytes in the returned string
Returns
UTF-8 format string
U8BIT* STB_DeleteUnicodeStringChar ( U8BIT *  string_ptr,
U16BIT  char_id 
)

Takes a string and removes the requested location, shuffling any following data down (thus removing gap)

Parameters
string_ptrPointer to the string to be updated
char_idCharacter to be changed
Returns
Pointer to the updated string or unchanged string if invalid request made.
U8BIT* STB_FormatUnicodeString ( BOOLEAN  strip_DVB_cntrl_char,
BOOLEAN *  reverse_dir,
const U8BIT *const  format_ptr,
  ... 
)

Unicode version of sprintf.

Parameters
strip_DVB_cntrl_charTrue if all DVB control chars are to be removed
reverse_dirreverse print direction (out)
format_ptrTakes a format string of any format, and recognises the following tokens: hu (U8BIT), u (U16BIT), lu (U32BIT), hd (S8BIT), d (S16BIT), ld (S32BIT), hx (U8BIT), x (U16BIT), lx (U32BIT), s (U8BIT*) for 8-bit Ascii or Unicode strings, %% (to output a % character). For the decimal and hexadecimal number tokens there is also limited support for number/precision flags up to a maximum width of 11 digits (e.g. %1 to %11 to pad the number with leading spaces, %01 to %011 to pad the number with leading zeroes) the maximum width of the number/precision flag is 3 characters. ... Parameters to be subsituted into the format string.
Returns
NULL if there is any error, otherwise a newly allocated unicode string Note: The string must be freed with STB_ReleaseUnicodeString.
U32BIT STB_GetNumBytesInString ( U8BIT *  string_ptr)

Determines the no of bytes of the given string.

Parameters
string_ptr- Pointer to the U8BIT string.
Returns
Returns no of bytes of string including headers and terminators.
U32BIT STB_GetUnicodeStringChar ( U8BIT *  string_ptr,
U16BIT  char_id 
)

Retrieves the unicode value pointed to by char_id within the given string. If an invalid request occurs (ie char_id is beyond string limit) then 0 is returned.

Parameters
string_ptrPointer to the string to be searched
char_idCharacter to be retrieved
Returns
unicode value contained at the requested string position (0xFFFF if invalid position or string).
BOOLEAN STB_IsNormalString ( U8BIT *  string_ptr)

Tests for normal ascii string.

Parameters
U8BIT*string_ptr - string to test if normal ascii
Returns
BOOLEAN is_normal
BOOLEAN STB_IsStringEmpty ( U8BIT *  string_ptr)

Checks for a string of only spaces.

Parameters
string_ptr- pointer to the string to be checked.
Returns
TRUE if empty, FALSE otherwise.
BOOLEAN STB_IsUnicodeString ( U8BIT *  string_ptr)

Tests for unicode string.

Parameters
U8BIT*string_ptr - string to test if unicode
Returns
BOOLEAN is_unicode
BOOLEAN STB_IsUnicodeStringReversed ( U8BIT *  string_ptr)

Checks to see if the supplied string is unicode and if it is reversed (arabic)

Parameters
U8BIT*string_ptr - string to test if unicode
Returns
BOOLEAN is_reversed TRUE if reversed
void STB_ReleaseUnicodeString ( U8BIT *  string)

Releases the specified unicode string, freeing associated heap resources.

Parameters
string- pointer to the unicode string to be released. NB - This will have been supplied by a previous call to STB_ConvertStringToUnicode()!
void STB_SetDefaultAsciiTable ( U8BIT  table)

Sets default ascii table to be used, if not overridden by the table index at the start of a string.

Parameters
table- index of the table to be used (0 to 15), as defined in ETSI 300 468, Annex A, table A.3
U8BIT* STB_SetUnicodeStringChar ( U8BIT *  string_ptr,
U16BIT  char_id,
U16BIT  code 
)

Takes a string and changes the requested location to a new value. This request may involve appending to the string in which case the string is extended (always extended by one character, independent of char_id).

Parameters
string_ptrPointer to the string to be updated
char_idCharacter to be changed
codeUnicode value to be inserted at position indicated by char_id
Returns
Pointer to the updated string or unchanged string if invalid request made.
U8BIT* STB_UnicodeInsertString ( U8BIT *  src_str,
U16BIT  insert_pos,
U8BIT *  insert_str,
BOOLEAN  replace_char 
)

Creates a new string by inserting one string into another at a given position, with the option of replacing the char at the given position. Strings can be passed as DVB or unicode, but output will always be unicode and the resulting string must be freed.


Parameters
src_str- string into which the insertion will be made
insert_pos- position in the source string to make the insertion, it will be after this position
insert_str- string to be inserted
replace_char- TRUE if the char at the insertion point is to be replaced by the insertion string
Returns
new string with text inserted
U32BIT STB_UnicodeStringLen ( U8BIT *  string_ptr)

Determines the length, in characters, of the given unicode string by searching for NULL. Count ignores the unicode header value.

Parameters
string_ptr- Pointer to the U8BIT string.
Returns
Returns length of string
U8BIT* STB_UnicodeStringTokenise ( U8BIT *  string,
U8BIT **  save_ptr 
)

Divides the (space separated) string up into individual words and returns them one per call.

Parameters
strThe string to be tokenised (will be modified by this function)
save_ptrThe caller's variable in which the current pointer can be saved
Returns
Pointer to the next token in the string, or NULL if no more
U8BIT* STB_UnicodeStripControlChars ( U8BIT *  string_ptr)

Strips the DVB control characters from a string that's already in UTF-8 or UTF-16 format. The control chars that are stripped are DVB emphasis on/off and DVB CR/LF. The input string isn't changed and the returned string must be freed by calling STB_ReleaseUnicodeString.

Parameters
string_ptrUTF-8 or UTF-16 string from which the control chars are to be stripped
Returns
new string in the same format as the input string
U8BIT* STB_UnicodeStrStr ( U8BIT *  str1,
U8BIT *  str2,
BOOLEAN  ignore_case 
)

Finds the first occurence of str2 in str1 and returns a pointer to the substring (as per strstr)

Parameters
str1String being searched
str2String being searched for
ignore_caseIf TRUE, ignores case when comparing characters
Returns
Pointer to the first occurence of substring in str1, or NULL if not found