DVBCore  20.3.0
DVBCore Documentation
stbuni.c
Go to the documentation of this file.
1 /*******************************************************************************
2  * Copyright © 2014 The DTVKit Open Software Foundation Ltd (www.dtvkit.org)
3  * Copyright © 2004 Ocean Blue Software Ltd
4  *
5  * This file is part of a DTVKit Software Component
6  * You are permitted to copy, modify or distribute this file subject to the terms
7  * of the DTVKit 1.0 Licence which can be found in licence.txt or at www.dtvkit.org
8  *
9  * THIS CODE AND INFORMATION ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND,
10  * EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE IMPLIED WARRANTIES
11  * OF MERCHANTABILITY AND/OR FITNESS FOR A PARTICULAR PURPOSE.
12  *
13  * If you or your organisation is not a member of DTVKit then you have access
14  * to this source code outside of the terms of the licence agreement
15  * and you are expected to delete this and any associated files immediately.
16  * Further information on DTVKit, membership and terms can be found at www.dtvkit.org
17  *******************************************************************************/
24 //---includes for this file-------------------------------------------------------------------------
25 // compiler library header files
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <stdarg.h>
30 #include <ctype.h>
31 
32 // third party header files
33 
34 // Ocean Blue Software header files
35 #include <techtype.h>
36 #include <dbgfuncs.h>
37 
38 #include "asciimap.h"
39 #include "stbheap.h"
40 #include "stbuni.h"
41 
42 #include "stbhuffman.h"
43 
44 //---constant definitions for this file-------------------------------------------------------------
45 #define UTF16_HEADER_VALUE 0x11 // Unicode header value
46 #define UCS2_HEADER_VALUE 0x14 // UCS-2BE header value
47 #define UTF8_HEADER_VALUE 0x15 // UTF-8 header value
48 #define UNICODE_HEADER_POS 0 // Unicode header, start position in string
49 #define LENGTH_LOOP_LIMIT INVALID_UNICODE_CHAR // Catch for non null terminated strings
50 // REM: LENGTH_LOOP_LIMIT Must be Less than INVALID_UNICODE_CHAR
51 
52 #define STRINGS_EQUAL 0 // Return value when compared strings are the same
53 #define FIRST_STRING_GREATER 1 // Return value when 1st string is greater than 2nd
54 #define SECOND_STRING_GREATER -1 // Return value when 2nd string is greater than 1st
55 
56 #define MAX_NUMBER_DIGITS 11 // Maximum number digits in a S32BIT number (e.g. -2147483648 )
57 
58 
59 /* Unicode tokens for format strings */
60 #define UNI_PERCENT_CHAR 0x0025
61 #define UNI_SMALL_D_CHAR 0x0064
62 #define UNI_SMALL_H_CHAR 0x0068
63 #define UNI_SMALL_U_CHAR 0x0075
64 #define UNI_SMALL_L_CHAR 0x006C
65 #define UNI_SMALL_S_CHAR 0x0073
66 #define UNI_SMALL_X_CHAR 0x0078
67 #define UNI_LARGE_X_CHAR 0x0058
68 #define UNI_ZERO_CHAR 0x0030
69 #define UNI_NINE_CHAR 0x0039
70 
71 #define UNI_SUR_HIGH_START 0xD800
72 #define UNI_SUR_HIGH_END 0xDBFF
73 #define UNI_SUR_LOW_START 0xDC00
74 #define UNI_SUR_LOW_END 0xDFFF
75 #define UNI_REPLACEMENT_CHAR 0x0000FFFD
76 
77 /* Pairs of unicode tokens for format strings */
78 #define UNI_SMALL_L_SMALL_D_CHARS ((UNI_SMALL_L_CHAR << 16) | UNI_SMALL_D_CHAR)
79 #define UNI_SMALL_L_SMALL_U_CHARS ((UNI_SMALL_L_CHAR << 16) | UNI_SMALL_U_CHAR)
80 #define UNI_SMALL_L_SMALL_X_CHARS ((UNI_SMALL_L_CHAR << 16) | UNI_SMALL_X_CHAR)
81 #define UNI_SMALL_L_LARGE_X_CHARS ((UNI_SMALL_L_CHAR << 16) | UNI_LARGE_X_CHAR)
82 #define UNI_SMALL_H_SMALL_D_CHARS ((UNI_SMALL_H_CHAR << 16) | UNI_SMALL_D_CHAR)
83 #define UNI_SMALL_H_SMALL_U_CHARS ((UNI_SMALL_H_CHAR << 16) | UNI_SMALL_U_CHAR)
84 #define UNI_SMALL_H_SMALL_X_CHARS ((UNI_SMALL_H_CHAR << 16) | UNI_SMALL_X_CHAR)
85 #define UNI_SMALL_H_LARGE_X_CHARS ((UNI_SMALL_H_CHAR << 16) | UNI_LARGE_X_CHAR)
86 
87 #define MAX_DECODE_BUFFER_SIZE 255 /* Buffer size used for decoding compressed strings */
88 #define MAX_NUM_FORMAT_SPEC_STR_SIZE 6 /* max size of numeric format specifier string e.g. "%011ld" */
89 #define MAX_NUM_WIDTH_DIGITS 3 /* max number of digits to specify number width
90  in numeric format specifier e.g. "011" in "%011ld" */
91 
92 //---local typedefs, structs, enumerations for this file--------------------------------------------
93 typedef struct
94 {
95  U32BIT lang_code;
96  U8BIT table_id;
98 
99 //---local (static) variable declarations for this file---------------------------------------------
100 // (internal variables declared static to make them local)
101 
102 /* This table is used to set the default 8859 char table that should be used for a particular
103  * language code. The entries here are exceptions if the default table, which is 0, can't be
104  * used and the broadcasters aren't encoding the strings correctly such that the table to be
105  * used isn't being specified at the start of each string. So if a language isn't defined here
106  * it doesn't mean it isn't supported! */
107 static const S_LANG_CODE_ENTRY lang_code_lookup_table[] =
108 {
109  {(U32BIT)(('r' << 16) | ('u' << 8) | 'm'), 2}, /* Romanian */
110  {(U32BIT)(('r' << 16) | ('o' << 8) | 'n'), 2} /* Romanian */
111 };
112 
113 // Default Latin table - defined as being table 0 in ETSI 300 468.
114 static U8BIT default_ascii_table = 0;
115 
116 //---local function prototypes for this file--------------------------------------------------------
117 // (internal functions declared static to make them local)
118 
119 static BOOLEAN CheckUnicodeCharForReverseDirection(U16BIT unicode);
120 static void MakeUnicode( U8BIT **addr_string_ptr, BOOLEAN *new_string,
121  U16BIT *length_ptr, BOOLEAN *reverse_dir, BOOLEAN strip_DVB_cntrl_char);
122 static U8BIT* OutputUTF8(U8BIT *buffer, U32BIT char_code);
123 static U32BIT ReadUTF8(U8BIT **buffer);
124 static U32BIT CharToLower(U32BIT char_code);
125 
126 //--------------------------------------------------------------------------------------------------
127 // global function definitions
128 //--------------------------------------------------------------------------------------------------
141 U32BIT STB_UnicodeStringLen(U8BIT *string_ptr)
142 {
143  U32BIT string_length; // Length of string (U16BIT counts)
144  U32BIT string_offset; // Counter for offset into string (U8BIT counts)
145  U32BIT char_code;
146  U8BIT *ptr;
147 
148  FUNCTION_START(STB_UnicodeStringLen);
149 
150  ASSERT(string_ptr != NULL);
151 
152  // Reset string length to zero
153  string_length = 0;
154 
155  // only process if this is a valid Unicode string
156  if (string_ptr != NULL)
157  {
158  if ((string_ptr[UNICODE_HEADER_POS] == UTF16_HEADER_VALUE) || (string_ptr[UNICODE_HEADER_POS] == UCS2_HEADER_VALUE))
159  {
160  // look beyond the header
161  string_offset = 1;
162 
163  do
164  {
165  // if we have found a unicode NULL then exit loop else increment window to look at
166  // next U16BIT value
167  if (string_ptr[string_offset] == '\0' && string_ptr[string_offset + 1] == '\0')
168  {
169  break;
170  }
171 
172  string_offset += 2;
173  string_length++;
174  }
175  while (string_length < LENGTH_LOOP_LIMIT);
176  }
177  else if (string_ptr[UNICODE_HEADER_POS] == UTF8_HEADER_VALUE)
178  {
179  ptr = string_ptr + 1;
180 
181  while ((char_code = ReadUTF8(&ptr)) != 0)
182  {
183  string_length++;
184  }
185  }
186  }
187 
188  FUNCTION_FINISH(STB_UnicodeStringLen);
189 
190  return(string_length);
191 }
192 
204 BOOLEAN STB_IsUnicodeStringReversed(U8BIT *string_ptr)
205 {
206  BOOLEAN is_reversed; // TRUE if reversed char found
207  U32BIT num_bytes; // Number of bytes in string
208  U32BIT offset_count; // Offset counter
209  U16BIT unicode; // Unicode value
210 
211  FUNCTION_START(STB_IsUnicodeStringReversed);
212 
213  is_reversed = FALSE;
214 
215  if ((string_ptr != NULL) && (string_ptr[UNICODE_HEADER_POS] == UTF16_HEADER_VALUE))
216  {
217  num_bytes = STB_UnicodeStringLen(string_ptr);
218  string_ptr++;
219  num_bytes *= 2;
220  offset_count = 0;
221 
222  while ((offset_count < num_bytes) && (is_reversed == FALSE))
223  {
224  // Convert next two bytes to a 16 bit unicode value
225  unicode = ((*(string_ptr + offset_count)) << 8) | ((*(string_ptr + offset_count + 1)));
226 
227  is_reversed = CheckUnicodeCharForReverseDirection(unicode );
228 
229  offset_count += 2;
230  }
231  }
232 
233  FUNCTION_FINISH(STB_IsUnicodeStringReversed);
234  return(is_reversed);
235 }
236 
248 BOOLEAN STB_IsUnicodeString(U8BIT *string_ptr)
249 {
250  BOOLEAN is_unicode;
251 
252  FUNCTION_START(STB_IsUnicodeString);
253 
254  if ((string_ptr != NULL) &&
255  ((string_ptr[UNICODE_HEADER_POS] == UTF16_HEADER_VALUE) ||
256  (string_ptr[UNICODE_HEADER_POS] == UTF8_HEADER_VALUE)))
257  {
258  is_unicode = TRUE;
259  }
260  else
261  {
262  is_unicode = FALSE;
263  }
264 
265  FUNCTION_FINISH(STB_IsUnicodeString);
266 
267  return(is_unicode);
268 }
269 
281 BOOLEAN STB_IsNormalString(U8BIT *string_ptr)
282 {
283  BOOLEAN is_normal;
284 
285  FUNCTION_START(STB_IsNormalString);
286 
287  is_normal = FALSE;
288  if (string_ptr != NULL)
289  {
290  if ((*string_ptr == 0) || (*string_ptr >= 0x20))
291  {
292  is_normal = TRUE;
293  }
294  }
295 
296  FUNCTION_FINISH(STB_IsNormalString);
297  return(is_normal);
298 }
299 
311 U32BIT STB_GetNumBytesInString(U8BIT *string_ptr)
312 {
313  U32BIT num_chars;
314  U32BIT num_bytes;
315  U8BIT byte_val;
316  BOOLEAN finished;
317  BOOLEAN prev_char_null;
318 
319  FUNCTION_START(STB_GetNumBytesInString);
320 
321  num_bytes = 0;
322 
323  if (string_ptr != NULL)
324  {
325  byte_val = string_ptr[0];
326  if (byte_val >= 0x20)
327  {
328  // normal ascii codes plus null terminator
329  num_bytes = strlen((char *)string_ptr) + 1;
330  }
331  else if (byte_val == 0)
332  {
333  // empty ascii string contains null char only
334  num_bytes = 1;
335  }
336  else if ((byte_val >= 1) && (byte_val <= 11))
337  {
338  // one byte header followed by normal ascii codes and null terminator
339  num_bytes = strlen((char *)(string_ptr + 1)) + 2;
340  }
341  else if (byte_val == 0x10)
342  {
343  // three byte header followed by normal ascii codes and null terminator
344  num_bytes = strlen((char *)(string_ptr + 3)) + 4;
345  }
346  else if ((byte_val == UTF16_HEADER_VALUE) || (byte_val == UCS2_HEADER_VALUE))
347  {
348  // unicode - one byte header followed by 16-bit character codes and 16-bit terminator
349  num_chars = STB_UnicodeStringLen(string_ptr);
350  if (num_chars != INVALID_UNICODE_CHAR)
351  {
352  num_bytes = STB_UTF16_LEN_TO_BYTES_IN_STRING(num_chars);
353  }
354  }
355  else if (byte_val == UTF8_HEADER_VALUE)
356  {
357  /* UTF-8 encoded string, 1 byte header followed by 1 byte terminator */
358  num_bytes = strlen((char *)(string_ptr + 1)) + 2;
359  }
360  else if (byte_val == 0x1f)
361  {
362  /* Compressed UTF-8 string.
363  * This count relies on the fact that when the string is read it's terminated with a
364  * two null chars as without this there's no way to determine the length other than
365  * by decompressing it! */
366  finished = FALSE;
367  prev_char_null = FALSE;
368 
369  for (num_bytes = 0; !finished; )
370  {
371  if (string_ptr[num_bytes] == 0)
372  {
373  if (prev_char_null)
374  {
375  /* Double 0 char found, end of string */
376  finished = TRUE;
377  }
378  else
379  {
380  prev_char_null = TRUE;
381  num_bytes++;
382  }
383  }
384  else
385  {
386  prev_char_null = FALSE;
387  num_bytes++;
388  }
389  }
390  }
391  }
392  FUNCTION_FINISH(STB_GetNumBytesInString);
393  return(num_bytes);
394 }
395 
412 U8BIT* STB_SetUnicodeStringChar(U8BIT *string_ptr, U16BIT char_id, U16BIT code)
413 {
414  U32BIT string_length; // Length of string passed in
415  U32BIT byte_offset; // Byte offset rather than unicode character offset
416 
417  FUNCTION_START(STB_SetUnicodeStringChar);
418 
419  ASSERT(string_ptr != NULL);
420  ASSERT((string_ptr[0] == UTF16_HEADER_VALUE) || (string_ptr[0] == UTF8_HEADER_VALUE));
421 
422  // Only process if this is a valid pointer and a valid unicode string
423  if (string_ptr != NULL && string_ptr[UNICODE_HEADER_POS] == UTF16_HEADER_VALUE)
424  {
425  string_length = STB_UnicodeStringLen(string_ptr);
426 
427  // check if we need to re-alloc string to append the code (expand by one)
428  if (char_id >= string_length)
429  {
430  // Realloc enough memory to include string, null terminator(2) and unicode header(1)
431  string_ptr = STB_ResizeMemory(string_ptr, STB_UTF16_LEN_TO_BYTES_IN_STRING(string_length));
432 
433  // re-align to end of string
434  char_id = (U16BIT) string_length;
435  } // if
436 
437  // If we still have a valid string then insert new character
438  if (string_ptr != NULL)
439  {
440  // Actual byte offset of required unicode character (take header into account)
441  byte_offset = (char_id * 2) + 1;
442 
443  string_ptr[byte_offset] = (U8BIT) (code >> 8);
444  byte_offset++;
445 
446  string_ptr[byte_offset] = (U8BIT) (code & 0x00FF);
447  byte_offset++;
448 
449  // if this is an append then terminate string with null
450  if (char_id == string_length)
451  {
452  string_ptr[byte_offset++] = '\0';
453  byte_offset++;
454 
455  string_ptr[byte_offset] = '\0';
456  } // if
457  } // if
458  } // if
459 
460  FUNCTION_FINISH(STB_SetUnicodeStringChar);
461  return(string_ptr);
462 } // STB_SetUnicodeStringChar
463 
477 U8BIT* STB_DeleteUnicodeStringChar(U8BIT *string_ptr, U16BIT char_id)
478 {
479  U32BIT string_length; // Length of string passed in
480  U32BIT byte_offset; // Byte offset rather than unicode character offset
481 
482  FUNCTION_START(STB_DeleteUnicodeStringChar);
483 
484  ASSERT(string_ptr != NULL);
485  ASSERT((string_ptr[0] == UTF16_HEADER_VALUE) || (string_ptr[0] == UTF8_HEADER_VALUE));
486 
487  // Only process if this is a valid pointer and a valid unicode string
488  if (string_ptr != NULL && string_ptr[UNICODE_HEADER_POS] == UTF16_HEADER_VALUE)
489  {
490  string_length = STB_UnicodeStringLen(string_ptr);
491 
492  // check if we are asking to delete a valid character
493  if (char_id < string_length)
494  {
495  // Actual byte offset of required unicode character (take header into account)
496  byte_offset = (char_id * 2) + 1;
497 
498  // Compress string by removing requested data
499  memcpy(&string_ptr[byte_offset], &string_ptr[byte_offset + 2],
500  ((string_length - char_id) * 2));
501 
502  // re-size, allowing for unicode header and NULL
503  string_ptr = STB_ResizeMemory(string_ptr, (string_length * 2) + 1);
504  } // if
505  } // if
506 
507  FUNCTION_FINISH(STB_DeleteUnicodeStringChar);
508  return(string_ptr);
509 } // STB_DeleteUnicodeStringChar
510 
526 U32BIT STB_GetUnicodeStringChar(U8BIT *string_ptr, U16BIT char_id)
527 {
528  U32BIT string_length; // Length of string passed in
529  U32BIT byte_offset; // Byte offset rather than unicode character offset
530  U32BIT return_code; // Value found at location, to be returned
531 
532  FUNCTION_START(STB_GetUnicodeStringChar);
533 
534  ASSERT(string_ptr != NULL);
535  ASSERT((string_ptr[0] == UTF16_HEADER_VALUE) || (string_ptr[0] == UTF8_HEADER_VALUE));
536 
537  return_code = INVALID_UNICODE_CHAR;
538 
539  // Only process if this is a valid pointer and a valid unicode string
540  if (string_ptr != NULL)
541  {
542  if (string_ptr[UNICODE_HEADER_POS] == UTF16_HEADER_VALUE)
543  {
544  string_length = STB_UnicodeStringLen(string_ptr);
545 
546  if (char_id <= string_length)
547  {
548  // Actual byte offset of required unicode character (take header into account)
549  byte_offset = (char_id * 2) + 1;
550 
551  return_code = (U16BIT) (string_ptr[byte_offset] << 8);
552  byte_offset++;
553 
554  return_code |= (U16BIT) string_ptr[byte_offset];
555  }
556  }
557  else if (string_ptr[UNICODE_HEADER_POS] == UTF8_HEADER_VALUE)
558  {
559  /* Can't work out the byte offset of the nth char, so have to iterate through */
560  string_ptr++;
561  while (char_id > 0)
562  {
563  if ((*string_ptr & 0x80) == 0)
564  {
565  string_ptr++;
566  char_id--;
567  }
568  else if ((*string_ptr & 0xE0) == 0xC0)
569  {
570  string_ptr += 2;
571  char_id--;
572  }
573  else if ((*string_ptr & 0xF0) == 0xE0)
574  {
575  string_ptr += 3;
576  char_id--;
577  }
578  else if ((*string_ptr & 0xF8) == 0xF0)
579  {
580  string_ptr += 4;
581  char_id--;
582  }
583  else
584  {
585  /* Invalid UTF-8 string */
586  char_id = 0;
587  string_ptr = NULL;
588  }
589  }
590 
591  if (string_ptr != NULL)
592  {
593  return_code = ReadUTF8(&string_ptr);
594  }
595  }
596  }
597 
598  FUNCTION_FINISH(STB_GetUnicodeStringChar);
599 
600  return(return_code);
601 }
602 
618 U8BIT* STB_ConcatUnicodeStrings(U8BIT *string1_ptr, U8BIT *string2_ptr)
619 {
620  U16BIT char_count;
621  U8BIT *unicode_str1;
622  U8BIT *unicode_str2;
623  U8BIT *concatinated_string_ptr; // Holder for the concatinated string
624  U32BIT new_string_byte_count; // Concatinated string length (in bytes)
625  U32BIT num_bytes1;
626  U32BIT num_bytes2;
627 
628  FUNCTION_START(STB_ConcatUnicodeStrings);
629 
630  ASSERT(string1_ptr != NULL);
631  ASSERT(string2_ptr != NULL);
632 
633  concatinated_string_ptr = NULL;
634 
635  // Only process if strings are valid pointers and both have valid unicode strings
636  if ((string1_ptr != NULL) && (string2_ptr != NULL))
637  {
638  unicode_str1 = NULL;
639  unicode_str2 = NULL;
640 
641  if (((string1_ptr[UNICODE_HEADER_POS] == UTF16_HEADER_VALUE) &&
642  (string2_ptr[UNICODE_HEADER_POS] == UTF16_HEADER_VALUE)) ||
643  ((string1_ptr[UNICODE_HEADER_POS] == UTF8_HEADER_VALUE) &&
644  (string2_ptr[UNICODE_HEADER_POS] == UTF8_HEADER_VALUE)))
645  {
646  /* Both strings are the same, so don't need to be converted */
647  num_bytes1 = STB_GetNumBytesInString(string1_ptr);
648  num_bytes2 = STB_GetNumBytesInString(string2_ptr);
649  }
650  else
651  {
652  /* Convert both strings before concatenating */
653  unicode_str1 = STB_ConvertStringToUTF8(string1_ptr, &char_count, FALSE, 0);
654  string1_ptr = unicode_str1;
655  num_bytes1 = STB_GetNumBytesInString(string1_ptr);
656 
657  unicode_str2 = STB_ConvertStringToUTF8(string2_ptr, &char_count, FALSE, 0);
658  string2_ptr = unicode_str2;
659  num_bytes2 = STB_GetNumBytesInString(string2_ptr);
660  }
661 
662  if ((string1_ptr != NULL) && (string2_ptr != NULL))
663  {
664  if (string1_ptr[UNICODE_HEADER_POS] == UTF16_HEADER_VALUE)
665  {
666  /* Length of concat string is sum of lengths, which include header and null bytes for each,
667  * minus 3 as we only need one header and terminator on the new string */
668  new_string_byte_count = num_bytes1 + num_bytes2 - 3;
669  }
670  else
671  {
672  /* Length of concat string is sum of lengths, which include header and null byte for each,
673  * minus 2 as we only need one header and null on the new string */
674  new_string_byte_count = num_bytes1 + num_bytes2 - 2;
675  }
676 
677  concatinated_string_ptr = (U8BIT *)STB_GetMemory(new_string_byte_count);
678  if (concatinated_string_ptr != NULL)
679  {
680  if (string1_ptr[UNICODE_HEADER_POS] == UTF16_HEADER_VALUE)
681  {
682  /* Concat UTF-16 strings */
683  /* Copy string1, minus its null terminator */
684  memcpy(&concatinated_string_ptr[0], string1_ptr, num_bytes1 - 2);
685 
686  /* Concat string2 starting from the first actual char, and including its null */
687  memcpy((char *)&concatinated_string_ptr[num_bytes1 - 2], &string2_ptr[1], num_bytes2 - 1);
688  }
689  else
690  {
691  /* Concat UTF-8 strings */
692  /* Copy string1, minus its null terminator */
693  memcpy(&concatinated_string_ptr[0], string1_ptr, num_bytes1 - 1);
694 
695  /* Concat string2 starting from the first actual char, and including its null */
696  memcpy((char *)&concatinated_string_ptr[num_bytes1 - 1], &string2_ptr[1], num_bytes2 - 1);
697  }
698  }
699  }
700 
701  STB_ReleaseUnicodeString(unicode_str1);
702  STB_ReleaseUnicodeString(unicode_str2);
703  }
704 
705  FUNCTION_FINISH(STB_ConcatUnicodeStrings);
706 
707  return(concatinated_string_ptr);
708 }
709 
723 U8BIT* STB_UnicodeStringTokenise(U8BIT *string, U8BIT **save_ptr)
724 {
725  U8BIT *return_ptr = NULL;
726  U8BIT *next_char_ptr;
727  U32BIT char_code;
728 
729  FUNCTION_START(STB_UnicodeStringTokenise);
730 
731  ASSERT(save_ptr != NULL);
732 
733  // Pick up the remembered pointer to the start of the next string
734  // if this is a repeat call
735  if ((string == NULL) && (*save_ptr != NULL))
736  {
737  string = *save_ptr;
738  // Replace 2nd NULL terminator from previously found word with header for new word
739  *string = UTF8_HEADER_VALUE;
740  }
741 
742  if ((string != NULL) && (string[UNICODE_HEADER_POS] == UTF8_HEADER_VALUE))
743  {
744  return_ptr = string;
745  next_char_ptr = string + 1;
746  *save_ptr = NULL;
747 
748  while ((char_code = ReadUTF8(&next_char_ptr)) != 0)
749  {
750  if (char_code == 0x20)
751  {
752  /* Convert the separator (space) to a terminating NULL */
753  *string = 0x00;
754 
755  /* Skip any further whitespace */
756  while ((char_code = ReadUTF8(&next_char_ptr)) == 0x20)
757  {
758  string = next_char_ptr;
759  }
760 
761  *save_ptr = string;
762  break;
763  }
764 
765  string = next_char_ptr;
766  }
767  }
768 
769  FUNCTION_FINISH(STB_UnicodeStringTokenise);
770 
771  return return_ptr;
772 }
773 
788 U8BIT* STB_UnicodeStrStr(U8BIT *str1, U8BIT *str2, BOOLEAN ignore_case)
789 {
790  U8BIT *subs_ptr = NULL;
791  U32BIT str1_len;
792  U32BIT str2_len;
793  U32BIT str1_index;
794  U32BIT str1_cmp_index;
795  U32BIT str2_cmp_index;
796  U8BIT *str1_ptr;
797  U8BIT *next_str1_ptr;
798  U8BIT *str1_cmp_ptr;
799  U8BIT *str2_cmp_ptr;
800  U32BIT str1_char, str2_char;
801 
802  FUNCTION_START(STB_UnicodeStrStr);
803 
804  // Only process if strings are valid pointers and both have valid unicode strings
805  if ((str1 != NULL) && (str2 != NULL))
806  {
807  if ((str1[UNICODE_HEADER_POS] == UTF16_HEADER_VALUE) &&
808  (str2[UNICODE_HEADER_POS] == UTF16_HEADER_VALUE))
809  {
810  /* Search procedure for UTF-16 strings */
811  str1_len = STB_UnicodeStringLen(str1);
812  str2_len = STB_UnicodeStringLen(str2);
813 
814  str1_index = 0;
815 
816  while (str1_index < str1_len)
817  {
818  str1_cmp_index = str1_index;
819  str2_cmp_index = 0;
820 
821  while ((str1_cmp_index < str1_len) && (str2_cmp_index < str2_len))
822  {
823  str1_char = STB_GetUnicodeStringChar(str1, str1_cmp_index);
824  str2_char = STB_GetUnicodeStringChar(str2, str2_cmp_index);
825 
826  if (ignore_case)
827  {
828  str1_char = CharToLower(str1_char);
829  str2_char = CharToLower(str2_char);
830  }
831 
832  if (str1_char != str2_char)
833  {
834  break;
835  }
836 
837  str1_cmp_index++;
838  str2_cmp_index++;
839  }
840 
841  if (str2_cmp_index >= str2_len)
842  {
843  subs_ptr = &str1[1 + ((str1_cmp_index - 1) * 2)];
844  break;
845  }
846  str1_index++;
847  }
848  }
849  else if ((str1[UNICODE_HEADER_POS] == UTF8_HEADER_VALUE) &&
850  (str2[UNICODE_HEADER_POS] == UTF8_HEADER_VALUE))
851  {
852  /* Search procedure for UTF-8 strings */
853  str1_ptr = &str1[1];
854  next_str1_ptr = str1_ptr;
855  str1_char = ReadUTF8(&next_str1_ptr);
856 
857  while (str1_char != 0)
858  {
859  str1_cmp_ptr = str1_ptr;
860  str2_cmp_ptr = &str2[1];
861 
862  do
863  {
864  str1_char = ReadUTF8(&str1_cmp_ptr);
865  str2_char = ReadUTF8(&str2_cmp_ptr);
866 
867  if (ignore_case)
868  {
869  str1_char = CharToLower(str1_char);
870  str2_char = CharToLower(str2_char);
871  }
872  }
873  while ((str1_char != 0) && (str2_char != 0) && (str1_char == str2_char));
874 
875  if (str2_char == 0)
876  {
877  /* Matched to the end of the search string */
878  subs_ptr = str1_ptr;
879  break;
880  }
881 
882  str1_ptr = next_str1_ptr;
883  str1_char = ReadUTF8(&next_str1_ptr);
884  }
885  }
886  }
887 
888  FUNCTION_FINISH(STB_UnicodeStrStr);
889 
890  return(subs_ptr);
891 }
892 
910 S8BIT STB_CompareUnicodeStrings(U8BIT *string1_ptr, U8BIT *string2_ptr, BOOLEAN exact_match, BOOLEAN ignore_case)
911 {
912  U32BIT string1_length; // Length of 1st string passed in
913  U32BIT string2_length; // Length of 2nd string passed in
914  U32BIT min_string_length; // Smallest string length from the two
915  U32BIT unicode_count; // Counter value pointing at current unicode character
916  U32BIT byte_offset; // Counter value pointing at current byte offset
917  U32BIT current_string1_code; // Current unicode value from string1
918  U32BIT current_string2_code; // Current unicode value from string2
919  S8BIT difference = STRINGS_EQUAL; // Return value indicating difference between strings
920  U8BIT *ptr1;
921  U8BIT *ptr2;
922 
923  FUNCTION_START(STB_CompareUnicodeStrings);
924 
925  ASSERT(string1_ptr != NULL);
926  ASSERT(string2_ptr != NULL);
927 
928  // Only process if strings are valid pointers and both have valid unicode strings
929  if ((string1_ptr != NULL) && (string2_ptr != NULL))
930  {
931  if ((string1_ptr[UNICODE_HEADER_POS] == UTF16_HEADER_VALUE) &&
932  (string2_ptr[UNICODE_HEADER_POS] == UTF16_HEADER_VALUE))
933  {
934  string1_length = STB_UnicodeStringLen(string1_ptr);
935  string2_length = STB_UnicodeStringLen(string2_ptr);
936 
937  // Determine the shortest string
938  if (string1_length < string2_length)
939  {
940  min_string_length = string1_length;
941  }
942  else
943  {
944  min_string_length = string2_length;
945  }
946 
947  difference = STRINGS_EQUAL;
948  unicode_count = 0;
949  byte_offset = 1;
950 
951  // Loop through the strings
952  while (unicode_count <= min_string_length && difference == STRINGS_EQUAL)
953  {
954  current_string1_code = (U16BIT) (string1_ptr[byte_offset] << 8);
955  current_string2_code = (U16BIT) (string2_ptr[byte_offset] << 8);
956  byte_offset++;
957  current_string1_code |= (U16BIT) string1_ptr[byte_offset];
958  current_string2_code |= (U16BIT) string2_ptr[byte_offset];
959  byte_offset++;
960 
961  if (ignore_case)
962  {
963  current_string1_code = CharToLower(current_string1_code);
964  current_string2_code = CharToLower(current_string2_code);
965  }
966 
967  if (current_string1_code > current_string2_code)
968  {
969  difference = FIRST_STRING_GREATER;
970  }
971  else if (current_string1_code < current_string2_code)
972  {
973  difference = SECOND_STRING_GREATER;
974  }
975 
976  unicode_count++;
977  }
978  }
979  else if ((string1_ptr[UNICODE_HEADER_POS] == UTF8_HEADER_VALUE) &&
980  (string2_ptr[UNICODE_HEADER_POS] == UTF8_HEADER_VALUE))
981  {
982  ptr1 = &string1_ptr[1];
983  ptr2 = &string2_ptr[1];
984  current_string1_code = ReadUTF8(&ptr1);
985  current_string2_code = ReadUTF8(&ptr2);
986 
987  if (ignore_case)
988  {
989  current_string1_code = CharToLower(current_string1_code);
990  current_string2_code = CharToLower(current_string2_code);
991  }
992 
993  while ((current_string1_code != 0) && (current_string2_code != 0) &&
994  (difference == STRINGS_EQUAL))
995  {
996  if (current_string1_code > current_string2_code)
997  {
998  difference = FIRST_STRING_GREATER;
999  }
1000  else if (current_string1_code < current_string2_code)
1001  {
1002  difference = SECOND_STRING_GREATER;
1003  }
1004  else
1005  {
1006  current_string1_code = ReadUTF8(&ptr1);
1007  current_string2_code = ReadUTF8(&ptr2);
1008 
1009  if (ignore_case)
1010  {
1011  current_string1_code = CharToLower(current_string1_code);
1012  current_string2_code = CharToLower(current_string2_code);
1013  }
1014  }
1015  }
1016 
1017  if (exact_match && (difference == STRINGS_EQUAL))
1018  {
1019  /* Match will be adjusted according to string lengths */
1020  string1_length = STB_UnicodeStringLen(string1_ptr);
1021  string2_length = STB_UnicodeStringLen(string2_ptr);
1022  }
1023  }
1024  else
1025  {
1026  /* no useful way to report utf8/16 error - compare as C strings */
1027  difference = strcmp((char *) string1_ptr, (char *) string2_ptr);
1028  exact_match = FALSE;
1029  }
1030 
1031  // final check, if loop exits with strings set the same, but strings not same length
1032  // then modify difference as one string continues and hence is greater than the other
1033  if (exact_match == TRUE)
1034  {
1035  if ((difference == STRINGS_EQUAL) && (string1_length != string2_length))
1036  {
1037  if (string1_length < string2_length)
1038  {
1039  difference = SECOND_STRING_GREATER;
1040  }
1041  else
1042  {
1043  difference = FIRST_STRING_GREATER;
1044  }
1045  }
1046  }
1047  }
1048 
1049  FUNCTION_FINISH(STB_CompareUnicodeStrings);
1050 
1051  return(difference);
1052 }
1053 
1069 U8BIT* STB_ConvertStringToUnicode(U8BIT *string, BOOLEAN *reverse_dir, U16BIT *nchar,
1070  BOOLEAN strip_DVB_cntrl_char, U32BIT lang_code)
1071 {
1072  U8BIT *char_ptr;
1073  U8BIT byte_val;
1074  BOOLEAN convert_reqd;
1075  U8BIT table;
1076  U8BIT *op_buff;
1077  U8BIT *op_ptr;
1078  U16BIT num_ip_char;
1079  U16BIT num_op_char;
1080  BOOLEAN reverse;
1081  BOOLEAN utf8;
1082  U8BIT *decoded_utf8;
1083  U16BIT unicode;
1084  U16BIT nbytes;
1085  U16BIT i;
1086  U16BIT prev_char;
1087  U8BIT table_id;
1088  U8BIT encoding_type_id;
1089  U16BIT actual_size;
1090 
1091  FUNCTION_START(STB_ConvertStringToUnicode);
1092 
1093  ASSERT(reverse_dir != NULL);
1094  ASSERT(nchar != NULL);
1095 
1096  // setup pointer to characters in string
1097  char_ptr = string;
1098 
1099  table = 0;
1100  num_op_char = 0;
1101  reverse = FALSE;
1102  utf8 = FALSE;
1103  decoded_utf8 = NULL;
1104 
1105  if (string != NULL)
1106  {
1107  /* Use the default table if there's no lang code or the code isn't found */
1108  table_id = default_ascii_table;
1109 
1110  if (lang_code != 0)
1111  {
1112  /* Lookup the lang code to find the table id to be used */
1113  for (i = 0; i < (sizeof(lang_code_lookup_table) / sizeof(S_LANG_CODE_ENTRY)); i++)
1114  {
1115  if (lang_code_lookup_table[i].lang_code == lang_code)
1116  {
1117  table_id = lang_code_lookup_table[i].table_id;
1118  break;
1119  }
1120  }
1121  }
1122 
1123  // determine type of character coding in string - first byte indicates coding (see DVB SI spec
1124  // for details)
1125  byte_val = *char_ptr;
1126  if ((byte_val >= 0x20) || (byte_val == '\0'))
1127  {
1128  /* Use the default ascii table */
1129  convert_reqd = TRUE;
1130  table = table_id;
1131  num_ip_char = (U16BIT) strlen((char *)char_ptr);
1132  }
1133  else if ((byte_val >= 1) && (byte_val <= 11))
1134  {
1135  // one of the Latin tables specified in the DVB SI specification from ISO 8859.
1136  // 1 => 8859-5, 2 => 8859-6, 3 => 8859-7, 4 => 8859-8, 5 => 8859-9
1137  // 6 => 8859-10, 7 => 8859-11, 8 => 8859-12, 9 => 8859-13, 10 => 8859-14, 11 => 8859-15
1138  convert_reqd = TRUE;
1139  table = byte_val + 4;
1140  char_ptr++;
1141  num_ip_char = (U16BIT) strlen((char *)char_ptr);
1142  }
1143  else if (byte_val == 0x10)
1144  {
1145  // next 2 bytes indicate the table from ISO 8859
1146  convert_reqd = TRUE;
1147  table = ((*(char_ptr + 1)) << 8) | *(char_ptr + 2);
1148  char_ptr += 3;
1149  num_ip_char = (U16BIT) strlen((char *)char_ptr);
1150  }
1151  else if (byte_val == 0x11)
1152  {
1153  // already unicode, no conversion required.
1154  convert_reqd = FALSE;
1155  char_ptr++;
1156  // count number of characters in the string
1157  num_ip_char = 0;
1158  do
1159  {
1160  // read next unicode character
1161  unicode = ((*char_ptr) << 8) | *(char_ptr + 1);
1162  char_ptr += 2;
1163  if (unicode != '\0')
1164  {
1165  num_ip_char++;
1166  }
1167  }
1168  while (unicode != '\0');
1169  char_ptr = string + 1;
1170  }
1171  else if ((byte_val == UTF8_HEADER_VALUE) || (byte_val == 0x1F))
1172  {
1173  if (byte_val == UTF8_HEADER_VALUE)
1174  {
1175  /* This is a normal uncompressed UTF-8 string */
1176  char_ptr++;
1177  }
1178  else
1179  {
1180  /* This is a compressed UTF-8 string with the next byte defining the table
1181  * to be used for decoding */
1182  char_ptr++;
1183  encoding_type_id = *char_ptr;
1184  char_ptr++;
1185 
1186  if ((encoding_type_id == 1) || (encoding_type_id == 2))
1187  {
1188  /* It's not possible to tell how long the string is without decoding it first,
1189  * so a sufficiently large buffer is allocated to decode into */
1190  decoded_utf8 = (U8BIT *)STB_GetMemory(MAX_DECODE_BUFFER_SIZE);
1191  if (decoded_utf8 != NULL)
1192  {
1193  actual_size = STB_HuffmanDecompress(encoding_type_id, char_ptr, decoded_utf8,
1194  MAX_DECODE_BUFFER_SIZE);
1195 
1196  if (actual_size > 0)
1197  {
1198  /* The length is now known, the buffer can be resized */
1199  decoded_utf8 = (U8BIT *)STB_ResizeMemory(decoded_utf8, actual_size);
1200  char_ptr = decoded_utf8;
1201  }
1202  else
1203  {
1204  /* Decompression failed */
1205  *nchar = 0;
1206  *reverse_dir = FALSE;
1207  STB_FreeMemory(decoded_utf8);
1208  FUNCTION_FINISH(STB_ConvertStringToUnicode);
1209  return(NULL);
1210  }
1211  }
1212  else
1213  {
1214  /* Can't decode the string */
1215  *nchar = 0;
1216  *reverse_dir = FALSE;
1217  FUNCTION_FINISH(STB_ConvertStringToUnicode);
1218  return(NULL);
1219  }
1220  }
1221  else
1222  {
1223  /* Don't know how to decode the string */
1224  *nchar = 0;
1225  *reverse_dir = FALSE;
1226  FUNCTION_FINISH(STB_ConvertStringToUnicode);
1227  return(NULL);
1228  }
1229  }
1230 
1231  /* UTF-8 string */
1232  convert_reqd = TRUE;
1233  utf8 = TRUE;
1234 
1235  /* Find the length of the string in chars.
1236  * As conversion is to 16-bit unicode, only char codes upto 0xFFFF are supported,
1237  * which means only chars upto 3-byte UTF-8 can be converted */
1238  num_ip_char = 0;
1239  do
1240  {
1241  if ((*char_ptr & 0x80) == 0)
1242  {
1243  char_ptr++;
1244  }
1245  else if ((*char_ptr & 0xE0) == 0xC0)
1246  {
1247  char_ptr += 2;
1248  }
1249  else if ((*char_ptr & 0xF0) == 0xE0)
1250  {
1251  char_ptr += 3;
1252  }
1253  else
1254  {
1255  /* Invalid string */
1256  if (decoded_utf8 != NULL)
1257  {
1258  STB_FreeMemory(decoded_utf8);
1259  }
1260 
1261  *nchar = 0;
1262  *reverse_dir = FALSE;
1263  FUNCTION_FINISH(STB_ConvertStringToUnicode);
1264  return(NULL);
1265  }
1266  num_ip_char++;
1267  }
1268  while (*char_ptr != '\0');
1269 
1270  if (decoded_utf8 != NULL)
1271  {
1272  char_ptr = decoded_utf8;
1273  }
1274  else
1275  {
1276  char_ptr = string + 1;
1277  }
1278  }
1279  else
1280  {
1281  // invalid string - exit
1282  *nchar = 0;
1283  *reverse_dir = FALSE;
1284  FUNCTION_FINISH(STB_ConvertStringToUnicode);
1285  return(NULL);
1286  }
1287 
1288 
1289  // Now create output string
1290  nbytes = STB_UTF16_LEN_TO_BYTES_IN_STRING(num_ip_char); // +3 for 0x11 code at the beginning and 0x0000 terminator
1291  op_buff = (U8BIT *)STB_GetMemory(nbytes);
1292  if (op_buff != NULL)
1293  {
1294  prev_char = 0;
1295  op_ptr = op_buff;
1296 
1297  // put in code indicating uincode string
1298  *op_ptr = 0x11;
1299  op_ptr++;
1300 
1301  // copy over each character, converting if necessary
1302  for (i = 0; i < num_ip_char; i++)
1303  {
1304  unicode = 0;
1305  if (convert_reqd == FALSE)
1306  {
1307  // already unicode - read next character
1308  unicode = ((*char_ptr) << 8) | *(char_ptr + 1);
1309  char_ptr += 2;
1310  }
1311  else
1312  {
1313  if (utf8)
1314  {
1315  byte_val = *char_ptr;
1316  if ((byte_val & 0x80) == 0)
1317  {
1318  unicode = byte_val;
1319  char_ptr++;
1320  }
1321  else if ((byte_val & 0xE0) == 0xC0)
1322  {
1323  unicode = byte_val & 0x1F;
1324  unicode <<= 6;
1325  unicode += (*(char_ptr + 1) & 0x3F);
1326  char_ptr += 2;
1327  }
1328  else if ((byte_val & 0xF0) == 0xE0)
1329  {
1330  unicode = byte_val & 0x0F;
1331  unicode <<= 6;
1332  unicode += (*(char_ptr + 1) & 0x3F);
1333  unicode <<= 6;
1334  unicode += (*(char_ptr + 2) & 0x3F);
1335  char_ptr += 3;
1336  }
1337  if ((unicode >= CHAR_TABLE_START_ASCII_CODE) && (unicode <= 0xff))
1338  {
1339  /* Map this range of chars to ISO-6937 using table 0 */
1340  unicode = utf8_char_map[unicode - CHAR_TABLE_START_ASCII_CODE];
1341  }
1342  }
1343  else
1344  {
1345  // ascii codes - convert character to unicode
1346  byte_val = *char_ptr;
1347  char_ptr++;
1348  if (byte_val < CHAR_TABLE_START_ASCII_CODE)
1349  {
1350  unicode = (U16BIT)byte_val;
1351  }
1352  else
1353  {
1354  if (table < MAX_CHAR_MAP_TABLES)
1355  {
1356  unicode = char_map[table][byte_val - CHAR_TABLE_START_ASCII_CODE];
1357  }
1358  }
1359  }
1360  }
1361 
1362  // check for control codes
1363  if (((unicode > 0x0000) && (unicode <= 0x001f)) ||
1364  ((unicode >= 0x0080) && (unicode <= 0x009f)) ||
1365  ((unicode >= 0xe080) && (unicode <= 0xe09f))
1366  )
1367  {
1368  switch (unicode)
1369  {
1370  // only accept LF and convert to 0xe08a
1371  // DVB - CR/LF.
1372  case 0x008a:
1373  case 0xe08a:
1374  {
1375  if (strip_DVB_cntrl_char == FALSE)
1376  {
1377  unicode = 0xe08a;
1378  }
1379  else
1380  {
1381  unicode = 0;
1382  }
1383  break;
1384  }
1385  // DVB - character emphasis ON
1386  case 0x0086:
1387  case 0xe086:
1388  {
1389  if (strip_DVB_cntrl_char == FALSE)
1390  {
1391  unicode = 0xe086;
1392  }
1393  else
1394  {
1395  unicode = 0;
1396  }
1397  break;
1398  }
1399  // DVB - character emphasis OFF
1400  case 0x0087:
1401  case 0xe087:
1402  {
1403  if (strip_DVB_cntrl_char == FALSE)
1404  {
1405  unicode = 0xe087;
1406  }
1407  else
1408  {
1409  unicode = 0;
1410  }
1411  break;
1412  }
1413  // convert all other control codes to 0
1414  default:
1415  {
1416  unicode = 0;
1417  break;
1418  }
1419  } // e.o. switch
1420  } // e.o. control codes
1421 
1422  // if valid character copy to output string and check for reverse direction
1423  if (unicode != 0)
1424  {
1425  /* Check for diacritic character. In DVB strings the diacritic precedes the char
1426  * it combines with, but in Unicode a diacritic follows the char, so the char order needs
1427  * to be swapped. Only a single combining diacritic is valid in DVB so only need
1428  * to check previous char to see if a swap is required. */
1429  if (prev_char != 0)
1430  {
1431  /* Don't want to swap if this char is also a diacritic */
1432  if ((unicode < 0x300) || (unicode > 0x36f))
1433  {
1434  /* Swap chars - replace the last output char with the current char */
1435  op_ptr -= 2;
1436  *op_ptr = (U8BIT)((unicode >> 8) & 0xff);
1437  op_ptr++;
1438  *op_ptr = (U8BIT)(unicode & 0xff);
1439  op_ptr++;
1440 
1441  /* Set the char to be output to the previous (diacritic) char */
1442  unicode = prev_char;
1443  prev_char = 0;
1444  }
1445  }
1446  else
1447  {
1448  if ((unicode >= 0x300) && (unicode <= 0x36f))
1449  {
1450  /* This char is a diacritic */
1451  prev_char = unicode;
1452  }
1453  }
1454 
1455  *op_ptr = (U8BIT)((unicode >> 8) & 0xff);
1456  op_ptr++;
1457  *op_ptr = (U8BIT)(unicode & 0xff);
1458  op_ptr++;
1459 
1460  num_op_char++;
1461 
1462 #if 0
1463  if (reverse == FALSE)
1464  {
1465  reverse = CheckUnicodeCharForReverseDirection(unicode);
1466  }
1467 #endif
1468  }
1469  }
1470 
1471  // add null terminator
1472  *op_ptr = 0x00;
1473  *(op_ptr + 1) = 0x00;
1474  }
1475 
1476  if (decoded_utf8 != NULL)
1477  {
1478  STB_FreeMemory(decoded_utf8);
1479  }
1480  }
1481  else
1482  {
1483  op_buff = NULL;
1484  }
1485 
1486  /* return num chars and reverse */
1487  *nchar = num_op_char;
1488  *reverse_dir = reverse;
1489 
1490  FUNCTION_FINISH(STB_ConvertStringToUnicode);
1491  return(op_buff);
1492 }
1493 
1494 /*!**************************************************************************
1495  * @brief Converts the given DVB coded string into a UTF-8 unicode string.
1496  * The returned string will be preceded by the DVB byte, 0x15, indicating
1497  * the string is UTF-8 format.
1498  * The returned string should be freed using STB_ReleaseUnicodeString.
1499  * @param string - DVB string to be converted
1500  * @param nchar - number of characters, not bytes, in the returned string
1501  * @param strip_DVB_cntrl_char - TRUE if DVB control character codes aren't
1502  * to be included in the converted string
1503  * @param lang_code - language code of the string, which may affect the ETSI defined
1504  * character code table used when doing the conversion.
1505  * If the code is 0 then the default table will be used.
1506  * @return UTF-8 format string
1507  ****************************************************************************/
1508 U8BIT* STB_ConvertStringToUTF8(U8BIT *string, U16BIT *nchar, BOOLEAN strip_DVB_cntrl_char,
1509  U32BIT lang_code)
1510 {
1511  U8BIT *char_ptr;
1512  U8BIT byte_val;
1513  U8BIT table;
1514  U8BIT *op_buff;
1515  U8BIT *op_ptr;
1516  U16BIT num_ip_char;
1517  U16BIT num_op_char;
1518  BOOLEAN utf8;
1519  BOOLEAN utf16;
1520  U8BIT *decoded_utf8;
1521  U32BIT unicode;
1522  U16BIT nbytes;
1523  U16BIT i;
1524  U32BIT prev_char;
1525  U32BIT temp_char;
1526  U8BIT table_id;
1527  U8BIT encoding_type_id;
1528  U16BIT actual_size;
1529 
1530  FUNCTION_START(STB_ConvertStringToUTF8);
1531 
1532  ASSERT(nchar != NULL);
1533 
1534  char_ptr = string;
1535 
1536  table = 0;
1537  num_op_char = 0;
1538  nbytes = 0;
1539  utf8 = FALSE;
1540  utf16 = FALSE;
1541  decoded_utf8 = NULL;
1542 
1543  if (string != NULL)
1544  {
1545  /* Use the default table if there's no lang code or the code isn't found */
1546  table_id = default_ascii_table;
1547 
1548  if (lang_code != 0)
1549  {
1550  /* Lookup the lang code to find the table id to be used */
1551  for (i = 0; i < (sizeof(lang_code_lookup_table) / sizeof(S_LANG_CODE_ENTRY)); i++)
1552  {
1553  if (lang_code_lookup_table[i].lang_code == lang_code)
1554  {
1555  table_id = lang_code_lookup_table[i].table_id;
1556  break;
1557  }
1558  }
1559  }
1560 
1561  /* Determine type of character coding in string -
1562  * first byte indicates coding (see DVB SI spec for details) */
1563  byte_val = *char_ptr;
1564  if ((byte_val >= 0x20) || (byte_val == '\0'))
1565  {
1566  /* Use the default ascii table */
1567  table = table_id;
1568  num_ip_char = (U16BIT)strlen((char *)char_ptr);
1569  }
1570  else if ((byte_val >= 1) && (byte_val <= 11))
1571  {
1572  // one of the Latin tables specified in the DVB SI specification from ISO 8859.
1573  // 1 => 8859-5, 2 => 8859-6, 3 => 8859-7, 4 => 8859-8, 5 => 8859-9
1574  // 6 => 8859-10, 7 => 8859-11, 8 => 8859-12, 9 => 8859-13, 10 => 8859-14, 11 => 8859-15
1575  table = byte_val + 4;
1576  char_ptr++;
1577  num_ip_char = (U16BIT)strlen((char *)char_ptr);
1578  }
1579  else if (byte_val == 0x10)
1580  {
1581  // next 2 bytes indicate the table from ISO 8859
1582  table = ((*(char_ptr + 1)) << 8) | *(char_ptr + 2);
1583  char_ptr += 3;
1584  num_ip_char = (U16BIT)strlen((char *)char_ptr);
1585  }
1586  else if ((byte_val == 0x11) || (byte_val == UCS2_HEADER_VALUE))
1587  {
1588  utf16 = TRUE;
1589  char_ptr++;
1590 
1591  /* Count number of characters in the string */
1592  num_ip_char = 0;
1593  do
1594  {
1595  unicode = ((*char_ptr) << 8) | *(char_ptr + 1);
1596  char_ptr += 2;
1597  if (unicode != '\0')
1598  {
1599  num_ip_char++;
1600  }
1601  }
1602  while (unicode != '\0');
1603  char_ptr = string + 1;
1604  }
1605  else if ((byte_val == UTF8_HEADER_VALUE) || (byte_val == 0x1F))
1606  {
1607  /* The string is already UTF-8 */
1608  utf8 = TRUE;
1609  char_ptr++;
1610 
1611  if (byte_val == 0x1F)
1612  {
1613  /* This is a compressed UTF-8 string with the next byte defining the table
1614  * to be used for decoding */
1615  encoding_type_id = *char_ptr;
1616  char_ptr++;
1617 
1618  if ((encoding_type_id == 1) || (encoding_type_id == 2))
1619  {
1620  /* It's not possible to tell how long the string is without decoding it first,
1621  * so a sufficiently large buffer is allocated to decode into */
1622  decoded_utf8 = (U8BIT *)STB_GetMemory(MAX_DECODE_BUFFER_SIZE);
1623  if (decoded_utf8 != NULL)
1624  {
1625  actual_size = STB_HuffmanDecompress(encoding_type_id, char_ptr, decoded_utf8,
1626  MAX_DECODE_BUFFER_SIZE);
1627 
1628  if (actual_size > 0)
1629  {
1630  /* The length is now known, the buffer can be resized */
1631  decoded_utf8 = (U8BIT *)STB_ResizeMemory(decoded_utf8, actual_size);
1632  char_ptr = decoded_utf8;
1633  }
1634  else
1635  {
1636  /* Decompression failed */
1637  *nchar = 0;
1638  STB_FreeMemory(decoded_utf8);
1639  FUNCTION_FINISH(STB_ConvertStringToUTF8);
1640  return(NULL);
1641  }
1642  }
1643  else
1644  {
1645  /* Can't decode the string */
1646  *nchar = 0;
1647  FUNCTION_FINISH(STB_ConvertStringToUTF8);
1648  return(NULL);
1649  }
1650  }
1651  else
1652  {
1653  /* Don't know how to decode the string */
1654  *nchar = 0;
1655  FUNCTION_FINISH(STB_ConvertStringToUTF8);
1656  return(NULL);
1657  }
1658  }
1659 
1660  /* Find the length of the string in chars */
1661  num_ip_char = 0;
1662  do
1663  {
1664  if ((*char_ptr & 0x80) == 0)
1665  {
1666  char_ptr++;
1667  }
1668  else if ((*char_ptr & 0xE0) == 0xC0)
1669  {
1670  char_ptr += 2;
1671  }
1672  else if ((*char_ptr & 0xF0) == 0xE0)
1673  {
1674  char_ptr += 3;
1675  }
1676  else if ((*char_ptr & 0xF8) == 0xF0)
1677  {
1678  char_ptr += 4;
1679  }
1680  else
1681  {
1682  /* Invalid byte sequence */
1683  if (decoded_utf8 != NULL)
1684  {
1685  STB_FreeMemory(decoded_utf8);
1686  }
1687 
1688  *nchar = 0;
1689  FUNCTION_FINISH(STB_ConvertStringToUTF8);
1690  return(NULL);
1691  }
1692  num_ip_char++;
1693  }
1694  while (*char_ptr != '\0');
1695 
1696  if (decoded_utf8 != NULL)
1697  {
1698  char_ptr = decoded_utf8;
1699  }
1700  else
1701  {
1702  char_ptr = string + 1;
1703  }
1704  }
1705  else
1706  {
1707  // invalid string - exit
1708  *nchar = 0;
1709  FUNCTION_FINISH(STB_ConvertStringToUTF8);
1710  return(NULL);
1711  }
1712 
1713 
1714  /* Calculate the number of bytes required for the conversion.
1715  * Each character could result in 4 bytes being generated, which is the worst case */
1716  nbytes = num_ip_char * 4;
1717 
1718  /* Two additional bytes required for 0x15 at the start and 0x00 at the end */
1719  nbytes += 2;
1720 
1721  /* Now create output string */
1722  op_buff = (U8BIT *)STB_GetMemory(nbytes);
1723  if (op_buff != NULL)
1724  {
1725  prev_char = 0;
1726  op_ptr = op_buff;
1727 
1728  /* Prepend with code indicating it's a UTF-8 string */
1729  *op_ptr = UTF8_HEADER_VALUE;
1730  op_ptr++;
1731 
1732  /* Copy over each character, converting if necessary */
1733  for (i = 0; i < num_ip_char; i++)
1734  {
1735  unicode = 0;
1736  if (utf16)
1737  {
1738  unicode = ((*char_ptr) << 8) | *(char_ptr + 1);
1739  char_ptr += 2;
1740  }
1741  else if (utf8)
1742  {
1743  byte_val = *char_ptr;
1744  if ((byte_val & 0x80) == 0)
1745  {
1746  unicode = byte_val;
1747  char_ptr++;
1748  }
1749  else if ((byte_val & 0xE0) == 0xC0)
1750  {
1751  unicode = byte_val & 0x1F;
1752  unicode <<= 6;
1753  unicode += (*(char_ptr + 1) & 0x3F);
1754  char_ptr += 2;
1755  }
1756  else if ((byte_val & 0xF0) == 0xE0)
1757  {
1758  unicode = byte_val & 0x0F;
1759  unicode <<= 6;
1760  unicode += (*(char_ptr + 1) & 0x3F);
1761  unicode <<= 6;
1762  unicode += (*(char_ptr + 2) & 0x3F);
1763  char_ptr += 3;
1764  }
1765  else if ((byte_val & 0xF8) == 0xF0)
1766  {
1767  unicode = byte_val & 0x07;
1768  unicode <<= 6;
1769  unicode += (*(char_ptr + 1) & 0x3F);
1770  unicode <<= 6;
1771  unicode += (*(char_ptr + 2) & 0x3F);
1772  unicode <<= 6;
1773  unicode += (*(char_ptr + 3) & 0x3F);
1774  char_ptr += 4;
1775  }
1776 
1777  if ((unicode >= CHAR_TABLE_START_ASCII_CODE) && (unicode <= 0xff))
1778  {
1779  /* Map this range of chars to ISO-6937 using table 0 */
1780  unicode = utf8_char_map[unicode - CHAR_TABLE_START_ASCII_CODE];
1781  }
1782  }
1783  else
1784  {
1785  /* ASCII codes - convert character to unicode */
1786  byte_val = *char_ptr;
1787  char_ptr++;
1788  if (byte_val < CHAR_TABLE_START_ASCII_CODE)
1789  {
1790  unicode = (U32BIT)byte_val;
1791  }
1792  else
1793  {
1794  if (table < MAX_CHAR_MAP_TABLES)
1795  {
1796  unicode = char_map[table][byte_val - CHAR_TABLE_START_ASCII_CODE];
1797  }
1798  }
1799  }
1800 
1801  /* Check for control codes */
1802  if (((unicode > 0x0000) && (unicode <= 0x001f)) ||
1803  ((unicode >= 0x0080) && (unicode <= 0x009f)) ||
1804  ((unicode >= 0xe080) && (unicode <= 0xe09f)))
1805  {
1806  switch (unicode)
1807  {
1808  // only accept LF and convert to 0xe08a
1809  // DVB - CR/LF.
1810  case 0x008a:
1811  case 0xe08a:
1812  {
1813  if (strip_DVB_cntrl_char == FALSE)
1814  {
1815  unicode = 0xe08a;
1816  }
1817  else
1818  {
1819  unicode = 0;
1820  }
1821  break;
1822  }
1823  // DVB - character emphasis ON
1824  case 0x0086:
1825  case 0xe086:
1826  {
1827  if (strip_DVB_cntrl_char == FALSE)
1828  {
1829  unicode = 0xe086;
1830  }
1831  else
1832  {
1833  unicode = 0;
1834  }
1835  break;
1836  }
1837  // DVB - character emphasis OFF
1838  case 0x0087:
1839  case 0xe087:
1840  {
1841  if (strip_DVB_cntrl_char == FALSE)
1842  {
1843  unicode = 0xe087;
1844  }
1845  else
1846  {
1847  unicode = 0;
1848  }
1849  break;
1850  }
1851  // convert all other control codes to 0
1852  default:
1853  {
1854  unicode = 0;
1855  break;
1856  }
1857  }
1858  }
1859 
1860  /* If valid character copy to output string */
1861  if (unicode != 0)
1862  {
1863  /* Check for diacritic character. In DVB strings the diacritic precedes the char
1864  * it combines with, but in Unicode a diacritic follows the char, so the char order needs
1865  * to be swapped. Only a single combining diacritic is valid in DVB so only need
1866  * to check previous char to see if a swap is required. */
1867  if (prev_char != 0)
1868  {
1869  if ((unicode >= 0x300) && (unicode <= 0x36f))
1870  {
1871  /* This char is also a diacritic which means there's two diacritics together,
1872  * so output the previous diacritic and save this one to see if it needs to
1873  * combine with the next char */
1874  temp_char = unicode;
1875  unicode = prev_char;
1876  prev_char = temp_char;
1877  }
1878  else
1879  {
1880  /* Diacritic preceding a normal char, so output the normal char first */
1881  op_ptr = OutputUTF8(op_ptr, unicode);
1882  num_op_char++;
1883 
1884  /* Set the char to be output to the previous (diacritic) char */
1885  unicode = prev_char;
1886  prev_char = 0;
1887  }
1888  }
1889  else
1890  {
1891  if ((unicode >= 0x300) && (unicode <= 0x36f))
1892  {
1893  /* This char is a diacritic */
1894  prev_char = unicode;
1895  unicode = 0;
1896  }
1897  }
1898 
1899  if (unicode != 0)
1900  {
1901  op_ptr = OutputUTF8(op_ptr, unicode);
1902  num_op_char++;
1903  }
1904  }
1905  }
1906 
1907  if (prev_char != 0)
1908  {
1909  /* Last char was a diacritic so output it */
1910  op_ptr = OutputUTF8(op_ptr, prev_char);
1911  num_op_char++;
1912  }
1913 
1914  /* Add null terminator */
1915  *op_ptr++ = 0x00;
1916 
1917  if ((op_ptr - op_buff) < nbytes)
1918  {
1919  nbytes = op_ptr - op_buff;
1920  op_buff = STB_ResizeMemory(op_buff, nbytes);
1921  }
1922  }
1923 
1924  if (decoded_utf8 != NULL)
1925  {
1926  STB_FreeMemory(decoded_utf8);
1927  }
1928  }
1929  else
1930  {
1931  op_buff = NULL;
1932  }
1933 
1934  /* return num chars and reverse */
1935  *nchar = num_op_char;
1936 
1937  FUNCTION_FINISH(STB_ConvertStringToUTF8);
1938 
1939  return(op_buff);
1940 }
1941 
1955 void STB_ReleaseUnicodeString(U8BIT *string)
1956 {
1957  FUNCTION_START(STB_ReleaseUnicodeString);
1958 
1959  if (string != NULL)
1960  {
1961  STB_FreeMemory(string);
1962  }
1963 
1964  FUNCTION_FINISH(STB_ReleaseUnicodeString);
1965 }
1966 
1967 /*!**************************************************************************
1968  * @brief Creates the given string from UTF-16 to UTF-8 and returns a new string.
1969  * The returned string should be freed using STB_ReleaseUnicodeString.
1970  * @param src - UTF-16 string to be converted
1971  * @param outlen - number of bytes in the returned string
1972  * @return UTF-8 format string
1973  ****************************************************************************/
1974 U8BIT* STB_ConvertUTF16toUTF8( U8BIT *src, U32BIT *outlen )
1975 {
1976  const U32BIT byteMask = 0xBF;
1977  const U32BIT byteMark = 0x80;
1978  const int halfShift = 10;
1979  const U32BIT halfBase = 0x0010000UL;
1980  const U8BIT firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
1981  U8BIT *result, *pch;
1982  U16BIT bytesToWrite;
1983  U32BIT ch1, ch2, length;
1984 
1985  FUNCTION_START(STB_ConvertUTF16toUTF8);
1986 
1987  if ((src == NULL) || (src[UNICODE_HEADER_POS] != UTF16_HEADER_VALUE))
1988  {
1989  DBGPRINT("ERR");
1990  result = NULL;
1991  *outlen = 0;
1992  }
1993  else
1994  {
1995  length = STB_UnicodeStringLen(src);
1996  if (length == 0)
1997  {
1998  DBGPRINT("ERR");
1999  result = NULL;
2000  *outlen = 0;
2001  }
2002  else
2003  {
2004  src++; // skip UTF16_HEADER_VALUE
2005 
2006  /* Allocate memory for the UTF-8 string allowing 4 bytes per char + 0x15 and null */
2007  result = STB_GetMemory(length * 4 + 2);
2008  if (result == NULL)
2009  {
2010  DBGPRINT("ERR getting 3 * %d bytes", length);
2011  *outlen = 0;
2012  }
2013  else
2014  {
2015  *result = UTF8_HEADER_VALUE;
2016 
2017  pch = result + 1;
2018  while (length--)
2019  {
2020  ch1 = *src++;
2021  ch1 <<= 8;
2022  ch1 += *src++;
2023  /* If we have a surrogate pair, convert to UTF32 first. */
2024  if (ch1 >= UNI_SUR_HIGH_START && ch1 <= UNI_SUR_HIGH_END)
2025  {
2026  if (length)
2027  {
2028  ch2 = (*src << 8) + *(src + 1);
2029  /* If it's a low surrogate, convert to UTF32. */
2030  if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
2031  {
2032  ch1 = ((ch1 - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) + halfBase;
2033  src += 2;
2034  }
2035  else
2036  {
2037  DBGPRINT("ERR");
2038  break;
2039  }
2040  }
2041  else
2042  {
2043  DBGPRINT("ERR");
2044  break;
2045  }
2046  }
2047  else if (ch1 >= UNI_SUR_LOW_START && ch1 <= UNI_SUR_LOW_END)
2048  {
2049  /* UTF-16 surrogate values are illegal in UTF-32 */
2050  DBGPRINT("ERR");
2051  break;
2052  }
2053  /* Figure out how many bytes the result will require */
2054  if (ch1 < (U32BIT)0x80)
2055  {
2056  bytesToWrite = 1;
2057  }
2058  else if (ch1 < (U32BIT)0x800)
2059  {
2060  bytesToWrite = 2;
2061  }
2062  else if (ch1 < (U32BIT)0x10000)
2063  {
2064  bytesToWrite = 3;
2065  }
2066  else if (ch1 < (U32BIT)0x110000)
2067  {
2068  bytesToWrite = 4;
2069  }
2070  else
2071  {
2072  bytesToWrite = 3;
2073  ch1 = UNI_REPLACEMENT_CHAR;
2074  }
2075  pch += bytesToWrite;
2076  switch (bytesToWrite) /* note: everything falls through. */
2077  {
2078  case 4: *--pch = (U8BIT)((ch1 | byteMark) & byteMask); ch1 >>= 6;
2079  case 3: *--pch = (U8BIT)((ch1 | byteMark) & byteMask); ch1 >>= 6;
2080  case 2: *--pch = (U8BIT)((ch1 | byteMark) & byteMask); ch1 >>= 6;
2081  case 1: *--pch = (U8BIT) (ch1 | firstByteMark[bytesToWrite]);
2082  }
2083  pch += bytesToWrite;
2084  }
2085 
2086  /* Append null terminator */
2087  *pch = 0;
2088  pch++;
2089 
2090  *outlen = pch - result;
2091 
2092  result = STB_ResizeMemory(result, *outlen);
2093  }
2094  }
2095  }
2096 
2097  FUNCTION_FINISH(STB_ConvertUTF16toUTF8);
2098 
2099  return result;
2100 }
2101 
2113 BOOLEAN STB_IsStringEmpty(U8BIT *string_ptr)
2114 {
2115  BOOLEAN retval;
2116  U8BIT *ucode_string;
2117  BOOLEAN rev_string;
2118  U16BIT num_char;
2119  U16BIT i;
2120  U16BIT char_code;
2121  U8BIT *byte_ptr;
2122 
2123  FUNCTION_START(STB_IsStringEmpty);
2124 
2125  retval = TRUE;
2126  ucode_string = STB_ConvertStringToUnicode(string_ptr, &rev_string, &num_char, TRUE, 0);
2127 
2128  if (ucode_string != NULL)
2129  {
2130  byte_ptr = &ucode_string[1];
2131  for (i = 0; i < num_char; i++)
2132  {
2133  char_code = (U16BIT)((*byte_ptr) << 8);
2134  byte_ptr++;
2135  char_code |= (U16BIT)(*byte_ptr);
2136  byte_ptr++;
2137 
2138  if ((char_code != 0x0020) && (char_code != 0x00a0))
2139  {
2140  retval = FALSE;
2141  break;
2142  }
2143  }
2144  STB_ReleaseUnicodeString(ucode_string);
2145  }
2146 
2147  FUNCTION_FINISH(STB_IsStringEmpty);
2148  return(retval);
2149 }
2150 
2157 void STB_SetDefaultAsciiTable(U8BIT table)
2158 {
2159  FUNCTION_START(STB_SetDefaultAsciiTable);
2160 
2161  if (table < MAX_CHAR_MAP_TABLES)
2162  {
2163  default_ascii_table = table;
2164  }
2165 
2166  FUNCTION_FINISH(STB_SetDefaultAsciiTable);
2167 }
2168 
2183 S8BIT STB_CompareStringsIgnoreCase(U8BIT *string1_ptr, U8BIT *string2_ptr)
2184 {
2185  U32BIT str1_len, str2_len;
2186  U32BIT min_str_len;
2187  U32BIT index;
2188  U8BIT char1, char2;
2189  S8BIT difference = STRINGS_EQUAL; // Return value indicating difference between strings
2190 
2191  FUNCTION_START(STB_CompareStringsIgnoreCase);
2192 
2193  if (string1_ptr != NULL)
2194  {
2195  str1_len = strlen((char *)string1_ptr);
2196  }
2197  else
2198  {
2199  str1_len = 0;
2200  }
2201  if (string2_ptr != NULL)
2202  {
2203  str2_len = strlen((char *)string2_ptr);
2204  }
2205  else
2206  {
2207  str2_len = 0;
2208  }
2209 
2210  if (str2_len < str1_len)
2211  {
2212  min_str_len = str2_len;
2213  }
2214  else
2215  {
2216  min_str_len = str1_len;
2217  }
2218 
2219  for (index = 0; (index < min_str_len) && (difference == STRINGS_EQUAL); index++)
2220  {
2221  char1 = *string1_ptr;
2222  char2 = *string2_ptr;
2223 
2224  if ((char1 >= 'A') && (char1 <= 'Z'))
2225  {
2226  char1 = 'a' + (char1 - 'A');
2227  }
2228 
2229  if ((char2 >= 'A') && (char2 <= 'Z'))
2230  {
2231  char2 = 'a' + (char2 - 'A');
2232  }
2233 
2234  if (char1 > char2)
2235  {
2236  difference = FIRST_STRING_GREATER;
2237  }
2238  else if (char1 < char2)
2239  {
2240  difference = SECOND_STRING_GREATER;
2241  }
2242 
2243  string1_ptr++;
2244  string2_ptr++;
2245  }
2246 
2247  if ((difference == STRINGS_EQUAL) && (str1_len != str2_len))
2248  {
2249  if (str1_len < str2_len)
2250  {
2251  difference = SECOND_STRING_GREATER;
2252  }
2253  else
2254  {
2255  difference = FIRST_STRING_GREATER;
2256  }
2257  }
2258 
2259  FUNCTION_FINISH(STB_CompareStringsIgnoreCase);
2260 
2261  return(difference);
2262 }
2263 
2292 U8BIT* STB_FormatUnicodeString( BOOLEAN strip_DVB_cntrl_char,
2293  BOOLEAN *reverse_dir,
2294  const U8BIT *const format_ptr,
2295  ... )
2296 {
2297  typedef struct format_param
2298  {
2299  U16BIT format_index; /* Number of bytes including DVB header */
2300  U16BIT format_adj; /* Number of bytes to adjucst for format tokens */
2301  U8BIT *param;
2302  U16BIT param_size; /* Number of Unicode characters */
2303  BOOLEAN free_param; /* String has been reallocated so must be freed */
2304  struct format_param *next_ptr;
2305  } E_FORMAT_PARAM;
2306 
2307  va_list args_ptr;
2308  U8BIT *uni_string_ptr;
2309  U8BIT *uni_format_ptr;
2310  BOOLEAN reverse;
2311  U16BIT format_length;
2312  U32BIT nchar; /* Number of Characters */
2313  U16BIT total_length; /* Number of U16BIT characters not bytes */
2314  BOOLEAN free_uni_format_ptr;
2315  BOOLEAN error_exit;
2316  U16BIT format_loop;
2317  U16BIT uni_cur_char;
2318  E_FORMAT_PARAM *uni_format_param_ptr;
2319  E_FORMAT_PARAM *uni_head_param_list_ptr;
2320  E_FORMAT_PARAM *uni_last_param_list_ptr;
2321  U8BIT *tmp_str_ptr;
2322  U16BIT cur_format_index;
2323  U8BIT u8_number;
2324  U16BIT u16_number;
2325  U32BIT u32_number;
2326  S8BIT s8_number;
2327  S16BIT s16_number;
2328  S32BIT s32_number;
2329  U8BIT ascii_string[MAX_NUMBER_DIGITS + 1];
2330  U16BIT uni_next_char;
2331  U16BIT length_adj;
2332  U8BIT *tmp_format_ptr;
2333  U8BIT num_form_spec_str[MAX_NUM_FORMAT_SPEC_STR_SIZE];
2334  U8BIT num_form_spec_str_index;
2335 
2336  FUNCTION_START(STB_FormatUnicodeString);
2337 
2338  ASSERT(reverse_dir != NULL);
2339 
2340  /* Initialise the argument pointer */
2341  va_start(args_ptr, format_ptr);
2342  *reverse_dir = FALSE;
2343  uni_string_ptr = NULL;
2344  uni_head_param_list_ptr = NULL;
2345  uni_format_param_ptr = NULL;
2346  uni_last_param_list_ptr = NULL;
2347  error_exit = FALSE;
2348 
2349  uni_format_ptr = (U8BIT *)format_ptr;
2350 
2351  MakeUnicode( &uni_format_ptr, &free_uni_format_ptr, &format_length, &reverse,
2352  strip_DVB_cntrl_char);
2353 
2354  total_length = format_length;
2355  *reverse_dir |= reverse;
2356 
2357  /* If the format string is valid */
2358  if (uni_format_ptr != NULL)
2359  {
2360  /* Scan format loop for tokens.
2361  * Stop if there is no space for 2 tokens i.e %<x>. */
2362  for (format_loop = 0;
2363  (format_loop < (format_length - 1)) && (error_exit == FALSE);
2364  format_loop++)
2365  {
2366  /* Allow for the unicode header */
2367  uni_cur_char = STB_GetUnicodeStringChar(uni_format_ptr, format_loop);
2368 
2369  /* Look for the '%' skipping unicode header */
2370  if (uni_cur_char == UNI_PERCENT_CHAR)
2371  {
2372  cur_format_index = format_loop + 1;
2373 
2374  /* Allow for the unicode header */
2375  uni_cur_char = STB_GetUnicodeStringChar(uni_format_ptr, cur_format_index);
2376 
2377  /* Check for next token element */
2378  /* Note not a switch because unicode chars are not constants. */
2379  if (uni_cur_char == UNI_SMALL_S_CHAR)
2380  {
2381  uni_format_param_ptr = (E_FORMAT_PARAM *)STB_GetMemory(
2382  sizeof(E_FORMAT_PARAM));
2383  if (uni_format_param_ptr != NULL)
2384  {
2385  /* First allocation? */
2386  if (uni_head_param_list_ptr == NULL)
2387  {
2388  uni_head_param_list_ptr = uni_format_param_ptr;
2389  uni_last_param_list_ptr = uni_format_param_ptr;
2390  }
2391  else
2392  {
2393  uni_last_param_list_ptr->next_ptr = uni_format_param_ptr;
2394  uni_last_param_list_ptr = uni_format_param_ptr;
2395  }
2396  uni_format_param_ptr->format_index = (2 * format_loop) + 1; /* start of '%' */
2397  uni_format_param_ptr->format_adj = 2 * 2; /* Conver U16BIT to U8BIT */
2398  uni_format_param_ptr->param = (U8BIT *) va_arg(args_ptr, char *);
2399  MakeUnicode( &(uni_format_param_ptr->param),
2400  &(uni_format_param_ptr->free_param), &(uni_format_param_ptr->param_size),
2401  &reverse, strip_DVB_cntrl_char);
2402  uni_format_param_ptr->next_ptr = NULL;
2403  total_length = ((total_length - 2) +
2404  (uni_format_param_ptr->param_size));
2405  uni_format_param_ptr = uni_format_param_ptr->next_ptr;
2406  format_loop = cur_format_index; /* Skip parsed tokens */
2407  *reverse_dir |= reverse;
2408  }
2409  else
2410  {
2411  error_exit = TRUE;
2412  }
2413  } /* end UNI_SMALL_S_CHAR */
2414  else /* handle numeric format specifiers */
2415  {
2416  /* build up numeric format specifier on the fly */
2417  length_adj = 0;
2418  memset((void *)num_form_spec_str, 0, sizeof(num_form_spec_str));
2419  num_form_spec_str[0] = '%';
2420  num_form_spec_str_index = 1;
2421 
2422  /* handle zero padding and number width specifiers */
2423  while (((U8BIT)uni_cur_char >= '0' && (U8BIT)uni_cur_char <= '9') && (num_form_spec_str_index <= MAX_NUM_WIDTH_DIGITS))
2424  {
2425  /* place the next character into the numeric format spec. string */
2426  num_form_spec_str[num_form_spec_str_index] = (U8BIT)uni_cur_char;
2427  num_form_spec_str_index++;
2428 
2429  /* if we have'nt exceeded the maximum number of digits or reached the end of the
2430  string read in the next character */
2431  if ((atoi((char *)&num_form_spec_str[1]) <= MAX_NUMBER_DIGITS)
2432  &&
2433  ((cur_format_index + 1) <= format_length))
2434  {
2435  uni_cur_char = STB_GetUnicodeStringChar(uni_format_ptr, (cur_format_index + 1));
2436  cur_format_index++;
2437  }
2438  /* otherwise force an error that gets us out of this loop and forces an exit from the main loop */
2439  else
2440  {
2441  uni_cur_char = 0xffff;
2442  }
2443  }
2444 
2445  switch (uni_cur_char)
2446  {
2447  /* handle %%*/
2448  case UNI_PERCENT_CHAR:
2449  {
2450  /* must be "%%" dont allow width specifiers here e.g. "%02%" etc */
2451  if (num_form_spec_str_index == 1)
2452  {
2453  length_adj = 2;
2454  snprintf((char *)ascii_string, MAX_NUMBER_DIGITS + 1, "%%");
2455  }
2456  break;
2457  }
2458 
2459  /* handle %u %d %x %X */
2460  case UNI_SMALL_U_CHAR:
2461  case UNI_SMALL_D_CHAR:
2462  case UNI_SMALL_X_CHAR:
2463  case UNI_LARGE_X_CHAR:
2464  {
2465  /* finish building the format spec string and then sprintf into the buffer */
2466  length_adj = num_form_spec_str_index + 1;
2467  num_form_spec_str[num_form_spec_str_index] = (U8BIT)uni_cur_char;
2468  if (uni_cur_char == UNI_SMALL_D_CHAR)
2469  {
2470  s16_number = (S16BIT) va_arg(args_ptr, int);
2471  snprintf((char *)ascii_string, MAX_NUMBER_DIGITS + 1, (char *)num_form_spec_str, s16_number );
2472  }
2473  else
2474  {
2475  u16_number = (U16BIT) va_arg(args_ptr, int);
2476  snprintf((char *)ascii_string, MAX_NUMBER_DIGITS + 1, (char *)num_form_spec_str, u16_number );
2477  }
2478  break;
2479  }
2480 
2481  /* handle "%hu" "%hd" "%hx" "%hX" "%lu" "%ld" "%lx" "%lX" */
2482  case UNI_SMALL_H_CHAR:
2483  case UNI_SMALL_L_CHAR:
2484  {
2485  /* there needs to be at least one more character in the string to modify the h or u */
2486  if ((cur_format_index + 1) <= format_length)
2487  {
2488  /* read it, and build up the rest of the format specifier string */
2489  uni_next_char = STB_GetUnicodeStringChar(uni_format_ptr, (cur_format_index + 1));
2490  cur_format_index++;
2491  length_adj = num_form_spec_str_index + 2;
2492  num_form_spec_str[num_form_spec_str_index] = (U8BIT)uni_cur_char;
2493  num_form_spec_str_index++;
2494  num_form_spec_str[num_form_spec_str_index] = (U8BIT)uni_next_char;
2495 
2496  /* now process the pair of unicode characters together to read the
2497  correct size and sprintf into local buffer ready for conversion to unicode */
2498  switch ((U32BIT)((uni_cur_char << 16) | uni_next_char))
2499  {
2500  /* "%lu", "%lx", "%lX" */
2501  case UNI_SMALL_L_SMALL_U_CHARS:
2502  case UNI_SMALL_L_SMALL_X_CHARS:
2503  case UNI_SMALL_L_LARGE_X_CHARS:
2504  {
2505  u32_number = (U32BIT) va_arg(args_ptr, int);
2506  snprintf((char *)ascii_string, MAX_NUMBER_DIGITS + 1, (char *)num_form_spec_str, u32_number );
2507  break;
2508  }
2509 
2510  /* "%hu", "%hx", "%hX" */
2511  case UNI_SMALL_H_SMALL_U_CHARS:
2512  case UNI_SMALL_H_SMALL_X_CHARS:
2513  case UNI_SMALL_H_LARGE_X_CHARS:
2514  {
2515  u8_number = (U8BIT) va_arg(args_ptr, int);
2516  snprintf((char *)ascii_string, MAX_NUMBER_DIGITS + 1, (char *)num_form_spec_str, u8_number );
2517  break;
2518  }
2519 
2520  /* "%ld" */
2521  case UNI_SMALL_L_SMALL_D_CHARS:
2522  {
2523  s32_number = (S32BIT) va_arg(args_ptr, int);
2524  snprintf((char *)ascii_string, MAX_NUMBER_DIGITS + 1, (char *)num_form_spec_str, s32_number );
2525  break;
2526  }
2527 
2528  /* "%hd" */
2529  case UNI_SMALL_H_SMALL_D_CHARS:
2530  {
2531  s8_number = (S8BIT) va_arg(args_ptr, int);
2532  snprintf((char *)ascii_string, MAX_NUMBER_DIGITS + 1, (char *)num_form_spec_str, s8_number );
2533  break;
2534  }
2535 
2536  /* unsupported format specifier %h? or %l? */
2537  default:
2538  {
2539  error_exit = TRUE;
2540  break;
2541  }
2542  } /* end switch on uni_next_char */
2543  } /* end if */
2544 
2545  break;
2546  } /* end case UNI_SMALL_L_CHAR, UNI_SMALL_H_CHAR */
2547 
2548  /* unsupported format specifier "%?" */
2549  default:
2550  {
2551  error_exit = TRUE;
2552  break;
2553  }
2554  } /* end Switch on uni_cur_char */
2555 
2556  /* Correct tokens found to consume */
2557  if (length_adj != 0)
2558  {
2559  uni_format_param_ptr = (E_FORMAT_PARAM *)STB_GetMemory(
2560  sizeof(E_FORMAT_PARAM));
2561  if (uni_format_param_ptr != NULL)
2562  {
2563  /* First allocation? */
2564  if (uni_head_param_list_ptr == NULL)
2565  {
2566  uni_head_param_list_ptr = uni_format_param_ptr;
2567  uni_last_param_list_ptr = uni_format_param_ptr;
2568  }
2569  else
2570  {
2571  uni_last_param_list_ptr->next_ptr = uni_format_param_ptr;
2572  uni_last_param_list_ptr = uni_format_param_ptr;
2573  }
2574  uni_format_param_ptr->format_index = (2 * format_loop) + 1; /* start of '%' */
2575  uni_format_param_ptr->format_adj = 2 * length_adj;
2576  uni_format_param_ptr->param = ascii_string;
2577  MakeUnicode( &(uni_format_param_ptr->param),
2578  &(uni_format_param_ptr->free_param), &(uni_format_param_ptr->param_size),
2579  &reverse, FALSE);
2580  uni_format_param_ptr->next_ptr = NULL;
2581  total_length = ((total_length - length_adj) +
2582  (uni_format_param_ptr->param_size));
2583  uni_format_param_ptr = uni_format_param_ptr->next_ptr;
2584  *reverse_dir |= reverse;
2585  }
2586  else
2587  {
2588  error_exit = TRUE;
2589  }
2590  }
2591  format_loop = cur_format_index; /* Skip parsed tokens */
2592  } /* End else check for numeric format specifiers */
2593  } /* End if '%' found */
2594  } /* End for to loop over the format string */
2595 
2596  if (error_exit == FALSE)
2597  {
2598  /* Allocate memory for new string plus space for null terminator and header */
2599  uni_string_ptr = (U8BIT *)STB_GetMemory(sizeof(U8BIT) * STB_UTF16_LEN_TO_BYTES_IN_STRING(total_length));
2600  tmp_str_ptr = uni_string_ptr;
2601  if (uni_string_ptr != NULL)
2602  {
2603  /* No need to add header as it will be added from the format string */
2604  uni_format_param_ptr = uni_head_param_list_ptr;
2605  tmp_format_ptr = uni_format_ptr;
2606  while (tmp_str_ptr < (uni_string_ptr + STB_UTF16_LEN_TO_BYTES_IN_STRING(total_length)))
2607  {
2608  if (uni_format_param_ptr != NULL)
2609  {
2610  /* Need to copy some of the format string over? */
2611  if ((tmp_format_ptr - uni_format_ptr) <
2612  uni_format_param_ptr->format_index)
2613  {
2614  nchar = ((uni_format_param_ptr->format_index) - (tmp_format_ptr - uni_format_ptr));
2615  memcpy((void *)tmp_str_ptr, (void *)tmp_format_ptr, nchar);
2616  tmp_str_ptr += nchar;
2617  tmp_format_ptr += nchar;
2618  }
2619  /* copy the parameter string skipping the unicode header */
2620  if (uni_format_param_ptr->param != NULL)
2621  {
2622  memcpy((void *)tmp_str_ptr,
2623  (void *)((uni_format_param_ptr->param) + 1),
2624  2 * (uni_format_param_ptr->param_size));
2625  }
2626  tmp_str_ptr += (2 * (uni_format_param_ptr->param_size));
2627  tmp_format_ptr += uni_format_param_ptr->format_adj;
2628  uni_format_param_ptr = uni_format_param_ptr->next_ptr;
2629  }
2630  else
2631  {
2632  /* Copy the end of format string */
2633  nchar = (STB_UTF16_LEN_TO_BYTES_IN_STRING(format_length) - (tmp_format_ptr - uni_format_ptr));
2634  memcpy((void *)tmp_str_ptr, (void *)tmp_format_ptr, nchar);
2635  tmp_str_ptr += nchar;
2636  tmp_format_ptr += nchar;
2637  } /* end of no more params to process */
2638  } /* loop to copy over string */
2639  /* No need to add null terminator as got from format string */
2640  }
2641  } /* End if no error */
2642  } /* End if Format string is valid */
2643  else
2644  {
2645  uni_string_ptr = NULL;
2646  }
2647 
2648  /* When constructing the string need to check for reverse_dir */
2649  /* Clean up and free resources */
2650  if (free_uni_format_ptr == TRUE)
2651  {
2652  STB_ReleaseUnicodeString(uni_format_ptr);
2653  }
2654 
2655  while (uni_head_param_list_ptr != NULL)
2656  {
2657  uni_format_param_ptr = uni_head_param_list_ptr;
2658  uni_head_param_list_ptr = uni_format_param_ptr->next_ptr;
2659  if (uni_format_param_ptr->free_param == TRUE)
2660  {
2661  STB_FreeMemory(uni_format_param_ptr->param);
2662  }
2663  STB_FreeMemory(uni_format_param_ptr);
2664  }
2665 
2666  va_end(args_ptr);
2667 
2668  if (uni_string_ptr != NULL)
2669  {
2670  /* Convert the resulting string to UTF-8 */
2671  tmp_str_ptr = STB_ConvertUTF16toUTF8(uni_string_ptr, &nchar);
2672  STB_ReleaseUnicodeString(uni_string_ptr);
2673  uni_string_ptr = tmp_str_ptr;
2674  }
2675 
2676  FUNCTION_FINISH(STB_FormatUnicodeString);
2677 
2678  return(uni_string_ptr);
2679 }
2680 
2681 /*!**************************************************************************
2682  * @brief Creates a new string by inserting one string into another at a given position,
2683  * with the option of replacing the char at the given position. Strings can be
2684  * passed as DVB or unicode, but output will always be unicode and the resulting
2685  * string must be freed.
2686  * @param src_str - string into which the insertion will be made
2687  * @param insert_pos - position in the source string to make the insertion, it will be after this position
2688  * @param insert_str - string to be inserted
2689  * @param replace_char - TRUE if the char at the insertion point is to be replaced by the insertion string
2690  * @return new string with text inserted
2691  ****************************************************************************/
2692 U8BIT* STB_UnicodeInsertString(U8BIT *src_str, U16BIT insert_pos, U8BIT *insert_str, BOOLEAN replace_char)
2693 {
2694  U8BIT *uni_src;
2695  U8BIT *uni_insert;
2696  U8BIT *new_str;
2697  U8BIT *str_ptr;
2698  BOOLEAN reverse;
2699  U16BIT num_chars;
2700  U16BIT num_bytes;
2701 
2702  FUNCTION_START(STB_UnicodeInsertString);
2703 
2704  new_str = NULL;
2705 
2706  if ((src_str != NULL) && (insert_str != NULL))
2707  {
2708  if (STB_IsUnicodeString(src_str))
2709  {
2710  uni_src = src_str;
2711  num_bytes = STB_GetNumBytesInString(uni_src);
2712  }
2713  else
2714  {
2715  uni_src = STB_ConvertStringToUnicode(src_str, &reverse, &num_chars, FALSE, 0);
2716  num_bytes = STB_UTF16_LEN_TO_BYTES_IN_STRING(num_chars);
2717  }
2718 
2719  if (replace_char)
2720  {
2721  num_bytes -= 2;
2722  }
2723 
2724  if (STB_IsUnicodeString(insert_str))
2725  {
2726  uni_insert = insert_str;
2727  }
2728  else
2729  {
2730  uni_insert = STB_ConvertStringToUnicode(insert_str, &reverse, &num_chars, FALSE, 0);
2731  }
2732 
2733  if ((uni_src != NULL) && (uni_insert != NULL))
2734  {
2735  /* Subtract 3 for the unicode header byte, 0x11, and the trailing null bytes */
2736  num_bytes += (STB_GetNumBytesInString(uni_insert) - 3);
2737 
2738  /* Allocate memory for the new string and copy the data into it */
2739  new_str = STB_GetMemory(num_bytes);
2740  if (new_str != NULL)
2741  {
2742  /* Copy the unicode header byte and all chars upto the insertion point */
2743  num_bytes = (insert_pos * 2) + 1;
2744  memcpy(new_str, uni_src, num_bytes);
2745  str_ptr = new_str + num_bytes;
2746 
2747  /* Copy the string being inserted, without the unicode header byte */
2748  num_bytes = STB_GetNumBytesInString(uni_insert) - 3;
2749  memcpy(str_ptr, uni_insert + 1, num_bytes);
2750  str_ptr += num_bytes;
2751 
2752  if (replace_char)
2753  {
2754  /* Char is being replaced, so skip it when copying the remainder of the source string */
2755  insert_pos++;
2756  }
2757 
2758  /* Copy the remainder of the source string */
2759  num_bytes = STB_GetNumBytesInString(uni_src) - ((insert_pos * 2) + 1);
2760  memcpy(str_ptr, uni_src + (insert_pos * 2) + 1, num_bytes);
2761  }
2762  }
2763 
2764  if ((uni_insert != NULL) && (uni_insert != insert_str))
2765  {
2766  STB_ReleaseUnicodeString(uni_insert);
2767  }
2768 
2769  if ((uni_src != NULL) && (uni_src != src_str))
2770  {
2771  STB_ReleaseUnicodeString(uni_src);
2772  }
2773  }
2774 
2775  FUNCTION_FINISH(STB_UnicodeInsertString);
2776 
2777  return(new_str);
2778 }
2779 
2788 U8BIT* STB_UnicodeStripControlChars(U8BIT *string_ptr)
2789 {
2790  U8BIT *out_string;
2791  U8BIT *out_ptr;
2792  U8BIT *str_ptr;
2793  U8BIT *end_ptr;
2794  U16BIT num_bytes;
2795  U16BIT unicode;
2796 
2797  FUNCTION_START(STB_UnicodeStripControlChars);
2798 
2799  out_string = NULL;
2800 
2801  if (STB_IsUnicodeString(string_ptr))
2802  {
2803  /* Allocate the return string */
2804  num_bytes = STB_GetNumBytesInString(string_ptr);
2805  if (num_bytes != 0)
2806  {
2807  out_string = STB_GetMemory(num_bytes);
2808  if (out_string != NULL)
2809  {
2810  out_ptr = out_string;
2811  str_ptr = string_ptr;
2812  end_ptr = string_ptr + num_bytes;
2813 
2814  *out_ptr = *str_ptr;
2815  out_ptr++;
2816  str_ptr++;
2817 
2818  if (string_ptr[UNICODE_HEADER_POS] == UTF16_HEADER_VALUE)
2819  {
2820  while (str_ptr < end_ptr)
2821  {
2822  unicode = ((*str_ptr) << 8) | *(str_ptr + 1);
2823  str_ptr += 2;
2824 
2825  if ((unicode != 0x008a) && (unicode != 0xe08a) && (unicode != 0x0086) &&
2826  (unicode != 0xe086) && (unicode != 0x0087) && (unicode != 0xe087))
2827  {
2828  *out_ptr = (U8BIT)((unicode >> 8) & 0xff);
2829  out_ptr++;
2830  *out_ptr = (U8BIT)(unicode & 0xff);
2831  out_ptr++;
2832  }
2833  }
2834  }
2835  else
2836  {
2837  while (str_ptr < end_ptr)
2838  {
2839  unicode = ReadUTF8(&str_ptr);
2840 
2841  if ((unicode != 0x008a) && (unicode != 0xe08a) && (unicode != 0x0086) &&
2842  (unicode != 0xe086) && (unicode != 0x0087) && (unicode != 0xe087))
2843  {
2844  out_ptr = OutputUTF8(out_ptr, unicode);
2845  }
2846  }
2847  }
2848  }
2849  }
2850  }
2851 
2852  FUNCTION_FINISH(STB_UnicodeStripControlChars);
2853 
2854  return(out_string);
2855 }
2856 
2857 
2858 //--------------------------------------------------------------------------------------------------
2859 // local function definitions
2860 //--------------------------------------------------------------------------------------------------
2861 
2862 
2875 static BOOLEAN CheckUnicodeCharForReverseDirection(U16BIT unicode)
2876 {
2877  BOOLEAN retval;
2878 
2879  FUNCTION_START(CheckUnicodeCharForReverseDirection);
2880 
2881  retval = FALSE;
2882 
2883  if (((unicode >= 0x0600) && (unicode <= 0x06ff)) ||
2884  ((unicode >= 0xfb50) && (unicode <= 0xfdff)) ||
2885  ((unicode >= 0xfe70) && (unicode <= 0xfeff))
2886  )
2887  {
2888  retval = TRUE;
2889  }
2890 
2891  FUNCTION_FINISH(CheckUnicodeCharForReverseDirection);
2892  return(retval);
2893 }
2894 
2910 static void MakeUnicode( U8BIT **addr_string_ptr, BOOLEAN *new_string, U16BIT *length_ptr,
2911  BOOLEAN *reverse_dir, BOOLEAN strip_DVB_cntrl_char)
2912 {
2913  FUNCTION_START(MakeUnicode);
2914 
2915  *new_string = FALSE;
2916  if (*addr_string_ptr[UNICODE_HEADER_POS] != UTF16_HEADER_VALUE)
2917  {
2918  *addr_string_ptr = STB_ConvertStringToUnicode(*addr_string_ptr, reverse_dir, length_ptr,
2919  strip_DVB_cntrl_char, 0);
2920  if (*addr_string_ptr == NULL)
2921  {
2922  *new_string = FALSE;
2923  }
2924  else
2925  {
2926  *new_string = TRUE;
2927  }
2928  }
2929  else
2930  {
2931  *length_ptr = (U16BIT) STB_UnicodeStringLen( *addr_string_ptr );
2932  }
2933 
2934  FUNCTION_FINISH(MakeUnicode);
2935 }
2936 
2937 /*!**************************************************************************
2938  * @brief Generates the UTF-8 byte sequence for the given character
2939  * @param buffer - buffer to output the UTF-8 byte sequence to
2940  * @param char_code - character to be output
2941  * @return pointer to the first byte following the output sequence
2942  ****************************************************************************/
2943 static U8BIT* OutputUTF8(U8BIT *buffer, U32BIT char_code)
2944 {
2945  U8BIT num_bytes;
2946  const U8BIT firstByteMark[4] = { 0x00, 0xC0, 0xE0, 0xF0 };
2947 
2948  FUNCTION_START(OutputUTF8);
2949 
2950  if (char_code < 0x80)
2951  {
2952  num_bytes = 0;
2953  }
2954  else if (char_code < 0x0800)
2955  {
2956  num_bytes = 1;
2957  }
2958  else if (char_code < 0x00010000)
2959  {
2960  num_bytes = 2;
2961  }
2962  else if (char_code < 0x00110000)
2963  {
2964  num_bytes = 3;
2965  }
2966  else
2967  {
2968  num_bytes = 2;
2969  char_code = UNI_REPLACEMENT_CHAR;
2970  }
2971 
2972  buffer += num_bytes;
2973 
2974  switch (num_bytes) /* Note: everything falls through. */
2975  {
2976  case 3:
2977  *buffer = (U8BIT)((char_code | 0x80) & 0xBF);
2978  buffer--;
2979  char_code >>= 6;
2980  case 2:
2981  *buffer = (U8BIT)((char_code | 0x80) & 0xBF);
2982  buffer--;
2983  char_code >>= 6;
2984  case 1:
2985  *buffer = (U8BIT)((char_code | 0x80) & 0xBF);
2986  buffer--;
2987  char_code >>= 6;
2988  case 0:
2989  *buffer = (U8BIT)(char_code | firstByteMark[num_bytes]);
2990  break;
2991  }
2992 
2993  FUNCTION_FINISH(OutputUTF8);
2994 
2995  /* Return pointer to the next byte to be written */
2996  return(buffer + num_bytes + 1);
2997 }
2998 
2999 /*!**************************************************************************
3000  * @brief Returns the character code from a UTF-8 byte sequence
3001  * @param buffer - address of pointer to the character in the UTF-8 byte sequence,
3002  * this value is updated to point to the next char
3003  * @return character code, or INVALID_UNICODE_CHAR
3004  ****************************************************************************/
3005 static U32BIT ReadUTF8(U8BIT **buffer)
3006 {
3007  U8BIT byte_val;
3008  U32BIT char_code;
3009 
3010  FUNCTION_START(ReadUTF8);
3011 
3012  byte_val = (*buffer)[0];
3013  char_code = INVALID_UNICODE_CHAR;
3014 
3015  if ((byte_val & 0x80) == 0)
3016  {
3017  char_code = byte_val;
3018  *buffer += 1;
3019  }
3020  else if ((byte_val & 0xE0) == 0xC0)
3021  {
3022  char_code = byte_val & 0x1F;
3023  char_code <<= 6;
3024  char_code += (((*buffer)[1]) & 0x3F);
3025  *buffer += 2;
3026  }
3027  else if ((byte_val & 0xF0) == 0xE0)
3028  {
3029  char_code = byte_val & 0x0F;
3030  char_code <<= 6;
3031  char_code += (((*buffer)[1]) & 0x3F);
3032  char_code <<= 6;
3033  char_code += (((*buffer)[2]) & 0x3F);
3034  *buffer += 3;
3035  }
3036  else if ((byte_val & 0xF8) == 0xF0)
3037  {
3038  char_code = byte_val & 0x07;
3039  char_code <<= 6;
3040  char_code += (((*buffer)[1]) & 0x3F);
3041  char_code <<= 6;
3042  char_code += (((*buffer)[2]) & 0x3F);
3043  char_code <<= 6;
3044  char_code += (((*buffer)[3]) & 0x3F);
3045  *buffer += 4;
3046  }
3047 
3048  FUNCTION_FINISH(ReadUTF8);
3049 
3050  return(char_code);
3051 }
3052 
3053 /*!**************************************************************************
3054  * @brief Converts the given character to its lower case equivalent
3055  * @param char_code - character to be converted
3056  * @return Converted character code, or original code if no conversion is required or appropriate
3057  ****************************************************************************/
3058 static U32BIT CharToLower(U32BIT char_code)
3059 {
3060  FUNCTION_START(CharToLower);
3061 
3062  if (((char_code >= 'A') && (char_code <= 'Z')) ||
3063  ((char_code >= UC_LATIN_CAPITAL_LETTER_A_WITH_GRAVE) &&
3064  (char_code <= UC_LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS)) ||
3065  ((char_code >= UC_LATIN_CAPITAL_LETTER_O_WITH_STROKE) &&
3066  (char_code <= UC_LATIN_CAPITAL_LETTER_THORN)))
3067  {
3068  char_code += 0x20;
3069  }
3070  else if ((char_code >= UC_LATIN_CAPITAL_LETTER_A_WITH_MACRON) &&
3071  (char_code <= UC_LATIN_CAPITAL_LETTER_Z_WITH_CARON))
3072  {
3073  /* Map individual chars through a mapping table */
3074  char_code = lowercase_chars_0x0100[char_code - UC_LATIN_CAPITAL_LETTER_A_WITH_MACRON];
3075  }
3076  else if (char_code == UC_GREEK_CAPITAL_LETTER_ALPHA_WITH_TONOS)
3077  {
3078  char_code = UC_GREEK_SMALL_LETTER_ALPHA_WITH_TONOS;
3079  }
3080  else if ((char_code >= UC_GREEK_CAPITAL_LETTER_EPSILON_WITH_TONOS) &&
3081  (char_code <= UC_GREEK_CAPITAL_LETTER_IOTA_WITH_TONOS))
3082  {
3083  char_code += 0x25;
3084  }
3085  else if (char_code == UC_GREEK_CAPITAL_LETTER_OMICRON_WITH_TONOS)
3086  {
3087  char_code = UC_GREEK_SMALL_LETTER_OMICRON_WITH_TONOS;
3088  }
3089  else if (char_code == UC_GREEK_CAPITAL_LETTER_UPSILON_WITH_TONOS)
3090  {
3091  char_code = UC_GREEK_SMALL_LETTER_UPSILON_WITH_TONOS;
3092  }
3093  else if (char_code == UC_GREEK_CAPITAL_LETTER_OMEGA_WITH_TONOS)
3094  {
3095  char_code = UC_GREEK_SMALL_LETTER_OMEGA_WITH_TONOS;
3096  }
3097  else if ((char_code >= UC_GREEK_CAPITAL_LETTER_ALPHA) &&
3098  (char_code <= UC_GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA))
3099  {
3100  char_code += 0x20;
3101  }
3102  else if ((char_code >= UC_CYRILLIC_CAPITAL_LETTER_IO) && (char_code <= UC_CYRILLIC_CAPITAL_LETTER_DZHE))
3103  {
3104  char_code += 0x50;
3105  }
3106  else if ((char_code >= UC_CYRILLIC_CAPITAL_LETTER_A) && (char_code <= UC_CYRILLIC_CAPITAL_LETTER_YA))
3107  {
3108  char_code += 0x20;
3109  }
3110  else if ((char_code >= UC_LATIN_CAPITAL_LETTER_A_WITH_RING_BELOW) &&
3111  (char_code <= UC_LATIN_CAPITAL_LETTER_Y_WITH_LOOP))
3112  {
3113  char_code = lowercase_chars_0x1E00[char_code - UC_LATIN_CAPITAL_LETTER_A_WITH_RING_BELOW];
3114  }
3115 
3116  FUNCTION_FINISH(CharToLower);
3117 
3118  return(char_code);
3119 }
3120 
3121 //**************************************************************************************************
3122 // End of File
3123 //**************************************************************************************************
Contains character map tables for converting single byte ascii codes above 0xa0 to unicode codes...
void * STB_GetMemory(U32BIT bytes)
Attempts to allocate memory from the heap.
Definition: stbheap.c:221
U8BIT * STB_SetUnicodeStringChar(U8BIT *string_ptr, U16BIT char_id, U16BIT code)
Takes a string and changes the requested location to a new value. This request may involve appending ...
Definition: stbuni.c:412
void STB_SetDefaultAsciiTable(U8BIT table)
Sets default ascii table to be used, if not overridden by the table index at the start of a string...
Definition: stbuni.c:2157
U8BIT * STB_ConvertStringToUTF8(U8BIT *string, U16BIT *nchar, BOOLEAN strip_DVB_cntrl_char, U32BIT lang_code)
Converts the given DVB coded string into a UTF-8 unicode string. The returned string will be preceded...
Definition: stbuni.c:1508
U8BIT * STB_ConvertUTF16toUTF8(U8BIT *src, U32BIT *outlen)
Creates the given string from UTF-16 to UTF-8 and returns a new string. The returned string should be...
Definition: stbuni.c:1974
S8BIT STB_CompareUnicodeStrings(U8BIT *string1_ptr, U8BIT *string2_ptr, BOOLEAN exact_match, BOOLEAN ignore_case)
Compares the contents of the two given unicode strings and returns the status (as per strcmp) ...
Definition: stbuni.c:910
U8BIT * STB_ConvertStringToUnicode(U8BIT *string, BOOLEAN *reverse_dir, U16BIT *nchar, BOOLEAN strip_DVB_cntrl_char, U32BIT lang_code)
Converts the specified DVB coded string into a unicode string, counting the number of characters and ...
Definition: stbuni.c:1069
void * STB_ResizeMemory(void *ptr, U32BIT new_num_bytes)
Re-allocates a given memory area to the new size, ensuring data contained within the original memory ...
Definition: stbheap.c:550
U8BIT * STB_FormatUnicodeString(BOOLEAN strip_DVB_cntrl_char, BOOLEAN *reverse_dir, const U8BIT *const format_ptr,...)
Unicode version of sprintf.
Definition: stbuni.c:2292
S8BIT STB_CompareStringsIgnoreCase(U8BIT *string1_ptr, U8BIT *string2_ptr)
Compares the contents of the two given ASCII strings and returns the status (as per strcmp) but ignor...
Definition: stbuni.c:2183
void STB_FreeMemory(void *addr)
Releases previously allocated heap memory.
Definition: stbheap.c:336
BOOLEAN STB_IsStringEmpty(U8BIT *string_ptr)
Checks for a string of only spaces.
Definition: stbuni.c:2113
Debug functions header file.
void STB_ReleaseUnicodeString(U8BIT *string)
Releases the specified unicode string, freeing associated heap resources.
Definition: stbuni.c:1955
U8BIT * STB_UnicodeStripControlChars(U8BIT *string_ptr)
Strips the DVB control characters from a string that&#39;s already in UTF-8 or UTF-16 format...
Definition: stbuni.c:2788
U32BIT STB_UnicodeStringLen(U8BIT *string_ptr)
Determines the length, in characters, of the given unicode string by searching for NULL...
Definition: stbuni.c:141
BOOLEAN STB_IsNormalString(U8BIT *string_ptr)
Tests for normal ascii string.
Definition: stbuni.c:281
Header for STB unicode string handling routines.
U8BIT * STB_DeleteUnicodeStringChar(U8BIT *string_ptr, U16BIT char_id)
Takes a string and removes the requested location, shuffling any following data down (thus removing g...
Definition: stbuni.c:477
U8BIT * STB_ConcatUnicodeStrings(U8BIT *string1_ptr, U8BIT *string2_ptr)
Appends the contents of string2_ptr to string1_ptr and returns a pointer to the newly created string...
Definition: stbuni.c:618
System Wide Global Technical Data Type Definitions.
U16BIT STB_HuffmanDecompress(U8BIT encoding_type, U8BIT *input, U8BIT *output, U16BIT output_size)
Decompresses the input buffer according to the BBC&#39;s Huffman algorithm as defined in the DTG D-Book 6...
Definition: stbhuffman.c:69
Header file - Function prototypes for heap memory.
U32BIT STB_GetUnicodeStringChar(U8BIT *string_ptr, U16BIT char_id)
Retrieves the unicode value pointed to by char_id within the given string. If an invalid request occu...
Definition: stbuni.c:526
U8BIT * STB_UnicodeStrStr(U8BIT *str1, U8BIT *str2, BOOLEAN ignore_case)
Finds the first occurence of str2 in str1 and returns a pointer to the substring (as per strstr) ...
Definition: stbuni.c:788
BOOLEAN STB_IsUnicodeString(U8BIT *string_ptr)
Tests for unicode string.
Definition: stbuni.c:248
U32BIT STB_GetNumBytesInString(U8BIT *string_ptr)
Determines the no of bytes of the given string.
Definition: stbuni.c:311
BOOLEAN STB_IsUnicodeStringReversed(U8BIT *string_ptr)
Checks to see if the supplied string is unicode and if it is reversed (arabic)
Definition: stbuni.c:204
#define DBGPRINT(...)
Definition: dbgfuncs.h:74
U8BIT * STB_UnicodeStringTokenise(U8BIT *string, U8BIT **save_ptr)
Divides the (space separated) string up into individual words and returns them one per call...
Definition: stbuni.c:723
STB midware Huffman decompression routines defined by the BBC.
U8BIT * STB_UnicodeInsertString(U8BIT *src_str, U16BIT insert_pos, U8BIT *insert_str, BOOLEAN replace_char)
Creates a new string by inserting one string into another at a given position, with the option of rep...
Definition: stbuni.c:2692