using Unity.Collections.LowLevel.Unsafe; namespace Unity.Collections { /// /// Provides methods for copying and encoding Unicode text. /// [GenerateTestsForBurstCompatibility] public static unsafe class UTF8ArrayUnsafeUtility { /// /// Copies a buffer of UCS-2 text. The copy is encoded as UTF-8. /// /// Assumes the source data is valid UCS-2. /// The source buffer for reading UCS-2. /// The number of chars to read from the source. /// The destination buffer for writing UTF-8. /// Outputs the number of bytes written to the destination. /// The max number of bytes that will be written to the destination buffer. /// if the copy fully completes. Otherwise, returns . public static CopyError Copy(byte *dest, out int destLength, int destUTF8MaxLengthInBytes, char *src, int srcLength) { var error = Unicode.Utf16ToUtf8(src, srcLength, dest, out destLength, destUTF8MaxLengthInBytes); if (error == ConversionError.None) return CopyError.None; return CopyError.Truncation; } /// /// Copies a buffer of UCS-2 text. The copy is encoded as UTF-8. /// /// Assumes the source data is valid UCS-2. /// The source buffer for reading UCS-2. /// The number of chars to read from the source. /// The destination buffer for writing UTF-8. /// Outputs the number of bytes written to the destination. /// The max number of bytes that will be written to the destination buffer. /// if the copy fully completes. Otherwise, returns . public static CopyError Copy(byte *dest, out ushort destLength, ushort destUTF8MaxLengthInBytes, char *src, int srcLength) { var error = Unicode.Utf16ToUtf8(src, srcLength, dest, out var temp, destUTF8MaxLengthInBytes); destLength = (ushort)temp; if (error == ConversionError.None) return CopyError.None; return CopyError.Truncation; } /// /// Copies a buffer of UCS-8 text. /// /// Assumes the source data is valid UTF-8. /// The source buffer. /// The number of chars to read from the source. /// The destination buffer. /// Outputs the number of bytes written to the destination. /// The max number of bytes that will be written to the destination buffer. /// if the copy fully completes. Otherwise, returns . public static CopyError Copy(byte *dest, out int destLength, int destUTF8MaxLengthInBytes, byte *src, int srcLength) { var error = Unicode.Utf8ToUtf8(src, srcLength, dest, out var temp, destUTF8MaxLengthInBytes); destLength = temp; if (error == ConversionError.None) return CopyError.None; return CopyError.Truncation; } /// /// Copies a buffer of UCS-8 text. /// /// Assumes the source data is valid UTF-8. /// The source buffer. /// The number of chars to read from the source. /// The destination buffer. /// Outputs the number of bytes written to the destination. /// The max number of bytes that will be written to the destination buffer. /// if the copy fully completes. Otherwise, returns . public static CopyError Copy(byte *dest, out ushort destLength, ushort destUTF8MaxLengthInBytes, byte *src, ushort srcLength) { var error = Unicode.Utf8ToUtf8(src, srcLength, dest, out var temp, destUTF8MaxLengthInBytes); destLength = (ushort)temp; if (error == ConversionError.None) return CopyError.None; return CopyError.Truncation; } /// /// Copies a buffer of UTF-8 text. The copy is encoded as UCS-2. /// /// Assumes the source data is valid UTF-8. /// The source buffer for reading UTF-8. /// The number of bytes to read from the source. /// The destination buffer for writing UCS-2. /// Outputs the number of chars written to the destination. /// The max number of chars that will be written to the destination buffer. /// if the copy fully completes. Otherwise, returns . public static CopyError Copy(char *dest, out int destLength, int destUCS2MaxLengthInChars, byte *src, int srcLength) { if (ConversionError.None == Unicode.Utf8ToUtf16(src, srcLength, dest, out destLength, destUCS2MaxLengthInChars)) return CopyError.None; return CopyError.Truncation; } /// /// Copies a buffer of UTF-8 text. The copy is encoded as UCS-2. /// /// Assumes the source data is valid UTF-8. /// The source buffer for reading UTF-8. /// The number of bytes to read from the source. /// The destination buffer for writing UCS-2. /// Outputs the number of chars written to the destination. /// The max number of chars that will be written to the destination buffer. /// if the copy fully completes. Otherwise, returns . public static CopyError Copy(char *dest, out ushort destLength, ushort destUCS2MaxLengthInChars, byte *src, ushort srcLength) { var error = Unicode.Utf8ToUtf16(src, srcLength, dest, out var temp, destUCS2MaxLengthInChars); destLength = (ushort)temp; if (error == ConversionError.None) return CopyError.None; return CopyError.Truncation; } /// /// Appends UTF-8 text to a buffer. /// /// Assumes the source data is valid UTF-8. /// /// No data will be copied if the destination has insufficient capacity for the full append, *i.e.* if `srcLength > (destCapacity - destLength)`. /// /// The source buffer. /// The number of bytes to read from the source. /// The destination buffer. /// Reference to the destination buffer's length in bytes *before* the append. Will be assigned the new length *after* the append. /// The destination buffer capacity in bytes. /// if the append fully completes. Otherwise, returns . public static FormatError AppendUTF8Bytes(byte* dest, ref int destLength, int destCapacity, byte* src, int srcLength) { if (destLength + srcLength > destCapacity) return FormatError.Overflow; UnsafeUtility.MemCpy(dest + destLength, src, srcLength); destLength += srcLength; return FormatError.None; } /// /// Appends UTF-8 text to a buffer. /// /// Assumes the source data is valid UTF-8. /// The source buffer. /// The number of bytes to read from the source. /// The destination buffer. /// Reference to the destination buffer's length in bytes *before* the append. Will be assigned the number of bytes appended. /// The destination buffer's length in bytes. Data will not be appended past this length. /// if the append fully completes. Otherwise, returns . public static CopyError Append(byte *dest, ref ushort destLength, ushort destUTF8MaxLengthInBytes, byte *src, ushort srcLength) { var error = Unicode.Utf8ToUtf8(src, srcLength, dest + destLength, out var temp, destUTF8MaxLengthInBytes - destLength); destLength += (ushort)temp; if (error == ConversionError.None) return CopyError.None; return CopyError.Truncation; } /// /// Appends UCS-2 text to a buffer, encoded as UTF-8. /// /// Assumes the source data is valid UCS-2. /// The source buffer. /// The number of chars to read from the source. /// The destination buffer. /// Reference to the destination buffer's length in bytes *before* the append. Will be assigned the number of bytes appended. /// The destination buffer's length in bytes. Data will not be appended past this length. /// if the append fully completes. Otherwise, returns . public static CopyError Append(byte *dest, ref ushort destLength, ushort destUTF8MaxLengthInBytes, char *src, int srcLength) { var error = Unicode.Utf16ToUtf8(src, srcLength, dest + destLength, out var temp, destUTF8MaxLengthInBytes - destLength); destLength += (ushort)temp; if (error == ConversionError.None) return CopyError.None; return CopyError.Truncation; } /// /// Appends UTF-8 text to a buffer, encoded as UCS-2. /// /// Assumes the source data is valid UTF-8. /// The source buffer. /// The number of bytes to read from the source. /// The destination buffer. /// Reference to the destination buffer's length in chars *before* the append. Will be assigned the number of chars appended. /// The destination buffer's length in chars. Data will not be appended past this length. /// if the append fully completes. Otherwise, returns . public static CopyError Append(char *dest, ref ushort destLength, ushort destUCS2MaxLengthInChars, byte *src, ushort srcLength) { var error = Unicode.Utf8ToUtf16(src, srcLength, dest + destLength, out var temp, destUCS2MaxLengthInChars - destLength); destLength += (ushort)temp; if (error == ConversionError.None) return CopyError.None; return CopyError.Truncation; } internal struct Comparison { public bool terminates; public int result; public Comparison(Unicode.Rune runeA, ConversionError errorA, Unicode.Rune runeB, ConversionError errorB) { if(errorA != ConversionError.None) runeA.value = 0; if(errorB != ConversionError.None) runeB.value = 0; if(runeA.value != runeB.value) { result = runeA.value - runeB.value; terminates = true; } else { result = 0; terminates = (runeA.value == 0 && runeB.value == 0); } } } /// Compares two UTF-8 buffers for relative equality. /// The first buffer of UTF-8 text. /// The length in bytes of the first UTF-8 buffer. /// The second buffer of UTF-8 text. /// The length in bytes of the second UTF-8 buffer. /// /// Less than zero if first different code point is less in the first UTF-8 buffer. /// Zero if the strings are identical. /// More than zero if first different code point is less in the second UTF-8 buffer. /// public static int StrCmp(byte* utf8BufferA, int utf8LengthInBytesA, byte* utf8BufferB, int utf8LengthInBytesB) { int byteIndexA = 0; int byteIndexB = 0; while(true) { var utf8ErrorA = Unicode.Utf8ToUcs(out var utf8RuneA, utf8BufferA,ref byteIndexA, utf8LengthInBytesA); var utf8ErrorB = Unicode.Utf8ToUcs(out var utf8RuneB, utf8BufferB, ref byteIndexB, utf8LengthInBytesB); var comparison = new Comparison(utf8RuneA, utf8ErrorA, utf8RuneB, utf8ErrorB); if(comparison.terminates) return comparison.result; } } internal static int StrCmp(byte* utf8BufferA, int utf8LengthInBytesA, Unicode.Rune* runeBufferB, int lengthInRunesB) { int charIndexA = 0; int charIndexB = 0; while (true) { var utf16ErrorA = Unicode.Utf8ToUcs(out var utf16RuneA, utf8BufferA, ref charIndexA, utf8LengthInBytesA); var errorB = Unicode.UcsToUcs(out var runeB, runeBufferB, ref charIndexB, lengthInRunesB); var comparison = new Comparison(utf16RuneA, utf16ErrorA, runeB, errorB); if (comparison.terminates) return comparison.result; } } /// Compares two UTF-16 buffers for relative equality. /// The first buffer of UTF-16 text. /// The length in chars of the first UTF-16 buffer. /// The second buffer of UTF-16 text. /// The length in chars of the second UTF-16 buffer. /// /// Less than zero if first different code point is less in the first UTF-16 buffer. /// Zero if the strings are identical. /// More than zero if first different code point is less in the second UTF-16 buffer. /// public static int StrCmp(char* utf16BufferA, int utf16LengthInCharsA, char* utf16BufferB, int utf16LengthInCharsB) { int charIndexA = 0; int charIndexB = 0; while(true) { var utf16ErrorA = Unicode.Utf16ToUcs(out var utf16RuneA, utf16BufferA,ref charIndexA, utf16LengthInCharsA); var utf16ErrorB = Unicode.Utf16ToUcs(out var utf16RuneB, utf16BufferB, ref charIndexB, utf16LengthInCharsB); var comparison = new Comparison(utf16RuneA, utf16ErrorA, utf16RuneB, utf16ErrorB); if(comparison.terminates) return comparison.result; } } /// Returns true if two UTF-8 buffers have the same length and content. /// The first buffer of UTF-8 text. /// The length in bytes of the first buffer. /// The second buffer of UTF-8 text. /// The length in bytes of the second buffer. /// True if the content of both strings is identical. public static bool EqualsUTF8Bytes(byte* aBytes, int aLength, byte* bBytes, int bLength) { return aLength == bLength && StrCmp(aBytes, aLength, bBytes, bLength) == 0; } /// Compares a UTF-8 buffer and a UTF-16 buffer for relative equality. /// The buffer of UTF-8 text. /// The length in bytes of the UTF-8 buffer. /// The buffer of UTF-16 text. /// The length in chars of the UTF-16 buffer. /// /// Less than zero if first different code point is less in UTF-8 buffer. /// Zero if the strings are identical. /// More than zero if first different code point is less in UTF-16 buffer. /// public static int StrCmp(byte* utf8Buffer, int utf8LengthInBytes, char* utf16Buffer, int utf16LengthInChars) { int byteIndex = 0; int charIndex = 0; while(true) { var utf8Error = Unicode.Utf8ToUcs(out var utf8Rune, utf8Buffer,ref byteIndex, utf8LengthInBytes); var utf16Error = Unicode.Utf16ToUcs(out var utf16Rune, utf16Buffer, ref charIndex, utf16LengthInChars); var comparison = new Comparison(utf8Rune, utf8Error, utf16Rune, utf16Error); if(comparison.terminates) return comparison.result; } } /// Compares a UTF-16 buffer and a UTF-8 buffer for relative equality. /// The buffer of UTF-16 text. /// The length in chars of the UTF-16 buffer. /// The buffer of UTF-8 text. /// The length in bytes of the UTF-8 buffer. /// /// Less than zero if first different code point is less in UTF-16 buffer. /// Zero if the strings are identical. /// More than zero if first different code point is less in UTF-8 buffer. /// public static int StrCmp(char* utf16Buffer, int utf16LengthInChars, byte* utf8Buffer, int utf8LengthInBytes) { return -StrCmp(utf8Buffer, utf8LengthInBytes, utf16Buffer, utf16LengthInChars); } } }