class ICU::BreakIterator

Overview

This class defines methods for finding the location of -line, sentense and word- boundaries in text.

Usage

str = "หน้าแรก"
bi = ICU::BreakIterator.new(str, ICU::BreakIterator::Type::Word)
bi.each { |s| p s }
# => "หน้า"
# => "แรก"
bi.to_a # => ["หน้า", "แรก"]

See also

Included Modules

Defined in:

icu/break_iterator.cr

Constant Summary

DONE = -1
LOCALES = begin locales = (0...LibICU.ubrk_count_available).map do |i| String.new(LibICU.ubrk_get_available(i)) end Set(String).new(locales) end

Constructors

Instance Method Summary

Constructor Detail

def self.new(break_type : Type, locale : String? = nil) #

Create a new BreakIterator

bi = ICU::BreakIterator.new(ICU::BreakIterator::Type::Word)
bi.text = "abc def ghi"
bi.to_a # => ["abc", " ", "def", " ", "ghi"]

[View source]
def self.new(text : String, break_type : Type, locale : String? = nil) #

Creates a new BreakIterator specifying some text

str = "Some text. More text."
bi = ICU::BreakIterator.new(str, ICU::BreakIterator::Type::Sentence)
bi.to_a # => ["Some text. ", "More text."]

[View source]

Instance Method Detail

def each(&) #

Iterate on text boundaries

str = "abc def ghi"
bi = ICU::BreakIterator.new(str, ICU::BreakIterator::Type::Word)
bi.to_a # => ["abc", " ", "def", " ", "ghi"]
bi.each { |s| p s }
# => "abc"
# => " "
# => "def"
# => " "
# => "ghi"

[View source]
def each_bound(&) #

Iterate on text boundaries indices

str = "abc def"
bi = ICU::BreakIterator.new(str, ICU::BreakIterator::Type::Word)
bi.each_bound { |i| p i }
# => 0
# => 3
# => 4
# => 7

[View source]
def finalize #

[View source]
def text : ICU::UChars? #

[View source]
def text=(text : String) #

Change the text that's being iterated on


[View source]
def to_unsafe : LibICU::UBreakIterator #

[View source]