From: Chuck Hagenbuch Date: Mon, 3 Nov 2008 19:30:02 +0000 (-0500) Subject: no need for the ruby numerizer implementation now X-Git-Url: https://git.internetallee.de/?a=commitdiff_plain;h=187131a5fcb4de62996ebf8a708f2d9a2c61f11b;p=horde.git no need for the ruby numerizer implementation now --- diff --git a/framework/Horde_Date_Parser/chronic/lib/numerizer/numerizer.rb b/framework/Horde_Date_Parser/chronic/lib/numerizer/numerizer.rb deleted file mode 100644 index 8b02e6260..000000000 --- a/framework/Horde_Date_Parser/chronic/lib/numerizer/numerizer.rb +++ /dev/null @@ -1,103 +0,0 @@ -require 'strscan' - -class Numerizer - - DIRECT_NUMS = [ - ['eleven', '11'], - ['twelve', '12'], - ['thirteen', '13'], - ['fourteen', '14'], - ['fifteen', '15'], - ['sixteen', '16'], - ['seventeen', '17'], - ['eighteen', '18'], - ['nineteen', '19'], - ['ninteen', '19'], # Common mis-spelling - ['zero', '0'], - ['one', '1'], - ['two', '2'], - ['three', '3'], - ['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty - ['five', '5'], - ['six(\W|$)', '6\1'], - ['seven(\W|$)', '7\1'], - ['eight(\W|$)', '8\1'], - ['nine(\W|$)', '9\1'], - ['ten', '10'], - ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1 - ] - - TEN_PREFIXES = [ ['twenty', 20], - ['thirty', 30], - ['fourty', 40], - ['fifty', 50], - ['sixty', 60], - ['seventy', 70], - ['eighty', 80], - ['ninety', 90] - ] - - BIG_PREFIXES = [ ['hundred', 100], - ['thousand', 1000], - ['million', 1_000_000], - ['billion', 1_000_000_000], - ['trillion', 1_000_000_000_000], - ] - -class << self - def numerize(string) - string = string.dup - - # preprocess - string.gsub!(/ +|([^\d])-([^d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction - string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end - - # easy/direct replacements - - DIRECT_NUMS.each do |dn| - string.gsub!(/#{dn[0]}/i, dn[1]) - end - - # ten, twenty, etc. - - TEN_PREFIXES.each do |tp| - string.gsub!(/(?:#{tp[0]})( *\d(?=[^\d]|$))*/i) { (tp[1] + $1.to_i).to_s } - end - - # hundreds, thousands, millions, etc. - - BIG_PREFIXES.each do |bp| - string.gsub!(/(\d*) *#{bp[0]}/i) { (bp[1] * $1.to_i).to_s} - andition(string) - #combine_numbers(string) # Should to be more efficient way to do this - end - - # fractional addition - # I'm not combining this with the previous block as using float addition complicates the strings - # (with extraneous .0's and such ) - string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s } - - string - end - -private - def andition(string) - sc = StringScanner.new(string) - while(sc.scan_until(/(\d+)( | and )(\d+)(?=[^\w]|$)/i)) - if sc[2] =~ /and/ || sc[1].size > sc[3].size - string[(sc.pos - sc.matched_size)..(sc.pos-1)] = (sc[1].to_i + sc[3].to_i).to_s - sc.reset - end - end - end - -# def combine_numbers(string) -# sc = StringScanner.new(string) -# while(sc.scan_until(/(\d+)(?: | and |-)(\d+)(?=[^\w]|$)/i)) -# string[(sc.pos - sc.matched_size)..(sc.pos-1)] = (sc[1].to_i + sc[2].to_i).to_s -# sc.reset -# end -# end - -end -end \ No newline at end of file diff --git a/framework/Horde_Date_Parser/chronic/test/test_Numerizer.rb b/framework/Horde_Date_Parser/chronic/test/test_Numerizer.rb deleted file mode 100644 index f70c51ad8..000000000 --- a/framework/Horde_Date_Parser/chronic/test/test_Numerizer.rb +++ /dev/null @@ -1,48 +0,0 @@ -require 'test/unit' -require 'chronic' - -class ParseNumbersTest < Test::Unit::TestCase - - def test_straight_parsing - strings = { 1 => 'one', - 5 => 'five', - 10 => 'ten', - 11 => 'eleven', - 12 => 'twelve', - 13 => 'thirteen', - 14 => 'fourteen', - 15 => 'fifteen', - 16 => 'sixteen', - 17 => 'seventeen', - 18 => 'eighteen', - 19 => 'nineteen', - 20 => 'twenty', - 27 => 'twenty seven', - 31 => 'thirty-one', - 59 => 'fifty nine', - 100 => 'a hundred', - 100 => 'one hundred', - 150 => 'one hundred and fifty', - # 150 => 'one fifty', - 200 => 'two-hundred', - 500 => '5 hundred', - 999 => 'nine hundred and ninety nine', - 1_000 => 'one thousand', - 1_200 => 'twelve hundred', - 1_200 => 'one thousand two hundred', - 17_000 => 'seventeen thousand', - 21_473 => 'twentyone-thousand-four-hundred-and-seventy-three', - 74_002 => 'seventy four thousand and two', - 99_999 => 'ninety nine thousand nine hundred ninety nine', - 100_000 => '100 thousand', - 250_000 => 'two hundred fifty thousand', - 1_000_000 => 'one million', - 1_250_007 => 'one million two hundred fifty thousand and seven', - 1_000_000_000 => 'one billion', - 1_000_000_001 => 'one billion and one' } - - strings.keys.sort.each do |key| - assert_equal key, Numerizer.numerize(strings[key]).to_i - end - end -end \ No newline at end of file