--- /dev/null
+<?php
+/**
+ *
+ */
+class Horde_Support_Numerizer
+{
+ public static function numerize($string, $args = array())
+ {
+ return self::factory($args)->numerize($string);
+ }
+
+ public static function factory($args = array())
+ {
+ $locale = isset($args['locale']) ? $args['locale'] : null;
+ if ($locale && strtolower($locale) != 'base') {
+ $locale = str_replace(' ', '_', ucwords(str_replace('_', ' ', strtolower($locale))));
+ $class = 'Horde_Support_Numerizer_Locale_' . $locale;
+ if (class_exists($class)) {
+ return new $class($args);
+ }
+
+ $language = array_shift(explode('_', $locale));
+ if ($language != $locale) {
+ $class = 'Horde_Support_Numerizer_Locale_' . $language;
+ if (class_exists($class)) {
+ return new $class($args);
+ }
+ }
+ }
+
+ return new Horde_Support_Numerizer_Locale_Base($args);
+ }
+
+}
--- /dev/null
+<?php
+class Horde_Support_Numerizer_Locale_Base
+{
+ public $DIRECT_NUMS = array(
+ 'eleven' => '11',
+ 'twelve' => '12',
+ 'thirteen' => '13',
+ 'fourteen' => '14',
+ 'fifteen' => '15',
+ 'sixteen' => '16',
+ 'seventeen' => '17',
+ 'eighteen' => '18',
+ 'nineteen' => '19',
+ 'ninteen' => '19', // Common mis-spelling
+ 'zero' => '0',
+ 'one' => '1',
+ 'two' => '2',
+ 'three' => '3',
+ 'four(\W|$)' => '4$1', // The weird regex is so that it matches four but not fourty
+ 'five' => '5',
+ 'six(\W|$)' => '6$1',
+ 'seven(\W|$)' => '7$1',
+ 'eight(\W|$)' => '8$1',
+ 'nine(\W|$)' => '9$1',
+ 'ten' => '10',
+ '\ba[\b^$]' => '1', // doesn't make sense for an 'a' at the end to be a 1
+ );
+
+ public $TEN_PREFIXES = array(
+ 'twenty' => 20,
+ 'thirty' => 30,
+ 'fourty' => 40,
+ 'fifty' => 50,
+ 'sixty' => 60,
+ 'seventy' => 70,
+ 'eighty' => 80,
+ 'ninety' => 90,
+ );
+
+ public $BIG_PREFIXES = array(
+ 'hundred' => 100,
+ 'thousand' => 1000,
+ 'million' => 1000000,
+ 'billion' => 1000000000,
+ 'trillion' => 1000000000000,
+ );
+
+ public function numerize($string)
+ {
+ // preprocess
+ $string = $this->_splitHyphenatedWords($string);
+ $string = $this->_hideAHalf($string);
+
+ $string = $this->_directReplacements($string);
+ $string = $this->_replaceTenPrefixes($string);
+ $string = $this->_replaceBigPrefixes($string);
+ $string = $this->_fractionalAddition($string);
+
+ return $string;
+ }
+
+ /**
+ * will mutilate hyphenated-words but shouldn't matter for date extraction
+ */
+ protected function _splitHyphenatedWords($string)
+ {
+ return preg_replace('/ +|([^\d])-([^d])/', '$1 $2', $string);
+ }
+
+ /**
+ * take the 'a' out so it doesn't turn into a 1, save the half for the end
+ */
+ protected function _hideAHalf($string)
+ {
+ return str_replace('a half', 'haAlf', $string);
+ }
+
+ /**
+ * easy/direct replacements
+ */
+ protected function _directReplacements($string)
+ {
+ foreach ($this->DIRECT_NUMS as $dn => $dn_replacement) {
+ $string = preg_replace("/$dn/i", $dn_replacement, $string);
+ }
+ return $string;
+ }
+
+ /**
+ * ten, twenty, etc.
+ */
+ protected function _replaceTenPrefixes($string)
+ {
+ foreach ($this->TEN_PREFIXES as $tp => $tp_replacement) {
+ $string = preg_replace_callback(
+ "/(?:$tp)( *\d(?=[^\d]|\$))*/i",
+ create_function(
+ '$m',
+ 'return ' . $tp_replacement . ' + (isset($m[1]) ? (int)$m[1] : 0);'
+ ),
+ $string);
+ }
+ return $string;
+ }
+
+ /**
+ * hundreds, thousands, millions, etc.
+ */
+ protected function _replaceBigPrefixes($string)
+ {
+ foreach ($this->BIG_PREFIXES as $bp => $bp_replacement) {
+ $string = preg_replace_callback(
+ '/(\d*) *' . $bp . '/i',
+ create_function(
+ '$m',
+ 'return ' . $bp_replacement . ' * (int)$m[1];'
+ ),
+ $string);
+ $string = $this->_andition($string);
+ }
+ return $string;
+ }
+
+ protected function _andition($string)
+ {
+ while (true) {
+ if (preg_match('/(\d+)( | and )(\d+)(?=[^\w]|$)/i', $string, $sc, PREG_OFFSET_CAPTURE)) {
+ if (preg_match('/and/', $sc[2][0]) || (strlen($sc[1][0]) > strlen($sc[3][0]))) {
+ $string = substr($string, 0, $sc[1][1]) . ((int)$sc[1][0] + (int)$sc[3][0]) . substr($string, $sc[3][1] + strlen($sc[3][0]));
+ continue;
+ }
+ }
+ break;
+ }
+ return $string;
+ }
+
+ protected function _fractionalAddition($string)
+ {
+ return preg_replace_callback(
+ '/(\d+)(?: | and |-)*haAlf/i',
+ create_function(
+ '$m',
+ 'return (string)((float)$m[1] + 0.5);'
+ ),
+ $string);
+ }
+
+}
--- /dev/null
+<?php
+class Horde_Support_Numerizer_Locale_De extends Horde_Support_Numerizer_Locale_Base
+{
+ public $DIRECT_NUMS = array(
+ 'dreizehn' => 13,
+ 'vierzehn' => 14,
+ 'fünfzehn' => 15,
+ 'sechzehn' => 16,
+ 'siebzehn' => 17,
+ 'achtzehn' => 18,
+ 'neunzehn' => 19,
+ 'eins' => 1,
+ 'zwei' => 2,
+ 'zwo' => 2,
+ 'drei' => 3,
+ 'vier' => 4,
+ 'fünf' => 5,
+ 'sechs' => 6,
+ 'sieben' => 7,
+ 'acht' => 8,
+ 'neun' => 9,
+ 'zehn' => 10,
+ 'elf' => 11,
+ 'zwölf' => 12,
+ 'eine?' => 1,
+ );
+
+ public $TEN_PREFIXES = array(
+ 'zwanzig' => 20,
+ 'dreißig' => 30,
+ 'vierzig' => 40,
+ 'fünfzig' => 50,
+ 'sechzig' => 60,
+ 'siebzig' => 70,
+ 'achtzig' => 80,
+ 'neunzig' => 90,
+ );
+
+ public $BIG_PREFIXES = array(
+ 'hundert' => 100,
+ 'tausend' => 1000,
+ 'million' => 1000000,
+ 'milliarde' => 1000000000,
+ 'billion' => 1000000000000,
+ );
+
+ /**
+ * Rules:
+ *
+ * - there are irregular word for 11 and 12 like in English
+ * - numbers below one million are written together (1 M = "eine Million", 100 = "einhundert")
+ * - "a" is declinable (see above, "one" = "eins", "a" = "ein/eine")
+ * - numbers below 100 are flipped compared to english, and have an "and = "und" (21 = "twenty-one" = "einundzwanzig")
+ */
+ public function numerize($string)
+ {
+ // preprocess?
+
+ $string = $this->_replaceTenPrefixes($string);
+ $string = $this->_directReplacements($string);
+ $string = $this->_replaceBigPrefixes($string);
+ $string = $this->_fractionalAddition($string);
+ $string = $this->_andition($string);
+
+ return $string;
+ }
+
+ /**
+ * ten, twenty, etc.
+ */
+ protected function _replaceTenPrefixes($string)
+ {
+ foreach ($this->TEN_PREFIXES as $tp => $tp_replacement) {
+ $string = preg_replace_callback(
+ "/(?:$tp)( *\d(?=[^\d]|\$))*/i",
+ create_function(
+ '$m',
+ 'return ' . $tp_replacement . ' + (isset($m[1]) ? (int)$m[1] : 0);'
+ ),
+ $string);
+ }
+ return $string;
+ }
+
+ /**
+ * hundreds, thousands, millions, etc.
+ */
+ protected function _replaceBigPrefixes($string)
+ {
+ foreach ($this->BIG_PREFIXES as $bp => $bp_replacement) {
+ $string = preg_replace_callback(
+ '/(\d*) *' . $bp . '/i',
+ create_function(
+ '$m',
+ '$factor = (int)$m[1]; if (!$factor) $factor = 1; return (' . $bp_replacement . ' * $factor) . "und";'
+ ),
+ $string);
+ }
+ return $string;
+ }
+
+ protected function _andition($string)
+ {
+ while (preg_match('/(\d+)((?:und)+)(\d*)(?=[^\w]|$)/i', $string, $sc, PREG_OFFSET_CAPTURE)) {
+ $string = substr($string, 0, $sc[1][1]) . ((int)$sc[1][0] + (int)$sc[3][0]) . substr($string, $sc[3][1] + strlen($sc[3][0]));
+ }
+ return $string;
+ }
+
+}
* Initial Horde_Support_Stub object
* Initial Horde_Support_Timer object
* Initial Horde_Support_Uuid object
+ * Initial Horde_Support_Numerizer objects
</notes>
<contents>
<dir name="/">
<dir name="lib">
<dir name="Horde">
<dir name="Support">
+ <dir name="Numerizer">
+ <dir name="Locale">
+ <file name="Base.php" role="php" />
+ <file name="De.php" role="php" />
+ </dir> <!-- /lib/Horde/Support/Numerizer/Locale -->
+ </dir> <!-- /lib/Horde/Support/Numerizer -->
<file name="Array.php" role="php" />
<file name="ConsistentHash.php" role="php" />
<file name="Inflector.php" role="php" />
+ <file name="Numerizer.php" role="php" />
<file name="Stub.php" role="php" />
<file name="Timer.php" role="php" />
<file name="Uuid.php" role="php" />
<install name="lib/Horde/Support/Array.php" as="Horde/Support/Array.php" />
<install name="lib/Horde/Support/ConsistentHash.php" as="Horde/Support/ConsistentHash.php" />
<install name="lib/Horde/Support/Inflector.php" as="Horde/Support/Inflector.php" />
+ <install name="lib/Horde/Support/Numerizer/Locale/Base.php" as="Horde/Support/Numerizer/Locale/Base.php" />
+ <install name="lib/Horde/Support/Numerizer/Locale/De.php" as="Horde/Support/Numerizer/Locale/De.php" />
+ <install name="lib/Horde/Support/Numerizer.php" as="Horde/Support/Numerizer.php" />
<install name="lib/Horde/Support/Stub.php" as="Horde/Support/Stub.php" />
<install name="lib/Horde/Support/Timer.php" as="Horde/Support/Timer.php" />
<install name="lib/Horde/Support/Uuid.php" as="Horde/Support/Uuid.php" />
--- /dev/null
+<?php
+/**
+ * @category Horde
+ * @package Horde_Support
+ * @subpackage UnitTests
+ */
+
+/**
+ * @category Horde
+ * @package Horde_Support
+ * @subpackage UnitTests
+ */
+class Horde_Support_Numerizer_Locale_BaseTest extends PHPUnit_Framework_TestCase
+{
+ public function testStraightParsing()
+ {
+ $numerizer = Horde_Support_Numerizer::factory();
+ $strings = array(
+ 1 => 'one',
+ 5 => 'five',
+ 10 => 'ten',
+ 11 => 'eleven',
+ 12 => 'twelve',
+ 13 => 'thirteen',
+ 14 => 'fourteen',
+ 15 => 'fifteen',
+ 16 => 'sixteen',
+ 17 => 'seventeen',
+ 18 => 'eighteen',
+ 19 => 'nineteen',
+ 20 => 'twenty',
+ 27 => 'twenty seven',
+ 31 => 'thirty-one',
+ 59 => 'fifty nine',
+ 100 => 'a hundred',
+ 100 => 'one hundred',
+ 150 => 'one hundred and fifty',
+ // 150 => 'one fifty',
+ 200 => 'two-hundred',
+ 500 => '5 hundred',
+ 999 => 'nine hundred and ninety nine',
+ 1000 => 'one thousand',
+ 1200 => 'twelve hundred',
+ 1200 => 'one thousand two hundred',
+ 17000 => 'seventeen thousand',
+ 21473 => 'twentyone-thousand-four-hundred-and-seventy-three',
+ 74002 => 'seventy four thousand and two',
+ 99999 => 'ninety nine thousand nine hundred ninety nine',
+ 100000 => '100 thousand',
+ 250000 => 'two hundred fifty thousand',
+ 1000000 => 'one million',
+ 1250007 => 'one million two hundred fifty thousand and seven',
+ 1000000000 => 'one billion',
+ 1000000001 => 'one billion and one',
+ );
+
+ foreach ($strings as $key => $string) {
+ $this->assertEquals($key, (int)$numerizer->numerize($string));
+ }
+ }
+
+ public function testLeavesDatesAlone()
+ {
+ $numerizer = Horde_Support_Numerizer::factory();
+
+ $this->assertEquals('2006-08-20 03:00', $numerizer->numerize('2006-08-20 03:00'));
+ $this->assertEquals('2006-08-20 15:30:30', $numerizer->numerize('2006-08-20 15:30:30'));
+ }
+
+}
--- /dev/null
+<?php
+/**
+ * @category Horde
+ * @package Horde_Support
+ * @subpackage UnitTests
+ */
+
+/**
+ * @category Horde
+ * @package Horde_Support
+ * @subpackage UnitTests
+ */
+class Horde_Support_Numerizer_Locale_DeTest extends PHPUnit_Framework_TestCase
+{
+ public function testStraightParsing()
+ {
+ $numerizer = Horde_Support_Numerizer::factory(array('locale' => 'de'));
+ $strings = array(
+ array(1, 'eins'),
+ array(5, 'fünf'),
+ array(10, 'zehn'),
+ array(11, 'elf'),
+ array(12, 'zwölf'),
+ array(13, 'dreizehn'),
+ array(14, 'vierzehn'),
+ array(15, 'fünfzehn'),
+ array(16, 'sechzehn'),
+ array(17, 'siebzehn'),
+ array(18, 'achtzehn'),
+ array(19, 'neunzehn'),
+ array(20, 'zwanzig'),
+ array(27, 'siebenundzwanzig'),
+ array(31, 'einunddreißig'),
+ array(59, 'neunundfünfzig'),
+ array(100, 'einhundert'),
+ array(100, 'ein hundert'),
+ array(150, 'hundertundfünfzig'),
+ array(150, 'einhundertundfünfzig'),
+ array(200, 'zweihundert'),
+ array(500, 'fünfhundert'),
+ array(999, 'neunhundertneunundneunzig'),
+ array(1000, 'eintausend'),
+ array(1200, 'zwölfhundert'),
+ array(1200, 'eintausendzweihundert'),
+ array(17000, 'siebzehntausend'),
+ array(21473, 'einundzwanzigtausendvierhundertdreiundsiebzig'),
+ array(74002, 'vierundsiebzigtausendzwei'),
+ array(74002, 'vierundsiebzigtausendundzwei'),
+ array(99999, 'neunundneunzigtausendneunhundertneunundneunzig'),
+ array(100000, 'hunderttausend'),
+ array(100000, 'einhunderttausend'),
+ array(250000, 'zweihundertfünfzigtausend'),
+ array(1000000, 'eine million'),
+ array(1250007, 'eine million zweihundertfünfzigtausendundsieben'),
+ array(1000000000, 'eine milliarde'),
+ array(1000000001, 'eine milliarde und eins'),
+ );
+
+ foreach ($strings as $pair) {
+ $this->assertEquals((string)$pair[0], $numerizer->numerize($pair[1]));
+ }
+ }
+
+}