Finish Portugues date parser.
authorJan Schneider <jan@horde.org>
Sat, 10 Jul 2010 08:42:54 +0000 (10:42 +0200)
committerJan Schneider <jan@horde.org>
Sat, 10 Jul 2010 08:42:54 +0000 (10:42 +0200)
framework/Date_Parser/lib/Horde/Date/Parser/Locale/Pt.php
framework/Date_Parser/lib/Horde/Date/Parser/Locale/Pt/Grabber.php
framework/Date_Parser/lib/Horde/Date/Parser/Locale/Pt/Ordinal.php
framework/Date_Parser/lib/Horde/Date/Parser/Locale/Pt/Pointer.php
framework/Date_Parser/lib/Horde/Date/Parser/Locale/Pt/Repeater.php
framework/Date_Parser/lib/Horde/Date/Parser/Locale/Pt/Scalar.php
framework/Date_Parser/lib/Horde/Date/Parser/Locale/Pt/Separator.php
framework/Date_Parser/lib/Horde/Date/Parser/Locale/Pt/Timezone.php
framework/Support/lib/Horde/Support/Numerizer/Locale/Pt.php [new file with mode: 0644]
framework/Support/package.xml

index 111caf3..099db78 100644 (file)
@@ -1,6 +1,750 @@
-<?php
+<?php 
 /**
  */
+
 class Horde_Date_Parser_Locale_Pt extends Horde_Date_Parser_Locale_Base
 {
+    public $definitions = array();
+    public $args = array();
+    public $now;
+
+    public function __construct($args)
+    {
+        $this->args = $args;
+    }
+
+    /**
+    # Parses a string containing a natural language date or time. If the parser
+    # can find a date or time, either a Horde_Date or Horde_Date_Span will be returned
+    # (depending on the value of <tt>:return</tt>). If no date or time can be found,
+    # +nil+ will be returned.
+    #
+    # Options are:
+    #
+    # [<tt>:context</tt>]
+    #     <tt>:past</tt> or <tt>:future</tt> (defaults to <tt>:future</tt>)
+    #
+    #     If your string represents a birthday, you can set <tt>:context</tt> to <tt>:past</tt>
+    #     and if an ambiguous string is given, it will assume it is in the
+    #     past. Specify <tt>:future</tt> or omit to set a future context.
+    #
+    # [<tt>:now</tt>]
+    #     Time (defaults to time())
+    #
+    #     By setting <tt>:now</tt> to a Horde_Date, all computations will be based off
+    #     of that time instead of time().
+    #
+    # [<tt>:return</tt>]
+    #     'result', 'span', or 'date' (defaults to 'date')
+    #
+    #     By default, the parser will guess a single point in time for the
+    #     given date or time. If you'd rather have the entire time span returned,
+    #     set <tt>:return</tt> to 'span' and a Horde_Date_Span will be returned.
+    #     If you want the entire result, including tokens (for retrieving the text
+    #     that was or was not tagged, for example), set <tt>:return</tt> to 'result'
+    #     and you will get a result object.
+    #
+    # [<tt>:ambiguousTimeRange</tt>]
+    #     Integer or <tt>:none</tt> (defaults to <tt>6</tt> (6am-6pm))
+    #
+    #     If an Integer is given, ambiguous times (like 5:00) will be
+    #     assumed to be within the range of that time in the AM to that time
+    #     in the PM. For example, if you set it to <tt>7</tt>, then the parser will
+    #     look for the time between 7am and 7pm. In the case of 5:00, it would
+    #     assume that means 5:00pm. If <tt>:none</tt> is given, no assumption
+    #     will be made, and the first matching instance of that time will
+    #     be used.
+    */
+    
+    public function parse($text, $specifiedOptions = array())
+    {
+        // get options and set defaults if necessary
+        $defaultOptions = array(
+            'context' => 'future',
+            'now' => new Horde_Date(time()),
+            'return' => 'date',
+            'ambiguousTimeRange' => 6,
+        );
+        $options = array_merge($defaultOptions, $this->args, $specifiedOptions);
+
+        // ensure the specified options are valid
+        foreach (array_keys($specifiedOptions) as $key) {
+            if (!isset($defaultOptions[$key])) {
+                throw new InvalidArgumentException("$key is not a valid option key");
+            }
+        }
+
+        if (!in_array($options['context'], array('past', 'future', 'none'))) {
+            throw new InvalidArgumentException("Invalid value " . $options['context'] . " for 'context' specified. Valid values are 'past', 'future', and 'none'");
+        }
+
+        // store now for later =)
+        $this->now = $options['now'];
+
+
+               $text = $this->normalize_special_characters($text);
+
+        // put the text into a normal format to ease scanning
+        $text = $this->preNormalize($text);
+
+        // get base tokens for each word
+        $tokens = $this->preTokenize($text);
+
+        // scan the tokens with each token scanner
+        foreach (array('Repeater') as $tokenizer) {
+            $tokenizer = $this->componentFactory($tokenizer);
+            $tokens = $tokenizer->scan($tokens, $options);
+        }
+
+        foreach (array('Grabber', 'Pointer', 'Scalar', 'Ordinal', 'Separator', 'Timezone') as $tokenizer) {
+            $tokenizer = $this->componentFactory($tokenizer);
+            $tokens = $tokenizer->scan($tokens);
+        }
+
+        // strip any non-tagged tokens
+        $taggedTokens = array_values(array_filter($tokens, create_function('$t', 'return $t->tagged();')));
+
+        // Remove tokens we know we don't want - for example, if the first token
+        // is a separator, drop it.
+        $taggedTokens = $this->postTokenize($taggedTokens);
+
+        // do the heavy lifting
+        $span = $this->tokensToSpan($taggedTokens, $options);
+
+        // generate the result and return it, the span, or a guessed time within the span
+        $result = new Horde_Date_Parser_Result($span, $tokens);
+        switch ($options['return']) {
+        case 'result':
+            return $result;
+        case 'span':
+            return $result->span;
+        case 'date':
+            return $result->guess();
+        }
+    }
+
+    public function componentFactory($component, $args = null)
+    {
+        $locale = isset($this->args['locale']) ? $this->args['locale'] : null;
+
+        if ($locale && strtolower($locale) != 'base') {
+            $locale = str_replace(' ', '_', ucwords(str_replace('_', ' ', strtolower($locale))));
+            $class = 'Horde_Date_Parser_Locale_' . $locale . '_' . $component;
+            if (class_exists($class)) {
+                return new $class($args);
+            }
+
+            $language = array_shift(explode('_', $locale));
+            if ($language != $locale) {
+                $class = 'Horde_Date_Parser_Locale_' . $language . '_' . $component;
+                if (class_exists($class)) {
+                    return new $class($args);
+                }
+            }
+       }
+
+        $class = 'Horde_Date_Parser_Locale_Base_' . $component;
+        return new $class($args);
+    }
+
+    /**
+       Replaces special characters with non-special equivalents
+       source: http://pt2.php.net/manual/en/function.chr.php#93291
+    */
+       public function normalize_special_characters( $str )
+       {
+           # Quotes cleanup
+           $str = ereg_replace( chr(ord("`")), "'", $str );        # `
+           $str = ereg_replace( chr(ord("´")), "'", $str );        # ´
+           $str = ereg_replace( chr(ord("„")), ",", $str );        # „
+           $str = ereg_replace( chr(ord("`")), "'", $str );        # `
+           $str = ereg_replace( chr(ord("´")), "'", $str );        # ´
+           $str = ereg_replace( chr(ord("“")), "\"", $str );        # “
+           $str = ereg_replace( chr(ord("”")), "\"", $str );        # ”
+           $str = ereg_replace( chr(ord("´")), "'", $str );        # ´
+
+           $unwanted_array = array(    'Š'=>'S', 'š'=>'s', 'Ž'=>'Z', 'ž'=>'z', 'À'=>'A', 'Á'=>'A', 'Â'=>'A', 'Ã'=>'A', 'Ä'=>'A', 'Å'=>'A', 'Æ'=>'A', 'Ç'=>'C', 'È'=>'E', 'É'=>'E',
+                                       'Ê'=>'E', 'Ë'=>'E', 'Ì'=>'I', 'Í'=>'I', 'Î'=>'I', 'Ï'=>'I', 'Ñ'=>'N', 'Ò'=>'O', 'Ó'=>'O', 'Ô'=>'O', 'Õ'=>'O', 'Ö'=>'O', 'Ø'=>'O', 'Ù'=>'U',
+                                       'Ú'=>'U', 'Û'=>'U', 'Ü'=>'U', 'Ý'=>'Y', 'Þ'=>'B', 'ß'=>'Ss', 'à'=>'a', 'á'=>'a', 'â'=>'a', 'ã'=>'a', 'ä'=>'a', 'å'=>'a', 'æ'=>'a', 'ç'=>'c',
+                                       'è'=>'e', 'é'=>'e', 'ê'=>'e', 'ë'=>'e', 'ì'=>'i', 'í'=>'i', 'î'=>'i', 'ï'=>'i', 'ð'=>'o', 'ñ'=>'n', 'ò'=>'o', 'ó'=>'o', 'ô'=>'o', 'õ'=>'o',
+                                       'ö'=>'o', 'ø'=>'o', 'ù'=>'u', 'ú'=>'u', 'û'=>'u', 'ý'=>'y', 'ý'=>'y', 'þ'=>'b', 'ÿ'=>'y' );
+           $str = strtr( $str, $unwanted_array );
+
+           # Bullets, dashes, and trademarks
+           $str = ereg_replace( chr(149), "&#8226;", $str );    # bullet •
+           $str = ereg_replace( chr(150), "&ndash;", $str );    # en dash
+           $str = ereg_replace( chr(151), "&mdash;", $str );    # em dash
+           $str = ereg_replace( chr(153), "&#8482;", $str );    # trademark
+           $str = ereg_replace( chr(169), "&copy;", $str );    # copyright mark
+           $str = ereg_replace( chr(174), "&reg;", $str );        # registration mark
+
+           return $str;
+       }
+
+
+    /**
+    # Clean up the specified input text by stripping unwanted characters,
+    # converting idioms to their canonical form, converting number words
+    # to numbers (three => 3), and converting ordinal words to numeric
+    # ordinals (third => 3rd)
+    */
+    public function preNormalize($text)
+    {
+        $text = strtolower($text);
+        // $text = $this->numericizeNumbers($text);
+        $text = preg_replace('/[\'"\.]/', '', $text);
+        $text = preg_replace('/([\/\-\,\@])/', ' $1 ', $text);
+        $text = preg_replace('/\bhoje\b/', 'this day', $text);
+        // $text = preg_replace('/^amanh[aã]$/', 'next day', $text);
+        $text = preg_replace('/^amanha$/', 'next day', $text);
+
+               $text = preg_replace('/^ontem$/', 'last day', $text);
+        $text = preg_replace('/\bmeio\s+dia\b/', '12:00', $text);
+        $text = preg_replace('/\bmeia\s+noite\b/', '24:00', $text);
+        $text = preg_replace('/\b(antes|anterior)\b/', 'past', $text);
+        $text = preg_replace('/\b(agora|j[aá])\b/', 'this second', $text);
+        $text = preg_replace('/\b[uú]ltimo\b/', 'last', $text);
+        $text = preg_replace('/\b(?:de|na|durante\s+a|logo(?:\s[aà]|de))\s+(manh[aã]|madrugada)\b/', 'morning', $text);
+        $text = preg_replace('/\b(?:de|[àa]|durante\s+a|logo(?:\s[aà]|de))\s+tarde\b/', 'afternoon', $text);
+        $text = preg_replace('/\b((?:de|[àa]|durante\s+a|logo(?:\s[aà]))\s+noite|(?:ao)\s+anoitecer)\b/', 'this night', $text);
+        $text = preg_replace('/\b(horas?|h|hrs?)\b/', 'oclock', $text);
+        $text = preg_replace('/\b(depois|ap[oó]s)\b/', 'future', $text);
+        $text = $this->numericizeNumbers($text);
+//        $text = preg_replace('/\bdebug\b/', 'filho da mae', $text);
+        return $text;
+    }
+
+    /**
+     * Convert number words to numbers (three => 3)
+     */
+    public function numericizeNumbers($text)
+    {
+               return Horde_Support_Numerizer::numerize($text, $this->args);
+               // return $text;
+       }
+
+    /**
+     * Convert ordinal words to numeric ordinals (third => 3rd)
+     */
+    public function numericizeOrdinals($text)
+    {
+        $text = preg_replace('/^d[eé]cim[oa]\s+primeir[oa]$/', '11º', $text);
+        $text = preg_replace('/^d[eé]cim[oa]\s+segund[oa]$/', '12º', $text);
+        $text = preg_replace('/^d[eé]cim[oa]\s+terceir[oa]$/', '13º', $text);
+        $text = preg_replace('/^d[eé]cim[oa]\s+quart[oa]$/', '14º', $text);
+        $text = preg_replace('/^d[eé]cim[oa]\s+quint[oa]$/', '15º', $text);
+        $text = preg_replace('/^d[eé]cim[oa]\s+sext[oa]$/', '16º', $text);
+        $text = preg_replace('/^d[eé]cim[oa]\s+s[eé]tim[oa]$/', '17º', $text);
+        $text = preg_replace('/^d[eé]cim[oa]\s+oit[aá]v[oa]$/', '18º', $text);
+        $text = preg_replace('/^d[eé]cim[oa]\s+^non[oa]$/', '19º', $text);
+               $text = preg_replace('/^primeir[oa]$/', '1º', $text);
+        $text = preg_replace('/^segund[oa]$/', '2º', $text);
+        $text = preg_replace('/^terceir[oa]$/', '3º', $text);
+        $text = preg_replace('/^quart[oa]$/', '4º', $text);
+        $text = preg_replace('/^quint[oa]$/', '5º', $text);
+               $text = preg_replace('/^sext[oa]$/', '6º', $text);
+        $text = preg_replace('/^s[eé]tim[oa]$/', '7º', $text);
+        $text = preg_replace('/^oit[aá]v[oa]$/', '8º', $text);
+        $text = preg_replace('/^non[oa]$/', '9º', $text);
+        $text = preg_replace('/^d[eé]cim[oa]$/', '10º', $text);
+           // and so one....
+        return $text;
+
+    }
+
+    /**
+     * Split the text on spaces and convert each word into a Token.
+     *
+     * @param string $text  Text to tokenize
+     *
+     * @return array  Array of Horde_Date_Parser_Tokens.
+     */
+    public function preTokenize($text)
+    {
+        return array_map(create_function('$w', 'return new Horde_Date_Parser_Token($w);'), preg_split('/\s+/', $text));
+    }
+
+    /**
+     * Remove tokens that don't fit our definitions.
+     *
+     * @param array $tokens Array of tagged tokens.
+     *
+     * @return array  Filtered tagged tokens.
+     */
+    public function postTokenize($tokens)
+    {
+        if (!count($tokens)) { return $tokens; }
+
+        // First rule: if the first token is a separator, remove it from the
+        // list of tokens we consider in tokensToSpan().
+        $first = clone($tokens[0]);
+        $first->untag('separator_at');
+        $first->untag('separator_comma');
+        $first->untag('separator_in');
+        $first->untag('separator_slash_or_dash');
+        if (!$first->tagged()) {
+            array_shift($tokens);
+        }
+
+        return $tokens;
+    }
+
+    public function initDefinitions()
+    {
+        if ($this->definitions) { return; }
+
+        $this->definitions = array(
+            'time' => array(
+                new Horde_Date_Parser_Handler(array(':repeater_time', ':repeater_day_portion?'), null),
+                new Horde_Date_Parser_Handler(array(':repeater_day_portion?', ':repeater_time' ), null),
+                new Horde_Date_Parser_Handler(array(':separator_at?', ':repeater_time' ), null),
+                new Horde_Date_Parser_Handler(array(':repeater_time', ':separator_at?', ':repeater_day_portion?'), null),
+                               
+            ),
+
+            'date' => array(
+                new Horde_Date_Parser_Handler(array(':repeater_day_name', ':repeater_month_name', ':scalar_day', ':repeater_time', ':timezone', ':scalar_year'), 'handle_rdn_rmn_sd_t_tz_sy'),
+                new Horde_Date_Parser_Handler(array(':repeater_month_name', ':scalar_day', ':scalar_year'), 'handle_rmn_sd_sy'),
+                new Horde_Date_Parser_Handler(array(':repeater_month_name', ':scalar_day', ':scalar_year', ':separator_as?', 'time?'), 'handle_rmn_sd_sy'),
+                new Horde_Date_Parser_Handler(array(':repeater_month_name', ':scalar_day', ':separator_as?', 'time?'), 'handle_rmn_sd'),
+                new Horde_Date_Parser_Handler(array(':repeater_month_name', ':ordinal_day', ':separator_as?', 'time?'), 'handle_rmn_od'),
+                new Horde_Date_Parser_Handler(array(':repeater_month_name', ':scalar_year'), 'handle_rmn_sy'),
+                new Horde_Date_Parser_Handler(array(':scalar_day', ':repeater_month_name', ':scalar_year', ':separator_at?', 'time?'), 'handle_sd_rmn_sy'),
+                new Horde_Date_Parser_Handler(array(':scalar_month', ':separator_slash_or_dash', ':scalar_day', ':separator_slash_or_dash', ':scalar_year', ':separator_at?', 'time?'), 'handle_sm_sd_sy'),
+                new Horde_Date_Parser_Handler(array(':scalar_day', ':separator_slash_or_dash', ':scalar_month', ':separator_slash_or_dash', ':scalar_year', ':separator_at?', 'time?'), 'handle_sd_sm_sy'),
+                new Horde_Date_Parser_Handler(array(':scalar_year', ':separator_slash_or_dash', ':scalar_month', ':separator_slash_or_dash', ':scalar_day', ':separator_at?', 'time?'), 'handle_sy_sm_sd'),
+                new Horde_Date_Parser_Handler(array(':scalar_month', ':separator_slash_or_dash', ':scalar_year'), 'handle_sm_sy'),
+                new Horde_Date_Parser_Handler(array(':scalar_day', ':separator_at?', ':repeater_month_name', ':separator_at?', ':scalar_year', ':separator_at?', 'time?'), 'handle_sd_rmn_sy'),
+                new Horde_Date_Parser_Handler(array(':repeater_day_name',  ':separator_at?', ':time?'), 'handle_rdn'),
+                               new Horde_Date_Parser_Handler(array(':scalar_day',  ':separator_at?', ':scalar_month', ':separator_at?', ':scalar_year?', 'time?'), 'handle_sd_sm_sy'),
+                               new Horde_Date_Parser_Handler(array(':scalar_day',  ':separator_at?', ':repeater_month_name', ':separator_at?', ':scalar_year', ':separator_at?', 'time?'), 'handle_sd_rmn_sy'), 
+                new Horde_Date_Parser_Handler(array(':scalar_day',  ':separator_at?', ':repeater_month_name', ':separator_at?', 'time?'), 'handle_sd_rmn'),
+            ),
+
+            // tonight at 7pm
+            'anchor' => array(
+                new Horde_Date_Parser_Handler(array(':grabber?', ':repeater', ':separator_at?', ':repeater?', ':repeater?'), 'handle_r'),
+                new Horde_Date_Parser_Handler(array(':grabber?', ':repeater', ':repeater', ':separator_at?', ':repeater?', ':repeater?'), 'handle_r'),
+                new Horde_Date_Parser_Handler(array(':repeater', ':grabber', ':repeater'), 'handle_r_g_r'),
+            ),
+
+            // 3 weeks from now, in 2 months
+            'arrow' => array(
+                new Horde_Date_Parser_Handler(array(':scalar', ':repeater', ':pointer'), 'handle_s_r_p'),
+                new Horde_Date_Parser_Handler(array(':pointer', ':scalar', ':repeater'), 'handle_p_s_r'),
+                new Horde_Date_Parser_Handler(array(':scalar', ':repeater', ':pointer', 'anchor'), 'handle_s_r_p_a'),
+            ),
+
+            // 3rd week in march
+            'narrow' => array(
+                new Horde_Date_Parser_Handler(array(':ordinal', ':repeater', ':separator_in', ':repeater'), 'handle_o_r_s_r'),
+                new Horde_Date_Parser_Handler(array(':ordinal', ':repeater', ':grabber', ':repeater'), 'handle_o_r_g_r'),
+            ),
+        );
+    }
+
+    public function tokensToSpan($tokens, $options)
+    {
+        $this->initDefinitions();
+
+        // maybe it's a specific date
+        foreach ($this->definitions['date'] as $handler) {
+            if ($handler->match($tokens, $this->definitions)) {
+                $goodTokens = array_values(array_filter($tokens, create_function('$o', 'return !$o->getTag("separator");')));
+                $this->debug($handler->handlerMethod, $goodTokens, $options);
+                return call_user_func(array($this, $handler->handlerMethod), $goodTokens, $options);
+            }
+        }
+
+        // I guess it's not a specific date, maybe it's just an anchor
+        foreach ($this->definitions['anchor'] as $handler) {
+            if ($handler->match($tokens, $this->definitions)) {
+                $goodTokens = array_values(array_filter($tokens, create_function('$o', 'return !$o->getTag("separator");')));
+                $this->debug($handler->handlerMethod, $goodTokens, $options);
+                return call_user_func(array($this, $handler->handlerMethod), $goodTokens, $options);
+            }
+        }
+
+        // not an anchor, perhaps it's an arrow
+        foreach ($this->definitions['arrow'] as $handler) {
+            if ($handler->match($tokens, $this->definitions)) {
+                $goodTokens = array_values(array_filter($tokens, create_function('$o', 'return !$o->getTag("separator_at") && !$o->getTag("separator_slash_or_dash") && !$o->getTag("separator_comma");')));
+                $this->debug($handler->handlerMethod, $goodTokens, $options);
+                return call_user_func(array($this, $handler->handlerMethod), $goodTokens, $options);
+            }
+        }
+
+        // not an arrow, let's hope it's a narrow
+        foreach ($this->definitions['narrow'] as $handler) {
+            if ($handler->match($tokens, $this->definitions)) {
+                //good_tokens = tokens.select { |o| !o.get_tag Separator }
+                $this->debug($handler->handlerMethod, $tokens, $options);
+                return call_user_func(array($this, $handler->handlerMethod), $tokens, $options);
+            }
+        }
+
+        return null;
+    }
+
+    public function dayOrTime($dayStart, $timeTokens, $options)
+    {
+        $outerSpan = new Horde_Date_Span($dayStart, $dayStart->add(array('day' => 1)));
+
+        if (!empty($timeTokens)) {
+            $this->now = $outerSpan->begin;
+            return $this->getAnchor($this->dealiasAndDisambiguateTimes($timeTokens, $options), $options);
+        } else {
+            return $outerSpan;
+        }
+    }
+
+
+    public function handle_m_d($month, $day, $timeTokens, $options)
+    {
+        $month->now = $this->now;
+        $span = $month->this($options['context']);
+
+        $dayStart = new Horde_Date($span->begin->year, $span->begin->month, $day);
+        return $this->dayOrTime($dayStart, $timeTokens, $options);
+    }
+
+    public function handle_rmn_sd($tokens, $options)
+    {
+        return $this->handle_m_d($tokens[0]->getTag('repeater_month_name'), $tokens[1]->getTag('scalar_day'), array_slice($tokens, 2), $options);      // mês primeiro (dia/ano)
+    }
+
+    public function handle_rmn_od($tokens, $options)
+    {
+        return $this->handle_m_d($tokens[0]->getTag('repeater_month_name'), $tokens[1]->getTag('ordinal_day'), array_slice($tokens, 2), $options);
+    }
+
+    public function handle_rmn_sy($tokens, $options)
+    {
+        $month = $tokens[0]->getTag('repeater_month_name')->index();
+        $year = $tokens[1]->getTag('scalar_year');
+
+        try {
+            return new Horde_Date_Span(new Horde_Date($year, $month, 1), new Horde_Date($year, $month + 1, 1));
+        } catch (Exception $e) {
+            return null;
+        }
+    }
+
+    public function handle_rdn_rmn_sd_t_tz_sy($tokens, $options)
+    {
+        $month = $tokens[1]->getTag('repeater_month_name')->index();
+        $day = $tokens[2]->getTag('scalar_day');
+        $year = $tokens[5]->getTag('scalar_year');
+
+        try {
+            $dayStart = new Horde_Date($year, $month, $day);
+            return $this->dayOrTime($dayStart, array($tokens[3]), $options);
+        } catch (Exception $e) {
+            return null;
+        }
+    }
+
+    public function handle_rmn_sd_sy($tokens, $options)
+    {
+        $month = $tokens[0]->getTag('repeater_month_name')->index();
+        $day = $tokens[1]->getTag('scalar_day');
+        $year = $tokens[2]->getTag('scalar_year');
+
+        $timeTokens = array_slice($tokens, 3);
+
+        try {
+            $dayStart = new Horde_Date($year, $month, $day);
+            return $this->dayOrTime($dayStart, $timeTokens, $options);
+        } catch (Exception $e) {
+            return null;
+        }
+    }
+
+    public function handle_sd_rmn_sy($tokens, $options)
+    {
+        $newTokens = array($tokens[1], $tokens[0], $tokens[2]);
+        $timeTokens = array_slice($tokens, 3);
+        return $this->handle_rmn_sd_sy(array_merge($newTokens, $timeTokens), $options);
+    }
+
+
+    public function handle_sd_rmn($tokens, $options)
+    {
+               return $this->handle_m_d($tokens[1]->getTag('repeater_month_name'), $tokens[0]->getTag('scalar_day'), array_slice($tokens, 2), $options);
+       }
+
+    public function handle_sm_sd_sy($tokens, $options)
+    {
+        $month = $tokens[0]->getTag('scalar_month');
+        $day = $tokens[1]->getTag('scalar_day');
+        $year = $tokens[2]->getTag('scalar_year');
+
+        $timeTokens = array_slice($tokens, 3);
+
+        try {
+            $dayStart = new Horde_Date($year, $month, $day);
+            return $this->dayOrTime($dayStart, $timeTokens, $options);
+        } catch (Exception $e) {
+            return null;
+        }
+    }
+
+    public function handle_sd_sm_sy($tokens, $options)
+    {
+        $newTokens = array($tokens[1], $tokens[0], $tokens[2]);
+        $timeTokens = array_slice($tokens, 3);
+        return $this->handle_sm_sd_sy(array_merge($newTokens, $timeTokens), $options);
+    }
+
+    public function handle_sy_sm_sd($tokens, $options)
+    {
+        $newTokens = array($tokens[1], $tokens[2], $tokens[0]);
+        $timeTokens = array_slice($tokens, 3);
+        return $this->handle_sm_sd_sy(array_merge($newTokens, $timeTokens), $options);
+    }
+
+    public function handle_sm_sy($tokens, $options)
+    {
+        $month = $tokens[0]->getTag('scalar_month');
+        $year = $tokens[1]->getTag('scalar_year');
+
+        try {
+            return new Horde_Date_Span(new Horde_Date($year, $month, 1), new Horde_Date($year, $month + 1, 1));
+        } catch (Exception $e) {
+            return null;
+        }
+    }
+
+
+    /*##########################################################################
+    # Anchors
+    ##########################################################################*/
+
+    public function handle_r($tokens, $options)
+    {
+        $ddTokens = $this->dealiasAndDisambiguateTimes($tokens, $options);
+        return $this->getAnchor($ddTokens, $options);
+    }
+
+    public function handle_r_g_r($tokens, $options)
+    {
+        $newTokens = array($tokens[1], $tokens[0], $tokens[2]);
+        return $this->handle_r($newTokens, $options);
+    }
+
+
+    /*##########################################################################
+    # Arrows
+    ##########################################################################*/
+
+    public function handle_srp($tokens, $span, $options)
+    {
+        $distance = $tokens[0]->getTag('scalar');
+        $repeater = $tokens[1]->getTag('repeater');
+        $pointer = $tokens[2]->getTag('pointer');
+
+        return $repeater->offset($span, $distance, $pointer);
+    }
+
+    public function handle_s_r_p($tokens, $options)
+    {
+        $span = new Horde_Date_Span($this->now, $this->now->add(1));
+        return $this->handle_srp($tokens, $span, $options);
+    }
+
+    public function handle_p_s_r($tokens, $options)
+    {
+        $newTokens = array($tokens[1], $tokens[2], $tokens[0]);
+        return $this->handle_s_r_p($newTokens, $options);
+    }
+
+    public function handle_s_r_p_a($tokens, $options)
+    {
+        $anchorSpan = $this->getAnchor(array_slice($tokens, 3), $options);
+        return $this->handle_srp($tokens, $anchorSpan, $options);
+    }
+
+
+    /*##########################################################################
+    # Narrows
+    ##########################################################################*/
+
+    public function handle_orr($tokens, $outerSpan, $options)
+    {
+        $repeater = $tokens[1]->getTag('repeater');
+        $repeater->now = $outerSpan->begin->sub(1);
+        $ordinal = $tokens[0]->getTag('ordinal');
+        $span = null;
+
+        for ($i = 0; $i < $ordinal; $i++) {
+            $span = $repeater->next('future');
+            if ($span->begin->after($outerSpan->end)) {
+                $span = null;
+                break;
+            }
+        }
+        return $span;
+    }
+
+    public function handle_o_r_s_r($tokens, $options)
+    {
+        $outerSpan = $this->getAnchor(array($tokens[3]), $options);
+        return $this->handle_orr(array($tokens[0], $tokens[1]), $outerSpan, $options);
+    }
+
+    public function handle_o_r_g_r($tokens, $options)
+    {
+        $outerSpan = $this->getAnchor(array($tokens[2], $tokens[3]), $options);
+        return $this->handle_orr(array($tokens[0], $tokens[1]), $outerSpan, $options);
+    }
+
+
+    /*##########################################################################
+    # Logging Methods
+    ##########################################################################*/
+
+    public function debug($method, $args)
+    {
+        $args = func_get_args();
+        $method = array_shift($args);
+        // echo "$method\n";
+    }
+
+
+    /*##########################################################################
+    # Support Methods
+    ##########################################################################*/
+
+    public function getAnchor($tokens, $options)
+    {
+        $grabber = 'this';
+        $pointer = 'future';
+
+        $repeaters = $this->getRepeaters($tokens);
+        for ($i = 0, $size = count($repeaters); $i < $size; $i++) {
+            array_pop($tokens);
+        }
+
+        if (count($tokens) && $tokens[0]->getTag('grabber')) {
+            $grabber = $tokens[0]->getTag('grabber');
+            array_pop($tokens);
+        }
+
+        $head = array_shift($repeaters);
+        $head->now = $this->now;
+
+        switch ($grabber) {
+        case 'last':
+            $outerSpan = $head->next('past');
+            break;
+
+        case 'this':
+            if (count($repeaters)) {
+                $outerSpan = $head->this('none');
+            } else {
+                $outerSpan = $head->this($options['context']);
+            }
+            break;
+
+        case 'next':
+            $outerSpan = $head->next('future');
+            break;
+
+        default:
+            throw new Horde_Date_Parser_Exception('Invalid grabber ' . $grabber);
+        }
+
+        return $this->findWithin($repeaters, $outerSpan, $pointer);
+    }
+
+    public function getRepeaters($tokens)
+    {
+        $repeaters = array();
+        foreach ($tokens as $token) {
+            if ($t = $token->getTag('repeater')) {
+                $repeaters[] = $t;
+            }
+        }
+
+        // Return repeaters in order from widest (years) to smallest (seconds)
+        usort($repeaters, create_function('$a, $b', 'return $b->width() > $a->width();'));
+        return $repeaters;
+    }
+
+    /**
+     * Recursively finds repeaters within other repeaters.  Returns a Span
+     * representing the innermost time span or null if no repeater union could
+     * be found
+     */
+    public function findWithin($tags, $span, $pointer)
+    {
+        if (empty($tags)) { return $span; }
+
+        $head = array_shift($tags);
+        $rest = $tags;
+        $head->now = ($pointer == 'future') ? $span->begin : $span->end;
+        $h = $head->this('none');
+
+        if ($span->includes($h->begin) || $span->includes($h->end)) {
+            return $this->findWithin($rest, $h, $pointer);
+        } else {
+            return null;
+        }
+    }
+
+    /**
+     * handle aliases of am/pm
+     * 5:00 in the morning -> 5:00 am
+     * 7:00 in the evening -> 7:00 pm
+     */
+    public function dealiasAndDisambiguateTimes($tokens, $options)
+    {
+        $dayPortionIndex = null;
+        foreach ($tokens as $i => $t) {
+            if ($t->getTag('repeater_day_portion')) {
+                $dayPortionIndex = $i;
+                break;
+            }
+        }
+
+        $timeIndex = null;
+        foreach ($tokens as $i => $t) {
+            if ($t->getTag('repeater_time')) {
+                $timeIndex = $i;
+                break;
+            }
+        }
+
+        if ($dayPortionIndex !== null && $timeIndex !== null) {
+            $t1 = $tokens[$dayPortionIndex];
+            $t1tag = $t1->getTag('repeater_day_portion');
+
+            if ($t1tag->type == 'morning') {
+                $t1->untag('repeater_day_portion');
+                $t1->tag('repeater_day_portion', new Horde_Date_Repeater_DayPortion('am'));
+            } elseif (in_array($t1tag->type, array('afternoon', 'evening', 'night'))) {
+                $t1->untag('repeater_day_portion');
+                $t1->tag('repeater_day_portion', new Horde_Date_Repeater_DayPortion('pm'));
+            }
+        }
+
+        // handle ambiguous times if ambiguousTimeRange is specified
+        if (!isset($options['ambiguousTimeRange']) || $options['ambiguousTimeRange'] != 'none') {
+            $ttokens = array();
+            foreach ($tokens as $i => $t0) {
+                $ttokens[] = $t0;
+                $t1 = isset($tokens[$i + 1]) ? $tokens[$i + 1] : null;
+                if ($t0->getTag('repeater_time') && $t0->getTag('repeater_time')->ambiguous && (!$t1 || !$t1->getTag('repeater_day_portion'))) {
+                    $distoken = new Horde_Date_Parser_Token('disambiguator');
+                    $distoken->tag('repeater_day_portion', new Horde_Date_Repeater_DayPortion($options['ambiguousTimeRange']));
+                    $ttokens[] = $distoken;
+                }
+            }
+
+            $tokens = $ttokens;
+        }
+
+        return $tokens;
+    }
+
 }
index a408c0a..8ba01f5 100644 (file)
@@ -5,9 +5,28 @@ class Horde_Date_Parser_Locale_Pt_Grabber extends Horde_Date_Parser_Locale_Base_
      * Regex tokens
      */
     public $scanner = array(
-        '/\b(passado|[uú]ltimo)\b/' => 'last',
-        '/\best[ea]\b/' => 'this',
-        '/\bpr[oó]ximo\b/' => 'next',
+        '/^(passado|[uú]ltim[ao]|anterior)$/' => 'last',                      
+        '/^n?est[ea]$/' => 'this',                                     
+        '/^(pr[oó]xim[oa]|seguinte)$/' => 'next',     
     );
 
+    public function scan($tokens)
+    {
+        foreach ($tokens as &$token) {
+            if ($t = $this->scanForAll($token)) {
+                $token->tag('grabber', $t);
+            }
+        }
+        return $tokens;
+    }
+
+    public function scanForAll($token)
+    {
+        foreach ($this->scanner as $scannerItem => $scannerTag) {
+            if (preg_match($scannerItem, $token->word)) {
+                return $scannerTag;
+            }
+        }
+    }
+
 }
index c053e70..76251e6 100644 (file)
@@ -2,11 +2,49 @@
 class Horde_Date_Parser_Locale_Pt_Ordinal extends Horde_Date_Parser_Locale_Base_Ordinal
 {
 
-/*
-    public $ordinalRegex = '/^(\d*)(\.|\xBA|\xAA)$/';
-    public $ordinalDayRegex = '/^(\d*)(\.|\xBA|\xAA)$/';
-*/
-    public $ordinalRegex = '/\b(\d*)(\.|\xBA|\xAA)?\b/';
-    public $ordinalDayRegex = '/\b(\d*)(\.|\xBA|\xAA)?\b/';
+    public $ordinalRegex = '/^(\d*)(\.|\xBA|\xAA|º|ª)?$/';
+    public $ordinalDayRegex = '/^(0[1-9]|[12][0-9]|3[01])(\.|\xBA|\xAA|º|ª)?$/';
+    public $ordinalMonthsRegex = '/^(0[1-9]|1[012])(\.|\xBA|\xAA|º|ª)?$/';
 
+    public function scan($tokens)
+    {
+        foreach ($tokens as &$token) {
+            if (!is_null($t = $this->scanForOrdinals($token))) {
+                $token->tag('ordinal', $t);
+            }
+            if (!is_null($t = $this->scanForDays($token))) {
+                $token->tag('ordinal_day', $t);
+            }
+            if (!is_null($t = $this->scanForMonths($token))) {
+                $token->tag('ordinal_month', $t);
+            }
+        }
+
+        return $tokens;
+    }
+
+    public function scanForOrdinals($token)
+    {
+        if (preg_match($this->ordinalRegex, $token->word, $matches)) {
+            return (int)$matches[1];
+        }
+    }
+
+    public function scanForDays($token)
+    {
+        if (preg_match($this->ordinalDayRegex, $token->word, $matches)) {
+            if ($matches[1] <= 31) {
+                return (int)$matches[1];
+            }
+        }
+    }
+
+    public function scanForMonths($token)
+    {
+        if (preg_match($this->ordinalMonthsRegex, $token->word, $matches)) {
+            if ($matches[1] <= 12) {
+                return (int)$matches[1];
+            }
+        }
+    }
 }
index 6f29293..9a6f0ce 100644 (file)
@@ -2,8 +2,27 @@
 class Horde_Date_Parser_Locale_Pt_Pointer extends Horde_Date_Parser_Locale_Base_Pointer
 {
     public $scanner = array(
-        '/\bantes\b/' => 'past',
-        '/\b(depois|ap[oó]s|dentro(\s+de)?|daqui(\s+a)?)\b/' => 'future',
+        '/^antes$/' => 'past',
+        '/^(depois(\s+de)?|ap[oó]s|dentro\s+de|daqui\s+a)$/' => 'future',
     );
-}
 
+    public function scan($tokens)
+    {
+        foreach ($tokens as &$token) {
+            if ($t = $this->scanForAll($token)) {
+                $token->tag('pointer', $t);
+            }
+        }
+        return $tokens;
+    }
+
+    public function scanForAll($token)
+    {
+        foreach ($this->scanner as $scannerItem => $scannerTag) {
+            if (preg_match($scannerItem, $token->word)) {
+                return $scannerTag;
+            }
+        }
+    }
+
+}
index b3214f6..2e46c6c 100644 (file)
 <?php
 class Horde_Date_Parser_Locale_Pt_Repeater extends Horde_Date_Parser_Locale_Base_Repeater
 {
+
     public $monthNameScanner = array(
-        '/\bjan(\.|eiro)?\b/' => 'january',
-        '/\bfev(\.|ereiro)?\b/' => 'february',
-        '/\bmar(\.|([cç]o))?\b/' => 'march',
-        '/\babr(\.|(il))?\b/' => 'april',
-        '/\bmai(\.|o)?\b/' => 'may',
-        '/\bjun(\.|ho)?\b/' => 'june',
-        '/\bjul(\.|ho)?\b/' => 'july',
-        '/\bago(\.|sto)?\b/' => 'august',
-        '/\bset(\.|embro)?\b/' => 'september',
-        '/\bout(\.|ubro)?\b/' => 'october',
-        '/\bnov(\.|embro)?\b/' => 'november',
-        '/\bdez(\.|embro)?\b/' => 'december',
+        '/^jan(eiro)?$/' => 'january',
+        '/^fev(reiro)?$/' => 'february',
+        '/^mar(co)?$/' => 'march',
+        '/^abr(il)?$/' => 'april',
+        '/^mai(o)?$/' => 'may',
+        '/^jun(ho)?$/' => 'june',     
+        '/^jul(ho)?$/' => 'july',
+               '/^ago(sto)?$/' => 'august',
+        '/^set(embro)?$/' => 'september',
+        '/^out(ubro)?$/' => 'october',
+        '/^nov(embro)?$/' => 'november',
+        '/^dez(embro)?$/' => 'december',
     );
 
+/*
+        '/^jan(\.|eiro)?$/' => 'january',
+        '/^fev(\.|ereiro)?$/' => 'february',
+        '/^mar(\.|(co))?$/' => 'march',
+        '/^abr(\.|(il))?$/' => 'april',
+        '/^mai(\.|o)?$/' => 'may',
+        '/^jun(\.|ho)?$/' => 'june',
+        '/^jul(\.|ho)?$/' => 'july',
+        '/^ago(\.|sto)?$/' => 'august',
+        '/^set(\.|embro)?$/' => 'september',
+        '/^out(\.|ubro)?$/' => 'october',
+        '/^nov(\.|embro)?$/' => 'november',
+        '/^dez(\.|embro)?$/' => 'december',
+
+*/
+
     public $dayNameScanner = array(
-        '/\bseg(d?(unda?(\s|\-)feira))?\b/' => 'monday',
-        '/\bter([cç]a?(\s|\-)feira)?\b/' => 'tuesday',
-        '/\bqua(rta?(\s|\-)feira)?\b/' => 'wednesday',
-        '/\bqui(nta?(\s|\-)feira)?\b/' => 'thursday',
-        '/\bsex(ta?(\s|\-)feira)?\b/' => 'friday',
-        '/\bs[aá]b(ado)?\b/' => 'saturday',
-        '/\bdom(ingo)?\b/' => 'sunday',
+               '/^seg$/' => 'monday',
+               '/^ter$/' => 'tuesday',
+               '/^qua$/' => 'wednesday',
+               '/^qui$/' => 'thursday',
+        '/^sex$/' => 'friday',
+               '/^sab$/' => 'saturday',
+               '/^dom$/' => 'sunday',
+               '/^segunda$/' => 'monday',
+               '/^terca$/' => 'tuesday',
+               '/^quarta$/' => 'wednesday',
+        '/^quinta$/' => 'thursday',
+               '/^sexta$/' => 'friday',
+        '/^sab(ado)?$/' => 'saturday',
+        '/^dom(ingo)?$/' => 'sunday',
     );
 
+/*
+        '/^seg((unda)?(\s|\-)feira)?$/' => 'monday',
+        '/^ter(([cç]a)?(\s|\-)feira)?$/' => 'tuesday',
+       '/^qua((rta)?(\s|\-)feira)?$/' => 'wednesday',
+        '/^qui((nta)?([ \-]feira)?)?$/' => 'thursday',
+        '/^quinta-feira$/' => 'thursday',
+               '/^sex((ta)?(\s|\-)feira)?$/' => 'friday',
+
+*/
+// scalar timeSignifiers?
     public $dayPortionScanner = array(
-        '/\b(\d*)\s?ams?\b/' => 'am',
-        '/\b(\d*)\s?pms?\b/' => 'pm',
-        '/\bmanh[aã]\b/' => 'morning',
-        '/\btarde\b/' => 'afternoon',
-        '/\b(fim\s(d[ea]\s)tarde)\b/' => 'evening',
-        '/\bnoite\b/' => 'night',
+        '/^(\d*)\s?ams?$/' => 'am',
+        '/^(\d*)\s?pms?$/' => 'pm',
+        '/^(?:de|na|a|durante\s+a) (manh[aã]|madrugada)$/' => 'morning',
+        '/^(?:de|na|a|durante\s+a) tarde$/' => 'afternoon',
+        '/^((fim\s(d[ea]\s)tarde)|anoitecer)$/' => 'evening',
+        '/^noite$/' => 'night',
     );
 
     public $unitScanner = array(
-        '/\bano(s)?\b/' => 'year',
-        '/\b(esta[cç][aã]o|[eé]poca)\b/' => 'season',
-        '/\bm[eê]s\b/' => 'month',
-        '/\bquinzena\b/' => 'fortnight',
-        '/\bsemana(s)?\b/' => 'week',
-        '/\b(fds|fim(\s|(\s|-)de(\s|-))semana)?\b/' => 'weekend',
-        '/\bdia(s)?\b/' => 'day',
-        '/\bhora(s)?\b/' => 'hour',
-        '/\bminuto(s)?\b/' => 'minute',
-        '/\bsegundo(s)?\b/' => 'second',
+        '/^anos?$/' => 'year',
+        '/^(estacao|epoca)$/' => 'season',
+        '/^mes$/' => 'month',
+        '/^quinzena$/' => 'fortnight',
+        '/^semanas?$/' => 'week',
+        '/^(fds|fim( |( |\-)de( |\-))semana)?$/' => 'weekend',
+        '/^dias?$/' => 'day',
+        '/^horas?$/' => 'hour',
+        '/^minutos?$/' => 'minute',
+        '/^segundos?$/' => 'second',
     );
 
+    public $timeRegex = '/^\d{1,2}(:?\d{2})?([\.:]?\d{2})?$/';
+
+    public function scan($tokens, $options)
+    {
+        foreach ($tokens as &$token) {
+            if ($t = $this->scanForMonthNames($token)) {
+                $token->tag('repeater_month_name', $t);
+            } elseif ($t = $this->scanForDayNames($token)) {
+                $token->tag('repeater_day_name', $t);
+            } elseif ($t = $this->scanForDayPortions($token)) {
+                $token->tag('repeater_day_portion', $t);
+            } elseif ($t = $this->scanForTimes($token, $options)) {
+                $token->tag('repeater_time', $t);
+            } elseif ($t = $this->scanForUnits($token)) {
+                $token->tag(strtolower(str_replace('Horde_Date_', '', get_class($t))), $t);
+            }
+        }
+        return $tokens;
+    }
+
+    public function scanForMonthNames($token)
+    {
+        foreach ($this->monthNameScanner as $scannerItem => $scannerTag) {
+            if (preg_match($scannerItem, $token->word)) {
+                return new Horde_Date_Repeater_MonthName($scannerTag);
+            }
+        }
+    }
+
+    public function scanForDayNames($token)
+    {
+        foreach ($this->dayNameScanner as $scannerItem => $scannerTag) {
+            if (preg_match($scannerItem, $token->word)) {
+                return new Horde_Date_Repeater_DayName($scannerTag);
+            }
+        }
+    }
+
+    public function scanForDayPortions($token)
+    {
+        foreach ($this->dayPortionScanner as $scannerItem => $scannerTag) {
+            if (preg_match($scannerItem, $token->word)) {
+                return new Horde_Date_Repeater_DayPortion($scannerTag);
+            }
+        }
+    }
+
+    public function scanForTimes($token, $options)
+    {
+        if (preg_match($this->timeRegex, $token->word)) {
+            return new Horde_Date_Repeater_Time($token->word, $options);
+        }
+    }
+
+    public function scanForUnits($token)
+    {
+        foreach ($this->unitScanner as $scannerItem => $scannerTag) {
+            if (preg_match($scannerItem, $token->word)) {
+                $class = 'Horde_Date_Repeater_' . ucfirst($scannerTag);
+                return new $class($scannerTag);
+            }
+        }
+    }
+
 }
index 7d69428..2236460 100644 (file)
@@ -1,4 +1,66 @@
 <?php
 class Horde_Date_Parser_Locale_Pt_Scalar extends Horde_Date_Parser_Locale_Base_Scalar
 {
+    public $scalarRegex = '/^\d*$/';
+    public $dayRegex = '/^(0[1-9]|[12][0-9]|3[01])?$/';
+    public $monthRegex = '/^(0[1-9]|1[012])$/';
+    public $yearRegex = '/^(19|20)?\d\d$/';            
+    public $timeSignifiers = array('manha', 'manhã', 'tarde', 'noite', 'madrugada', 'anoitecer');
+
+    public function scan($tokens)
+    {
+        foreach ($tokens as $i => &$token) {
+            $postToken = isset($tokens[$i + 1]) ? $tokens[$i + 1]->word : null;
+            if (!is_null($t = $this->scanForScalars($token, $postToken))) {
+                $token->tag('scalar', $t);
+            }
+            if (!is_null($t = $this->scanForDays($token, $postToken))) {
+                $token->tag('scalar_day', $t);
+            }
+            if (!is_null($t = $this->scanForMonths($token, $postToken))) {
+                $token->tag('scalar_month', $t);
+            }
+            if (!is_null($t = $this->scanForYears($token, $postToken))) {
+                $token->tag('scalar_year', $t);
+            }
+        }
+        return $tokens;
+    }
+
+    public function scanForScalars($token, $postToken)
+    {
+        if (preg_match($this->scalarRegex, $token->word)) {
+            if (!in_array($postToken, $this->timeSignifiers)) {
+                return $token->word;
+            }
+        }
+    }
+
+    public function scanForDays($token, $postToken)
+    {
+        if (preg_match($this->dayRegex, $token->word)) {
+            if ($token->word <= 31 && !in_array($postToken, $this->timeSignifiers)) {
+                return $token->word;
+            }
+        }
+    }
+
+    public function scanForMonths($token, $postToken)
+    {
+        if (preg_match($this->monthRegex, $token->word)) {
+            if ($token->word <= 12 && !in_array($postToken, $this->timeSignifiers)) {
+                return $token->word;
+            }
+        }
+    }
+
+    public function scanForYears($token, $postToken)
+    {
+        if (preg_match($this->yearRegex, $token->word)) {
+            if (!in_array($postToken, $this->timeSignifiers)) {
+                return $token->word;
+            }
+        }
+    }
+
 }
index 17ef875..42cf175 100644 (file)
@@ -2,12 +2,74 @@
 class Horde_Date_Parser_Locale_Pt_Separator extends Horde_Date_Parser_Locale_Base_Separator
 {
 
+    public $commaScanner = array(
+        '/^,$/' => 'comma',
+    );
+
+    public $slashOrDashScanner = array(
+        '/^-$/' => 'dash',
+        '/^\/$/' => 'slash',
+    );
+
     public $atScanner = array(
-        '/\b(em|@)\b/' => 'at',
+        '/^(em|@|de)$/' => 'at',
+               '/^(as|ao)$/' => 'at',
     );
 
+
     public $inScanner = array(
-        '/\bno\b/' => 'in',
+        '/^no$/' => 'in',
     );
 
+    public function scan($tokens)
+    {
+        foreach ($tokens as &$token) {
+            if ($t = $this->scanForCommas($token)) {
+                $token->tag('separator_comma', $t);
+            } elseif ($t = $this->scanForSlashOrDash($token)) {
+                $token->tag('separator_slash_or_dash', $t);
+            } elseif ($t = $this->scanForAt($token)) {
+                $token->tag('separator_at', $t);
+            } elseif ($t = $this->scanForIn($token)) {
+                $token->tag('separator_in', $t);
+            }
+        }
+        return $tokens;
+    }
+
+    public function scanForCommas($token)
+    {
+        foreach ($this->commaScanner as $scannerItem => $scannerTag) {
+            if (preg_match($scannerItem, $token->word)) {
+                return $scannerTag;
+            }
+        }
+    }
+
+    public function scanForSlashOrDash($token)
+    {
+        foreach ($this->slashOrDashScanner as $scannerItem => $scannerTag) {
+            if (preg_match($scannerItem, $token->word)) {
+                return $scannerTag;
+            }
+        }
+    }
+
+    public function scanForAt($token)
+    {
+        foreach ($this->atScanner as $scannerItem => $scannerTag) {
+            if (preg_match($scannerItem, $token->word)) {
+                return $scannerTag;
+            }
+        }
+    }
+
+    public function scanForIn($token)
+    {
+        foreach ($this->inScanner as $scannerItem => $scannerTag) {
+            if (preg_match($scannerItem, $token->word)) {
+                return $scannerTag;
+            }
+        }
+    }
 }
index c6e0f41..512d75c 100644 (file)
@@ -3,7 +3,7 @@ class Horde_Date_Parser_Locale_Pt_Timezone extends Horde_Date_Parser_Locale_Base
 {
 
     public $scanner = array(
-        '/((E[SD]T|C[SD]T|M[SD]T|P[SD]T)|((GMT)?\s*[+-]\s*\d{3,4}?)|GMT|UTC)/i' => 'tz',
+        '/((E[SD]T|C[SD]T|M[SD]T|P[SD]T)|((GMT)?\s*[+-]\s*\d{3,4}?)|GMT|UTC)/i' => 'tz',               // não pode ter modificadores, vai dar erro se usado
     );
 
 }
diff --git a/framework/Support/lib/Horde/Support/Numerizer/Locale/Pt.php b/framework/Support/lib/Horde/Support/Numerizer/Locale/Pt.php
new file mode 100644 (file)
index 0000000..3106ee7
--- /dev/null
@@ -0,0 +1,134 @@
+<?php
+class Horde_Support_Numerizer_Locale_Pt extends Horde_Support_Numerizer_Locale_Base
+{
+    public $DIRECT_NUMS = array(
+        'treze' => '13',
+        'catorze' => '14',
+               'quatorze' => '14',
+        'quinze' => '15',
+        'dezasseis' => '16',
+        'dezassete' => '17',
+        'dezoito' => '18',
+        'dezanove' => '19',
+        'um(\W|$)' => '1$1',
+               'uma(\W|$)' => '1$1',
+        'dois' => '2',
+               'duas' => '2',
+        'tres' => '3',
+        'quatro' => '4',
+        'cinco' => '5',
+        'seis' => '6',
+        'sete' => '7',
+        'oito' => '8',
+        'nove' => '9',
+        'dez' => '10',
+        'onze' => '11',
+        'doze' => '12',
+    );
+
+    public $TEN_PREFIXES = array(
+        'vinte' => '20',
+        'trinta' => '30',
+        'quarenta' => '40',
+        'cinquenta' => '50',
+        'sessenta' => '60',
+        'setenta' => '70',
+        'oitenta' => '80',
+        'noventa' => '90',
+    );
+
+    public $BIG_PREFIXES = array(
+        'cem' => '100',
+        'mil' => '1000',
+        'milhao *' => '1000000',
+        'milhar de *' => '1000000000',
+        'biliao *' => '1000000000000',
+    );
+
+    public function numerize($string)
+    {
+               // preprocess
+               $string = $this->_splitHyphenateWords($string);
+        $string = $this->_replaceTenPrefixes($string);
+        $string = $this->_directReplacements($string);
+        $string = $this->_replaceBigPrefixes($string);
+//        $string = $this->_fractionalAddition($string);
+
+        return $string;
+    }
+
+
+    /**
+     * will mutilate hyphenated-words but shouldn't matter for date extraction
+     */
+    protected function _splitHyphenateWords($string)
+    {
+        return preg_replace('/ +|([^\d]) e? ([^d])/', '$1 $2', $string);
+    }
+
+    /**
+     * easy/direct replacements
+     */
+    protected function _directReplacements($string)
+    {
+        foreach ($this->DIRECT_NUMS as $dn => $dn_replacement) {
+            $string = preg_replace("/$dn/i", $dn_replacement, $string);
+        }
+        return $string;
+    }
+
+    /**
+     * ten, twenty, etc.
+     */
+    protected function _replaceTenPrefixes($string)
+    {
+        foreach ($this->TEN_PREFIXES as $tp => $tp_replacement) {
+            $string = preg_replace_callback(
+                "/(?:$tp)( *\d(?=[^\d]|\$))*/i",
+                create_function(
+                    '$m',
+                    'return ' . $tp_replacement . ' + (isset($m[1]) ? (int)$m[1] : 0);'
+                ),
+                $string);
+        }
+        return $string;
+    }
+
+    /**
+     * hundreds, thousands, millions, etc.
+     */
+    protected function _replaceBigPrefixes($string)
+    {
+        foreach ($this->BIG_PREFIXES as $bp => $bp_replacement) {
+            $string = preg_replace_callback(
+                '/(\d*) *' . $bp . '(\d?)/i',
+                create_function(
+                    '$m',
+                    '$factor = (int)$m[1]; if (!$factor) $factor = 1; return (' . $bp_replacement . ' * $factor)' . ($bp_replacement == 100 ? ' . ($m[2] ? "e" : "")' : ' . "e"') . ' . $m[2];'
+                ),
+                $string);
+            $string = $this->_andition($string);
+        }
+        return $string;
+    }
+
+    protected function _andition($string)
+    {
+        while (preg_match('/(\d+)((?: *e *)+)(\d*)(?=\w|$)/i', $string, $sc, PREG_OFFSET_CAPTURE)) {
+            $string = substr($string, 0, $sc[1][1]) . ((int)$sc[1][0] + (int)$sc[3][0]) . substr($string, $sc[3][1] + strlen($sc[3][0]));
+        }
+        return $string;
+    }
+
+    protected function _fractionalAddition($string)
+    {
+        return preg_replace_callback(
+            '/(\d+)(?: | e |-)*/i',
+            create_function(
+                '$m',
+                'return (string)((float)$m[1] + 0.5);'
+            ),
+            $string);
+    }
+
+}
index e6f8270..85fa66e 100644 (file)
@@ -1,22 +1,19 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<package packagerversion="1.4.9" version="2.0" xmlns="http://pear.php.net/dtd/package-2.0" xmlns:tasks="http://pear.php.net/dtd/tasks-1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pear.php.net/dtd/tasks-1.0
-http://pear.php.net/dtd/tasks-1.0.xsd
-http://pear.php.net/dtd/package-2.0
-http://pear.php.net/dtd/package-2.0.xsd">
+<package packagerversion="1.9.1" version="2.0" xmlns="http://pear.php.net/dtd/package-2.0" xmlns:tasks="http://pear.php.net/dtd/tasks-1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pear.php.net/dtd/tasks-1.0 http://pear.php.net/dtd/tasks-1.0.xsd http://pear.php.net/dtd/package-2.0 http://pear.php.net/dtd/package-2.0.xsd">
  <name>Support</name>
  <channel>pear.horde.org</channel>
  <summary>Horde support package</summary>
- <description>This package provides supporting functionality for Horde that is not tied to Horde but is used by it. These classes can be used outside of Horde as well.
- </description>
+ <description>This package provides supporting functionality for Horde that is not tied to Horde but is used by it. These classes can be used outside of Horde as well.</description>
  <lead>
   <name>Chuck Hagenbuch</name>
   <user>chuck</user>
   <email>chuck@horde.org</email>
   <active>yes</active>
  </lead>
- <date>2008-08-01</date>
+ <date>2010-07-10</date>
+ <time>10:39:15</time>
  <version>
-  <release>0.1.0</release>
+  <release>0.2.0</release>
   <api>0.1.0</api>
  </version>
  <stability>
@@ -25,21 +22,10 @@ http://pear.php.net/dtd/package-2.0.xsd">
  </stability>
  <license uri="http://opensource.org/licenses/bsd-license.php">BSD</license>
  <notes>
-   * Add Horde_Support_CombineStream::.
-   * Initial horde/support package
-   * Initial Horde_Support_Array object
-   * Initial Horde_Support_Backtrace object
-   * Initial Horde_Support_ConsistentHash object
-   * Initial Horde_Support_Inflector object
-   * Initial Horde_Support_Stack object
-   * Initial Horde_Support_StringStream object
-   * Initial Horde_Support_Stub object
-   * Initial Horde_Support_Timer object
-   * Initial Horde_Support_Uuid object
-   * Initial Horde_Support_Numerizer objects
+* Add Portuguese numerizer.
  </notes>
  <contents>
-  <dir name="/">
+  <dir baseinstalldir="/" name="/">
    <dir name="lib">
     <dir name="Horde">
      <dir name="Support">
@@ -47,6 +33,7 @@ http://pear.php.net/dtd/package-2.0.xsd">
        <dir name="Locale">
         <file name="Base.php" role="php" />
         <file name="De.php" role="php" />
+        <file name="Pt.php" role="php" />
        </dir> <!-- /lib/Horde/Support/Numerizer/Locale -->
       </dir> <!-- /lib/Horde/Support/Numerizer -->
       <file name="Array.php" role="php" />
@@ -64,6 +51,28 @@ http://pear.php.net/dtd/package-2.0.xsd">
      </dir> <!-- /lib/Horde/Support -->
     </dir> <!-- /lib/Horde -->
    </dir> <!-- /lib -->
+   <dir name="test">
+    <dir name="Horde">
+     <dir name="Support">
+      <dir name="Numerizer">
+       <dir name="Locale">
+        <file name="BaseTest.php" role="test" />
+        <file name="DeTest.php" role="test" />
+       </dir> <!-- /test/Horde/Support/Numerizer/Locale -->
+      </dir> <!-- /test/Horde/Support/Numerizer -->
+      <file name="AllTests.php" role="test" />
+      <file name="ArrayTest.php" role="test" />
+      <file name="BacktraceTest.php" role="test" />
+      <file name="CombineStreamTest.php" role="test" />
+      <file name="ConsistentHashTest.php" role="test" />
+      <file name="InflectorTest.php" role="test" />
+      <file name="StringStreamTest.php" role="test" />
+      <file name="StubTest.php" role="test" />
+      <file name="TimerTest.php" role="test" />
+      <file name="UuidTest.php" role="test" />
+     </dir> <!-- /test/Horde/Support -->
+    </dir> <!-- /test/Horde -->
+   </dir> <!-- /test -->
   </dir> <!-- / -->
  </contents>
  <dependencies>
@@ -82,20 +91,76 @@ http://pear.php.net/dtd/package-2.0.xsd">
  </dependencies>
  <phprelease>
   <filelist>
-   <install name="lib/Horde/Support/Array.php" as="Horde/Support/Array.php" />
-   <install name="lib/Horde/Support/Backtrace.php" as="Horde/Support/Backtrace.php" />
-   <install name="lib/Horde/Support/CombineStream.php" as="Horde/Support/CombineStream.php" />
-   <install name="lib/Horde/Support/ConsistentHash.php" as="Horde/Support/ConsistentHash.php" />
-   <install name="lib/Horde/Support/Guid.php" as="Horde/Support/Guid.php" />
-   <install name="lib/Horde/Support/Inflector.php" as="Horde/Support/Inflector.php" />
-   <install name="lib/Horde/Support/Numerizer/Locale/Base.php" as="Horde/Support/Numerizer/Locale/Base.php" />
-   <install name="lib/Horde/Support/Numerizer/Locale/De.php" as="Horde/Support/Numerizer/Locale/De.php" />
-   <install name="lib/Horde/Support/Numerizer.php" as="Horde/Support/Numerizer.php" />
-   <install name="lib/Horde/Support/Stack.php" as="Horde/Support/Stack.php" />
-   <install name="lib/Horde/Support/StringStream.php" as="Horde/Support/StringStream.php" />
-   <install name="lib/Horde/Support/Stub.php" as="Horde/Support/Stub.php" />
-   <install name="lib/Horde/Support/Timer.php" as="Horde/Support/Timer.php" />
-   <install name="lib/Horde/Support/Uuid.php" as="Horde/Support/Uuid.php" />
+   <install as="Horde/Support/Array.php" name="lib/Horde/Support/Array.php" />
+   <install as="Horde/Support/Backtrace.php" name="lib/Horde/Support/Backtrace.php" />
+   <install as="Horde/Support/CombineStream.php" name="lib/Horde/Support/CombineStream.php" />
+   <install as="Horde/Support/ConsistentHash.php" name="lib/Horde/Support/ConsistentHash.php" />
+   <install as="Horde/Support/Guid.php" name="lib/Horde/Support/Guid.php" />
+   <install as="Horde/Support/Inflector.php" name="lib/Horde/Support/Inflector.php" />
+   <install as="Horde/Support/Numerizer.php" name="lib/Horde/Support/Numerizer.php" />
+   <install as="Horde/Support/Stack.php" name="lib/Horde/Support/Stack.php" />
+   <install as="Horde/Support/StringStream.php" name="lib/Horde/Support/StringStream.php" />
+   <install as="Horde/Support/Stub.php" name="lib/Horde/Support/Stub.php" />
+   <install as="Horde/Support/Timer.php" name="lib/Horde/Support/Timer.php" />
+   <install as="Horde/Support/Uuid.php" name="lib/Horde/Support/Uuid.php" />
+   <install as="Horde/Support/Numerizer/Locale/Base.php" name="lib/Horde/Support/Numerizer/Locale/Base.php" />
+   <install as="Horde/Support/Numerizer/Locale/De.php" name="lib/Horde/Support/Numerizer/Locale/De.php" />
+   <install as="Horde/Support/Numerizer/Locale/Pt.php" name="lib/Horde/Support/Numerizer/Locale/Pt.php" />
+   <install as="Horde/Support/AllTests.php" name="test/Horde/Support/AllTests.php" />
+   <install as="Horde/Support/ArrayTest.php" name="test/Horde/Support/ArrayTest.php" />
+   <install as="Horde/Support/BacktraceTest.php" name="test/Horde/Support/BacktraceTest.php" />
+   <install as="Horde/Support/CombineStreamTest.php" name="test/Horde/Support/CombineStreamTest.php" />
+   <install as="Horde/Support/ConsistentHashTest.php" name="test/Horde/Support/ConsistentHashTest.php" />
+   <install as="Horde/Support/InflectorTest.php" name="test/Horde/Support/InflectorTest.php" />
+   <install as="Horde/Support/StringStreamTest.php" name="test/Horde/Support/StringStreamTest.php" />
+   <install as="Horde/Support/StubTest.php" name="test/Horde/Support/StubTest.php" />
+   <install as="Horde/Support/TimerTest.php" name="test/Horde/Support/TimerTest.php" />
+   <install as="Horde/Support/UuidTest.php" name="test/Horde/Support/UuidTest.php" />
+   <install as="Horde/Support/Numerizer/Locale/BaseTest.php" name="test/Horde/Support/Numerizer/Locale/BaseTest.php" />
+   <install as="Horde/Support/Numerizer/Locale/DeTest.php" name="test/Horde/Support/Numerizer/Locale/DeTest.php" />
   </filelist>
  </phprelease>
+ <changelog>
+  <release>
+   <version>
+    <release>0.1.0</release>
+    <api>0.1.0</api>
+   </version>
+   <stability>
+    <release>beta</release>
+    <api>beta</api>
+   </stability>
+   <date>2010-07-10</date>
+   <license uri="http://opensource.org/licenses/bsd-license.php">BSD</license>
+   <notes>
+* Add Horde_Support_CombineStream::.
+* Initial horde/support package
+* Initial Horde_Support_Array object
+* Initial Horde_Support_Backtrace object
+* Initial Horde_Support_ConsistentHash object
+* Initial Horde_Support_Inflector object
+* Initial Horde_Support_Stack object
+* Initial Horde_Support_StringStream object
+* Initial Horde_Support_Stub object
+* Initial Horde_Support_Timer object
+* Initial Horde_Support_Uuid object
+* Initial Horde_Support_Numerizer objects
+   </notes>
+  </release>
+  <release>
+   <version>
+    <release>0.2.0</release>
+    <api>0.1.0</api>
+   </version>
+   <stability>
+    <release>beta</release>
+    <api>beta</api>
+   </stability>
+   <date>2010-07-10</date>
+   <license uri="http://opensource.org/licenses/bsd-license.php">BSD</license>
+   <notes>
+* Add Portuguese numerizer.
+   </notes>
+  </release>
+ </changelog>
 </package>