diff -r -u icu/source/common/unicode/utypes.h icu.new/source/common/unicode/utypes.h
--- icu/source/common/unicode/utypes.h	2005-06-28 23:18:08.000000000 +0100
+++ icu.new/source/common/unicode/utypes.h	2006-08-06 14:40:17.000000000 +0100
@@ -706,6 +706,8 @@
     U_REGEX_INVALID_FLAG,                 /**< Invalid value for match mode flags.                */
     U_REGEX_LOOK_BEHIND_LIMIT,            /**< Look-Behind pattern matches must have a bounded maximum length.    */
     U_REGEX_SET_CONTAINS_STRING,          /**< Regexps cannot have UnicodeSets containing strings.*/
+    U_REGEX_UNKNOWN_GROUP_NAME,           /**< Unrecognized group name.                           */
+    U_REGEX_UNTERMINATED_GROUP_NAME,      /**< Unterminated group name.                           */
     U_REGEX_ERROR_LIMIT,                  /**< This must always be the last value to indicate the limit for regexp errors */
 
     /*
diff -r -u icu/source/common/utypes.c icu.new/source/common/utypes.c
--- icu/source/common/utypes.c	2005-07-12 21:32:00.000000000 +0100
+++ icu.new/source/common/utypes.c	2006-08-05 23:27:54.000000000 +0100
@@ -154,7 +154,9 @@
     "U_REGEX_INVALID_BACK_REF",
     "U_REGEX_INVALID_FLAG",
     "U_REGEX_LOOK_BEHIND_LIMIT",
-    "U_REGEX_SET_CONTAINS_STRING"
+    "U_REGEX_SET_CONTAINS_STRING",
+    "U_REGEX_UNKNOWN_GROUP_NAME",
+    "U_REGEX_UNTERMINATED_GROUP_NAME"
 };
 /* TODO: replace the definition with _uIDNAErrorName[U_IDNA_ERROR_LIMIT - U_IDNA_ERROR_START ]
  * in ICU 3.6
diff -r -u icu/source/i18n/regexcmp.cpp icu.new/source/i18n/regexcmp.cpp
--- icu/source/i18n/regexcmp.cpp	2004-12-30 07:25:50.000000000 +0000
+++ icu.new/source/i18n/regexcmp.cpp	2006-08-06 14:41:08.000000000 +0100
@@ -1,4 +1,3 @@
-
 //
 //  file:  regexcmp.cpp
 //
@@ -24,6 +23,7 @@
 #include "util.h"
 #include "cmemory.h"
 #include "cstring.h"
+#include "uvector.h"
 #include "uvectr32.h"
 #include "uassert.h"
 #include "ucln_in.h"
@@ -48,7 +48,7 @@
 //  Constructor.
 //
 //------------------------------------------------------------------------------
-RegexCompile::RegexCompile(RegexPattern *rxp, UErrorCode &status) : fParenStack(status)
+RegexCompile::RegexCompile(RegexPattern *rxp, UErrorCode &status) : fParenStack(status), fParenNameStack(status)
 {
     fStatus           = &status;
 
@@ -402,7 +402,22 @@
         }
         break;
 
+    case doOpenPyNamedParen:
+        // Python-style named paren.
+        //   Scan the name and push it onto the name stack, then proceed as for
+        //   a normal capturing paren.
+        {
+            UnicodeString *name = scanName ();
+
+            if (!name) {
+                break;
+            }
 
+            fParenNameStack.push(name, *fStatus);
+        }
+
+        // No break; fall through here
+            
     case doOpenCaptureParen:
         // Open Paren.
         //   Compile to a
@@ -434,7 +449,11 @@
             //   NOPs may be changed to SAVE_STATE or JMP ops, with a target
             //   address of the end of the parenthesized group.
             fParenStack.push(fModeFlags, *fStatus);                       // Match mode state
-            fParenStack.push(capturing, *fStatus);                        // Frame type.
+            if ((Regex_PatternParseAction)action == doOpenCaptureParen) {
+                fParenStack.push(capturing, *fStatus);                    // Frame type.
+            } else if ((Regex_PatternParseAction)action == doOpenPyNamedParen) {
+                fParenStack.push(named, *fStatus);
+            }
             fParenStack.push(fRXPat->fCompiledPat->size()-3, *fStatus);   // The first  NOP location
             fParenStack.push(fRXPat->fCompiledPat->size()-1, *fStatus);   // The second NOP loc
 
@@ -1199,6 +1218,35 @@
         }
         break;
 
+    case doPyNamedBackRef:
+        // Named backreference.
+        {
+            // Find the name in the name table.  We must have seen the name
+            // already at this point, otherwise using it is invalid.
+            UnicodeString *name = scanName();
+
+            if (!name) {
+                break;
+            }
+
+            int32_t nameIndex;
+            int32_t nameCount = fRXPat->fNamedGroups->size ();
+            for (nameIndex = 0; nameIndex < nameCount; ++nameIndex) {
+                UnicodeString *otherName = (UnicodeString *)fRXPat->fNamedGroups->elementAt(nameIndex);
+
+                if (*otherName == *name)
+                    break;
+            }
+
+            if (nameIndex >= nameCount) {
+                error(U_REGEX_UNKNOWN_GROUP_NAME);
+                break;
+            }
+
+            int32_t op = URX_BUILD(URX_NAMEDBACKREF, nameIndex);
+            fRXPat->fCompiledPat->addElement(op, *fStatus);
+        }
+        break;
 
     case doPossessivePlus:
         // Possessive ++ quantifier.
@@ -1750,6 +1798,38 @@
             fRXPat->fCompiledPat->addElement(endCaptureOp, *fStatus);
         }
         break;
+    case named:
+        // Named Parentheses.
+        //   As for capturing, but with an additional name specification.
+        {
+            int32_t   captureOp = fRXPat->fCompiledPat->elementAti(fMatchOpenParen+1);
+            U_ASSERT(URX_TYPE(captureOp) == URX_START_CAPTURE);
+
+            // Get the name and find it; re-use it if it's already in the
+            // list of named groups for this pattern
+            UnicodeString *name = (UnicodeString *)fParenNameStack.pop();
+            int32_t   nameIndex;
+            int32_t   nameCount = fRXPat->fNamedGroups->size ();
+            for (nameIndex = 0; nameIndex < nameCount; ++nameIndex) {
+                UnicodeString *otherName = (UnicodeString *)fRXPat->fNamedGroups->elementAt(nameIndex);
+
+                if (*otherName == *name)
+                    break;
+            }
+
+            if (nameIndex == nameCount) {
+                fRXPat->fNamedGroups->addElement(name, *fStatus);
+            }
+
+            int32_t   endNameCaptureOp = URX_BUILD(URX_NAME_CAPTURE,
+                                                   nameIndex);
+            fRXPat->fCompiledPat->addElement(endNameCaptureOp, *fStatus);
+
+            int32_t   frameVarLocation = URX_VAL(captureOp);
+            int32_t   endCaptureOp = URX_BUILD(URX_END_CAPTURE, frameVarLocation);
+            fRXPat->fCompiledPat->addElement(endCaptureOp, *fStatus);           
+        }
+        break;
     case atomic:
         // Atomic Parenthesis.
         //   Insert a LD_SP operation to restore the state stack to the position
@@ -2109,6 +2189,7 @@
         case URX_STRING_LEN:
         case URX_NOP:
         case URX_START_CAPTURE:
+        case URX_NAME_CAPTURE:
         case URX_END_CAPTURE:
         case URX_BACKSLASH_B:
         case URX_BACKSLASH_BU:
@@ -2121,6 +2202,7 @@
         case URX_BACKTRACK:
         case URX_BACKREF:         // BackRef.  Must assume that it might be a zero length match
         case URX_BACKREF_I:
+        case URX_NAMEDBACKREF:
 
         case URX_STO_SP:          // Setup for atomic or possessive blocks.  Doesn't change what can match.
         case URX_LD_SP:
@@ -2580,6 +2662,7 @@
         case URX_STRING_LEN:
         case URX_NOP:
         case URX_START_CAPTURE:
+        case URX_NAME_CAPTURE:
         case URX_END_CAPTURE:
         case URX_BACKSLASH_B:
         case URX_BACKSLASH_BU:
@@ -2594,6 +2677,7 @@
         case URX_BACKTRACK:
         case URX_BACKREF:         // BackRef.  Must assume that it might be a zero length match
         case URX_BACKREF_I:
+        case URX_NAMEDBACKREF:
 
         case URX_STO_SP:          // Setup for atomic or possessive blocks.  Doesn't change what can match.
         case URX_LD_SP:
@@ -2820,6 +2904,7 @@
         case URX_STRING_LEN:
         case URX_NOP:
         case URX_START_CAPTURE:
+        case URX_NAME_CAPTURE:
         case URX_END_CAPTURE:
         case URX_BACKSLASH_B:
         case URX_BACKSLASH_BU:
@@ -2848,6 +2933,7 @@
             //   Call the max length unbounded, and stop further checking.
         case URX_BACKREF:         // BackRef.  Must assume that it might be a zero length match
         case URX_BACKREF_I:
+        case URX_NAMEDBACKREF:
         case URX_BACKSLASH_X:   // Grahpeme Cluster.  Minimum is 1, max unbounded.
         case URX_DOTANY_PL:
         case URX_DOTANY_ALL_PL:
@@ -3069,6 +3155,7 @@
         case URX_STRING:
         case URX_STRING_LEN:
         case URX_START_CAPTURE:
+        case URX_NAME_CAPTURE:
         case URX_END_CAPTURE:
         case URX_STATIC_SETREF:
         case URX_STAT_SETREF_N:
@@ -3091,6 +3178,7 @@
         case URX_STO_SP:
         case URX_LD_SP:
         case URX_BACKREF:
+        case URX_NAMEDBACKREF:
         case URX_STO_INP_LOC:
         case URX_LA_START:
         case URX_LA_END:
@@ -3157,6 +3245,7 @@
             
         case URX_END:
         case URX_NOP:
+        case URX_NAME_CAPTURE:
         case URX_END_CAPTURE:
         case URX_DOLLAR_M:
         case URX_DOLLAR:
@@ -3247,7 +3336,9 @@
 static const UChar      chUpperN    = 0x4E;
 static const UChar      chLowerP    = 0x70;
 static const UChar      chUpperP    = 0x50;
-
+static const UChar      chLAngle    = 0x3c;     // '<'
+static const UChar      chEquals    = 0x3d;     // '='
+static const UChar      chRAngle    = 0x3e;     // '>'
 
 //------------------------------------------------------------------------------
 //
@@ -3527,6 +3618,62 @@
     return uset;
 }
 
+
+//------------------------------------------------------------------------------
+//
+//  scanName   Scan the name for a named group, e.g. (?P<name>foo), or a named
+//             backreference, e.g. (?P=name).
+//
+//             Case 1: Named group
+//             The scan position will be at the '<'.  On return, the scan
+//             position should be just after the '>'.
+//
+//             Case 2: Named backreference
+//             The scan position will be at the '='.  On return the scan
+//             position should be just after the ')'.
+//
+//             Return a UnicodeString containing the name, or NULL if the
+//             pattern is invalid.
+//
+//------------------------------------------------------------------------------
+UnicodeString *RegexCompile::scanName(void)
+{
+    UnicodeString *ustr = NULL;
+    UChar         chTerm;
+
+    if (U_FAILURE(*fStatus)) {
+        return NULL;
+    }
+
+    U_ASSERT(fC.fChar == chLAngle || fC.fChar == chEquals);
+
+    if (fC.fChar == chLAngle)
+        chTerm = chRAngle;
+    else
+        chTerm = chRParen;
+
+    UnicodeString *name = new UnicodeString();
+    if (!name) {
+        error(U_MEMORY_ALLOCATION_ERROR);
+        return NULL;
+    }
+    do {
+        nextChar(fC);
+        if (fC.fChar == -1) {
+            // Hit the end of the input string without finding the closing
+            // character
+            error(U_REGEX_UNTERMINATED_GROUP_NAME);
+            return NULL;
+        }
+        if (fC.fChar != chTerm)
+            name->append (fC.fChar);
+    } while (fC.fChar != chTerm);
+
+    nextChar(fC);
+
+    return name;
+}
+
 U_NAMESPACE_END
 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
 
diff -r -u icu/source/i18n/regexcmp.h icu.new/source/i18n/regexcmp.h
--- icu/source/i18n/regexcmp.h	2003-04-03 00:10:16.000000000 +0100
+++ icu.new/source/i18n/regexcmp.h	2006-08-06 14:41:46.000000000 +0100
@@ -77,7 +77,8 @@
         negLookAhead = -5,
         flags        = -6,
         lookBehind   = -7,
-        lookBehindN  = -8
+        lookBehindN  = -8,
+        named        = -9
     };
 
 private:
@@ -90,6 +91,7 @@
     UChar32     peekCharLL();
     UnicodeSet  *scanSet();
     UnicodeSet  *scanProp();
+    UnicodeString *scanName();
     void        handleCloseParen();
     int32_t     blockTopLoc(UBool reserve);          // Locate a position in the compiled pattern
                                                      //  at the top of the just completed block
@@ -175,6 +177,7 @@
                                                      //   the kind of paren that opened the frame.  Some
                                                      //   need special handling on close.
 
+    UStack                        fParenNameStack;   // Parenthesis name stack.
 
     int32_t                       fMatchOpenParen;   // The position in the compiled pattern
                                                      //   of the slot reserved for a state save
diff -r -u icu/source/i18n/regexcst.h icu.new/source/i18n/regexcst.h
--- icu/source/i18n/regexcst.h	2003-11-08 02:01:42.000000000 +0000
+++ icu.new/source/i18n/regexcst.h	2006-08-06 12:10:11.000000000 +0100
@@ -16,75 +16,77 @@
 //
 // Character classes for regex pattern scanning.
 //
-    static const uint8_t kRuleSet_digit_char = 128;
-    static const uint8_t kRuleSet_white_space = 129;
+    static const uint8_t kRuleSet_white_space = 128;
+    static const uint8_t kRuleSet_digit_char = 129;
     static const uint8_t kRuleSet_rule_char = 130;
 
 
 enum Regex_PatternParseAction {
-    doPossessivePlus,
-    doCloseParen,
-    doProperty,
-    doBeginMatchMode,
-    doOrOperator,
-    doOpenCaptureParen,
-    doBadOpenParenType,
-    doRuleError,
+    doLiteralChar,
+    doBackslashA,
+    doNOP,
+    doBackslashG,
+    doPerlInline,
+    doPyNamedBackRef,
     doIntevalLowerDigit,
-    doBackslashs,
-    doNGOpt,
-    doBackslashw,
-    doMismatchedParenErr,
-    doOpenLookBehind,
-    doBackslashz,
+    doProperty,
+    doBackslashX,
+    doOpenAtomicParen,
+    doOpenPyNamedParen,
+    doPatFinish,
+    doNGPlus,
+    doOpenLookBehindNeg,
     doIntervalError,
-    doStar,
-    doCaret,
-    doEnterQuoteMode,
-    doNGStar,
-    doMatchMode,
-    doIntervalUpperDigit,
-    doOpenLookAheadNeg,
+    doIntervalSame,
+    doBackRef,
     doPlus,
+    doOpenCaptureParen,
+    doMismatchedParenErr,
+    doBeginMatchMode,
+    doEscapeError,
     doOpenNonCaptureParen,
-    doBackslashA,
-    doBackslashB,
-    doNGPlus,
+    doDollar,
+    doIntervalUpperDigit,
+    doBackslashs,
+    doOpenLookBehind,
     doSetMatchMode,
-    doPatFinish,
-    doBackslashD,
-    doPossessiveInterval,
-    doEscapeError,
-    doBackslashG,
-    doSuppressComments,
+    doOrOperator,
+    doCaret,
     doMatchModeParen,
+    doStar,
     doOpt,
-    doInterval,
-    doLiteralChar,
-    doIntervalInit,
-    doOpenAtomicParen,
-    doBackslashS,
-    doOpenLookAhead,
-    doBackRef,
-    doDollar,
-    doDotAny,
+    doMatchMode,
+    doSuppressComments,
+    doPossessiveInterval,
+    doOpenLookAheadNeg,
     doBackslashW,
-    doBackslashX,
+    doCloseParen,
+    doIntervalInit,
     doScanUnicodeSet,
-    doBackslashZ,
-    doPerlInline,
+    doNGStar,
+    doEnterQuoteMode,
+    doBackslashB,
+    doBackslashw,
     doPossessiveOpt,
-    doNOP,
+    doRuleError,
+    doBackslashb,
     doConditionalExpr,
-    doExit,
+    doPossessivePlus,
+    doBadOpenParenType,
     doNGInterval,
-    doPatStart,
+    doBackslashd,
+    doBackslashD,
+    doExit,
+    doInterval,
+    doNGOpt,
+    doBackslashS,
+    doBackslashZ,
+    doOpenLookAhead,
     doBadModeFlag,
-    doBackslashb,
+    doPatStart,
     doPossessiveStar,
-    doBackslashd,
-    doIntervalSame,
-    doOpenLookBehindNeg,
+    doBackslashz,
+    doDotAny,
     rbbiLastAction};
 
 //-------------------------------------------------------------------------------
@@ -112,15 +114,15 @@
     , {doDotAny, 46 /* . */, 14,0,  TRUE}     //  6 
     , {doCaret, 94 /* ^ */, 2,0,  TRUE}     //  7 
     , {doDollar, 36 /* $ */, 2,0,  TRUE}     //  8 
-    , {doNOP, 92 /* \ */, 81,0,  TRUE}     //  9 
+    , {doNOP, 92 /* \ */, 84,0,  TRUE}     //  9 
     , {doOrOperator, 124 /* | */, 2,0,  TRUE}     //  10 
     , {doCloseParen, 41 /* ) */, 255,0,  TRUE}     //  11 
     , {doPatFinish, 253, 2,0,  FALSE}     //  12 
-    , {doRuleError, 255, 101,0,  FALSE}     //  13 
-    , {doNOP, 42 /* * */, 59,0,  TRUE}     //  14      expr-quant
-    , {doNOP, 43 /* + */, 62,0,  TRUE}     //  15 
-    , {doNOP, 63 /* ? */, 65,0,  TRUE}     //  16 
-    , {doIntervalInit, 123 /* { */, 68,0,  TRUE}     //  17 
+    , {doRuleError, 255, 104,0,  FALSE}     //  13 
+    , {doNOP, 42 /* * */, 62,0,  TRUE}     //  14      expr-quant
+    , {doNOP, 43 /* + */, 65,0,  TRUE}     //  15 
+    , {doNOP, 63 /* ? */, 68,0,  TRUE}     //  16 
+    , {doIntervalInit, 123 /* { */, 71,0,  TRUE}     //  17 
     , {doNOP, 40 /* ( */, 23,0,  TRUE}     //  18 
     , {doNOP, 255, 20,0,  FALSE}     //  19 
     , {doOrOperator, 124 /* | */, 2,0,  TRUE}     //  20      expr-cont
@@ -128,7 +130,7 @@
     , {doNOP, 255, 2,0,  FALSE}     //  22 
     , {doSuppressComments, 63 /* ? */, 25,0,  TRUE}     //  23      open-paren-quant
     , {doNOP, 255, 27,0,  FALSE}     //  24 
-    , {doNOP, 35 /* # */, 47, 14, TRUE}     //  25      open-paren-quant2
+    , {doNOP, 35 /* # */, 50, 14, TRUE}     //  25      open-paren-quant2
     , {doNOP, 255, 29,0,  FALSE}     //  26 
     , {doSuppressComments, 63 /* ? */, 29,0,  TRUE}     //  27      open-paren
     , {doOpenCaptureParen, 255, 2, 14, FALSE}     //  28 
@@ -136,75 +138,78 @@
     , {doOpenAtomicParen, 62 /* > */, 2, 14, TRUE}     //  30 
     , {doOpenLookAhead, 61 /* = */, 2, 20, TRUE}     //  31 
     , {doOpenLookAheadNeg, 33 /* ! */, 2, 20, TRUE}     //  32 
-    , {doNOP, 60 /* < */, 44,0,  TRUE}     //  33 
-    , {doNOP, 35 /* # */, 47, 2, TRUE}     //  34 
-    , {doBeginMatchMode, 105 /* i */, 50,0,  FALSE}     //  35 
-    , {doBeginMatchMode, 109 /* m */, 50,0,  FALSE}     //  36 
-    , {doBeginMatchMode, 115 /* s */, 50,0,  FALSE}     //  37 
-    , {doBeginMatchMode, 119 /* w */, 50,0,  FALSE}     //  38 
-    , {doBeginMatchMode, 120 /* x */, 50,0,  FALSE}     //  39 
-    , {doBeginMatchMode, 45 /* - */, 50,0,  FALSE}     //  40 
-    , {doConditionalExpr, 40 /* ( */, 101,0,  TRUE}     //  41 
-    , {doPerlInline, 123 /* { */, 101,0,  TRUE}     //  42 
-    , {doBadOpenParenType, 255, 101,0,  FALSE}     //  43 
-    , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE}     //  44      open-paren-lookbehind
-    , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE}     //  45 
-    , {doBadOpenParenType, 255, 101,0,  FALSE}     //  46 
-    , {doNOP, 41 /* ) */, 255,0,  TRUE}     //  47      paren-comment
-    , {doMismatchedParenErr, 253, 101,0,  FALSE}     //  48 
-    , {doNOP, 255, 47,0,  TRUE}     //  49 
-    , {doMatchMode, 105 /* i */, 50,0,  TRUE}     //  50      paren-flag
-    , {doMatchMode, 109 /* m */, 50,0,  TRUE}     //  51 
-    , {doMatchMode, 115 /* s */, 50,0,  TRUE}     //  52 
-    , {doMatchMode, 119 /* w */, 50,0,  TRUE}     //  53 
-    , {doMatchMode, 120 /* x */, 50,0,  TRUE}     //  54 
-    , {doMatchMode, 45 /* - */, 50,0,  TRUE}     //  55 
-    , {doSetMatchMode, 41 /* ) */, 2,0,  TRUE}     //  56 
-    , {doMatchModeParen, 58 /* : */, 2, 14, TRUE}     //  57 
-    , {doBadModeFlag, 255, 101,0,  FALSE}     //  58 
-    , {doNGStar, 63 /* ? */, 20,0,  TRUE}     //  59      quant-star
-    , {doPossessiveStar, 43 /* + */, 20,0,  TRUE}     //  60 
-    , {doStar, 255, 20,0,  FALSE}     //  61 
-    , {doNGPlus, 63 /* ? */, 20,0,  TRUE}     //  62      quant-plus
-    , {doPossessivePlus, 43 /* + */, 20,0,  TRUE}     //  63 
-    , {doPlus, 255, 20,0,  FALSE}     //  64 
-    , {doNGOpt, 63 /* ? */, 20,0,  TRUE}     //  65      quant-opt
-    , {doPossessiveOpt, 43 /* + */, 20,0,  TRUE}     //  66 
-    , {doOpt, 255, 20,0,  FALSE}     //  67 
-    , {doNOP, 129, 68,0,  TRUE}     //  68      interval-open
-    , {doNOP, 128, 71,0,  FALSE}     //  69 
-    , {doIntervalError, 255, 101,0,  FALSE}     //  70 
-    , {doIntevalLowerDigit, 128, 71,0,  TRUE}     //  71      interval-lower
-    , {doNOP, 44 /* , */, 75,0,  TRUE}     //  72 
-    , {doIntervalSame, 125 /* } */, 78,0,  TRUE}     //  73 
-    , {doIntervalError, 255, 101,0,  FALSE}     //  74 
-    , {doIntervalUpperDigit, 128, 75,0,  TRUE}     //  75      interval-upper
-    , {doNOP, 125 /* } */, 78,0,  TRUE}     //  76 
-    , {doIntervalError, 255, 101,0,  FALSE}     //  77 
-    , {doNGInterval, 63 /* ? */, 20,0,  TRUE}     //  78      interval-type
-    , {doPossessiveInterval, 43 /* + */, 20,0,  TRUE}     //  79 
-    , {doInterval, 255, 20,0,  FALSE}     //  80 
-    , {doBackslashA, 65 /* A */, 2,0,  TRUE}     //  81      backslash
-    , {doBackslashB, 66 /* B */, 2,0,  TRUE}     //  82 
-    , {doBackslashb, 98 /* b */, 2,0,  TRUE}     //  83 
-    , {doBackslashd, 100 /* d */, 14,0,  TRUE}     //  84 
-    , {doBackslashD, 68 /* D */, 14,0,  TRUE}     //  85 
-    , {doBackslashG, 71 /* G */, 2,0,  TRUE}     //  86 
-    , {doProperty, 78 /* N */, 14,0,  FALSE}     //  87 
-    , {doProperty, 112 /* p */, 14,0,  FALSE}     //  88 
-    , {doProperty, 80 /* P */, 14,0,  FALSE}     //  89 
-    , {doEnterQuoteMode, 81 /* Q */, 2,0,  TRUE}     //  90 
-    , {doBackslashS, 83 /* S */, 14,0,  TRUE}     //  91 
-    , {doBackslashs, 115 /* s */, 14,0,  TRUE}     //  92 
-    , {doBackslashW, 87 /* W */, 14,0,  TRUE}     //  93 
-    , {doBackslashw, 119 /* w */, 14,0,  TRUE}     //  94 
-    , {doBackslashX, 88 /* X */, 14,0,  TRUE}     //  95 
-    , {doBackslashZ, 90 /* Z */, 2,0,  TRUE}     //  96 
-    , {doBackslashz, 122 /* z */, 2,0,  TRUE}     //  97 
-    , {doBackRef, 128, 14,0,  TRUE}     //  98 
-    , {doEscapeError, 253, 101,0,  FALSE}     //  99 
-    , {doLiteralChar, 255, 14,0,  TRUE}     //  100 
-    , {doExit, 255, 101,0,  TRUE}     //  101      errorDeath
+    , {doNOP, 60 /* < */, 45,0,  TRUE}     //  33 
+    , {doNOP, 35 /* # */, 50, 2, TRUE}     //  34 
+    , {doBeginMatchMode, 105 /* i */, 53,0,  FALSE}     //  35 
+    , {doBeginMatchMode, 109 /* m */, 53,0,  FALSE}     //  36 
+    , {doBeginMatchMode, 115 /* s */, 53,0,  FALSE}     //  37 
+    , {doBeginMatchMode, 119 /* w */, 53,0,  FALSE}     //  38 
+    , {doBeginMatchMode, 120 /* x */, 53,0,  FALSE}     //  39 
+    , {doBeginMatchMode, 45 /* - */, 53,0,  FALSE}     //  40 
+    , {doConditionalExpr, 40 /* ( */, 104,0,  TRUE}     //  41 
+    , {doPerlInline, 123 /* { */, 104,0,  TRUE}     //  42 
+    , {doNOP, 80 /* P */, 48,0,  TRUE}     //  43 
+    , {doBadOpenParenType, 255, 104,0,  FALSE}     //  44 
+    , {doOpenLookBehind, 61 /* = */, 2, 20, TRUE}     //  45      open-paren-lookbehind
+    , {doOpenLookBehindNeg, 33 /* ! */, 2, 20, TRUE}     //  46 
+    , {doBadOpenParenType, 255, 104,0,  FALSE}     //  47 
+    , {doOpenPyNamedParen, 60 /* < */, 2, 14, FALSE}     //  48      paren-python
+    , {doPyNamedBackRef, 61 /* = */, 14,0,  FALSE}     //  49 
+    , {doNOP, 41 /* ) */, 255,0,  TRUE}     //  50      paren-comment
+    , {doMismatchedParenErr, 253, 104,0,  FALSE}     //  51 
+    , {doNOP, 255, 50,0,  TRUE}     //  52 
+    , {doMatchMode, 105 /* i */, 53,0,  TRUE}     //  53      paren-flag
+    , {doMatchMode, 109 /* m */, 53,0,  TRUE}     //  54 
+    , {doMatchMode, 115 /* s */, 53,0,  TRUE}     //  55 
+    , {doMatchMode, 119 /* w */, 53,0,  TRUE}     //  56 
+    , {doMatchMode, 120 /* x */, 53,0,  TRUE}     //  57 
+    , {doMatchMode, 45 /* - */, 53,0,  TRUE}     //  58 
+    , {doSetMatchMode, 41 /* ) */, 2,0,  TRUE}     //  59 
+    , {doMatchModeParen, 58 /* : */, 2, 14, TRUE}     //  60 
+    , {doBadModeFlag, 255, 104,0,  FALSE}     //  61 
+    , {doNGStar, 63 /* ? */, 20,0,  TRUE}     //  62      quant-star
+    , {doPossessiveStar, 43 /* + */, 20,0,  TRUE}     //  63 
+    , {doStar, 255, 20,0,  FALSE}     //  64 
+    , {doNGPlus, 63 /* ? */, 20,0,  TRUE}     //  65      quant-plus
+    , {doPossessivePlus, 43 /* + */, 20,0,  TRUE}     //  66 
+    , {doPlus, 255, 20,0,  FALSE}     //  67 
+    , {doNGOpt, 63 /* ? */, 20,0,  TRUE}     //  68      quant-opt
+    , {doPossessiveOpt, 43 /* + */, 20,0,  TRUE}     //  69 
+    , {doOpt, 255, 20,0,  FALSE}     //  70 
+    , {doNOP, 128, 71,0,  TRUE}     //  71      interval-open
+    , {doNOP, 129, 74,0,  FALSE}     //  72 
+    , {doIntervalError, 255, 104,0,  FALSE}     //  73 
+    , {doIntevalLowerDigit, 129, 74,0,  TRUE}     //  74      interval-lower
+    , {doNOP, 44 /* , */, 78,0,  TRUE}     //  75 
+    , {doIntervalSame, 125 /* } */, 81,0,  TRUE}     //  76 
+    , {doIntervalError, 255, 104,0,  FALSE}     //  77 
+    , {doIntervalUpperDigit, 129, 78,0,  TRUE}     //  78      interval-upper
+    , {doNOP, 125 /* } */, 81,0,  TRUE}     //  79 
+    , {doIntervalError, 255, 104,0,  FALSE}     //  80 
+    , {doNGInterval, 63 /* ? */, 20,0,  TRUE}     //  81      interval-type
+    , {doPossessiveInterval, 43 /* + */, 20,0,  TRUE}     //  82 
+    , {doInterval, 255, 20,0,  FALSE}     //  83 
+    , {doBackslashA, 65 /* A */, 2,0,  TRUE}     //  84      backslash
+    , {doBackslashB, 66 /* B */, 2,0,  TRUE}     //  85 
+    , {doBackslashb, 98 /* b */, 2,0,  TRUE}     //  86 
+    , {doBackslashd, 100 /* d */, 14,0,  TRUE}     //  87 
+    , {doBackslashD, 68 /* D */, 14,0,  TRUE}     //  88 
+    , {doBackslashG, 71 /* G */, 2,0,  TRUE}     //  89 
+    , {doProperty, 78 /* N */, 14,0,  FALSE}     //  90 
+    , {doProperty, 112 /* p */, 14,0,  FALSE}     //  91 
+    , {doProperty, 80 /* P */, 14,0,  FALSE}     //  92 
+    , {doEnterQuoteMode, 81 /* Q */, 2,0,  TRUE}     //  93 
+    , {doBackslashS, 83 /* S */, 14,0,  TRUE}     //  94 
+    , {doBackslashs, 115 /* s */, 14,0,  TRUE}     //  95 
+    , {doBackslashW, 87 /* W */, 14,0,  TRUE}     //  96 
+    , {doBackslashw, 119 /* w */, 14,0,  TRUE}     //  97 
+    , {doBackslashX, 88 /* X */, 14,0,  TRUE}     //  98 
+    , {doBackslashZ, 90 /* Z */, 2,0,  TRUE}     //  99 
+    , {doBackslashz, 122 /* z */, 2,0,  TRUE}     //  100 
+    , {doBackRef, 129, 14,0,  TRUE}     //  101 
+    , {doEscapeError, 253, 104,0,  FALSE}     //  102 
+    , {doLiteralChar, 255, 14,0,  TRUE}     //  103 
+    , {doExit, 255, 104,0,  TRUE}     //  104      errorDeath
  };
 static const char * const RegexStateNames[] = {    0,
      "start",
@@ -250,9 +255,12 @@
     0,
     0,
     0,
+    0,
      "open-paren-lookbehind",
     0,
     0,
+     "paren-python",
+    0,
      "paren-comment",
     0,
     0,
diff -r -u icu/source/i18n/regexcst.txt icu.new/source/i18n/regexcst.txt
--- icu/source/i18n/regexcst.txt	2003-11-08 02:01:42.000000000 +0000
+++ icu.new/source/i18n/regexcst.txt	2006-08-06 12:09:38.000000000 +0100
@@ -1,4 +1,3 @@
-
 #*****************************************************************************
 #
 #   Copyright (C) 2002-2003, International Business Machines Corporation and others.
@@ -74,7 +73,7 @@
     '\'                  n backslash
     '|'                  n  term                                    doOrOperator
     ')'                  n  pop                                     doCloseParen
-    eof	                   term                                     doPatFinish
+    eof                    term                                     doPatFinish
     default                errorDeath                               doRuleError
     
 
@@ -140,13 +139,21 @@
     '-'                     paren-flag                              doBeginMatchMode
     '('                  n  errorDeath                              doConditionalExpr
     '{'                  n  errorDeath                              doPerlInline
+    'P'                  n  paren-python
     default                 errorDeath                              doBadOpenParenType
     
 open-paren-lookbehind:
     '='                  n  term            ^expr-cont              doOpenLookBehind       #  (?<=
     '!'                  n  term            ^expr-cont              doOpenLookBehindNeg    #  (?<!
     default                 errorDeath                              doBadOpenParenType
-    
+
+#
+#  paren-python      We've got a Python-style named group, e.g. (?P<id>...), or possibly a Python
+#                    named group reference, e.g. (?P=id).
+#
+paren-python:
+    '<'                     term            ^expr-quant             doOpenPyNamedParen    # (?P<name>
+    '='                     expr-quant                              doPyNamedBackRef      # (?P=name)
 
 #
 #   paren-comment    We've got a (?# ... )  style comment.  Eat pattern text till we get to the ')'
@@ -154,7 +161,7 @@
 #
 paren-comment:
     ')'                  n  pop
-    eof		                errorDeath                              doMismatchedParenErr
+    eof                     errorDeath                              doMismatchedParenErr
     default              n  paren-comment
 
 #
@@ -213,7 +220,7 @@
     
 interval-lower:
     digit_char           n  interval-lower                          doIntevalLowerDigit
-    ','			         n  interval-upper
+    ','                  n  interval-upper
     '}'                  n  interval-type                           doIntervalSame             # {n}
     default                 errorDeath                              doIntervalError
 
@@ -250,9 +257,9 @@
    'X'                   n  expr-quant                              doBackslashX
    'Z'                   n  term                                    doBackslashZ
    'z'                   n  term                                    doBackslashz
-   digit_char	         n  expr-quant                              doBackRef         #  Will scan multiple digits
+   digit_char            n  expr-quant                              doBackRef         #  Will scan multiple digits
    eof                      errorDeath                              doEscapeError
-   default               n  expr-quant		                    doLiteralChar     #  Escaped literal char.		       
+   default               n  expr-quant                          doLiteralChar     #  Escaped literal char.             
 
     
 #
diff -r -u icu/source/i18n/regeximp.h icu.new/source/i18n/regeximp.h
--- icu/source/i18n/regeximp.h	2004-12-30 07:25:50.000000000 +0000
+++ icu.new/source/i18n/regeximp.h	2006-08-06 13:31:08.000000000 +0100
@@ -168,9 +168,12 @@
                                //   Operand value:
                                //      0:  Normal (. doesn't match new-line) mode.
                                //      1:  . matches new-line mode.
-     URX_BACKSLASH_BU  = 53    // \b or \B in UREGEX_UWORD mode, using Unicode style
+     URX_BACKSLASH_BU  = 53,   // \b or \B in UREGEX_UWORD mode, using Unicode style
                                //   word boundaries.
-
+     URX_NAME_CAPTURE  = 54,   // When processing the next URX_END_CAPTURE, mark the
+                               // group as matching the specified name.
+                               //   Parameter is the group name index.
+     URX_NAMEDBACKREF  = 55,   // A back-reference to the named group
 };           
 
 // Keep this list of opcode names in sync with the above enum
@@ -229,8 +232,9 @@
         "LOOP_SR_I",           \
         "LOOP_C",              \
         "LOOP_DOT_I",          \
-        "BACKSLASH_BU"
-
+        "BACKSLASH_BU",        \
+        "NAME_CAPTURE",        \
+        "NAMEDBACKREF"
 
 //
 //  Convenience macros for assembling and disassembling a compiled operation.
diff -r -u icu/source/i18n/rematch.cpp icu.new/source/i18n/rematch.cpp
--- icu/source/i18n/rematch.cpp	2005-06-17 01:42:54.000000000 +0100
+++ icu.new/source/i18n/rematch.cpp	2006-08-06 14:44:20.000000000 +0100
@@ -48,10 +48,12 @@
         fDeferredStatus = U_ILLEGAL_ARGUMENT_ERROR;
         return;
     }
+    fNamedGroups       = new UVector32(pat->fNamedGroups->size(),
+                                       fDeferredStatus);
     if (pat->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(int32_t))) {
         fData = (int32_t *)uprv_malloc(pat->fDataSize * sizeof(int32_t)); 
     }
-    if (fStack == NULL || fData == NULL) {
+    if (fStack == NULL || fData == NULL || fNamedGroups == NULL) {
         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
     }
         
@@ -73,10 +75,12 @@
     if (U_FAILURE(status)) {
         return;
     }
+    fNamedGroups       = new UVector32(fPatternOwned->fNamedGroups->size(),
+                                       fDeferredStatus);
     if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(int32_t))) {
         fData = (int32_t *)uprv_malloc(fPattern->fDataSize * sizeof(int32_t)); 
     }
-    if (fStack == NULL || fData == NULL) {
+    if (fStack == NULL || fData == NULL || fNamedGroups == NULL) {
         status = U_MEMORY_ALLOCATION_ERROR;
     }
     reset(input);
@@ -96,11 +100,12 @@
     if (U_FAILURE(status)) {
         return;
     }
-
+    fNamedGroups       = new UVector32(fPatternOwned->fNamedGroups->size(),
+                                       fDeferredStatus);
     if (fPattern->fDataSize > (int32_t)(sizeof(fSmallData)/sizeof(int32_t))) {
         fData = (int32_t *)uprv_malloc(fPattern->fDataSize * sizeof(int32_t)); 
     }
-    if (fStack == NULL || fData == NULL) {
+    if (fStack == NULL || fData == NULL || fNamedGroups == NULL) {
         status = U_MEMORY_ALLOCATION_ERROR;
     }
     reset(*RegexStaticSets::gStaticSets->fEmptyString);
@@ -119,6 +124,8 @@
         fPatternOwned = NULL;
         fPattern = NULL;
     }
+    delete fNamedGroups;
+    fNamedGroups = NULL;
     #if UCONFIG_NO_BREAK_ITERATION==0
     delete fWordBreakItr;
     #endif
@@ -299,6 +306,43 @@
 
 
 
+int32_t RegexMatcher::end(const UnicodeString &groupName, UErrorCode &err) const {
+    if (U_FAILURE(err)) {
+        return -1;
+    }
+    if (fMatch == FALSE) {
+        err = U_REGEX_INVALID_STATE;
+        return -1;
+    }
+
+    int32_t namedGroupCount = fPattern->fNamedGroups->size();
+    int32_t nameIndex;
+    int32_t groupOffset = -1;
+    for (nameIndex = 0; nameIndex < namedGroupCount; ++nameIndex) {
+        UnicodeString *name = (UnicodeString *)fPattern->fNamedGroups->elementAt(nameIndex);
+
+        if (*name == groupName) {
+            groupOffset = fNamedGroups->elementAti(nameIndex);
+            break;
+        }
+    }
+
+    if (nameIndex >= namedGroupCount) {
+        err = U_REGEX_UNKNOWN_GROUP_NAME;
+        return -1;
+    }
+
+    if (groupOffset < 0) {
+        // The capture group wasn't part of the match
+        return -1;
+    }
+
+    U_ASSERT(groupOffset < fPattern->fFrameSize);
+
+    return fFrame->fExtra[groupOffset + 1];
+}
+
+
 //--------------------------------------------------------------------------------
 //
 //   find()
@@ -541,6 +585,102 @@
 }
 
 
+UnicodeString RegexMatcher::group(const UnicodeString &groupName,
+                                  UErrorCode &status) const {
+    // We don't use start() and end() in here, because it would mean scanning
+    // the name list twice, which is unecessarily inefficient.
+    if (U_FAILURE(status)) {
+        return UnicodeString();
+    }
+    if (U_FAILURE(fDeferredStatus)) {
+        status = fDeferredStatus;
+        return UnicodeString();
+    }
+    if (fMatch == FALSE) {
+        status = U_REGEX_INVALID_STATE;
+        return UnicodeString();
+    }
+
+    int32_t namedGroupCount = fPattern->fNamedGroups->size();
+    int32_t nameIndex;
+    int32_t groupOffset = -1;
+    for (nameIndex = 0; nameIndex < namedGroupCount; ++nameIndex) {
+        UnicodeString *name = (UnicodeString *)fPattern->fNamedGroups->elementAt(nameIndex);
+
+        if (*name == groupName) {
+            groupOffset = fNamedGroups->elementAti(nameIndex);
+            break;
+        }
+    }
+
+    if (nameIndex >= namedGroupCount) {
+        status = U_REGEX_UNKNOWN_GROUP_NAME;
+        return UnicodeString();
+    }
+
+    if (groupOffset < 0) {
+        // The capture group wasn't part of the match
+        return UnicodeString();
+    }
+
+    U_ASSERT(groupOffset < fPattern->fFrameSize);
+    U_ASSERT(groupOffset >= 0);
+    
+    int32_t s = fFrame->fExtra[groupOffset];
+    int32_t e = fFrame->fExtra[groupOffset + 1];
+
+    U_ASSERT(s <= e);
+    return UnicodeString (*fInput, s, e-s);
+}
+
+
+int32_t RegexMatcher::groupIndexFromName(const UnicodeString &groupName,
+                     UErrorCode &status) const {
+    if (U_FAILURE(status)) {
+    return -1;
+    }
+    if (U_FAILURE(fDeferredStatus)) {
+    status = fDeferredStatus;
+    return -1;
+    }
+
+    // First, find the name in the name list
+    int32_t namedGroupCount = fPattern->fNamedGroups->size();
+    int32_t nameIndex;
+    int32_t groupOffset = -1;
+    for (nameIndex = 0; nameIndex < namedGroupCount; ++nameIndex) {
+        UnicodeString *name = (UnicodeString *)fPattern->fNamedGroups->elementAt(nameIndex);
+
+        if (*name == groupName) {
+            groupOffset = fNamedGroups->elementAti(nameIndex);
+            break;
+        }
+    }
+
+    if (nameIndex >= namedGroupCount) {
+      status = U_REGEX_UNKNOWN_GROUP_NAME;
+      return -1;
+    }
+
+    // If this group wasn't matched, return 0
+    if (groupOffset < 0)
+      return 0;
+
+    // Next, locate the group index by scanning the group map
+    int32_t groupCount = fPattern->fGroupMap->size();
+    int32_t groupIndex;
+    for (groupIndex = 0; groupIndex < groupCount; ++groupIndex) {
+        int32_t groupPtr = fPattern->fGroupMap->elementAti(groupIndex);
+
+        if (groupPtr == groupOffset) {
+            return groupIndex + 1;
+        }
+    }
+
+    status = U_REGEX_UNKNOWN_GROUP_NAME;
+    return -1;
+}
+
 
 
 int32_t RegexMatcher::groupCount() const {
@@ -858,7 +998,6 @@
 
 
 
-
 int32_t RegexMatcher::start(int group, UErrorCode &status) const {
     if (U_FAILURE(status)) {
         return -1;
@@ -889,6 +1028,44 @@
 
 
 
+int32_t RegexMatcher::start(const UnicodeString &groupName, 
+                            UErrorCode &err) const {
+    if (U_FAILURE(err)) {
+        return -1;
+    }
+    if (fMatch == FALSE) {
+        err = U_REGEX_INVALID_STATE;
+        return -1;
+    }
+
+    int32_t namedGroupCount = fPattern->fNamedGroups->size();
+    int32_t nameIndex;
+    int32_t groupOffset = -1;
+    for (nameIndex = 0; nameIndex < namedGroupCount; ++nameIndex) {
+        UnicodeString *name = (UnicodeString *)fPattern->fNamedGroups->elementAt(nameIndex);
+
+        if (*name == groupName) {
+            groupOffset = fNamedGroups->elementAti(nameIndex);
+            break;
+        }
+    }
+
+    if (nameIndex >= namedGroupCount) {
+        err = U_REGEX_UNKNOWN_GROUP_NAME;
+        return -1;
+    }
+
+    if (groupOffset < 0) {
+        // The capture group wasn't part of the match
+        return -1;
+    }
+
+    U_ASSERT(groupOffset < fPattern->fFrameSize);
+
+    return fFrame->fExtra[groupOffset];
+}
+
+
 //================================================================================
 //
 //    Code following this point in this file is the internal
@@ -1093,6 +1270,18 @@
         fData[i] = 0;
     }
 
+    // A string to hold the name of the capturing group
+    int32_t captureNameIdx = -1;
+
+    // Reset the named group indices
+    int32_t namedGroupCount = fPattern->fNamedGroups->size();
+    if (!fNamedGroups->ensureCapacity(namedGroupCount, status))
+      return;
+    fNamedGroups->setSize(namedGroupCount);
+    for (i = 0; i < namedGroupCount; ++i) {
+        fNamedGroups->setElementAt (-1, i);
+    }
+
     //
     //  Main loop for interpreting the compiled pattern.
     //  One iteration of the loop per pattern operation performed.
@@ -1210,12 +1399,22 @@
             break;
 
 
+        case URX_NAME_CAPTURE:
+            U_ASSERT(opValue >= 0 && opValue < namedGroupCount);
+            captureNameIdx = opValue;
+            break;
+
+
         case URX_END_CAPTURE:
             U_ASSERT(opValue >= 0 && opValue < frameSize-3);
             U_ASSERT(fp->fExtra[opValue+2] >= 0);            // Start pos for this group must be set.
             fp->fExtra[opValue]   = fp->fExtra[opValue+2];   // Tentative start becomes real.
             fp->fExtra[opValue+1] = fp->fInputIdx;           // End position
             U_ASSERT(fp->fExtra[opValue] <= fp->fExtra[opValue+1]);
+            if (captureNameIdx >= 0) {
+                fNamedGroups->setElementAt(opValue, captureNameIdx);
+                captureNameIdx = -1;
+            }
             break;
 
             
@@ -1794,6 +1993,19 @@
             }
             break;
 
+        case URX_NAMEDBACKREF:
+            {
+                U_ASSERT(opValue >= 0 && opValue <= namedGroupCount);
+
+                opValue = fNamedGroups->elementAti(opValue);
+
+                if (opValue < 0) {
+                    // This group hasn't been matched yet
+                    fp = (REStackFrame *)fStack->popFrame(frameSize);   // FAIL, no match.
+                    break;
+                }
+                // Fall through
+            }
         case URX_BACKREF:
         case URX_BACKREF_I:
             {
@@ -1816,7 +2028,7 @@
 
                 UBool  haveMatch = FALSE;
                 if (fp->fInputIdx + len <= inputLen) {
-                    if (opType == URX_BACKREF) {
+                    if (opType == URX_BACKREF || opType == URX_NAMEDBACKREF) {
                         if (u_strncmp(inputBuf+groupStartIdx, inputBuf+fp->fInputIdx, len) == 0) {
                             haveMatch = TRUE;
                         }
diff -r -u icu/source/i18n/repattrn.cpp icu.new/source/i18n/repattrn.cpp
--- icu/source/i18n/repattrn.cpp	2004-12-30 07:25:50.000000000 +0000
+++ icu.new/source/i18n/repattrn.cpp	2006-08-06 14:45:20.000000000 +0100
@@ -57,6 +57,11 @@
 //    Assignmenet Operator
 //
 //--------------------------------------------------------------------------
+static void copyStringTok(UHashTok *a, UHashTok *b)
+{
+  a->pointer = new UnicodeString (*(UnicodeString *)b->pointer);
+}
+
 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
     if (this == &other) {
         // Source and destination are the same.  Don't do anything.
@@ -91,6 +96,7 @@
     //  Copy the pattern.  It's just values, nothing deep to copy.
     fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
     fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
+    fNamedGroups->assign(*other.fNamedGroups, (UTokenAssigner *)copyStringTok, fDeferredStatus);
 
     //  Copy the Unicode Sets.  
     //    Could be made more efficient if the sets were reference counted and shared,
@@ -135,6 +141,7 @@
     fFrameSize        = 0;
     fDataSize         = 0;
     fGroupMap         = NULL;
+    fNamedGroups      = NULL;
     fMaxCaptureDigits = 1;  
     fStaticSets       = NULL;
     fStaticSets8      = NULL;
@@ -147,13 +154,14 @@
     
     fCompiledPat      = new UVector32(fDeferredStatus);
     fGroupMap         = new UVector32(fDeferredStatus);
+    fNamedGroups      = new UVector(fDeferredStatus);
     fSets             = new UVector(fDeferredStatus);
     fInitialChars     = new UnicodeSet;
     fInitialChars8    = new Regex8BitSet;
     if (U_FAILURE(fDeferredStatus)) {
         return;
     }
-    if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
+    if (fCompiledPat == NULL  || fGroupMap == NULL || fNamedGroups == NULL || fSets == NULL ||
         fInitialChars == NULL || fInitialChars8 == NULL) {
         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
         return;
@@ -186,6 +194,15 @@
     fSets8 = NULL;
     delete fGroupMap;
     fGroupMap = NULL;
+    for (i = 1; i < fNamedGroups->size(); ++i) {
+      UnicodeString *s;
+      s = (UnicodeString *)fNamedGroups->elementAt(i);
+      if (s != NULL) {
+          delete s;
+      }
+    }
+    delete fNamedGroups;
+    fNamedGroups = NULL;
     delete fInitialChars;
     fInitialChars = NULL;
     delete fInitialChars8;
diff -r -u icu/source/i18n/unicode/regex.h icu.new/source/i18n/unicode/regex.h
--- icu/source/i18n/unicode/regex.h	2005-06-23 00:38:06.000000000 +0100
+++ icu.new/source/i18n/unicode/regex.h	2006-08-06 14:45:56.000000000 +0100
@@ -405,6 +405,7 @@
 
     UVector32       *fGroupMap;    // Map from capture group number to position of
                                    //   the group's variables in the matcher stack frame.
+    UVector         *fNamedGroups;     // A vector of group names.
 
     int32_t         fMaxCaptureDigits;
 
@@ -622,6 +623,32 @@
 
 
    /**
+    *    Returns a string containing the text captured by the given group
+    *    during the previous match operation.
+    *
+    *    @param groupName    the capture group name
+    *    @param   status     A reference to a UErrorCode to receive any errors.
+    *                        Possible errors are  U_REGEX_INVALID_STATE if no match
+    *                        has been attempted or the last match failed and
+    *                        U_REGEX_UNKNOWN_GROUP_NAME for a bad capture group name.
+    *    @return the captured text
+    */
+    virtual UnicodeString group(const UnicodeString &groupName,
+                                UErrorCode &status) const;
+
+   /**
+    *   Returns the group index for the named group with the specified name.
+    *
+    *   @param groupName    the capture group name
+    *   @param  status      A reference to a UErrorCode to receive any errors.
+    *
+    *   @return the index of the group, or a -ve number to indicate failure.
+    *
+    */
+    virtual int32_t groupIndexFromName(const UnicodeString &groupName,
+                                       UErrorCode &status) const;
+
+   /**
     *   Returns the number of capturing groups in this matcher's pattern.
     *   @return the number of capture groups
     *   @stable ICU 2.4
@@ -656,6 +683,22 @@
 
 
    /**
+    *   Returns the index in the input string of the start of the text matched by the
+    *    specified capture group during the previous match operation.  Return -1 if
+    *    the capture group exists in the pattern, but was not part of the last match.
+    *
+    *    @param  groupName   the capture group name
+    *    @param  status      A reference to a UErrorCode to receive any errors.  Possible
+    *                        errors are  U_REGEX_INVALID_STATE if no match has been
+    *                        attempted or the last match failed, and
+    *                        U_REGEX_UNKNOWN_GROUP_NAME for a bad capture group number
+    *    @return the start position of substring matched by the specified group.
+    */
+    virtual int32_t start(const UnicodeString &groupName,
+                          UErrorCode &status) const;
+
+
+   /**
     *    Returns the index in the input string of the first character following the
     *    text matched during the previous match operation.
     *   @param   status      A reference to a UErrorCode to receive any errors.  Possible
@@ -684,6 +727,21 @@
 
 
    /**
+    *    Returns the index in the input string of the character following the
+    *    text matched by the specified capture group during the previous match operation.
+    *    @param groupName    the capture group name
+    *    @param   status      A reference to a UErrorCode to receive any errors.  Possible
+    *                        errors are  U_REGEX_INVALID_STATE if no match has been
+    *                        attempted or the last match failed and
+    *                        U_REGEX_UNKNOWN_GROUP_NAME for a bad capture group number
+    *    @return  the index of the first character following the text
+    *              captured by the specifed group during the previous match operation.
+    *              Return -1 if the capture group exists in the pattern but was not part of the match.
+    */
+    virtual int32_t end(const UnicodeString &groupName, UErrorCode &status) const;
+
+
+   /**
     *   Resets this matcher.  The effect is to remove any memory of previous matches,
     *       and to cause subsequent find() operations to begin at the beginning of
     *       the input string.
@@ -935,6 +993,8 @@
     int32_t             *fData;            // Data area for use by the compiled pattern.
     int32_t             fSmallData[8];     //   Use this for data if it's enough.
 
+    UVector32           *fNamedGroups;     // A vector of indices, one for each in the pattern.
+
     UBool               fTraceDebug;       // Set true for debug tracing of match engine.
 
     UErrorCode          fDeferredStatus;   // Save error state if that cannot be immediately
diff -r -u icu/source/i18n/unicode/uregex.h icu.new/source/i18n/unicode/uregex.h
--- icu/source/i18n/unicode/uregex.h	2005-06-28 23:23:06.000000000 +0100
+++ icu.new/source/i18n/unicode/uregex.h	2006-08-06 14:46:26.000000000 +0100
@@ -332,7 +332,8 @@
 uregex_groupCount(URegularExpression *regexp,
                   UErrorCode         *status);
 
-/** Extract the string for the specified matching expression or subexpression.
+/**
+  * Extract the string for the specified matching expression or subexpression.
   * Group #0 is the complete string of matched text.
   * Group #1 is the text matched by the first set of capturing parentheses.
   *
@@ -355,6 +356,24 @@
              int32_t             destCapacity,
              UErrorCode          *status);
 
+/**
+ *    Returns the group index for the named group with the specified name,
+ *    provided that the named group was matched by the regular expression.
+ *
+ *    @param   regexp       The compiled regular expression.
+ *    @param   name         The capture group name.
+ *    @param   status       A reference to a UErrorCode to receive any errors.
+ *    @return               if the named group was matched, the index of the
+ *                          corresponding numbered group, otherwise 0.
+ *                          If there is no group with the specified name,
+ *                          -1 is returned with U_REGEX_UNKNOWN_GROUP_NAME in
+ *                          status.
+ */
+U_DRAFT int32_t U_EXPORT2
+uregex_groupIndexFromName(URegularExpression *regexp,
+                          const UChar        *name,
+                          int32_t            nameLen,
+                          UErrorCode         *status);
 
 /**
   *   Returns the index in the input string of the start of the text matched by the
diff -r -u icu/source/i18n/uregex.cpp icu.new/source/i18n/uregex.cpp
--- icu/source/i18n/uregex.cpp	2005-06-21 17:54:40.000000000 +0100
+++ icu.new/source/i18n/uregex.cpp	2006-08-06 14:47:02.000000000 +0100
@@ -432,6 +432,28 @@
 
 //------------------------------------------------------------------------------
 //
+//    uregex_groupIndexFromName
+//
+//------------------------------------------------------------------------------
+U_CAPI int32_t U_EXPORT2
+uregex_groupIndexFromName(URegularExpression *regexp,
+                          const UChar        *name,
+                          int32_t            nameLen,
+                          UErrorCode         *status) {
+    if (name == NULL || nameLen < -1) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+
+    // Make a read-only-aliased copy of the name string.
+    UnicodeString nameString(nameLen == -1, name, nameLen);
+
+    return regexp->fMatcher->groupIndexFromName(nameString, *status);
+}
+
+
+//------------------------------------------------------------------------------
+//
 //    uregex_start
 //
 //------------------------------------------------------------------------------
diff -r -u icu/source/test/cintltst/reapits.c icu.new/source/test/cintltst/reapits.c
--- icu/source/test/cintltst/reapits.c	2004-12-16 02:54:24.000000000 +0000
+++ icu.new/source/test/cintltst/reapits.c	2006-08-06 14:29:29.000000000 +0100
@@ -531,6 +531,50 @@
     }
 
     /*
+     *  groupIndexFromName()
+     */
+    {
+        UChar   text1[80];
+        UChar   buf[80];
+        UBool   result;
+        int32_t groupIndex;
+
+        u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
+        
+        status = U_ZERO_ERROR;
+        re = uregex_openC("(?:abc(?P<foo>.*?)def|(?P<bar>ergodic))",
+                          0, NULL, &status);
+        TEST_ASSERT_SUCCESS(status);
+
+        uregex_setText(re, text1, -1, &status);
+        result = uregex_find(re, 0, &status);
+        TEST_ASSERT(result == TRUE);
+
+        /* Capture group "foo".  Should succeed. */
+        status = U_ZERO_ERROR;
+        u_uastrncpy(buf, "foo", sizeof(buf)/2);
+        groupIndex = uregex_groupIndexFromName(re, buf, -1, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT(groupIndex == 1);
+
+        /* Capture group "bar".  Should succeed and return 0. */
+        status = U_ZERO_ERROR;
+        u_uastrncpy(buf, "bar", sizeof(buf)/2);
+        groupIndex = uregex_groupIndexFromName(re, buf, -1, &status);
+        TEST_ASSERT_SUCCESS(status);
+        TEST_ASSERT(groupIndex == 0);
+
+        /* Capture group "pants".  Should fail. */
+        status = U_ZERO_ERROR;
+        u_uastrncpy(buf, "pants", sizeof(buf)/2);
+        groupIndex = uregex_groupIndexFromName(re, buf, -1, &status);
+        TEST_ASSERT(status == U_REGEX_UNKNOWN_GROUP_NAME);
+        TEST_ASSERT(groupIndex == -1);
+
+        uregex_close(re);
+    }
+
+    /*
      *  replaceFirst()
      */
     {
diff -r -u icu/source/test/intltest/regextst.cpp icu.new/source/test/intltest/regextst.cpp
--- icu/source/test/intltest/regextst.cpp	2005-07-05 19:39:00.000000000 +0100
+++ icu.new/source/test/intltest/regextst.cpp	2006-08-06 14:47:53.000000000 +0100
@@ -67,7 +67,6 @@
             if (exec) PerlTests();
             break;
 
-
         default: name = "";
             break; //needed to end loop
     }
@@ -775,6 +774,39 @@
 
     }
 
+    // Named capture group
+    {
+        int32_t       flags = 0;
+        UParseError   pe;
+        UErrorCode    status = U_ZERO_ERROR;
+
+        UnicodeString re("01(?P<foo>23(?P<bar>45)67)(?P=bar)"
+                 "(?P<blat>kerpow)?");
+        RegexPattern *pat = RegexPattern::compile(re, flags, pe, status);
+        REGEX_CHECK_STATUS;
+        UnicodeString data = "0123456745";
+
+        RegexMatcher  *matcher = pat->matcher(data, status);
+        REGEX_CHECK_STATUS;
+        REGEX_ASSERT(matcher->lookingAt(status) == TRUE);
+
+        REGEX_ASSERT(matcher->groupIndexFromName("foo", status) == 1);
+        REGEX_ASSERT(matcher->groupIndexFromName("bar", status) == 2);
+        REGEX_ASSERT(matcher->group(0, status) == "0123456745");
+        REGEX_ASSERT(matcher->group(1, status) == "234567");
+        REGEX_ASSERT(matcher->group(2, status) == "45");
+        REGEX_ASSERT(matcher->group("foo", status) == "234567");
+        REGEX_ASSERT(matcher->group("bar", status) == "45");
+        REGEX_ASSERT(matcher->group("blat", status) == -1);
+        REGEX_ASSERT(matcher->start("foo", status) == 2);
+        REGEX_ASSERT(matcher->end("foo", status) == 8);
+        REGEX_ASSERT(matcher->start("bar", status) == 4);
+        REGEX_ASSERT(matcher->end("bar", status) == 6);
+        REGEX_CHECK_STATUS;
+        REGEX_ASSERT_FAIL(matcher->group("pants", status),
+                          U_REGEX_UNKNOWN_GROUP_NAME);
+    }
+
     //
     //  find
     //

