RegexCode.cs source code in C# .NET

Source code for the .NET framework in C#

                        

Code:

/ Dotnetfx_Win7_3.5.1 / Dotnetfx_Win7_3.5.1 / 3.5.1 / DEVDIV / depot / DevDiv / releases / whidbey / NetFXspW7 / ndp / fx / src / Regex / System / Text / RegularExpressions / RegexCode.cs / 1 / RegexCode.cs

                            //------------------------------------------------------------------------------ 
// 
//     Copyright (c) Microsoft Corporation.  All rights reserved.
// 
//----------------------------------------------------------------------------- 

// This RegexCode class is internal to the regular expression package. 
// It provides operator constants for use by the Builder and the Machine. 

// Implementation notes: 
//
// Regexps are built into RegexCodes, which contain an operation array,
// a string table, and some constants.
// 
// Each operation is one of the codes below, followed by the integer
// operands specified for each op. 
// 
// Strings and sets are indices into a string table.
 

namespace System.Text.RegularExpressions {

    using System.Collections; 
    using System.Diagnostics;
    using System.Globalization; 
 
    internal sealed class RegexCode {
        // the following primitive operations come directly from the parser 

        // lef/back operands        description

        internal const int Onerep         = 0;    // lef,back char,min,max    a {n} 
        internal const int Notonerep      = 1;    // lef,back char,min,max    .{n}
        internal const int Setrep         = 2;    // lef,back set,min,max     [\d]{n} 
 
        internal const int Oneloop        = 3;    // lef,back char,min,max    a {,n}
        internal const int Notoneloop     = 4;    // lef,back char,min,max    .{,n} 
        internal const int Setloop        = 5;    // lef,back set,min,max     [\d]{,n}

        internal const int Onelazy        = 6;    // lef,back char,min,max    a {,n}?
        internal const int Notonelazy     = 7;    // lef,back char,min,max    .{,n}? 
        internal const int Setlazy        = 8;    // lef,back set,min,max     [\d]{,n}?
 
        internal const int One            = 9;    // lef      char            a 
        internal const int Notone         = 10;   // lef      char            [^a]
        internal const int Set            = 11;   // lef      set             [a-z\s]  \w \s \d 

        internal const int Multi          = 12;   // lef      string          abcd
        internal const int Ref            = 13;   // lef      group           \#
 
        internal const int Bol            = 14;   //                          ^
        internal const int Eol            = 15;   //                          $ 
        internal const int Boundary       = 16;   //                          \b 
        internal const int Nonboundary    = 17;   //                          \B
        internal const int Beginning      = 18;   //                          \A 
        internal const int Start          = 19;   //                          \G
        internal const int EndZ           = 20;   //                          \Z
        internal const int End            = 21;   //                          \Z
 
        internal const int Nothing        = 22;   //                          Reject!
 
        // primitive control structures 

        internal const int Lazybranch     = 23;   // back     jump            straight first 
        internal const int Branchmark     = 24;   // back     jump            branch first for loop
        internal const int Lazybranchmark = 25;   // back     jump            straight first for loop
        internal const int Nullcount      = 26;   // back     val             set counter, null mark
        internal const int Setcount       = 27;   // back     val             set counter, make mark 
        internal const int Branchcount    = 28;   // back     jump,limit      branch++ if zero<=c impl group slots
        internal int             _capsize;               // number of impl group slots
        internal RegexPrefix     _fcPrefix;              // the set of candidate first characters (may be null)
        internal RegexBoyerMoore _bmPrefix;              // the fixed prefix string as a Boyer-Moore machine (may be null) 
        internal int             _anchors;               // the set of zero-length start anchors (RegexFCD.Bol, etc)
        internal bool         _rightToLeft;           // true if right to left 
 
        // optimizations
 
        // constructor

        internal RegexCode(int [] codes, ArrayList stringlist, int trackcount,
                           Hashtable caps, int capsize, 
                           RegexBoyerMoore bmPrefix, RegexPrefix fcPrefix,
                           int anchors, bool rightToLeft) { 
            _codes = codes; 
            _strings = new String[stringlist.Count];
            _trackcount = trackcount; 
            _caps = caps;
            _capsize = capsize;
            _bmPrefix = bmPrefix;
            _fcPrefix = fcPrefix; 
            _anchors = anchors;
            _rightToLeft = rightToLeft; 
            stringlist.CopyTo(0, _strings, 0, stringlist.Count); 
        }
 
        internal static bool OpcodeBacktracks(int Op) {
            Op &= Mask;

            switch (Op) { 
                case Oneloop:
                case Notoneloop: 
                case Setloop: 
                case Onelazy:
                case Notonelazy: 
                case Setlazy:
                case Lazybranch:
                case Branchmark:
                case Lazybranchmark: 
                case Nullcount:
                case Setcount: 
                case Branchcount: 
                case Lazybranchcount:
                case Setmark: 
                case Capturemark:
                case Getmark:
                case Setjump:
                case Backjump: 
                case Forejump:
                case Goto: 
                    return true; 

                default: 
                    return false;
            }
        }
 
        internal static int OpcodeSize(int Opcode) {
            Opcode &= Mask; 
 
            switch (Opcode) {
                case Nothing: 
                case Bol:
                case Eol:
                case Boundary:
                case Nonboundary: 
                case ECMABoundary:
                case NonECMABoundary: 
                case Beginning: 
                case Start:
                case EndZ: 
                case End:

                case Nullmark:
                case Setmark: 
                case Getmark:
                case Setjump: 
                case Backjump: 
                case Forejump:
                case Stop: 

                    return 1;

                case One: 
                case Notone:
                case Multi: 
                case Ref: 
                case Testref:
 

                case Goto:
                case Nullcount:
                case Setcount: 
                case Lazybranch:
                case Branchmark: 
                case Lazybranchmark: 
                case Prune:
                case Set: 

                    return 2;

                case Capturemark: 
                case Branchcount:
                case Lazybranchcount: 
 
                case Onerep:
                case Notonerep: 
                case Oneloop:
                case Notoneloop:
                case Onelazy:
                case Notonelazy: 
                case Setlazy:
                case Setrep: 
                case Setloop: 

                    return 3; 

                default:

                    throw MakeException(SR.GetString(SR.UnexpectedOpcode, Opcode.ToString(CultureInfo.CurrentCulture))); 
            }
        } 
 
        internal static ArgumentException MakeException(String message) {
            return new ArgumentException(message); 
        }

        // Debug only code below
 
#if DBG
        internal static String[] CodeStr = new String[] 
        { 
            "Onerep", "Notonerep", "Setrep",
            "Oneloop", "Notoneloop", "Setloop", 
            "Onelazy", "Notonelazy", "Setlazy",
            "One", "Notone", "Set",
            "Multi", "Ref",
            "Bol", "Eol", "Boundary", "Nonboundary", "Beginning", "Start", "EndZ", "End", 
            "Nothing",
            "Lazybranch", "Branchmark", "Lazybranchmark", 
            "Nullcount", "Setcount", "Branchcount", "Lazybranchcount", 
            "Nullmark", "Setmark", "Capturemark", "Getmark",
            "Setjump", "Backjump", "Forejump", "Testref", "Goto", 
            "Prune", "Stop",
#if ECMA
            "ECMABoundary", "NonECMABoundary",
#endif 
        };
 
        internal static String OperatorDescription(int Opcode) { 
            bool isCi   = ((Opcode & Ci) != 0);
            bool isRtl  = ((Opcode & Rtl) != 0); 
            bool isBack = ((Opcode & Back) != 0);
            bool isBack2 = ((Opcode & Back2) != 0);

            return CodeStr[Opcode & Mask] + 
            (isCi ? "-Ci" : "") + (isRtl ? "-Rtl" : "") + (isBack ? "-Back" : "") + (isBack2 ? "-Back2" : "");
        } 
 
        internal String OpcodeDescription(int offset) {
            StringBuilder sb = new StringBuilder(); 
            int opcode = _codes[offset];

            sb.AppendFormat("{0:D6} ", offset);
            sb.Append(OpcodeBacktracks(opcode & Mask) ? '*' : ' '); 
            sb.Append(OperatorDescription(opcode));
            sb.Append('('); 
 
            opcode &= Mask;
 
            switch (opcode) {
                case One:
                case Notone:
                case Onerep: 
                case Notonerep:
                case Oneloop: 
                case Notoneloop: 
                case Onelazy:
                case Notonelazy: 
                    sb.Append("Ch = ");
                    sb.Append(RegexCharClass.CharDescription((char)_codes[offset+1]));
                    break;
 
                case Set:
                case Setrep: 
                case Setloop: 
                case Setlazy:
                    sb.Append("Set = "); 
                    sb.Append(RegexCharClass.SetDescription(_strings[_codes[offset+1]]));
                    break;

                case Multi: 
                    sb.Append("String = ");
                    sb.Append(_strings[_codes[offset+1]]); 
                    break; 

                case Ref: 
                case Testref:
                    sb.Append("Index = ");
                    sb.Append(_codes[offset+1]);
                    break; 

                case Capturemark: 
                    sb.Append("Index = "); 
                    sb.Append(_codes[offset+1]);
                    if (_codes[offset+2] != -1) { 
                        sb.Append(", Unindex = ");
                        sb.Append(_codes[offset+2]);
                    }
                    break; 

                case Nullcount: 
                case Setcount: 
                    sb.Append("Value = ");
                    sb.Append(_codes[offset+1]); 
                    break;

                case Goto:
                case Lazybranch: 
                case Branchmark:
                case Lazybranchmark: 
                case Branchcount: 
                case Lazybranchcount:
                    sb.Append("Addr = "); 
                    sb.Append(_codes[offset+1]);
                    break;
            }
 
            switch (opcode) {
                case Onerep: 
                case Notonerep: 
                case Oneloop:
                case Notoneloop: 
                case Onelazy:
                case Notonelazy:
                case Setrep:
                case Setloop: 
                case Setlazy:
                    sb.Append(", Rep = "); 
                    if (_codes[offset + 2] == Int32.MaxValue) 
                        sb.Append("inf");
                    else 
                        sb.Append(_codes[offset + 2]);
                    break;

                case Branchcount: 
                case Lazybranchcount:
                    sb.Append(", Limit = "); 
                    if (_codes[offset + 2] == Int32.MaxValue) 
                        sb.Append("inf");
                    else 
                        sb.Append(_codes[offset + 2]);
                    break;
            }
 
            sb.Append(")");
 
            return sb.ToString(); 
        }
 
        internal void Dump() {
            int i;

            Debug.WriteLine("Direction:  " + (_rightToLeft ? "right-to-left" : "left-to-right")); 
            Debug.WriteLine("Firstchars: " + (_fcPrefix == null ? "n/a" : RegexCharClass.SetDescription(_fcPrefix.Prefix)));
            Debug.WriteLine("Prefix:     " + (_bmPrefix == null ? "n/a" : Regex.Escape(_bmPrefix.ToString()))); 
            Debug.WriteLine("Anchors:    " + RegexFCD.AnchorDescription(_anchors)); 
            Debug.WriteLine("");
            if (_bmPrefix != null) { 
                Debug.WriteLine("BoyerMoore:");
                Debug.WriteLine(_bmPrefix.Dump("    "));
            }
            for (i = 0; i < _codes.Length;) { 
                Debug.WriteLine(OpcodeDescription(i));
                i += OpcodeSize(_codes[i]); 
            } 

            Debug.WriteLine(""); 
        }
#endif

    } 
}

// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
//------------------------------------------------------------------------------ 
// 
//     Copyright (c) Microsoft Corporation.  All rights reserved.
// 
//----------------------------------------------------------------------------- 

// This RegexCode class is internal to the regular expression package. 
// It provides operator constants for use by the Builder and the Machine. 

// Implementation notes: 
//
// Regexps are built into RegexCodes, which contain an operation array,
// a string table, and some constants.
// 
// Each operation is one of the codes below, followed by the integer
// operands specified for each op. 
// 
// Strings and sets are indices into a string table.
 

namespace System.Text.RegularExpressions {

    using System.Collections; 
    using System.Diagnostics;
    using System.Globalization; 
 
    internal sealed class RegexCode {
        // the following primitive operations come directly from the parser 

        // lef/back operands        description

        internal const int Onerep         = 0;    // lef,back char,min,max    a {n} 
        internal const int Notonerep      = 1;    // lef,back char,min,max    .{n}
        internal const int Setrep         = 2;    // lef,back set,min,max     [\d]{n} 
 
        internal const int Oneloop        = 3;    // lef,back char,min,max    a {,n}
        internal const int Notoneloop     = 4;    // lef,back char,min,max    .{,n} 
        internal const int Setloop        = 5;    // lef,back set,min,max     [\d]{,n}

        internal const int Onelazy        = 6;    // lef,back char,min,max    a {,n}?
        internal const int Notonelazy     = 7;    // lef,back char,min,max    .{,n}? 
        internal const int Setlazy        = 8;    // lef,back set,min,max     [\d]{,n}?
 
        internal const int One            = 9;    // lef      char            a 
        internal const int Notone         = 10;   // lef      char            [^a]
        internal const int Set            = 11;   // lef      set             [a-z\s]  \w \s \d 

        internal const int Multi          = 12;   // lef      string          abcd
        internal const int Ref            = 13;   // lef      group           \#
 
        internal const int Bol            = 14;   //                          ^
        internal const int Eol            = 15;   //                          $ 
        internal const int Boundary       = 16;   //                          \b 
        internal const int Nonboundary    = 17;   //                          \B
        internal const int Beginning      = 18;   //                          \A 
        internal const int Start          = 19;   //                          \G
        internal const int EndZ           = 20;   //                          \Z
        internal const int End            = 21;   //                          \Z
 
        internal const int Nothing        = 22;   //                          Reject!
 
        // primitive control structures 

        internal const int Lazybranch     = 23;   // back     jump            straight first 
        internal const int Branchmark     = 24;   // back     jump            branch first for loop
        internal const int Lazybranchmark = 25;   // back     jump            straight first for loop
        internal const int Nullcount      = 26;   // back     val             set counter, null mark
        internal const int Setcount       = 27;   // back     val             set counter, make mark 
        internal const int Branchcount    = 28;   // back     jump,limit      branch++ if zero<=c impl group slots
        internal int             _capsize;               // number of impl group slots
        internal RegexPrefix     _fcPrefix;              // the set of candidate first characters (may be null)
        internal RegexBoyerMoore _bmPrefix;              // the fixed prefix string as a Boyer-Moore machine (may be null) 
        internal int             _anchors;               // the set of zero-length start anchors (RegexFCD.Bol, etc)
        internal bool         _rightToLeft;           // true if right to left 
 
        // optimizations
 
        // constructor

        internal RegexCode(int [] codes, ArrayList stringlist, int trackcount,
                           Hashtable caps, int capsize, 
                           RegexBoyerMoore bmPrefix, RegexPrefix fcPrefix,
                           int anchors, bool rightToLeft) { 
            _codes = codes; 
            _strings = new String[stringlist.Count];
            _trackcount = trackcount; 
            _caps = caps;
            _capsize = capsize;
            _bmPrefix = bmPrefix;
            _fcPrefix = fcPrefix; 
            _anchors = anchors;
            _rightToLeft = rightToLeft; 
            stringlist.CopyTo(0, _strings, 0, stringlist.Count); 
        }
 
        internal static bool OpcodeBacktracks(int Op) {
            Op &= Mask;

            switch (Op) { 
                case Oneloop:
                case Notoneloop: 
                case Setloop: 
                case Onelazy:
                case Notonelazy: 
                case Setlazy:
                case Lazybranch:
                case Branchmark:
                case Lazybranchmark: 
                case Nullcount:
                case Setcount: 
                case Branchcount: 
                case Lazybranchcount:
                case Setmark: 
                case Capturemark:
                case Getmark:
                case Setjump:
                case Backjump: 
                case Forejump:
                case Goto: 
                    return true; 

                default: 
                    return false;
            }
        }
 
        internal static int OpcodeSize(int Opcode) {
            Opcode &= Mask; 
 
            switch (Opcode) {
                case Nothing: 
                case Bol:
                case Eol:
                case Boundary:
                case Nonboundary: 
                case ECMABoundary:
                case NonECMABoundary: 
                case Beginning: 
                case Start:
                case EndZ: 
                case End:

                case Nullmark:
                case Setmark: 
                case Getmark:
                case Setjump: 
                case Backjump: 
                case Forejump:
                case Stop: 

                    return 1;

                case One: 
                case Notone:
                case Multi: 
                case Ref: 
                case Testref:
 

                case Goto:
                case Nullcount:
                case Setcount: 
                case Lazybranch:
                case Branchmark: 
                case Lazybranchmark: 
                case Prune:
                case Set: 

                    return 2;

                case Capturemark: 
                case Branchcount:
                case Lazybranchcount: 
 
                case Onerep:
                case Notonerep: 
                case Oneloop:
                case Notoneloop:
                case Onelazy:
                case Notonelazy: 
                case Setlazy:
                case Setrep: 
                case Setloop: 

                    return 3; 

                default:

                    throw MakeException(SR.GetString(SR.UnexpectedOpcode, Opcode.ToString(CultureInfo.CurrentCulture))); 
            }
        } 
 
        internal static ArgumentException MakeException(String message) {
            return new ArgumentException(message); 
        }

        // Debug only code below
 
#if DBG
        internal static String[] CodeStr = new String[] 
        { 
            "Onerep", "Notonerep", "Setrep",
            "Oneloop", "Notoneloop", "Setloop", 
            "Onelazy", "Notonelazy", "Setlazy",
            "One", "Notone", "Set",
            "Multi", "Ref",
            "Bol", "Eol", "Boundary", "Nonboundary", "Beginning", "Start", "EndZ", "End", 
            "Nothing",
            "Lazybranch", "Branchmark", "Lazybranchmark", 
            "Nullcount", "Setcount", "Branchcount", "Lazybranchcount", 
            "Nullmark", "Setmark", "Capturemark", "Getmark",
            "Setjump", "Backjump", "Forejump", "Testref", "Goto", 
            "Prune", "Stop",
#if ECMA
            "ECMABoundary", "NonECMABoundary",
#endif 
        };
 
        internal static String OperatorDescription(int Opcode) { 
            bool isCi   = ((Opcode & Ci) != 0);
            bool isRtl  = ((Opcode & Rtl) != 0); 
            bool isBack = ((Opcode & Back) != 0);
            bool isBack2 = ((Opcode & Back2) != 0);

            return CodeStr[Opcode & Mask] + 
            (isCi ? "-Ci" : "") + (isRtl ? "-Rtl" : "") + (isBack ? "-Back" : "") + (isBack2 ? "-Back2" : "");
        } 
 
        internal String OpcodeDescription(int offset) {
            StringBuilder sb = new StringBuilder(); 
            int opcode = _codes[offset];

            sb.AppendFormat("{0:D6} ", offset);
            sb.Append(OpcodeBacktracks(opcode & Mask) ? '*' : ' '); 
            sb.Append(OperatorDescription(opcode));
            sb.Append('('); 
 
            opcode &= Mask;
 
            switch (opcode) {
                case One:
                case Notone:
                case Onerep: 
                case Notonerep:
                case Oneloop: 
                case Notoneloop: 
                case Onelazy:
                case Notonelazy: 
                    sb.Append("Ch = ");
                    sb.Append(RegexCharClass.CharDescription((char)_codes[offset+1]));
                    break;
 
                case Set:
                case Setrep: 
                case Setloop: 
                case Setlazy:
                    sb.Append("Set = "); 
                    sb.Append(RegexCharClass.SetDescription(_strings[_codes[offset+1]]));
                    break;

                case Multi: 
                    sb.Append("String = ");
                    sb.Append(_strings[_codes[offset+1]]); 
                    break; 

                case Ref: 
                case Testref:
                    sb.Append("Index = ");
                    sb.Append(_codes[offset+1]);
                    break; 

                case Capturemark: 
                    sb.Append("Index = "); 
                    sb.Append(_codes[offset+1]);
                    if (_codes[offset+2] != -1) { 
                        sb.Append(", Unindex = ");
                        sb.Append(_codes[offset+2]);
                    }
                    break; 

                case Nullcount: 
                case Setcount: 
                    sb.Append("Value = ");
                    sb.Append(_codes[offset+1]); 
                    break;

                case Goto:
                case Lazybranch: 
                case Branchmark:
                case Lazybranchmark: 
                case Branchcount: 
                case Lazybranchcount:
                    sb.Append("Addr = "); 
                    sb.Append(_codes[offset+1]);
                    break;
            }
 
            switch (opcode) {
                case Onerep: 
                case Notonerep: 
                case Oneloop:
                case Notoneloop: 
                case Onelazy:
                case Notonelazy:
                case Setrep:
                case Setloop: 
                case Setlazy:
                    sb.Append(", Rep = "); 
                    if (_codes[offset + 2] == Int32.MaxValue) 
                        sb.Append("inf");
                    else 
                        sb.Append(_codes[offset + 2]);
                    break;

                case Branchcount: 
                case Lazybranchcount:
                    sb.Append(", Limit = "); 
                    if (_codes[offset + 2] == Int32.MaxValue) 
                        sb.Append("inf");
                    else 
                        sb.Append(_codes[offset + 2]);
                    break;
            }
 
            sb.Append(")");
 
            return sb.ToString(); 
        }
 
        internal void Dump() {
            int i;

            Debug.WriteLine("Direction:  " + (_rightToLeft ? "right-to-left" : "left-to-right")); 
            Debug.WriteLine("Firstchars: " + (_fcPrefix == null ? "n/a" : RegexCharClass.SetDescription(_fcPrefix.Prefix)));
            Debug.WriteLine("Prefix:     " + (_bmPrefix == null ? "n/a" : Regex.Escape(_bmPrefix.ToString()))); 
            Debug.WriteLine("Anchors:    " + RegexFCD.AnchorDescription(_anchors)); 
            Debug.WriteLine("");
            if (_bmPrefix != null) { 
                Debug.WriteLine("BoyerMoore:");
                Debug.WriteLine(_bmPrefix.Dump("    "));
            }
            for (i = 0; i < _codes.Length;) { 
                Debug.WriteLine(OpcodeDescription(i));
                i += OpcodeSize(_codes[i]); 
            } 

            Debug.WriteLine(""); 
        }
#endif

    } 
}

// File provided for Reference Use Only by Microsoft Corporation (c) 2007.
                        

Link Menu

Network programming in C#, Network Programming in VB.NET, Network Programming in .NET
This book is available now!
Buy at Amazon US or
Buy at Amazon UK