class pRegex;

/*
 * C++ Wrapper for the "Perl Compatible Regular Expressions" library
 *  The PCRE lib can be found at: 
 *   ftp://ftp.cus.cam.ac.uk/pub/software/programs/pcre/
 *
 * I've tried to keep the interface as similar to Perl's as possible.
 * there are two main functions, match and replace.  Match m() returns
 * a bool indicating whether the pattern matched the string.  The mn()
 * function returns an int indicating how many parentheticals in the
 * pattern matched, plus one for the entire pattern.
 * The second major operation is Substitution, using the s() operator
 * as in Perl.  s() returns the number of patterns replaced.
 *
 * Match example:
 *    string s = "man on the roof";
 *    if (pRegex("onthe").m(s))
 *        cout << "matched";
 *    // prints "matched"
 *
 * Substitution example:
 *    string t = "see with a telescope";
 *    if (pRegex("a (telescope|sled)").s(t, "the $1"))
 *        cout << "new str:\n";
 *    cout << t << "\n";
 *
 *    should print "see with the telescope"
 *
 * I think there may be some bugs in the s() function when matching
 * empty strings when using the global "g" modifier.  bugs, as in it
 * doesn't behave like Perl.  It is my intention to fix this, when I
 * have time.
 *
 * this wrapper coded by Michael Tesch with ideas from Max Okumoto.
 * Released under the GPL.  A copy of the GPL can be found almost anywhere,
 * but the most official is at www.gnu.org
 *
 * Copyright (c) 2000 Michael Tesch
 * (send bug fixes / improvements to tesch@cs.wisc.edu)
 * version 0.01
 */

#ifndef PREGEX_H
#define PREGEX_H

#include <string>
#include "pcre.h"

namespace PREGEX {
    // internal stuff, merge in _pcre_opts variable to keep track of modifiers
    const unsigned _PREGEX_GLOBAL = 0x80000000;
    const unsigned _PREGEX_OPTIMIZE = 0x40000000;
    const unsigned _PREGEX_DEBUG_MSGS = 0x20000000;

    // to allow us to use _pcre_opts for our own purposes too
    const unsigned int _opt_mask = 
	PCRE_CASELESS | PCRE_MULTILINE | PCRE_DOTALL | PCRE_EXTENDED | 
	PCRE_ANCHORED | PCRE_DOLLAR_ENDONLY | PCRE_EXTRA | PCRE_NOTBOL |
	PCRE_NOTEOL | PCRE_UNGREEDY | PCRE_NOTEMPTY | PCRE_UTF8;
};

class pRegex {
  public:
    ~pRegex();
    pRegex() 
	: _pcre(NULL), _pcre_extra(NULL), _pcre_opts(0), _m_offset(0) {};
    pRegex(const char *pat)
	: _pattern(pat), _pcre(NULL), _pcre_extra(NULL), 
	   _pcre_opts(0), _m_offset(0) {};
    pRegex(pRegex &p) 
	:  _pattern(p._pattern), _pcre(NULL), _pcre_extra(NULL),
	   _pcre_opts(p._pcre_opts), _m_offset(0), _last_m_str("") {};

    pRegex & operator=(const pRegex &p) {
	if (this != &p) {
	    _pattern = p._pattern;
	    _pcre = NULL;
	    _pcre_extra = NULL;
	    _pcre_opts = p._pcre_opts;
	    _m_offset = 0;
	    _last_m_str = "";
	}
	return *this;
    };

    /* print */
    friend std::ostream& operator<< (std::ostream&, const pRegex&);

    /* numerical match */
    int mn(const string &s, const char *mods = "");
    /* boolean match */
    bool m(const string &s, const char *mods = "") {
	return (0 < mn(s, mods));
    }
    /* substitution */
    int s(string &s, const char *replace, const char *mods = "");

  private:
    // priv fxns
    void compile();
    void study();	// for extra optimization
    unsigned parse_mods(const char *mods);
    int do_subs(const string s, string & final, const char *replace_pat, 
		int *ovec, int nmatch);

    // data members
    string _pattern;
    pcre * _pcre;
    pcre_extra * _pcre_extra;

    unsigned _pcre_opts;

    size_t _m_offset;
    string _last_m_str;
};

#endif
