"""shellwords.py

Parse a string into words like a (POSIX) shell does.

License: Python Software Foundation License
         http://www.opensource.org/licenses/PythonSoftFoundation.html

This module parses a string into words according to the parings-rules
of a POSIX shell. These parsing rules are (quoted after 'man bash'):

1) Words are split at whitespace charakters; these are Space, Tab,
   Newline, Carriage-Return, Vertival-Tab (0B) and Form-Feet (0C).

   NB: Quotes do _not_ separate words! Thus
        "My"Fancy"Computer"
   will be parsed into a single word:
        MyFancyComputer

2) A non-quoted backslash (\) is the escape character. It preserves
   the literal value of the next character that follows.

3) Enclosing characters in single quotes preserves the literal value
   of each character within the quotes. A single quote may not occur
   between single quotes, even when preceded by a backslash.

   This means: baskslash (\) has no special meaning within single
   quotes. All charakters within single quotes are taken as-is.

4) Enclosing characters in double quotes preserves the literal value
   of all characters within the quotes, with the exception of \. The
   backslash retains its special meaning only when followed " or \. A
   double quote may be quoted within double quotes by preceding it
   with a backslash.

http://www.crazy-compilers.com/py-lib/#shellwords

""" # " emacs happy

__author__ = "Hartmut Goebel <h.goebel@crazy-compilers.com>"
__version__ = "0.2"
__copyright__ = "(C) Copyright 2002 by Hartmut Goebel"
__license__ = "Python Software Foundation License"
__url__ = 'http://www.crazy-compilers.com/py-lib/#shellwords'


from types import ListType, TupleType
import re

__all__ = ['shellwords', 'EOFError', 'UnmatchedQuoteError',
           'UnmatchedSingleQuoteError', 'UnmatchedDoubleQuoteError']

## Semantics:
## w/o   quotes: \ escapes everything
## w/i d-quotes: \ escapes only q-quotes and back-slash
## w/i s-quotes: no escaping is done at all

re_dquote  = re.compile(r'"(([^"\\]|\\.)*)"')
re_squote  = re.compile(r"'(.*?)'")
re_escaped = re.compile(r'\\(.)')
re_esc_quote = re.compile(r'\\([\\"])')
re_outside = re.compile(r"""([^\s\\'"]+)""") # " emacs happy


class EOFError(ValueError):
    def __init__(self, line):
        self.line = line
class UnmatchedQuoteError(EOFError): pass
class UnmatchedSingleQuoteError(UnmatchedQuoteError):
    def __str__(self):
        return "Unmatched single quote: %s" % self.line
class UnmatchedDoubleQuoteError(UnmatchedQuoteError):
    def __str__(self):
        return "Unmatched double quote: %s" % self.line


class Arg:
    """
    Simple helper class for a string-like type which
    distinguishs between 'empty' and 'undefined'.
    """
    def __init__(self):
        self.arg = None

    def __ne__(self, other): return self.arg != other
    #def __eq__(self, other): return self.arg == other	# unused
    #def __repr__(self):      return repr(self.arg)	# unused
    def __str__(self):       return str(self.arg)

    def append(self, text):
        if self.arg is None: self.arg = text
        else:                self.arg += text
        

def shellwords(line):
    arg_list = []
    i = 0; start = 0; arg = Arg()
    while i < len(line):
        c = line[i]
        if c == '"': # handle double quote:
            match = re_dquote.match(line, i)
            if not match:
                raise UnmatchedDoubleQuoteError(line)
            i = match.end()
            snippet = match.group(1)
            arg.append( re_esc_quote.sub(r'\1', snippet))

        elif c == "'": # handle single quote:
            match = re_squote.match(line, i)
            if not match:
                raise UnmatchedSingleQuoteError(line)
            i = match.end()
            arg.append(match.group(1))
            # there is _no_ escape-charakter within single quotes!

        elif c == "\\": # handle backslash = escape-charakter
            match = re_escaped.match(line, i)
            if not match:
                raise EOFError(line)
            i = match.end()
            arg.append(match.group(1))

        elif c.isspace(): # handle whitespace
            if arg != None:
                arg_list.append(str(arg))
            arg = Arg()
            while i < len(line) and line[i].isspace():
                i += 1
        else:
            match = re_outside.match(line, i)
            assert match
            i = match.end()
            arg.append(match.group())

    if arg != None: arg_list.append(str(arg))

    return arg_list
