filelist.py 12.8 KB
Newer Older
1 2 3 4 5 6 7 8 9
"""distutils.filelist

Provides the FileList class, used for poking about the filesystem
and building lists of files.
"""

# created 2000/07/17, Rene Liebscher (as template.py)
# most parts taken from commands/sdist.py
# renamed 2000/07/29 (to filelist.py) and officially added to
Fred Drake's avatar
Fred Drake committed
10
#  the Distutils source, Greg Ward
11 12 13 14 15 16 17 18

__revision__ = "$Id$"

import sys, os, string, re
import fnmatch
from types import *
from glob import glob
from distutils.util import convert_path
19
from distutils.errors import DistutilsTemplateError, DistutilsInternalError
20 21 22

class FileList:

23 24 25 26 27 28 29 30 31 32 33 34 35
    """A list of files built by on exploring the filesystem and filtered by
    applying various patterns to what we find there.

    Instance attributes:
      dir
        directory from which files will be taken -- only used if
        'allfiles' not supplied to constructor
      files
        list of filenames currently being built/filtered/manipulated
      allfiles
        complete list of files under consideration (ie. without any
        filtering applied)
    """
36

Fred Drake's avatar
Fred Drake committed
37 38
    def __init__(self,
                 warn=None,
39
                 debug_print=None):
40 41 42
        # use standard warning and debug functions if no other given
        self.warn = warn or self.__warn
        self.debug_print = debug_print or self.__debug_print
43

44 45
        self.allfiles = None
        self.files = []
46 47


48 49 50 51 52 53 54 55
    def set_allfiles (self, allfiles):
        self.allfiles = allfiles

    def findall (self, dir=os.curdir):
        self.allfiles = findall(dir)


    # -- Fallback warning/debug functions ------------------------------
Fred Drake's avatar
Fred Drake committed
56

57
    def __warn (self, msg):
58
        sys.stderr.write("warning: %s\n" % msg)
Fred Drake's avatar
Fred Drake committed
59

60 61 62 63 64 65 66 67
    def __debug_print (self, msg):
        """Print 'msg' to stdout if the global DEBUG (taken from the
        DISTUTILS_DEBUG environment variable) flag is true.
        """
        from distutils.core import DEBUG
        if DEBUG:
            print msg

68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89

    # -- List-like methods ---------------------------------------------

    def append (self, item):
        self.files.append(item)

    def extend (self, items):
        self.files.extend(items)

    def sort (self):
        # Not a strict lexical sort!
        sortable_files = map(os.path.split, self.files)
        sortable_files.sort()
        self.files = []
        for sort_tuple in sortable_files:
            self.files.append(apply(os.path.join, sort_tuple))


    # -- Other miscellaneous utility methods ---------------------------

    def remove_duplicates (self):
        # Assumes list has been sorted!
90
        for i in range(len(self.files)-1, 0, -1):
91 92 93 94 95
            if self.files[i] == self.files[i-1]:
                del self.files[i]


    # -- "File template" methods ---------------------------------------
Fred Drake's avatar
Fred Drake committed
96

97
    def _parse_template_line (self, line):
98
        words = string.split(line)
99 100
        action = words[0]

101 102 103 104
        patterns = dir = dir_pattern = None

        if action in ('include', 'exclude',
                      'global-include', 'global-exclude'):
105
            if len(words) < 2:
106 107
                raise DistutilsTemplateError, \
                      "'%s' expects <pattern1> <pattern2> ..." % action
108

109
            patterns = map(convert_path, words[1:])
110

111
        elif action in ('recursive-include', 'recursive-exclude'):
112
            if len(words) < 3:
113 114
                raise DistutilsTemplateError, \
                      "'%s' expects <dir> <pattern1> <pattern2> ..." % action
115

116
            dir = convert_path(words[1])
117
            patterns = map(convert_path, words[2:])
118

119
        elif action in ('graft', 'prune'):
120
            if len(words) != 2:
121 122
                raise DistutilsTemplateError, \
                     "'%s' expects a single <dir_pattern>" % action
123

124
            dir_pattern = convert_path(words[1])
125 126

        else:
127 128
            raise DistutilsTemplateError, "unknown action '%s'" % action

Greg Ward's avatar
Greg Ward committed
129
        return (action, patterns, dir, dir_pattern)
130 131 132

    # _parse_template_line ()

Fred Drake's avatar
Fred Drake committed
133 134

    def process_template_line (self, line):
135 136

        # Parse the line: split it up, make sure the right number of words
137
        # is there, and return the relevant words.  'action' is always
138 139 140 141
        # defined: it's the first word of the line.  Which of the other
        # three are defined depends on the action; it'll be either
        # patterns, (dir and patterns), or (dir_pattern).
        (action, patterns, dir, dir_pattern) = self._parse_template_line(line)
142 143 144

        # OK, now we know that the action is valid and we have the
        # right number of words on the line for that action -- so we
145
        # can proceed with minimal error-checking.
146
        if action == 'include':
147 148
            self.debug_print("include " + string.join(patterns))
            for pattern in patterns:
149
                if not self.include_pattern(pattern, anchor=1):
150
                    self.warn("no files found matching '%s'" % pattern)
151 152

        elif action == 'exclude':
153 154
            self.debug_print("exclude " + string.join(patterns))
            for pattern in patterns:
155
                if not self.exclude_pattern(pattern, anchor=1):
156
                    self.warn(
157 158 159 160
                        "no previously-included files found matching '%s'"%
                        pattern)

        elif action == 'global-include':
161 162
            self.debug_print("global-include " + string.join(patterns))
            for pattern in patterns:
163 164 165 166
                if not self.include_pattern(pattern, anchor=0):
                    self.warn(("no files found matching '%s' " +
                               "anywhere in distribution") %
                              pattern)
167 168

        elif action == 'global-exclude':
169 170
            self.debug_print("global-exclude " + string.join(patterns))
            for pattern in patterns:
171
                if not self.exclude_pattern(pattern, anchor=0):
172 173 174
                    self.warn(("no previously-included files matching '%s' " +
                               "found anywhere in distribution") %
                              pattern)
175 176 177

        elif action == 'recursive-include':
            self.debug_print("recursive-include %s %s" %
178 179
                             (dir, string.join(patterns)))
            for pattern in patterns:
180 181
                if not self.include_pattern(pattern, prefix=dir):
                    self.warn(("no files found matching '%s' " +
182 183
                                "under directory '%s'") %
                               (pattern, dir))
184 185 186

        elif action == 'recursive-exclude':
            self.debug_print("recursive-exclude %s %s" %
187 188
                             (dir, string.join(patterns)))
            for pattern in patterns:
189
                if not self.exclude_pattern(pattern, prefix=dir):
190 191 192
                    self.warn(("no previously-included files matching '%s' " +
                               "found under directory '%s'") %
                              (pattern, dir))
Fred Drake's avatar
Fred Drake committed
193

194 195
        elif action == 'graft':
            self.debug_print("graft " + dir_pattern)
196
            if not self.include_pattern(None, prefix=dir_pattern):
197
                self.warn("no directories found matching '%s'" % dir_pattern)
198 199 200 201

        elif action == 'prune':
            self.debug_print("prune " + dir_pattern)
            if not self.exclude_pattern(None, prefix=dir_pattern):
202 203 204
                self.warn(("no previously-included directories found " +
                           "matching '%s'") %
                          dir_pattern)
205
        else:
206
            raise DistutilsInternalError, \
207
                  "this cannot happen: invalid action '%s'" % action
208

209
    # process_template_line ()
210 211


212 213
    # -- Filtering/selection methods -----------------------------------

214
    def include_pattern (self, pattern,
215
                         anchor=1, prefix=None, is_regex=0):
216 217 218 219 220 221
        """Select strings (presumably filenames) from 'self.files' that
        match 'pattern', a Unix-style wildcard (glob) pattern.  Patterns
        are not quite the same as implemented by the 'fnmatch' module: '*'
        and '?'  match non-special characters, where "special" is platform-
        dependent: slash on Unix; colon, slash, and backslash on
        DOS/Windows; and colon on Mac OS.
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240

        If 'anchor' is true (the default), then the pattern match is more
        stringent: "*.py" will match "foo.py" but not "foo/bar.py".  If
        'anchor' is false, both of these will match.

        If 'prefix' is supplied, then only filenames starting with 'prefix'
        (itself a pattern) and ending with 'pattern', with anything in between
        them, will match.  'anchor' is ignored in this case.

        If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
        'pattern' is assumed to be either a string containing a regex or a
        regex object -- no translation is done, the regex is just compiled
        and used as-is.

        Selected strings will be added to self.files.

        Return 1 if files are found.
        """
        files_found = 0
241
        pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
242
        self.debug_print("include_pattern: applying regex r'%s'" %
243 244 245
                         pattern_re.pattern)

        # delayed loading of allfiles list
246 247
        if self.allfiles is None:
            self.findall()
248 249

        for name in self.allfiles:
250
            if pattern_re.search(name):
251
                self.debug_print(" adding " + name)
252
                self.files.append(name)
253
                files_found = 1
Fred Drake's avatar
Fred Drake committed
254

255 256
        return files_found

257
    # include_pattern ()
258 259 260 261 262 263


    def exclude_pattern (self, pattern,
                         anchor=1, prefix=None, is_regex=0):
        """Remove strings (presumably filenames) from 'files' that match
        'pattern'.  Other parameters are the same as for
Fred Drake's avatar
Fred Drake committed
264
        'include_pattern()', above.
265 266 267 268
        The list 'self.files' is modified in place.
        Return 1 if files are found.
        """
        files_found = 0
269
        pattern_re = translate_pattern(pattern, anchor, prefix, is_regex)
270 271
        self.debug_print("exclude_pattern: applying regex r'%s'" %
                         pattern_re.pattern)
272 273
        for i in range(len(self.files)-1, -1, -1):
            if pattern_re.search(self.files[i]):
274 275 276
                self.debug_print(" removing " + self.files[i])
                del self.files[i]
                files_found = 1
Fred Drake's avatar
Fred Drake committed
277

278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
        return files_found

    # exclude_pattern ()

# class FileList


# ----------------------------------------------------------------------
# Utility functions

def findall (dir = os.curdir):
    """Find all files under 'dir' and return the list of full filenames
    (relative to 'dir').
    """
    from stat import ST_MODE, S_ISREG, S_ISDIR, S_ISLNK

    list = []
    stack = [dir]
    pop = stack.pop
    push = stack.append

    while stack:
        dir = pop()
301
        names = os.listdir(dir)
302 303 304

        for name in names:
            if dir != os.curdir:        # avoid the dreaded "./" syndrome
305
                fullname = os.path.join(dir, name)
306 307 308 309 310 311 312
            else:
                fullname = name

            # Avoid excess stat calls -- just one will do, thank you!
            stat = os.stat(fullname)
            mode = stat[ST_MODE]
            if S_ISREG(mode):
313
                list.append(fullname)
314
            elif S_ISDIR(mode) and not S_ISLNK(mode):
315
                push(fullname)
316 317 318 319 320 321 322 323 324 325

    return list


def glob_to_re (pattern):
    """Translate a shell-like glob pattern to a regular expression; return
    a string containing the regex.  Differs from 'fnmatch.translate()' in
    that '*' does not match "special characters" (which are
    platform-specific).
    """
326
    pattern_re = fnmatch.translate(pattern)
327 328 329 330 331 332 333 334

    # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
    # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
    # and by extension they shouldn't match such "special characters" under
    # any OS.  So change all non-escaped dots in the RE to match any
    # character except the special characters.
    # XXX currently the "special characters" are just slash -- i.e. this is
    # Unix-only.
335
    pattern_re = re.sub(r'(^|[^\\])\.', r'\1[^/]', pattern_re)
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
    return pattern_re

# glob_to_re ()


def translate_pattern (pattern, anchor=1, prefix=None, is_regex=0):
    """Translate a shell-like wildcard pattern to a compiled regular
    expression.  Return the compiled regex.  If 'is_regex' true,
    then 'pattern' is directly compiled to a regex (if it's a string)
    or just returned as-is (assumes it's a regex object).
    """
    if is_regex:
        if type(pattern) is StringType:
            return re.compile(pattern)
        else:
            return pattern

    if pattern:
354
        pattern_re = glob_to_re(pattern)
355 356
    else:
        pattern_re = ''
Fred Drake's avatar
Fred Drake committed
357

358
    if prefix is not None:
359 360
        prefix_re = (glob_to_re(prefix))[0:-1] # ditch trailing $
        pattern_re = "^" + os.path.join(prefix_re, ".*" + pattern_re)
361 362 363
    else:                               # no prefix -- respect anchor flag
        if anchor:
            pattern_re = "^" + pattern_re
Fred Drake's avatar
Fred Drake committed
364

365
    return re.compile(pattern_re)
366 367

# translate_pattern ()