summaryrefslogtreecommitdiffstats
path: root/Lib/csv.py
blob: 5ae5a730468ff6bc8a4bf4047973c56277f1900c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438

"""
csv.py - read/write/investigate CSV files
"""

import re
from _csv import Error, __version__, writer, reader, register_dialect, \
                 unregister_dialect, get_dialect, list_dialects, \
                 field_size_limit, \
                 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \
                 __doc__
from _csv import Dialect as _Dialect

from io import StringIO

__all__ = [ "QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE",
            "Error", "Dialect", "__doc__", "excel", "excel_tab",
            "field_size_limit", "reader", "writer",
            "register_dialect", "get_dialect", "list_dialects", "Sniffer",
            "unregister_dialect", "__version__", "DictReader", "DictWriter" ]

class Dialect:
    """Describe an Excel dialect.

    This must be subclassed (see csv.excel).  Valid attributes are:
    delimiter, quotechar, escapechar, doublequote, skipinitialspace,
    lineterminator, quoting.

    """
    _name = ""
    _valid = False
    # placeholders
    delimiter = None
    quotechar = None
    escapechar = None
    doublequote = None
    skipinitialspace = None
    lineterminator = None
    quoting = None

    def __init__(self):
        if self.__class__ != Dialect:
            self._valid = True
        self._validate()

    def _validate(self):
        try:
            _Dialect(self)
        except TypeError as e:
            # We do this for compatibility with py2.3
            raise Error(str(e))

class excel(Dialect):
    """Describe the usual properties of Excel-generated CSV files."""
    delimiter = ','
    quotechar = '"'
    doublequote = True
    skipinitialspace = False
    lineterminator = '\r\n'
    quoting = QUOTE_MINIMAL
register_dialect("excel", excel)

class excel_tab(excel):
    """Describe the usual properties of Excel-generated TAB-delimited files."""
    delimiter = '\t'
register_dialect("excel-tab", excel_tab)


class DictReader:
    def __init__(self, f, fieldnames=None, restkey=None, restval=None,
                 dialect="excel", *args, **kwds):
        self._fieldnames = fieldnames   # list of keys for the dict
        self.restkey = restkey          # key to catch long rows
        self.restval = restval          # default value for short rows
        self.reader = reader(f, dialect, *args, **kwds)
        self.dialect = dialect
        self.line_num = 0

    def __iter__(self):
        return self

    @property
    def fieldnames(self):
        if self._fieldnames is None:
            try:
                self._fieldnames = next(self.reader)
            except StopIteration:
                pass
        self.line_num = self.reader.line_num
        return self._fieldnames

    @fieldnames.setter
    def fieldnames(self, value):
        self._fieldnames = value

    def __next__(self):
        if self.line_num == 0:
            # Used only for its side effect.
            self.fieldnames
        row = next(self.reader)
        self.line_num = self.reader.line_num

        # unlike the basic reader, we prefer not to return blanks,
        # because we will typically wind up with a dict full of None
        # values
        while row == []:
            row = next(self.reader)
        d = dict(zip(self.fieldnames, row))
        lf = len(self.fieldnames)
        lr = len(row)
        if lf < lr:
            d[self.restkey] = row[lf:]
        elif lf > lr:
            for key in self.fieldnames[lr:]:
                d[key] = self.restval
        return d


class DictWriter:
    def __init__(self, f, fieldnames, restval="", extrasaction="raise",
                 dialect="excel", *args, **kwds):
        self.fieldnames = fieldnames    # list of keys for the dict
        self.restval = restval          # for writing short dicts
        if extrasaction.lower() not in ("raise", "ignore"):
            raise ValueError("extrasaction (%s) must be 'raise' or 'ignore'"
                             % extrasaction)
        self.extrasaction = extrasaction
        self.writer = writer(f, dialect, *args, **kwds)

    def writeheader(self):
        header = dict(zip(self.fieldnames, self.fieldnames))
        self.writerow(header)

    def _dict_to_list(self, rowdict):
        if self.extrasaction == "raise":
            wrong_fields = [k for k in rowdict if k not in self.fieldnames]
            if wrong_fields:
                raise ValueError("dict contains fields not in fieldnames: "
                                 + ", ".join(wrong_fields))
        return [rowdict.get(key, self.restval) for key in self.fieldnames]

    def writerow(self, rowdict):
        return self.writer.writerow(self._dict_to_list(rowdict))

    def writerows(self, rowdicts):
        rows = []
        for rowdict in rowdicts:
            rows.append(self._dict_to_list(rowdict))
        return self.writer.writerows(rows)

# Guard Sniffer's type checking against builds that exclude complex()
try:
    complex
except NameError:
    complex = float

class Sniffer:
    '''
    "Sniffs" the format of a CSV file (i.e. delimiter, quotechar)
    Returns a Dialect object.
    '''
    def __init__(self):
        # in case there is more than one possible delimiter
        self.preferred = [',', '\t', ';', ' ', ':']


    def sniff(self, sample, delimiters=None):
        """
        Returns a dialect (or None) corresponding to the sample
        """

        quotechar, doublequote, delimiter, skipinitialspace = \
                   self._guess_quote_and_delimiter(sample, delimiters)
        if not delimiter:
            delimiter, skipinitialspace = self._guess_delimiter(sample,
                                                                delimiters)

        if not delimiter:
            raise Error("Could not determine delimiter")

        class dialect(Dialect):
            _name = "sniffed"
            lineterminator = '\r\n'
            quoting = QUOTE_MINIMAL
            # escapechar = ''

        dialect.doublequote = doublequote
        dialect.delimiter = delimiter
        # _csv.reader won't accept a quotechar of ''
        dialect.quotechar = quotechar or '"'
        dialect.skipinitialspace = skipinitialspace

        return dialect


    def _guess_quote_and_delimiter(self, data, delimiters):
        """
        Looks for text enclosed between two identical quotes
        (the probable quotechar) which are preceded and followed
        by the same character (the probable delimiter).
        For example:
                         ,'some text',
        The quote with the most wins, same with the delimiter.
        If there is no quotechar the delimiter can't be determined
        this way.
        """

        matches = []
        for restr in ('(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
                      '(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)',   #  ".*?",
                      '(?P<delim>>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)',  # ,".*?"
                      '(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'):                            #  ".*?" (no delim, no space)
            regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
            matches = regexp.findall(data)
            if matches:
                break

        if not matches:
            # (quotechar, doublequote, delimiter, skipinitialspace)
            return ('', False, None, 0)
        quotes = {}
        delims = {}
        spaces = 0
        for m in matches:
            n = regexp.groupindex['quote'] - 1
            key = m[n]
            if key:
                quotes[key] = quotes.get(key, 0) + 1
            try:
                n = regexp.groupindex['delim'] - 1
                key = m[n]
            except KeyError:
                continue
            if key and (delimiters is None or key in delimiters):
                delims[key] = delims.get(key, 0) + 1
            try:
                n = regexp.groupindex['space'] - 1
            except KeyError:
                continue
            if m[n]:
                spaces += 1

        quotechar = max(quotes, key=quotes.get)

        if delims:
            delim = max(delims, key=delims.get)
            skipinitialspace = delims[delim] == spaces
            if delim == '\n': # most likely a file with a single column
                delim = ''
        else:
            # there is *no* delimiter, it's a single column of quoted data
            delim = ''
            skipinitialspace = 0

        # if we see an extra quote between delimiters, we've got a
        # double quoted format
        dq_regexp = re.compile(r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
                               {'delim':delim, 'quote':quotechar}, re.MULTILINE)



        if dq_regexp.search(data):
            doublequote = True
        else:
            doublequote = False

        return (quotechar, doublequote, delim, skipinitialspace)


    def _guess_delimiter(self, data, delimiters):
        """
        The delimiter /should/ occur the same number of times on
        each row. However, due to malformed data, it may not. We don't want
        an all or nothing approach, so we allow for small variations in this
        number.
          1) build a table of the frequency of each character on every line.
          2) build a table of freqencies of this frequency (meta-frequency?),
             e.g.  'x occurred 5 times in 10 rows, 6 times in 1000 rows,
             7 times in 2 rows'
          3) use the mode of the meta-frequency to determine the /expected/
             frequency for that character
          4) find out how often the character actually meets that goal
          5) the character that best meets its goal is the delimiter
        For performance reasons, the data is evaluated in chunks, so it can
        try and evaluate the smallest portion of the data possible, evaluating
        additional chunks as necessary.
        """

        data = list(filter(None, data.split('\n')))

        ascii = [chr(c) for c in range(127)] # 7-bit ASCII

        # build frequency tables
        chunkLength = min(10, len(data))
        iteration = 0
        charFrequency = {}
        modes = {}
        delims = {}
        start, end = 0, min(chunkLength, len(data))
        while start < len(data):
            iteration += 1
            for line in data[start:end]:
                for char in ascii:
                    metaFrequency = charFrequency.get(char, {})
                    # must count even if frequency is 0
                    freq = line.count(char)
                    # value is the mode
                    metaFrequency[freq] = metaFrequency.get(freq, 0) + 1
                    charFrequency[char] = metaFrequency

            for char in charFrequency.keys():
                items = list(charFrequency[char].items())
                if len(items) == 1 and items[0][0] == 0:
                    continue
                # get the mode of the frequencies
                if len(items) > 1:
                    modes[char] = max(items, key=lambda x: x[1])
                    # adjust the mode - subtract the sum of all
                    # other frequencies
                    items.remove(modes[char])
                    modes[char] = (modes[char][0], modes[char][1]
                                   - sum(item[1] for item in items))
                else:
                    modes[char] = items[0]

            # build a list of possible delimiters
            modeList = modes.items()
            total = float(chunkLength * iteration)
            # (rows of consistent data) / (number of rows) = 100%
            consistency = 1.0
            # minimum consistency threshold
            threshold = 0.9
            while len(delims) == 0 and consistency >= threshold:
                for k, v in modeList:
                    if v[0] > 0 and v[1] > 0:
                        if ((v[1]/total) >= consistency and
                            (delimiters is None or k in delimiters)):
                            delims[k] = v
                consistency -= 0.01

            if len(delims) == 1:
                delim = list(delims.keys())[0]
                skipinitialspace = (data[0].count(delim) ==
                                    data[0].count("%c " % delim))
                return (delim, skipinitialspace)

            # analyze another chunkLength lines
            start = end
            end += chunkLength

        if not delims:
            return ('', 0)

        # if there's more than one, fall back to a 'preferred' list
        if len(delims) > 1:
            for d in self.preferred:
                if d in delims.keys():
                    skipinitialspace = (data[0].count(d) ==
                                        data[0].count("%c " % d))
                    return (d, skipinitialspace)

        # nothing else indicates a preference, pick the character that
        # dominates(?)
        items = [(v,k) for (k,v) in delims.items()]
        items.sort()
        delim = items[-1][1]

        skipinitialspace = (data[0].count(delim) ==
                            data[0].count("%c " % delim))
        return (delim, skipinitialspace)


    def has_header(self, sample):
        # Creates a dictionary of types of data in each column. If any
        # column is of a single type (say, integers), *except* for the first
        # row, then the first row is presumed to be labels. If the type
        # can't be determined, it is assumed to be a string in which case
        # the length of the string is the determining factor: if all of the
        # rows except for the first are the same length, it's a header.
        # Finally, a 'vote' is taken at the end for each column, adding or
        # subtracting from the likelihood of the first row being a header.

        rdr = reader(StringIO(sample), self.sniff(sample))

        header = next(rdr) # assume first row is header

        columns = len(header)
        columnTypes = {}
        for i in range(columns): columnTypes[i] = None

        checked = 0
        for row in rdr:
            # arbitrary number of rows to check, to keep it sane
            if checked > 20:
                break
            checked += 1

            if len(row) != columns:
                continue # skip rows that have irregular number of columns

            for col in list(columnTypes.keys()):

                for thisType in [int, float, complex]:
                    try:
                        thisType(row[col])
                        break
                    except (ValueError, OverflowError):
                        pass
                else:
                    # fallback to length of string
                    thisType = len(row[col])

                if thisType != columnTypes[col]:
                    if columnTypes[col] is None: # add new column type
                        columnTypes[col] = thisType
                    else:
                        # type is inconsistent, remove column from
                        # consideration
                        del columnTypes[col]

        # finally, compare results against first row and "vote"
        # on whether it's a header
        hasHeader = 0
        for col, colType in columnTypes.items():
            if type(colType) == type(0): # it's a length
                if len(header[col]) != colType:
                    hasHeader += 1
                else:
                    hasHeader -= 1
            else: # attempt typecast
                try:
                    colType(header[col])
                except (ValueError, TypeError):
                    hasHeader += 1
                else:
                    hasHeader -= 1

        return hasHeader > 0
s") return _Semaphore.release(self) def Event(*args, **kwargs): return _Event(*args, **kwargs) class _Event(_Verbose): # After Tim Peters' event class (without is_posted()) def __init__(self, verbose=None): _Verbose.__init__(self, verbose) self._cond = Condition(Lock()) self._flag = False def is_set(self): return self._flag isSet = is_set def set(self): self._cond.acquire() try: self._flag = True self._cond.notify_all() finally: self._cond.release() def clear(self): self._cond.acquire() try: self._flag = False finally: self._cond.release() def wait(self, timeout=None): self._cond.acquire() try: if not self._flag: self._cond.wait(timeout) return self._flag finally: self._cond.release() # Helper to generate new thread names _counter = 0 def _newname(template="Thread-%d"): global _counter _counter = _counter + 1 return template % _counter # Active thread administration _active_limbo_lock = _allocate_lock() _active = {} # maps thread id to Thread object _limbo = {} # Main class for threads class Thread(_Verbose): __initialized = False # Need to store a reference to sys.exc_info for printing # out exceptions when a thread tries to use a global var. during interp. # shutdown and thus raises an exception about trying to perform some # operation on/with a NoneType __exc_info = _sys.exc_info # Keep sys.exc_clear too to clear the exception just before # allowing .join() to return. #XXX __exc_clear = _sys.exc_clear def __init__(self, group=None, target=None, name=None, args=(), kwargs=None, verbose=None): assert group is None, "group argument must be None for now" _Verbose.__init__(self, verbose) if kwargs is None: kwargs = {} self._target = target self._name = str(name or _newname()) self._args = args self._kwargs = kwargs self._daemonic = self._set_daemon() self._ident = None self._started = Event() self._stopped = False self._block = Condition(Lock()) self._initialized = True # sys.stderr is not stored in the class like # sys.exc_info since it can be changed between instances self._stderr = _sys.stderr def _set_daemon(self): # Overridden in _MainThread and _DummyThread return current_thread().daemon def __repr__(self): assert self._initialized, "Thread.__init__() was not called" status = "initial" if self._started.is_set(): status = "started" if self._stopped: status = "stopped" if self._daemonic: status += " daemon" if self._ident is not None: status += " %s" % self._ident return "<%s(%s, %s)>" % (self.__class__.__name__, self._name, status) def start(self): if not self._initialized: raise RuntimeError("thread.__init__() not called") if self._started.is_set(): raise RuntimeError("threads can only be started once") if __debug__: self._note("%s.start(): starting thread", self) with _active_limbo_lock: _limbo[self] = self try: _start_new_thread(self._bootstrap, ()) except Exception: with _active_limbo_lock: del _limbo[self] raise self._started.wait() def run(self): try: if self._target: self._target(*self._args, **self._kwargs) finally: # Avoid a refcycle if the thread is running a function with # an argument that has a member that points to the thread. del self._target, self._args, self._kwargs def _bootstrap(self): # Wrapper around the real bootstrap code that ignores # exceptions during interpreter cleanup. Those typically # happen when a daemon thread wakes up at an unfortunate # moment, finds the world around it destroyed, and raises some # random exception *** while trying to report the exception in # _bootstrap_inner() below ***. Those random exceptions # don't help anybody, and they confuse users, so we suppress # them. We suppress them only when it appears that the world # indeed has already been destroyed, so that exceptions in # _bootstrap_inner() during normal business hours are properly # reported. Also, we only suppress them for daemonic threads; # if a non-daemonic encounters this, something else is wrong. try: self._bootstrap_inner() except: if self._daemonic and _sys is None: return raise def _set_ident(self): self._ident = _get_ident() def _bootstrap_inner(self): try: self._set_ident() self._started.set() with _active_limbo_lock: _active[self._ident] = self del _limbo[self] if __debug__: self._note("%s._bootstrap(): thread started", self) if _trace_hook: self._note("%s._bootstrap(): registering trace hook", self) _sys.settrace(_trace_hook) if _profile_hook: self._note("%s._bootstrap(): registering profile hook", self) _sys.setprofile(_profile_hook) try: self.run() except SystemExit: if __debug__: self._note("%s._bootstrap(): raised SystemExit", self) except: if __debug__: self._note("%s._bootstrap(): unhandled exception", self) # If sys.stderr is no more (most likely from interpreter # shutdown) use self._stderr. Otherwise still use sys (as in # _sys) in case sys.stderr was redefined since the creation of # self. if _sys: _sys.stderr.write("Exception in thread %s:\n%s\n" % (self.name, _format_exc())) else: # Do the best job possible w/o a huge amt. of code to # approximate a traceback (code ideas from # Lib/traceback.py) exc_type, exc_value, exc_tb = self._exc_info() try: print(( "Exception in thread " + self.name + " (most likely raised during interpreter shutdown):"), file=self._stderr) print(( "Traceback (most recent call last):"), file=self._stderr) while exc_tb: print(( ' File "%s", line %s, in %s' % (exc_tb.tb_frame.f_code.co_filename, exc_tb.tb_lineno, exc_tb.tb_frame.f_code.co_name)), file=self._stderr) exc_tb = exc_tb.tb_next print(("%s: %s" % (exc_type, exc_value)), file=self._stderr) # Make sure that exc_tb gets deleted since it is a memory # hog; deleting everything else is just for thoroughness finally: del exc_type, exc_value, exc_tb else: if __debug__: self._note("%s._bootstrap(): normal return", self) finally: # Prevent a race in # test_threading.test_no_refcycle_through_target when # the exception keeps the target alive past when we # assert that it's dead. #XXX self.__exc_clear() pass finally: with _active_limbo_lock: self._stop() try: # We don't call self._delete() because it also # grabs _active_limbo_lock. del _active[_get_ident()] except: pass def _stop(self): self._block.acquire() self._stopped = True self._block.notify_all() self._block.release() def _delete(self): "Remove current thread from the dict of currently running threads." # Notes about running with _dummy_thread: # # Must take care to not raise an exception if _dummy_thread is being # used (and thus this module is being used as an instance of # dummy_threading). _dummy_thread.get_ident() always returns -1 since # there is only one thread if _dummy_thread is being used. Thus # len(_active) is always <= 1 here, and any Thread instance created # overwrites the (if any) thread currently registered in _active. # # An instance of _MainThread is always created by 'threading'. This # gets overwritten the instant an instance of Thread is created; both # threads return -1 from _dummy_thread.get_ident() and thus have the # same key in the dict. So when the _MainThread instance created by # 'threading' tries to clean itself up when atexit calls this method # it gets a KeyError if another Thread instance was created. # # This all means that KeyError from trying to delete something from # _active if dummy_threading is being used is a red herring. But # since it isn't if dummy_threading is *not* being used then don't # hide the exception. try: with _active_limbo_lock: del _active[_get_ident()] # There must not be any python code between the previous line # and after the lock is released. Otherwise a tracing function # could try to acquire the lock again in the same thread, (in # current_thread()), and would block. except KeyError: if 'dummy_threading' not in _sys.modules: raise def join(self, timeout=None): if not self._initialized: raise RuntimeError("Thread.__init__() not called") if not self._started.is_set(): raise RuntimeError("cannot join thread before it is started") if self is current_thread(): raise RuntimeError("cannot join current thread") if __debug__: if not self._stopped: self._note("%s.join(): waiting until thread stops", self) self._block.acquire() try: if timeout is None: while not self._stopped: self._block.wait() if __debug__: self._note("%s.join(): thread stopped", self) else: deadline = _time() + timeout while not self._stopped: delay = deadline - _time() if delay <= 0: if __debug__: self._note("%s.join(): timed out", self) break self._block.wait(delay) else: if __debug__: self._note("%s.join(): thread stopped", self) finally: self._block.release() @property def name(self): assert self._initialized, "Thread.__init__() not called" return self._name @name.setter def name(self, name): assert self._initialized, "Thread.__init__() not called" self._name = str(name) @property def ident(self): assert self._initialized, "Thread.__init__() not called" return self._ident def is_alive(self): assert self._initialized, "Thread.__init__() not called" return self._started.is_set() and not self._stopped isAlive = is_alive @property def daemon(self): assert self._initialized, "Thread.__init__() not called" return self._daemonic @daemon.setter def daemon(self, daemonic): if not self._initialized: raise RuntimeError("Thread.__init__() not called") if self._started.is_set(): raise RuntimeError("cannot set daemon status of active thread"); self._daemonic = daemonic def isDaemon(self): return self.daemon def setDaemon(self, daemonic): self.daemon = daemonic def getName(self): return self.name def setName(self, name): self.name = name # The timer class was contributed by Itamar Shtull-Trauring def Timer(*args, **kwargs): return _Timer(*args, **kwargs) class _Timer(Thread): """Call a function after a specified number of seconds: t = Timer(30.0, f, args=[], kwargs={}) t.start() t.cancel() # stop the timer's action if it's still waiting """ def __init__(self, interval, function, args=[], kwargs={}): Thread.__init__(self) self.interval = interval self.function = function self.args = args self.kwargs = kwargs self.finished = Event() def cancel(self): """Stop the timer if it hasn't finished yet""" self.finished.set() def run(self): self.finished.wait(self.interval) if not self.finished.is_set(): self.function(*self.args, **self.kwargs) self.finished.set() # Special thread class to represent the main thread # This is garbage collected through an exit handler class _MainThread(Thread): def __init__(self): Thread.__init__(self, name="MainThread") self._started.set() self._set_ident() with _active_limbo_lock: _active[self._ident] = self def _set_daemon(self): return False def _exitfunc(self): self._stop() t = _pickSomeNonDaemonThread() if t: if __debug__: self._note("%s: waiting for other threads", self) while t: t.join() t = _pickSomeNonDaemonThread() if __debug__: self._note("%s: exiting", self) self._delete() def _pickSomeNonDaemonThread(): for t in enumerate(): if not t.daemon and t.is_alive(): return t return None # Dummy thread class to represent threads not started here. # These aren't garbage collected when they die, nor can they be waited for. # If they invoke anything in threading.py that calls current_thread(), they # leave an entry in the _active dict forever after. # Their purpose is to return *something* from current_thread(). # They are marked as daemon threads so we won't wait for them # when we exit (conform previous semantics). class _DummyThread(Thread): def __init__(self): Thread.__init__(self, name=_newname("Dummy-%d")) # Thread.__block consumes an OS-level locking primitive, which # can never be used by a _DummyThread. Since a _DummyThread # instance is immortal, that's bad, so release this resource. del self._block self._started.set() self._set_ident() with _active_limbo_lock: _active[self._ident] = self def _set_daemon(self): return True def join(self, timeout=None): assert False, "cannot join a dummy thread" # Global API functions def current_thread(): try: return _active[_get_ident()] except KeyError: ##print "current_thread(): no current thread for", _get_ident() return _DummyThread() currentThread = current_thread def active_count(): with _active_limbo_lock: return len(_active) + len(_limbo) activeCount = active_count def _enumerate(): # Same as enumerate(), but without the lock. Internal use only. return list(_active.values()) + list(_limbo.values()) def enumerate(): with _active_limbo_lock: return list(_active.values()) + list(_limbo.values()) from _thread import stack_size # Create the main thread object, # and make it available for the interpreter # (Py_Main) as threading._shutdown. _shutdown = _MainThread()._exitfunc # get thread-local implementation, either from the thread # module, or from the python fallback try: from _thread import _local as local except ImportError: from _threading_local import local def _after_fork(): # This function is called by Python/ceval.c:PyEval_ReInitThreads which # is called from PyOS_AfterFork. Here we cleanup threading module state # that should not exist after a fork. # Reset _active_limbo_lock, in case we forked while the lock was held # by another (non-forked) thread. http://bugs.python.org/issue874900 global _active_limbo_lock _active_limbo_lock = _allocate_lock() # fork() only copied the current thread; clear references to others. new_active = {} current = current_thread() with _active_limbo_lock: for thread in _active.values(): if thread is current: # There is only one active thread. We reset the ident to # its new value since it can have changed. ident = _get_ident() thread._ident = ident new_active[ident] = thread else: # All the others are already stopped. # We don't call _Thread__stop() because it tries to acquire # thread._Thread__block which could also have been held while # we forked. thread._stopped = True _limbo.clear() _active.clear() _active.update(new_active) assert len(_active) == 1