From fa97d3f88da75e7bd9db9e68db54b6e3f474450e Mon Sep 17 00:00:00 2001 From: JonnyWong16 Date: Mon, 23 Mar 2020 18:45:35 -0700 Subject: [PATCH] Add future 0.18.2 --- lib/future/__init__.py | 93 + lib/future/backports/__init__.py | 26 + lib/future/backports/_markupbase.py | 422 +++ lib/future/backports/datetime.py | 2152 ++++++++++++ lib/future/backports/email/__init__.py | 78 + lib/future/backports/email/_encoded_words.py | 232 ++ .../backports/email/_header_value_parser.py | 2965 +++++++++++++++++ lib/future/backports/email/_parseaddr.py | 546 +++ lib/future/backports/email/_policybase.py | 365 ++ lib/future/backports/email/base64mime.py | 120 + lib/future/backports/email/charset.py | 409 +++ lib/future/backports/email/encoders.py | 90 + lib/future/backports/email/errors.py | 111 + lib/future/backports/email/feedparser.py | 525 +++ lib/future/backports/email/generator.py | 498 +++ lib/future/backports/email/header.py | 581 ++++ lib/future/backports/email/headerregistry.py | 592 ++++ lib/future/backports/email/iterators.py | 74 + lib/future/backports/email/message.py | 882 +++++ lib/future/backports/email/mime/__init__.py | 0 .../backports/email/mime/application.py | 39 + lib/future/backports/email/mime/audio.py | 74 + lib/future/backports/email/mime/base.py | 25 + lib/future/backports/email/mime/image.py | 48 + lib/future/backports/email/mime/message.py | 36 + lib/future/backports/email/mime/multipart.py | 49 + .../backports/email/mime/nonmultipart.py | 24 + lib/future/backports/email/mime/text.py | 44 + lib/future/backports/email/parser.py | 135 + lib/future/backports/email/policy.py | 193 ++ lib/future/backports/email/quoprimime.py | 326 ++ lib/future/backports/email/utils.py | 400 +++ lib/future/backports/html/__init__.py | 27 + lib/future/backports/html/entities.py | 2514 ++++++++++++++ lib/future/backports/html/parser.py | 536 +++ lib/future/backports/http/__init__.py | 0 lib/future/backports/http/client.py | 1346 ++++++++ lib/future/backports/http/cookiejar.py | 2110 ++++++++++++ lib/future/backports/http/cookies.py | 598 ++++ lib/future/backports/http/server.py | 1226 +++++++ lib/future/backports/misc.py | 944 ++++++ lib/future/backports/socket.py | 454 +++ lib/future/backports/socketserver.py | 747 +++++ lib/future/backports/test/__init__.py | 9 + lib/future/backports/test/badcert.pem | 36 + lib/future/backports/test/badkey.pem | 40 + lib/future/backports/test/dh512.pem | 9 + .../test/https_svn_python_org_root.pem | 41 + lib/future/backports/test/keycert.passwd.pem | 33 + lib/future/backports/test/keycert.pem | 31 + lib/future/backports/test/keycert2.pem | 31 + lib/future/backports/test/nokia.pem | 31 + lib/future/backports/test/nullbytecert.pem | 90 + lib/future/backports/test/nullcert.pem | 0 lib/future/backports/test/pystone.py | 272 ++ lib/future/backports/test/sha256.pem | 128 + lib/future/backports/test/ssl_cert.pem | 15 + lib/future/backports/test/ssl_key.passwd.pem | 18 + lib/future/backports/test/ssl_key.pem | 16 + lib/future/backports/test/ssl_servers.py | 207 ++ lib/future/backports/test/support.py | 2048 ++++++++++++ lib/future/backports/total_ordering.py | 38 + lib/future/backports/urllib/__init__.py | 0 lib/future/backports/urllib/error.py | 75 + lib/future/backports/urllib/parse.py | 991 ++++++ lib/future/backports/urllib/request.py | 2647 +++++++++++++++ lib/future/backports/urllib/response.py | 103 + lib/future/backports/urllib/robotparser.py | 211 ++ lib/future/backports/xmlrpc/__init__.py | 1 + lib/future/backports/xmlrpc/client.py | 1496 +++++++++ lib/future/backports/xmlrpc/server.py | 999 ++++++ lib/future/builtins/__init__.py | 51 + lib/future/builtins/disabled.py | 66 + lib/future/builtins/iterators.py | 52 + lib/future/builtins/misc.py | 135 + lib/future/builtins/new_min_max.py | 59 + lib/future/builtins/newnext.py | 70 + lib/future/builtins/newround.py | 102 + lib/future/builtins/newsuper.py | 114 + lib/future/moves/__init__.py | 8 + lib/future/moves/_dummy_thread.py | 8 + lib/future/moves/_markupbase.py | 8 + lib/future/moves/_thread.py | 8 + lib/future/moves/builtins.py | 10 + lib/future/moves/collections.py | 18 + lib/future/moves/configparser.py | 8 + lib/future/moves/copyreg.py | 12 + lib/future/moves/dbm/__init__.py | 20 + lib/future/moves/dbm/dumb.py | 9 + lib/future/moves/dbm/gnu.py | 9 + lib/future/moves/dbm/ndbm.py | 9 + lib/future/moves/html/__init__.py | 31 + lib/future/moves/html/entities.py | 8 + lib/future/moves/html/parser.py | 8 + lib/future/moves/http/__init__.py | 4 + lib/future/moves/http/client.py | 8 + lib/future/moves/http/cookiejar.py | 8 + lib/future/moves/http/cookies.py | 9 + lib/future/moves/http/server.py | 20 + lib/future/moves/itertools.py | 8 + lib/future/moves/pickle.py | 11 + lib/future/moves/queue.py | 8 + lib/future/moves/reprlib.py | 8 + lib/future/moves/socketserver.py | 8 + lib/future/moves/subprocess.py | 11 + lib/future/moves/sys.py | 8 + lib/future/moves/test/__init__.py | 5 + lib/future/moves/test/support.py | 10 + lib/future/moves/tkinter/__init__.py | 27 + lib/future/moves/tkinter/colorchooser.py | 12 + lib/future/moves/tkinter/commondialog.py | 12 + lib/future/moves/tkinter/constants.py | 12 + lib/future/moves/tkinter/dialog.py | 12 + lib/future/moves/tkinter/dnd.py | 12 + lib/future/moves/tkinter/filedialog.py | 12 + lib/future/moves/tkinter/font.py | 12 + lib/future/moves/tkinter/messagebox.py | 12 + lib/future/moves/tkinter/scrolledtext.py | 12 + lib/future/moves/tkinter/simpledialog.py | 12 + lib/future/moves/tkinter/tix.py | 12 + lib/future/moves/tkinter/ttk.py | 12 + lib/future/moves/urllib/__init__.py | 5 + lib/future/moves/urllib/error.py | 16 + lib/future/moves/urllib/parse.py | 28 + lib/future/moves/urllib/request.py | 94 + lib/future/moves/urllib/response.py | 12 + lib/future/moves/urllib/robotparser.py | 8 + lib/future/moves/winreg.py | 8 + lib/future/moves/xmlrpc/__init__.py | 0 lib/future/moves/xmlrpc/client.py | 7 + lib/future/moves/xmlrpc/server.py | 7 + lib/future/standard_library/__init__.py | 815 +++++ lib/future/tests/__init__.py | 0 lib/future/tests/base.py | 539 +++ lib/future/types/__init__.py | 257 ++ lib/future/types/newbytes.py | 460 +++ lib/future/types/newdict.py | 111 + lib/future/types/newint.py | 381 +++ lib/future/types/newlist.py | 95 + lib/future/types/newmemoryview.py | 29 + lib/future/types/newobject.py | 117 + lib/future/types/newopen.py | 32 + lib/future/types/newrange.py | 170 + lib/future/types/newstr.py | 426 +++ lib/future/utils/__init__.py | 767 +++++ lib/future/utils/surrogateescape.py | 198 ++ lib/libfuturize/__init__.py | 1 + lib/libfuturize/fixer_util.py | 520 +++ lib/libfuturize/fixes/__init__.py | 97 + lib/libfuturize/fixes/fix_UserDict.py | 102 + lib/libfuturize/fixes/fix_absolute_import.py | 91 + ...future__imports_except_unicode_literals.py | 26 + lib/libfuturize/fixes/fix_basestring.py | 17 + lib/libfuturize/fixes/fix_bytes.py | 24 + lib/libfuturize/fixes/fix_cmp.py | 33 + lib/libfuturize/fixes/fix_division.py | 12 + lib/libfuturize/fixes/fix_division_safe.py | 104 + lib/libfuturize/fixes/fix_execfile.py | 37 + lib/libfuturize/fixes/fix_future_builtins.py | 59 + .../fixes/fix_future_standard_library.py | 24 + .../fix_future_standard_library_urllib.py | 28 + lib/libfuturize/fixes/fix_input.py | 32 + lib/libfuturize/fixes/fix_metaclass.py | 262 ++ lib/libfuturize/fixes/fix_next_call.py | 104 + lib/libfuturize/fixes/fix_object.py | 17 + lib/libfuturize/fixes/fix_oldstr_wrap.py | 39 + .../fixes/fix_order___future__imports.py | 36 + lib/libfuturize/fixes/fix_print.py | 94 + .../fixes/fix_print_with_import.py | 22 + lib/libfuturize/fixes/fix_raise.py | 107 + .../fixes/fix_remove_old__future__imports.py | 26 + lib/libfuturize/fixes/fix_unicode_keep_u.py | 24 + .../fixes/fix_unicode_literals_import.py | 18 + .../fixes/fix_xrange_with_import.py | 20 + lib/libfuturize/main.py | 322 ++ lib/libpasteurize/__init__.py | 1 + lib/libpasteurize/fixes/__init__.py | 54 + lib/libpasteurize/fixes/feature_base.py | 57 + .../fixes/fix_add_all__future__imports.py | 24 + .../fixes/fix_add_all_future_builtins.py | 37 + .../fix_add_future_standard_library_import.py | 23 + lib/libpasteurize/fixes/fix_annotations.py | 48 + lib/libpasteurize/fixes/fix_division.py | 28 + lib/libpasteurize/fixes/fix_features.py | 86 + lib/libpasteurize/fixes/fix_fullargspec.py | 16 + .../fixes/fix_future_builtins.py | 46 + lib/libpasteurize/fixes/fix_getcwd.py | 26 + lib/libpasteurize/fixes/fix_imports.py | 112 + lib/libpasteurize/fixes/fix_imports2.py | 174 + lib/libpasteurize/fixes/fix_kwargs.py | 147 + lib/libpasteurize/fixes/fix_memoryview.py | 21 + lib/libpasteurize/fixes/fix_metaclass.py | 78 + lib/libpasteurize/fixes/fix_newstyle.py | 33 + lib/libpasteurize/fixes/fix_next.py | 43 + lib/libpasteurize/fixes/fix_printfunction.py | 17 + lib/libpasteurize/fixes/fix_raise.py | 25 + lib/libpasteurize/fixes/fix_raise_.py | 35 + lib/libpasteurize/fixes/fix_throw.py | 23 + lib/libpasteurize/fixes/fix_unpacking.py | 120 + lib/libpasteurize/main.py | 204 ++ lib/past/__init__.py | 90 + lib/past/builtins/__init__.py | 72 + lib/past/builtins/misc.py | 94 + lib/past/builtins/noniterators.py | 272 ++ lib/past/translation/__init__.py | 485 +++ lib/past/types/__init__.py | 29 + lib/past/types/basestring.py | 39 + lib/past/types/olddict.py | 96 + lib/past/types/oldstr.py | 135 + lib/past/utils/__init__.py | 97 + 210 files changed, 43159 insertions(+) create mode 100644 lib/future/__init__.py create mode 100644 lib/future/backports/__init__.py create mode 100644 lib/future/backports/_markupbase.py create mode 100644 lib/future/backports/datetime.py create mode 100644 lib/future/backports/email/__init__.py create mode 100644 lib/future/backports/email/_encoded_words.py create mode 100644 lib/future/backports/email/_header_value_parser.py create mode 100644 lib/future/backports/email/_parseaddr.py create mode 100644 lib/future/backports/email/_policybase.py create mode 100644 lib/future/backports/email/base64mime.py create mode 100644 lib/future/backports/email/charset.py create mode 100644 lib/future/backports/email/encoders.py create mode 100644 lib/future/backports/email/errors.py create mode 100644 lib/future/backports/email/feedparser.py create mode 100644 lib/future/backports/email/generator.py create mode 100644 lib/future/backports/email/header.py create mode 100644 lib/future/backports/email/headerregistry.py create mode 100644 lib/future/backports/email/iterators.py create mode 100644 lib/future/backports/email/message.py create mode 100644 lib/future/backports/email/mime/__init__.py create mode 100644 lib/future/backports/email/mime/application.py create mode 100644 lib/future/backports/email/mime/audio.py create mode 100644 lib/future/backports/email/mime/base.py create mode 100644 lib/future/backports/email/mime/image.py create mode 100644 lib/future/backports/email/mime/message.py create mode 100644 lib/future/backports/email/mime/multipart.py create mode 100644 lib/future/backports/email/mime/nonmultipart.py create mode 100644 lib/future/backports/email/mime/text.py create mode 100644 lib/future/backports/email/parser.py create mode 100644 lib/future/backports/email/policy.py create mode 100644 lib/future/backports/email/quoprimime.py create mode 100644 lib/future/backports/email/utils.py create mode 100644 lib/future/backports/html/__init__.py create mode 100644 lib/future/backports/html/entities.py create mode 100644 lib/future/backports/html/parser.py create mode 100644 lib/future/backports/http/__init__.py create mode 100644 lib/future/backports/http/client.py create mode 100644 lib/future/backports/http/cookiejar.py create mode 100644 lib/future/backports/http/cookies.py create mode 100644 lib/future/backports/http/server.py create mode 100644 lib/future/backports/misc.py create mode 100644 lib/future/backports/socket.py create mode 100644 lib/future/backports/socketserver.py create mode 100644 lib/future/backports/test/__init__.py create mode 100644 lib/future/backports/test/badcert.pem create mode 100644 lib/future/backports/test/badkey.pem create mode 100644 lib/future/backports/test/dh512.pem create mode 100644 lib/future/backports/test/https_svn_python_org_root.pem create mode 100644 lib/future/backports/test/keycert.passwd.pem create mode 100644 lib/future/backports/test/keycert.pem create mode 100644 lib/future/backports/test/keycert2.pem create mode 100644 lib/future/backports/test/nokia.pem create mode 100644 lib/future/backports/test/nullbytecert.pem create mode 100644 lib/future/backports/test/nullcert.pem create mode 100644 lib/future/backports/test/pystone.py create mode 100644 lib/future/backports/test/sha256.pem create mode 100644 lib/future/backports/test/ssl_cert.pem create mode 100644 lib/future/backports/test/ssl_key.passwd.pem create mode 100644 lib/future/backports/test/ssl_key.pem create mode 100644 lib/future/backports/test/ssl_servers.py create mode 100644 lib/future/backports/test/support.py create mode 100644 lib/future/backports/total_ordering.py create mode 100644 lib/future/backports/urllib/__init__.py create mode 100644 lib/future/backports/urllib/error.py create mode 100644 lib/future/backports/urllib/parse.py create mode 100644 lib/future/backports/urllib/request.py create mode 100644 lib/future/backports/urllib/response.py create mode 100644 lib/future/backports/urllib/robotparser.py create mode 100644 lib/future/backports/xmlrpc/__init__.py create mode 100644 lib/future/backports/xmlrpc/client.py create mode 100644 lib/future/backports/xmlrpc/server.py create mode 100644 lib/future/builtins/__init__.py create mode 100644 lib/future/builtins/disabled.py create mode 100644 lib/future/builtins/iterators.py create mode 100644 lib/future/builtins/misc.py create mode 100644 lib/future/builtins/new_min_max.py create mode 100644 lib/future/builtins/newnext.py create mode 100644 lib/future/builtins/newround.py create mode 100644 lib/future/builtins/newsuper.py create mode 100644 lib/future/moves/__init__.py create mode 100644 lib/future/moves/_dummy_thread.py create mode 100644 lib/future/moves/_markupbase.py create mode 100644 lib/future/moves/_thread.py create mode 100644 lib/future/moves/builtins.py create mode 100644 lib/future/moves/collections.py create mode 100644 lib/future/moves/configparser.py create mode 100644 lib/future/moves/copyreg.py create mode 100644 lib/future/moves/dbm/__init__.py create mode 100644 lib/future/moves/dbm/dumb.py create mode 100644 lib/future/moves/dbm/gnu.py create mode 100644 lib/future/moves/dbm/ndbm.py create mode 100644 lib/future/moves/html/__init__.py create mode 100644 lib/future/moves/html/entities.py create mode 100644 lib/future/moves/html/parser.py create mode 100644 lib/future/moves/http/__init__.py create mode 100644 lib/future/moves/http/client.py create mode 100644 lib/future/moves/http/cookiejar.py create mode 100644 lib/future/moves/http/cookies.py create mode 100644 lib/future/moves/http/server.py create mode 100644 lib/future/moves/itertools.py create mode 100644 lib/future/moves/pickle.py create mode 100644 lib/future/moves/queue.py create mode 100644 lib/future/moves/reprlib.py create mode 100644 lib/future/moves/socketserver.py create mode 100644 lib/future/moves/subprocess.py create mode 100644 lib/future/moves/sys.py create mode 100644 lib/future/moves/test/__init__.py create mode 100644 lib/future/moves/test/support.py create mode 100644 lib/future/moves/tkinter/__init__.py create mode 100644 lib/future/moves/tkinter/colorchooser.py create mode 100644 lib/future/moves/tkinter/commondialog.py create mode 100644 lib/future/moves/tkinter/constants.py create mode 100644 lib/future/moves/tkinter/dialog.py create mode 100644 lib/future/moves/tkinter/dnd.py create mode 100644 lib/future/moves/tkinter/filedialog.py create mode 100644 lib/future/moves/tkinter/font.py create mode 100644 lib/future/moves/tkinter/messagebox.py create mode 100644 lib/future/moves/tkinter/scrolledtext.py create mode 100644 lib/future/moves/tkinter/simpledialog.py create mode 100644 lib/future/moves/tkinter/tix.py create mode 100644 lib/future/moves/tkinter/ttk.py create mode 100644 lib/future/moves/urllib/__init__.py create mode 100644 lib/future/moves/urllib/error.py create mode 100644 lib/future/moves/urllib/parse.py create mode 100644 lib/future/moves/urllib/request.py create mode 100644 lib/future/moves/urllib/response.py create mode 100644 lib/future/moves/urllib/robotparser.py create mode 100644 lib/future/moves/winreg.py create mode 100644 lib/future/moves/xmlrpc/__init__.py create mode 100644 lib/future/moves/xmlrpc/client.py create mode 100644 lib/future/moves/xmlrpc/server.py create mode 100644 lib/future/standard_library/__init__.py create mode 100644 lib/future/tests/__init__.py create mode 100644 lib/future/tests/base.py create mode 100644 lib/future/types/__init__.py create mode 100644 lib/future/types/newbytes.py create mode 100644 lib/future/types/newdict.py create mode 100644 lib/future/types/newint.py create mode 100644 lib/future/types/newlist.py create mode 100644 lib/future/types/newmemoryview.py create mode 100644 lib/future/types/newobject.py create mode 100644 lib/future/types/newopen.py create mode 100644 lib/future/types/newrange.py create mode 100644 lib/future/types/newstr.py create mode 100644 lib/future/utils/__init__.py create mode 100644 lib/future/utils/surrogateescape.py create mode 100644 lib/libfuturize/__init__.py create mode 100644 lib/libfuturize/fixer_util.py create mode 100644 lib/libfuturize/fixes/__init__.py create mode 100644 lib/libfuturize/fixes/fix_UserDict.py create mode 100644 lib/libfuturize/fixes/fix_absolute_import.py create mode 100644 lib/libfuturize/fixes/fix_add__future__imports_except_unicode_literals.py create mode 100644 lib/libfuturize/fixes/fix_basestring.py create mode 100644 lib/libfuturize/fixes/fix_bytes.py create mode 100644 lib/libfuturize/fixes/fix_cmp.py create mode 100644 lib/libfuturize/fixes/fix_division.py create mode 100644 lib/libfuturize/fixes/fix_division_safe.py create mode 100644 lib/libfuturize/fixes/fix_execfile.py create mode 100644 lib/libfuturize/fixes/fix_future_builtins.py create mode 100644 lib/libfuturize/fixes/fix_future_standard_library.py create mode 100644 lib/libfuturize/fixes/fix_future_standard_library_urllib.py create mode 100644 lib/libfuturize/fixes/fix_input.py create mode 100644 lib/libfuturize/fixes/fix_metaclass.py create mode 100644 lib/libfuturize/fixes/fix_next_call.py create mode 100644 lib/libfuturize/fixes/fix_object.py create mode 100644 lib/libfuturize/fixes/fix_oldstr_wrap.py create mode 100644 lib/libfuturize/fixes/fix_order___future__imports.py create mode 100644 lib/libfuturize/fixes/fix_print.py create mode 100644 lib/libfuturize/fixes/fix_print_with_import.py create mode 100644 lib/libfuturize/fixes/fix_raise.py create mode 100644 lib/libfuturize/fixes/fix_remove_old__future__imports.py create mode 100644 lib/libfuturize/fixes/fix_unicode_keep_u.py create mode 100644 lib/libfuturize/fixes/fix_unicode_literals_import.py create mode 100644 lib/libfuturize/fixes/fix_xrange_with_import.py create mode 100644 lib/libfuturize/main.py create mode 100644 lib/libpasteurize/__init__.py create mode 100644 lib/libpasteurize/fixes/__init__.py create mode 100644 lib/libpasteurize/fixes/feature_base.py create mode 100644 lib/libpasteurize/fixes/fix_add_all__future__imports.py create mode 100644 lib/libpasteurize/fixes/fix_add_all_future_builtins.py create mode 100644 lib/libpasteurize/fixes/fix_add_future_standard_library_import.py create mode 100644 lib/libpasteurize/fixes/fix_annotations.py create mode 100644 lib/libpasteurize/fixes/fix_division.py create mode 100644 lib/libpasteurize/fixes/fix_features.py create mode 100644 lib/libpasteurize/fixes/fix_fullargspec.py create mode 100644 lib/libpasteurize/fixes/fix_future_builtins.py create mode 100644 lib/libpasteurize/fixes/fix_getcwd.py create mode 100644 lib/libpasteurize/fixes/fix_imports.py create mode 100644 lib/libpasteurize/fixes/fix_imports2.py create mode 100644 lib/libpasteurize/fixes/fix_kwargs.py create mode 100644 lib/libpasteurize/fixes/fix_memoryview.py create mode 100644 lib/libpasteurize/fixes/fix_metaclass.py create mode 100644 lib/libpasteurize/fixes/fix_newstyle.py create mode 100644 lib/libpasteurize/fixes/fix_next.py create mode 100644 lib/libpasteurize/fixes/fix_printfunction.py create mode 100644 lib/libpasteurize/fixes/fix_raise.py create mode 100644 lib/libpasteurize/fixes/fix_raise_.py create mode 100644 lib/libpasteurize/fixes/fix_throw.py create mode 100644 lib/libpasteurize/fixes/fix_unpacking.py create mode 100644 lib/libpasteurize/main.py create mode 100644 lib/past/__init__.py create mode 100644 lib/past/builtins/__init__.py create mode 100644 lib/past/builtins/misc.py create mode 100644 lib/past/builtins/noniterators.py create mode 100644 lib/past/translation/__init__.py create mode 100644 lib/past/types/__init__.py create mode 100644 lib/past/types/basestring.py create mode 100644 lib/past/types/olddict.py create mode 100644 lib/past/types/oldstr.py create mode 100644 lib/past/utils/__init__.py diff --git a/lib/future/__init__.py b/lib/future/__init__.py new file mode 100644 index 00000000..ad419d67 --- /dev/null +++ b/lib/future/__init__.py @@ -0,0 +1,93 @@ +""" +future: Easy, safe support for Python 2/3 compatibility +======================================================= + +``future`` is the missing compatibility layer between Python 2 and Python +3. It allows you to use a single, clean Python 3.x-compatible codebase to +support both Python 2 and Python 3 with minimal overhead. + +It is designed to be used as follows:: + + from __future__ import (absolute_import, division, + print_function, unicode_literals) + from builtins import ( + bytes, dict, int, list, object, range, str, + ascii, chr, hex, input, next, oct, open, + pow, round, super, + filter, map, zip) + +followed by predominantly standard, idiomatic Python 3 code that then runs +similarly on Python 2.6/2.7 and Python 3.3+. + +The imports have no effect on Python 3. On Python 2, they shadow the +corresponding builtins, which normally have different semantics on Python 3 +versus 2, to provide their Python 3 semantics. + + +Standard library reorganization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``future`` supports the standard library reorganization (PEP 3108) through the +following Py3 interfaces: + + >>> # Top-level packages with Py3 names provided on Py2: + >>> import html.parser + >>> import queue + >>> import tkinter.dialog + >>> import xmlrpc.client + >>> # etc. + + >>> # Aliases provided for extensions to existing Py2 module names: + >>> from future.standard_library import install_aliases + >>> install_aliases() + + >>> from collections import Counter, OrderedDict # backported to Py2.6 + >>> from collections import UserDict, UserList, UserString + >>> import urllib.request + >>> from itertools import filterfalse, zip_longest + >>> from subprocess import getoutput, getstatusoutput + + +Automatic conversion +-------------------- + +An included script called `futurize +`_ aids in converting +code (from either Python 2 or Python 3) to code compatible with both +platforms. It is similar to ``python-modernize`` but goes further in +providing Python 3 compatibility through the use of the backported types +and builtin functions in ``future``. + + +Documentation +------------- + +See: http://python-future.org + + +Credits +------- + +:Author: Ed Schofield, Jordan M. Adler, et al +:Sponsor: Python Charmers Pty Ltd, Australia, and Python Charmers Pte + Ltd, Singapore. http://pythoncharmers.com +:Others: See docs/credits.rst or http://python-future.org/credits.html + + +Licensing +--------- +Copyright 2013-2019 Python Charmers Pty Ltd, Australia. +The software is distributed under an MIT licence. See LICENSE.txt. + +""" + +__title__ = 'future' +__author__ = 'Ed Schofield' +__license__ = 'MIT' +__copyright__ = 'Copyright 2013-2019 Python Charmers Pty Ltd' +__ver_major__ = 0 +__ver_minor__ = 18 +__ver_patch__ = 2 +__ver_sub__ = '' +__version__ = "%d.%d.%d%s" % (__ver_major__, __ver_minor__, + __ver_patch__, __ver_sub__) diff --git a/lib/future/backports/__init__.py b/lib/future/backports/__init__.py new file mode 100644 index 00000000..c71e0653 --- /dev/null +++ b/lib/future/backports/__init__.py @@ -0,0 +1,26 @@ +""" +future.backports package +""" + +from __future__ import absolute_import + +import sys + +__future_module__ = True +from future.standard_library import import_top_level_modules + + +if sys.version_info[0] >= 3: + import_top_level_modules() + + +from .misc import (ceil, + OrderedDict, + Counter, + ChainMap, + check_output, + count, + recursive_repr, + _count_elements, + cmp_to_key + ) diff --git a/lib/future/backports/_markupbase.py b/lib/future/backports/_markupbase.py new file mode 100644 index 00000000..d51bfc7e --- /dev/null +++ b/lib/future/backports/_markupbase.py @@ -0,0 +1,422 @@ +"""Shared support for scanning document type declarations in HTML and XHTML. + +Backported for python-future from Python 3.3. Reason: ParserBase is an +old-style class in the Python 2.7 source of markupbase.py, which I suspect +might be the cause of sporadic unit-test failures on travis-ci.org with +test_htmlparser.py. The test failures look like this: + + ====================================================================== + +ERROR: test_attr_entity_replacement (future.tests.test_htmlparser.AttributesStrictTestCase) + +---------------------------------------------------------------------- + +Traceback (most recent call last): + File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 661, in test_attr_entity_replacement + [("starttag", "a", [("b", "&><\"'")])]) + File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 93, in _run_check + collector = self.get_collector() + File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 617, in get_collector + return EventCollector(strict=True) + File "/home/travis/build/edschofield/python-future/future/tests/test_htmlparser.py", line 27, in __init__ + html.parser.HTMLParser.__init__(self, *args, **kw) + File "/home/travis/build/edschofield/python-future/future/backports/html/parser.py", line 135, in __init__ + self.reset() + File "/home/travis/build/edschofield/python-future/future/backports/html/parser.py", line 143, in reset + _markupbase.ParserBase.reset(self) + +TypeError: unbound method reset() must be called with ParserBase instance as first argument (got EventCollector instance instead) + +This module is used as a foundation for the html.parser module. It has no +documented public API and should not be used directly. + +""" + +import re + +_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match +_declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match +_commentclose = re.compile(r'--\s*>') +_markedsectionclose = re.compile(r']\s*]\s*>') + +# An analysis of the MS-Word extensions is available at +# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf + +_msmarkedsectionclose = re.compile(r']\s*>') + +del re + + +class ParserBase(object): + """Parser base class which provides some common support methods used + by the SGML/HTML and XHTML parsers.""" + + def __init__(self): + if self.__class__ is ParserBase: + raise RuntimeError( + "_markupbase.ParserBase must be subclassed") + + def error(self, message): + raise NotImplementedError( + "subclasses of ParserBase must override error()") + + def reset(self): + self.lineno = 1 + self.offset = 0 + + def getpos(self): + """Return current line number and offset.""" + return self.lineno, self.offset + + # Internal -- update line number and offset. This should be + # called for each piece of data exactly once, in order -- in other + # words the concatenation of all the input strings to this + # function should be exactly the entire input. + def updatepos(self, i, j): + if i >= j: + return j + rawdata = self.rawdata + nlines = rawdata.count("\n", i, j) + if nlines: + self.lineno = self.lineno + nlines + pos = rawdata.rindex("\n", i, j) # Should not fail + self.offset = j-(pos+1) + else: + self.offset = self.offset + j-i + return j + + _decl_otherchars = '' + + # Internal -- parse declaration (for use by subclasses). + def parse_declaration(self, i): + # This is some sort of declaration; in "HTML as + # deployed," this should only be the document type + # declaration (""). + # ISO 8879:1986, however, has more complex + # declaration syntax for elements in , including: + # --comment-- + # [marked section] + # name in the following list: ENTITY, DOCTYPE, ELEMENT, + # ATTLIST, NOTATION, SHORTREF, USEMAP, + # LINKTYPE, LINK, IDLINK, USELINK, SYSTEM + rawdata = self.rawdata + j = i + 2 + assert rawdata[i:j] == "": + # the empty comment + return j + 1 + if rawdata[j:j+1] in ("-", ""): + # Start of comment followed by buffer boundary, + # or just a buffer boundary. + return -1 + # A simple, practical version could look like: ((name|stringlit) S*) + '>' + n = len(rawdata) + if rawdata[j:j+2] == '--': #comment + # Locate --.*-- as the body of the comment + return self.parse_comment(i) + elif rawdata[j] == '[': #marked section + # Locate [statusWord [...arbitrary SGML...]] as the body of the marked section + # Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA + # Note that this is extended by Microsoft Office "Save as Web" function + # to include [if...] and [endif]. + return self.parse_marked_section(i) + else: #all other declaration elements + decltype, j = self._scan_name(j, i) + if j < 0: + return j + if decltype == "doctype": + self._decl_otherchars = '' + while j < n: + c = rawdata[j] + if c == ">": + # end of declaration syntax + data = rawdata[i+2:j] + if decltype == "doctype": + self.handle_decl(data) + else: + # According to the HTML5 specs sections "8.2.4.44 Bogus + # comment state" and "8.2.4.45 Markup declaration open + # state", a comment token should be emitted. + # Calling unknown_decl provides more flexibility though. + self.unknown_decl(data) + return j + 1 + if c in "\"'": + m = _declstringlit_match(rawdata, j) + if not m: + return -1 # incomplete + j = m.end() + elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": + name, j = self._scan_name(j, i) + elif c in self._decl_otherchars: + j = j + 1 + elif c == "[": + # this could be handled in a separate doctype parser + if decltype == "doctype": + j = self._parse_doctype_subset(j + 1, i) + elif decltype in set(["attlist", "linktype", "link", "element"]): + # must tolerate []'d groups in a content model in an element declaration + # also in data attribute specifications of attlist declaration + # also link type declaration subsets in linktype declarations + # also link attribute specification lists in link declarations + self.error("unsupported '[' char in %s declaration" % decltype) + else: + self.error("unexpected '[' char in declaration") + else: + self.error( + "unexpected %r char in declaration" % rawdata[j]) + if j < 0: + return j + return -1 # incomplete + + # Internal -- parse a marked section + # Override this to handle MS-word extension syntax content + def parse_marked_section(self, i, report=1): + rawdata= self.rawdata + assert rawdata[i:i+3] == ' ending + match= _markedsectionclose.search(rawdata, i+3) + elif sectName in set(["if", "else", "endif"]): + # look for MS Office ]> ending + match= _msmarkedsectionclose.search(rawdata, i+3) + else: + self.error('unknown status keyword %r in marked section' % rawdata[i+3:j]) + if not match: + return -1 + if report: + j = match.start(0) + self.unknown_decl(rawdata[i+3: j]) + return match.end(0) + + # Internal -- parse comment, return length or -1 if not terminated + def parse_comment(self, i, report=1): + rawdata = self.rawdata + if rawdata[i:i+4] != ' delimiter transport-padding + # --> CRLF body-part + for body_part in msgtexts: + # delimiter transport-padding CRLF + self.write(self._NL + '--' + boundary + self._NL) + # body-part + self._fp.write(body_part) + # close-delimiter transport-padding + self.write(self._NL + '--' + boundary + '--') + if msg.epilogue is not None: + self.write(self._NL) + if self._mangle_from_: + epilogue = fcre.sub('>From ', msg.epilogue) + else: + epilogue = msg.epilogue + self._write_lines(epilogue) + + def _handle_multipart_signed(self, msg): + # The contents of signed parts has to stay unmodified in order to keep + # the signature intact per RFC1847 2.1, so we disable header wrapping. + # RDM: This isn't enough to completely preserve the part, but it helps. + p = self.policy + self.policy = p.clone(max_line_length=0) + try: + self._handle_multipart(msg) + finally: + self.policy = p + + def _handle_message_delivery_status(self, msg): + # We can't just write the headers directly to self's file object + # because this will leave an extra newline between the last header + # block and the boundary. Sigh. + blocks = [] + for part in msg.get_payload(): + s = self._new_buffer() + g = self.clone(s) + g.flatten(part, unixfrom=False, linesep=self._NL) + text = s.getvalue() + lines = text.split(self._encoded_NL) + # Strip off the unnecessary trailing empty line + if lines and lines[-1] == self._encoded_EMPTY: + blocks.append(self._encoded_NL.join(lines[:-1])) + else: + blocks.append(text) + # Now join all the blocks with an empty line. This has the lovely + # effect of separating each block with an empty line, but not adding + # an extra one after the last one. + self._fp.write(self._encoded_NL.join(blocks)) + + def _handle_message(self, msg): + s = self._new_buffer() + g = self.clone(s) + # The payload of a message/rfc822 part should be a multipart sequence + # of length 1. The zeroth element of the list should be the Message + # object for the subpart. Extract that object, stringify it, and + # write it out. + # Except, it turns out, when it's a string instead, which happens when + # and only when HeaderParser is used on a message of mime type + # message/rfc822. Such messages are generated by, for example, + # Groupwise when forwarding unadorned messages. (Issue 7970.) So + # in that case we just emit the string body. + payload = msg._payload + if isinstance(payload, list): + g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL) + payload = s.getvalue() + else: + payload = self._encode(payload) + self._fp.write(payload) + + # This used to be a module level function; we use a classmethod for this + # and _compile_re so we can continue to provide the module level function + # for backward compatibility by doing + # _make_boudary = Generator._make_boundary + # at the end of the module. It *is* internal, so we could drop that... + @classmethod + def _make_boundary(cls, text=None): + # Craft a random boundary. If text is given, ensure that the chosen + # boundary doesn't appear in the text. + token = random.randrange(sys.maxsize) + boundary = ('=' * 15) + (_fmt % token) + '==' + if text is None: + return boundary + b = boundary + counter = 0 + while True: + cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE) + if not cre.search(text): + break + b = boundary + '.' + str(counter) + counter += 1 + return b + + @classmethod + def _compile_re(cls, s, flags): + return re.compile(s, flags) + +class BytesGenerator(Generator): + """Generates a bytes version of a Message object tree. + + Functionally identical to the base Generator except that the output is + bytes and not string. When surrogates were used in the input to encode + bytes, these are decoded back to bytes for output. If the policy has + cte_type set to 7bit, then the message is transformed such that the + non-ASCII bytes are properly content transfer encoded, using the charset + unknown-8bit. + + The outfp object must accept bytes in its write method. + """ + + # Bytes versions of this constant for use in manipulating data from + # the BytesIO buffer. + _encoded_EMPTY = b'' + + def write(self, s): + self._fp.write(str(s).encode('ascii', 'surrogateescape')) + + def _new_buffer(self): + return BytesIO() + + def _encode(self, s): + return s.encode('ascii') + + def _write_headers(self, msg): + # This is almost the same as the string version, except for handling + # strings with 8bit bytes. + for h, v in msg.raw_items(): + self._fp.write(self.policy.fold_binary(h, v)) + # A blank line always separates headers from body + self.write(self._NL) + + def _handle_text(self, msg): + # If the string has surrogates the original source was bytes, so + # just write it back out. + if msg._payload is None: + return + if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit': + if self._mangle_from_: + msg._payload = fcre.sub(">From ", msg._payload) + self._write_lines(msg._payload) + else: + super(BytesGenerator,self)._handle_text(msg) + + # Default body handler + _writeBody = _handle_text + + @classmethod + def _compile_re(cls, s, flags): + return re.compile(s.encode('ascii'), flags) + + +_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' + +class DecodedGenerator(Generator): + """Generates a text representation of a message. + + Like the Generator base class, except that non-text parts are substituted + with a format string representing the part. + """ + def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): + """Like Generator.__init__() except that an additional optional + argument is allowed. + + Walks through all subparts of a message. If the subpart is of main + type `text', then it prints the decoded payload of the subpart. + + Otherwise, fmt is a format string that is used instead of the message + payload. fmt is expanded with the following keywords (in + %(keyword)s format): + + type : Full MIME type of the non-text part + maintype : Main MIME type of the non-text part + subtype : Sub-MIME type of the non-text part + filename : Filename of the non-text part + description: Description associated with the non-text part + encoding : Content transfer encoding of the non-text part + + The default value for fmt is None, meaning + + [Non-text (%(type)s) part of message omitted, filename %(filename)s] + """ + Generator.__init__(self, outfp, mangle_from_, maxheaderlen) + if fmt is None: + self._fmt = _FMT + else: + self._fmt = fmt + + def _dispatch(self, msg): + for part in msg.walk(): + maintype = part.get_content_maintype() + if maintype == 'text': + print(part.get_payload(decode=False), file=self) + elif maintype == 'multipart': + # Just skip this + pass + else: + print(self._fmt % { + 'type' : part.get_content_type(), + 'maintype' : part.get_content_maintype(), + 'subtype' : part.get_content_subtype(), + 'filename' : part.get_filename('[no filename]'), + 'description': part.get('Content-Description', + '[no description]'), + 'encoding' : part.get('Content-Transfer-Encoding', + '[no encoding]'), + }, file=self) + + +# Helper used by Generator._make_boundary +_width = len(repr(sys.maxsize-1)) +_fmt = '%%0%dd' % _width + +# Backward compatibility +_make_boundary = Generator._make_boundary diff --git a/lib/future/backports/email/header.py b/lib/future/backports/email/header.py new file mode 100644 index 00000000..63bf038c --- /dev/null +++ b/lib/future/backports/email/header.py @@ -0,0 +1,581 @@ +# Copyright (C) 2002-2007 Python Software Foundation +# Author: Ben Gertzfield, Barry Warsaw +# Contact: email-sig@python.org + +"""Header encoding and decoding functionality.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future.builtins import bytes, range, str, super, zip + +__all__ = [ + 'Header', + 'decode_header', + 'make_header', + ] + +import re +import binascii + +from future.backports import email +from future.backports.email import base64mime +from future.backports.email.errors import HeaderParseError +import future.backports.email.charset as _charset + +# Helpers +from future.backports.email.quoprimime import _max_append, header_decode + +Charset = _charset.Charset + +NL = '\n' +SPACE = ' ' +BSPACE = b' ' +SPACE8 = ' ' * 8 +EMPTYSTRING = '' +MAXLINELEN = 78 +FWS = ' \t' + +USASCII = Charset('us-ascii') +UTF8 = Charset('utf-8') + +# Match encoded-word strings in the form =?charset?q?Hello_World?= +ecre = re.compile(r''' + =\? # literal =? + (?P[^?]*?) # non-greedy up to the next ? is the charset + \? # literal ? + (?P[qb]) # either a "q" or a "b", case insensitive + \? # literal ? + (?P.*?) # non-greedy up to the next ?= is the encoded string + \?= # literal ?= + ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE) + +# Field name regexp, including trailing colon, but not separating whitespace, +# according to RFC 2822. Character range is from tilde to exclamation mark. +# For use with .match() +fcre = re.compile(r'[\041-\176]+:$') + +# Find a header embedded in a putative header value. Used to check for +# header injection attack. +_embeded_header = re.compile(r'\n[^ \t]+:') + + +def decode_header(header): + """Decode a message header value without converting charset. + + Returns a list of (string, charset) pairs containing each of the decoded + parts of the header. Charset is None for non-encoded parts of the header, + otherwise a lower-case string containing the name of the character set + specified in the encoded string. + + header may be a string that may or may not contain RFC2047 encoded words, + or it may be a Header object. + + An email.errors.HeaderParseError may be raised when certain decoding error + occurs (e.g. a base64 decoding exception). + """ + # If it is a Header object, we can just return the encoded chunks. + if hasattr(header, '_chunks'): + return [(_charset._encode(string, str(charset)), str(charset)) + for string, charset in header._chunks] + # If no encoding, just return the header with no charset. + if not ecre.search(header): + return [(header, None)] + # First step is to parse all the encoded parts into triplets of the form + # (encoded_string, encoding, charset). For unencoded strings, the last + # two parts will be None. + words = [] + for line in header.splitlines(): + parts = ecre.split(line) + first = True + while parts: + unencoded = parts.pop(0) + if first: + unencoded = unencoded.lstrip() + first = False + if unencoded: + words.append((unencoded, None, None)) + if parts: + charset = parts.pop(0).lower() + encoding = parts.pop(0).lower() + encoded = parts.pop(0) + words.append((encoded, encoding, charset)) + # Now loop over words and remove words that consist of whitespace + # between two encoded strings. + import sys + droplist = [] + for n, w in enumerate(words): + if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace(): + droplist.append(n-1) + for d in reversed(droplist): + del words[d] + + # The next step is to decode each encoded word by applying the reverse + # base64 or quopri transformation. decoded_words is now a list of the + # form (decoded_word, charset). + decoded_words = [] + for encoded_string, encoding, charset in words: + if encoding is None: + # This is an unencoded word. + decoded_words.append((encoded_string, charset)) + elif encoding == 'q': + word = header_decode(encoded_string) + decoded_words.append((word, charset)) + elif encoding == 'b': + paderr = len(encoded_string) % 4 # Postel's law: add missing padding + if paderr: + encoded_string += '==='[:4 - paderr] + try: + word = base64mime.decode(encoded_string) + except binascii.Error: + raise HeaderParseError('Base64 decoding error') + else: + decoded_words.append((word, charset)) + else: + raise AssertionError('Unexpected encoding: ' + encoding) + # Now convert all words to bytes and collapse consecutive runs of + # similarly encoded words. + collapsed = [] + last_word = last_charset = None + for word, charset in decoded_words: + if isinstance(word, str): + word = bytes(word, 'raw-unicode-escape') + if last_word is None: + last_word = word + last_charset = charset + elif charset != last_charset: + collapsed.append((last_word, last_charset)) + last_word = word + last_charset = charset + elif last_charset is None: + last_word += BSPACE + word + else: + last_word += word + collapsed.append((last_word, last_charset)) + return collapsed + + +def make_header(decoded_seq, maxlinelen=None, header_name=None, + continuation_ws=' '): + """Create a Header from a sequence of pairs as returned by decode_header() + + decode_header() takes a header value string and returns a sequence of + pairs of the format (decoded_string, charset) where charset is the string + name of the character set. + + This function takes one of those sequence of pairs and returns a Header + instance. Optional maxlinelen, header_name, and continuation_ws are as in + the Header constructor. + """ + h = Header(maxlinelen=maxlinelen, header_name=header_name, + continuation_ws=continuation_ws) + for s, charset in decoded_seq: + # None means us-ascii but we can simply pass it on to h.append() + if charset is not None and not isinstance(charset, Charset): + charset = Charset(charset) + h.append(s, charset) + return h + + +class Header(object): + def __init__(self, s=None, charset=None, + maxlinelen=None, header_name=None, + continuation_ws=' ', errors='strict'): + """Create a MIME-compliant header that can contain many character sets. + + Optional s is the initial header value. If None, the initial header + value is not set. You can later append to the header with .append() + method calls. s may be a byte string or a Unicode string, but see the + .append() documentation for semantics. + + Optional charset serves two purposes: it has the same meaning as the + charset argument to the .append() method. It also sets the default + character set for all subsequent .append() calls that omit the charset + argument. If charset is not provided in the constructor, the us-ascii + charset is used both as s's initial charset and as the default for + subsequent .append() calls. + + The maximum line length can be specified explicitly via maxlinelen. For + splitting the first line to a shorter value (to account for the field + header which isn't included in s, e.g. `Subject') pass in the name of + the field in header_name. The default maxlinelen is 78 as recommended + by RFC 2822. + + continuation_ws must be RFC 2822 compliant folding whitespace (usually + either a space or a hard tab) which will be prepended to continuation + lines. + + errors is passed through to the .append() call. + """ + if charset is None: + charset = USASCII + elif not isinstance(charset, Charset): + charset = Charset(charset) + self._charset = charset + self._continuation_ws = continuation_ws + self._chunks = [] + if s is not None: + self.append(s, charset, errors) + if maxlinelen is None: + maxlinelen = MAXLINELEN + self._maxlinelen = maxlinelen + if header_name is None: + self._headerlen = 0 + else: + # Take the separating colon and space into account. + self._headerlen = len(header_name) + 2 + + def __str__(self): + """Return the string value of the header.""" + self._normalize() + uchunks = [] + lastcs = None + lastspace = None + for string, charset in self._chunks: + # We must preserve spaces between encoded and non-encoded word + # boundaries, which means for us we need to add a space when we go + # from a charset to None/us-ascii, or from None/us-ascii to a + # charset. Only do this for the second and subsequent chunks. + # Don't add a space if the None/us-ascii string already has + # a space (trailing or leading depending on transition) + nextcs = charset + if nextcs == _charset.UNKNOWN8BIT: + original_bytes = string.encode('ascii', 'surrogateescape') + string = original_bytes.decode('ascii', 'replace') + if uchunks: + hasspace = string and self._nonctext(string[0]) + if lastcs not in (None, 'us-ascii'): + if nextcs in (None, 'us-ascii') and not hasspace: + uchunks.append(SPACE) + nextcs = None + elif nextcs not in (None, 'us-ascii') and not lastspace: + uchunks.append(SPACE) + lastspace = string and self._nonctext(string[-1]) + lastcs = nextcs + uchunks.append(string) + return EMPTYSTRING.join(uchunks) + + # Rich comparison operators for equality only. BAW: does it make sense to + # have or explicitly disable <, <=, >, >= operators? + def __eq__(self, other): + # other may be a Header or a string. Both are fine so coerce + # ourselves to a unicode (of the unencoded header value), swap the + # args and do another comparison. + return other == str(self) + + def __ne__(self, other): + return not self == other + + def append(self, s, charset=None, errors='strict'): + """Append a string to the MIME header. + + Optional charset, if given, should be a Charset instance or the name + of a character set (which will be converted to a Charset instance). A + value of None (the default) means that the charset given in the + constructor is used. + + s may be a byte string or a Unicode string. If it is a byte string + (i.e. isinstance(s, str) is false), then charset is the encoding of + that byte string, and a UnicodeError will be raised if the string + cannot be decoded with that charset. If s is a Unicode string, then + charset is a hint specifying the character set of the characters in + the string. In either case, when producing an RFC 2822 compliant + header using RFC 2047 rules, the string will be encoded using the + output codec of the charset. If the string cannot be encoded to the + output codec, a UnicodeError will be raised. + + Optional `errors' is passed as the errors argument to the decode + call if s is a byte string. + """ + if charset is None: + charset = self._charset + elif not isinstance(charset, Charset): + charset = Charset(charset) + if not isinstance(s, str): + input_charset = charset.input_codec or 'us-ascii' + if input_charset == _charset.UNKNOWN8BIT: + s = s.decode('us-ascii', 'surrogateescape') + else: + s = s.decode(input_charset, errors) + # Ensure that the bytes we're storing can be decoded to the output + # character set, otherwise an early error is raised. + output_charset = charset.output_codec or 'us-ascii' + if output_charset != _charset.UNKNOWN8BIT: + try: + s.encode(output_charset, errors) + except UnicodeEncodeError: + if output_charset!='us-ascii': + raise + charset = UTF8 + self._chunks.append((s, charset)) + + def _nonctext(self, s): + """True if string s is not a ctext character of RFC822. + """ + return s.isspace() or s in ('(', ')', '\\') + + def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): + r"""Encode a message header into an RFC-compliant format. + + There are many issues involved in converting a given string for use in + an email header. Only certain character sets are readable in most + email clients, and as header strings can only contain a subset of + 7-bit ASCII, care must be taken to properly convert and encode (with + Base64 or quoted-printable) header strings. In addition, there is a + 75-character length limit on any given encoded header field, so + line-wrapping must be performed, even with double-byte character sets. + + Optional maxlinelen specifies the maximum length of each generated + line, exclusive of the linesep string. Individual lines may be longer + than maxlinelen if a folding point cannot be found. The first line + will be shorter by the length of the header name plus ": " if a header + name was specified at Header construction time. The default value for + maxlinelen is determined at header construction time. + + Optional splitchars is a string containing characters which should be + given extra weight by the splitting algorithm during normal header + wrapping. This is in very rough support of RFC 2822's `higher level + syntactic breaks': split points preceded by a splitchar are preferred + during line splitting, with the characters preferred in the order in + which they appear in the string. Space and tab may be included in the + string to indicate whether preference should be given to one over the + other as a split point when other split chars do not appear in the line + being split. Splitchars does not affect RFC 2047 encoded lines. + + Optional linesep is a string to be used to separate the lines of + the value. The default value is the most useful for typical + Python applications, but it can be set to \r\n to produce RFC-compliant + line separators when needed. + """ + self._normalize() + if maxlinelen is None: + maxlinelen = self._maxlinelen + # A maxlinelen of 0 means don't wrap. For all practical purposes, + # choosing a huge number here accomplishes that and makes the + # _ValueFormatter algorithm much simpler. + if maxlinelen == 0: + maxlinelen = 1000000 + formatter = _ValueFormatter(self._headerlen, maxlinelen, + self._continuation_ws, splitchars) + lastcs = None + hasspace = lastspace = None + for string, charset in self._chunks: + if hasspace is not None: + hasspace = string and self._nonctext(string[0]) + import sys + if lastcs not in (None, 'us-ascii'): + if not hasspace or charset not in (None, 'us-ascii'): + formatter.add_transition() + elif charset not in (None, 'us-ascii') and not lastspace: + formatter.add_transition() + lastspace = string and self._nonctext(string[-1]) + lastcs = charset + hasspace = False + lines = string.splitlines() + if lines: + formatter.feed('', lines[0], charset) + else: + formatter.feed('', '', charset) + for line in lines[1:]: + formatter.newline() + if charset.header_encoding is not None: + formatter.feed(self._continuation_ws, ' ' + line.lstrip(), + charset) + else: + sline = line.lstrip() + fws = line[:len(line)-len(sline)] + formatter.feed(fws, sline, charset) + if len(lines) > 1: + formatter.newline() + if self._chunks: + formatter.add_transition() + value = formatter._str(linesep) + if _embeded_header.search(value): + raise HeaderParseError("header value appears to contain " + "an embedded header: {!r}".format(value)) + return value + + def _normalize(self): + # Step 1: Normalize the chunks so that all runs of identical charsets + # get collapsed into a single unicode string. + chunks = [] + last_charset = None + last_chunk = [] + for string, charset in self._chunks: + if charset == last_charset: + last_chunk.append(string) + else: + if last_charset is not None: + chunks.append((SPACE.join(last_chunk), last_charset)) + last_chunk = [string] + last_charset = charset + if last_chunk: + chunks.append((SPACE.join(last_chunk), last_charset)) + self._chunks = chunks + + +class _ValueFormatter(object): + def __init__(self, headerlen, maxlen, continuation_ws, splitchars): + self._maxlen = maxlen + self._continuation_ws = continuation_ws + self._continuation_ws_len = len(continuation_ws) + self._splitchars = splitchars + self._lines = [] + self._current_line = _Accumulator(headerlen) + + def _str(self, linesep): + self.newline() + return linesep.join(self._lines) + + def __str__(self): + return self._str(NL) + + def newline(self): + end_of_line = self._current_line.pop() + if end_of_line != (' ', ''): + self._current_line.push(*end_of_line) + if len(self._current_line) > 0: + if self._current_line.is_onlyws(): + self._lines[-1] += str(self._current_line) + else: + self._lines.append(str(self._current_line)) + self._current_line.reset() + + def add_transition(self): + self._current_line.push(' ', '') + + def feed(self, fws, string, charset): + # If the charset has no header encoding (i.e. it is an ASCII encoding) + # then we must split the header at the "highest level syntactic break" + # possible. Note that we don't have a lot of smarts about field + # syntax; we just try to break on semi-colons, then commas, then + # whitespace. Eventually, this should be pluggable. + if charset.header_encoding is None: + self._ascii_split(fws, string, self._splitchars) + return + # Otherwise, we're doing either a Base64 or a quoted-printable + # encoding which means we don't need to split the line on syntactic + # breaks. We can basically just find enough characters to fit on the + # current line, minus the RFC 2047 chrome. What makes this trickier + # though is that we have to split at octet boundaries, not character + # boundaries but it's only safe to split at character boundaries so at + # best we can only get close. + encoded_lines = charset.header_encode_lines(string, self._maxlengths()) + # The first element extends the current line, but if it's None then + # nothing more fit on the current line so start a new line. + try: + first_line = encoded_lines.pop(0) + except IndexError: + # There are no encoded lines, so we're done. + return + if first_line is not None: + self._append_chunk(fws, first_line) + try: + last_line = encoded_lines.pop() + except IndexError: + # There was only one line. + return + self.newline() + self._current_line.push(self._continuation_ws, last_line) + # Everything else are full lines in themselves. + for line in encoded_lines: + self._lines.append(self._continuation_ws + line) + + def _maxlengths(self): + # The first line's length. + yield self._maxlen - len(self._current_line) + while True: + yield self._maxlen - self._continuation_ws_len + + def _ascii_split(self, fws, string, splitchars): + # The RFC 2822 header folding algorithm is simple in principle but + # complex in practice. Lines may be folded any place where "folding + # white space" appears by inserting a linesep character in front of the + # FWS. The complication is that not all spaces or tabs qualify as FWS, + # and we are also supposed to prefer to break at "higher level + # syntactic breaks". We can't do either of these without intimate + # knowledge of the structure of structured headers, which we don't have + # here. So the best we can do here is prefer to break at the specified + # splitchars, and hope that we don't choose any spaces or tabs that + # aren't legal FWS. (This is at least better than the old algorithm, + # where we would sometimes *introduce* FWS after a splitchar, or the + # algorithm before that, where we would turn all white space runs into + # single spaces or tabs.) + parts = re.split("(["+FWS+"]+)", fws+string) + if parts[0]: + parts[:0] = [''] + else: + parts.pop(0) + for fws, part in zip(*[iter(parts)]*2): + self._append_chunk(fws, part) + + def _append_chunk(self, fws, string): + self._current_line.push(fws, string) + if len(self._current_line) > self._maxlen: + # Find the best split point, working backward from the end. + # There might be none, on a long first line. + for ch in self._splitchars: + for i in range(self._current_line.part_count()-1, 0, -1): + if ch.isspace(): + fws = self._current_line[i][0] + if fws and fws[0]==ch: + break + prevpart = self._current_line[i-1][1] + if prevpart and prevpart[-1]==ch: + break + else: + continue + break + else: + fws, part = self._current_line.pop() + if self._current_line._initial_size > 0: + # There will be a header, so leave it on a line by itself. + self.newline() + if not fws: + # We don't use continuation_ws here because the whitespace + # after a header should always be a space. + fws = ' ' + self._current_line.push(fws, part) + return + remainder = self._current_line.pop_from(i) + self._lines.append(str(self._current_line)) + self._current_line.reset(remainder) + + +class _Accumulator(list): + + def __init__(self, initial_size=0): + self._initial_size = initial_size + super().__init__() + + def push(self, fws, string): + self.append((fws, string)) + + def pop_from(self, i=0): + popped = self[i:] + self[i:] = [] + return popped + + def pop(self): + if self.part_count()==0: + return ('', '') + return super().pop() + + def __len__(self): + return sum((len(fws)+len(part) for fws, part in self), + self._initial_size) + + def __str__(self): + return EMPTYSTRING.join((EMPTYSTRING.join((fws, part)) + for fws, part in self)) + + def reset(self, startval=None): + if startval is None: + startval = [] + self[:] = startval + self._initial_size = 0 + + def is_onlyws(self): + return self._initial_size==0 and (not self or str(self).isspace()) + + def part_count(self): + return super().__len__() diff --git a/lib/future/backports/email/headerregistry.py b/lib/future/backports/email/headerregistry.py new file mode 100644 index 00000000..9aaad65a --- /dev/null +++ b/lib/future/backports/email/headerregistry.py @@ -0,0 +1,592 @@ +"""Representing and manipulating email headers via custom objects. + +This module provides an implementation of the HeaderRegistry API. +The implementation is designed to flexibly follow RFC5322 rules. + +Eventually HeaderRegistry will be a public API, but it isn't yet, +and will probably change some before that happens. + +""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +from future.builtins import super +from future.builtins import str +from future.utils import text_to_native_str +from future.backports.email import utils +from future.backports.email import errors +from future.backports.email import _header_value_parser as parser + +class Address(object): + + def __init__(self, display_name='', username='', domain='', addr_spec=None): + """Create an object represeting a full email address. + + An address can have a 'display_name', a 'username', and a 'domain'. In + addition to specifying the username and domain separately, they may be + specified together by using the addr_spec keyword *instead of* the + username and domain keywords. If an addr_spec string is specified it + must be properly quoted according to RFC 5322 rules; an error will be + raised if it is not. + + An Address object has display_name, username, domain, and addr_spec + attributes, all of which are read-only. The addr_spec and the string + value of the object are both quoted according to RFC5322 rules, but + without any Content Transfer Encoding. + + """ + # This clause with its potential 'raise' may only happen when an + # application program creates an Address object using an addr_spec + # keyword. The email library code itself must always supply username + # and domain. + if addr_spec is not None: + if username or domain: + raise TypeError("addrspec specified when username and/or " + "domain also specified") + a_s, rest = parser.get_addr_spec(addr_spec) + if rest: + raise ValueError("Invalid addr_spec; only '{}' " + "could be parsed from '{}'".format( + a_s, addr_spec)) + if a_s.all_defects: + raise a_s.all_defects[0] + username = a_s.local_part + domain = a_s.domain + self._display_name = display_name + self._username = username + self._domain = domain + + @property + def display_name(self): + return self._display_name + + @property + def username(self): + return self._username + + @property + def domain(self): + return self._domain + + @property + def addr_spec(self): + """The addr_spec (username@domain) portion of the address, quoted + according to RFC 5322 rules, but with no Content Transfer Encoding. + """ + nameset = set(self.username) + if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS): + lp = parser.quote_string(self.username) + else: + lp = self.username + if self.domain: + return lp + '@' + self.domain + if not lp: + return '<>' + return lp + + def __repr__(self): + return "Address(display_name={!r}, username={!r}, domain={!r})".format( + self.display_name, self.username, self.domain) + + def __str__(self): + nameset = set(self.display_name) + if len(nameset) > len(nameset-parser.SPECIALS): + disp = parser.quote_string(self.display_name) + else: + disp = self.display_name + if disp: + addr_spec = '' if self.addr_spec=='<>' else self.addr_spec + return "{} <{}>".format(disp, addr_spec) + return self.addr_spec + + def __eq__(self, other): + if type(other) != type(self): + return False + return (self.display_name == other.display_name and + self.username == other.username and + self.domain == other.domain) + + +class Group(object): + + def __init__(self, display_name=None, addresses=None): + """Create an object representing an address group. + + An address group consists of a display_name followed by colon and an + list of addresses (see Address) terminated by a semi-colon. The Group + is created by specifying a display_name and a possibly empty list of + Address objects. A Group can also be used to represent a single + address that is not in a group, which is convenient when manipulating + lists that are a combination of Groups and individual Addresses. In + this case the display_name should be set to None. In particular, the + string representation of a Group whose display_name is None is the same + as the Address object, if there is one and only one Address object in + the addresses list. + + """ + self._display_name = display_name + self._addresses = tuple(addresses) if addresses else tuple() + + @property + def display_name(self): + return self._display_name + + @property + def addresses(self): + return self._addresses + + def __repr__(self): + return "Group(display_name={!r}, addresses={!r}".format( + self.display_name, self.addresses) + + def __str__(self): + if self.display_name is None and len(self.addresses)==1: + return str(self.addresses[0]) + disp = self.display_name + if disp is not None: + nameset = set(disp) + if len(nameset) > len(nameset-parser.SPECIALS): + disp = parser.quote_string(disp) + adrstr = ", ".join(str(x) for x in self.addresses) + adrstr = ' ' + adrstr if adrstr else adrstr + return "{}:{};".format(disp, adrstr) + + def __eq__(self, other): + if type(other) != type(self): + return False + return (self.display_name == other.display_name and + self.addresses == other.addresses) + + +# Header Classes # + +class BaseHeader(str): + + """Base class for message headers. + + Implements generic behavior and provides tools for subclasses. + + A subclass must define a classmethod named 'parse' that takes an unfolded + value string and a dictionary as its arguments. The dictionary will + contain one key, 'defects', initialized to an empty list. After the call + the dictionary must contain two additional keys: parse_tree, set to the + parse tree obtained from parsing the header, and 'decoded', set to the + string value of the idealized representation of the data from the value. + (That is, encoded words are decoded, and values that have canonical + representations are so represented.) + + The defects key is intended to collect parsing defects, which the message + parser will subsequently dispose of as appropriate. The parser should not, + insofar as practical, raise any errors. Defects should be added to the + list instead. The standard header parsers register defects for RFC + compliance issues, for obsolete RFC syntax, and for unrecoverable parsing + errors. + + The parse method may add additional keys to the dictionary. In this case + the subclass must define an 'init' method, which will be passed the + dictionary as its keyword arguments. The method should use (usually by + setting them as the value of similarly named attributes) and remove all the + extra keys added by its parse method, and then use super to call its parent + class with the remaining arguments and keywords. + + The subclass should also make sure that a 'max_count' attribute is defined + that is either None or 1. XXX: need to better define this API. + + """ + + def __new__(cls, name, value): + kwds = {'defects': []} + cls.parse(value, kwds) + if utils._has_surrogates(kwds['decoded']): + kwds['decoded'] = utils._sanitize(kwds['decoded']) + self = str.__new__(cls, kwds['decoded']) + # del kwds['decoded'] + self.init(name, **kwds) + return self + + def init(self, name, **_3to2kwargs): + defects = _3to2kwargs['defects']; del _3to2kwargs['defects'] + parse_tree = _3to2kwargs['parse_tree']; del _3to2kwargs['parse_tree'] + self._name = name + self._parse_tree = parse_tree + self._defects = defects + + @property + def name(self): + return self._name + + @property + def defects(self): + return tuple(self._defects) + + def __reduce__(self): + return ( + _reconstruct_header, + ( + self.__class__.__name__, + self.__class__.__bases__, + str(self), + ), + self.__dict__) + + @classmethod + def _reconstruct(cls, value): + return str.__new__(cls, value) + + def fold(self, **_3to2kwargs): + policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] + """Fold header according to policy. + + The parsed representation of the header is folded according to + RFC5322 rules, as modified by the policy. If the parse tree + contains surrogateescaped bytes, the bytes are CTE encoded using + the charset 'unknown-8bit". + + Any non-ASCII characters in the parse tree are CTE encoded using + charset utf-8. XXX: make this a policy setting. + + The returned value is an ASCII-only string possibly containing linesep + characters, and ending with a linesep character. The string includes + the header name and the ': ' separator. + + """ + # At some point we need to only put fws here if it was in the source. + header = parser.Header([ + parser.HeaderLabel([ + parser.ValueTerminal(self.name, 'header-name'), + parser.ValueTerminal(':', 'header-sep')]), + parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]), + self._parse_tree]) + return header.fold(policy=policy) + + +def _reconstruct_header(cls_name, bases, value): + return type(text_to_native_str(cls_name), bases, {})._reconstruct(value) + + +class UnstructuredHeader(object): + + max_count = None + value_parser = staticmethod(parser.get_unstructured) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = cls.value_parser(value) + kwds['decoded'] = str(kwds['parse_tree']) + + +class UniqueUnstructuredHeader(UnstructuredHeader): + + max_count = 1 + + +class DateHeader(object): + + """Header whose value consists of a single timestamp. + + Provides an additional attribute, datetime, which is either an aware + datetime using a timezone, or a naive datetime if the timezone + in the input string is -0000. Also accepts a datetime as input. + The 'value' attribute is the normalized form of the timestamp, + which means it is the output of format_datetime on the datetime. + """ + + max_count = None + + # This is used only for folding, not for creating 'decoded'. + value_parser = staticmethod(parser.get_unstructured) + + @classmethod + def parse(cls, value, kwds): + if not value: + kwds['defects'].append(errors.HeaderMissingRequiredValue()) + kwds['datetime'] = None + kwds['decoded'] = '' + kwds['parse_tree'] = parser.TokenList() + return + if isinstance(value, str): + value = utils.parsedate_to_datetime(value) + kwds['datetime'] = value + kwds['decoded'] = utils.format_datetime(kwds['datetime']) + kwds['parse_tree'] = cls.value_parser(kwds['decoded']) + + def init(self, *args, **kw): + self._datetime = kw.pop('datetime') + super().init(*args, **kw) + + @property + def datetime(self): + return self._datetime + + +class UniqueDateHeader(DateHeader): + + max_count = 1 + + +class AddressHeader(object): + + max_count = None + + @staticmethod + def value_parser(value): + address_list, value = parser.get_address_list(value) + assert not value, 'this should not happen' + return address_list + + @classmethod + def parse(cls, value, kwds): + if isinstance(value, str): + # We are translating here from the RFC language (address/mailbox) + # to our API language (group/address). + kwds['parse_tree'] = address_list = cls.value_parser(value) + groups = [] + for addr in address_list.addresses: + groups.append(Group(addr.display_name, + [Address(mb.display_name or '', + mb.local_part or '', + mb.domain or '') + for mb in addr.all_mailboxes])) + defects = list(address_list.all_defects) + else: + # Assume it is Address/Group stuff + if not hasattr(value, '__iter__'): + value = [value] + groups = [Group(None, [item]) if not hasattr(item, 'addresses') + else item + for item in value] + defects = [] + kwds['groups'] = groups + kwds['defects'] = defects + kwds['decoded'] = ', '.join([str(item) for item in groups]) + if 'parse_tree' not in kwds: + kwds['parse_tree'] = cls.value_parser(kwds['decoded']) + + def init(self, *args, **kw): + self._groups = tuple(kw.pop('groups')) + self._addresses = None + super().init(*args, **kw) + + @property + def groups(self): + return self._groups + + @property + def addresses(self): + if self._addresses is None: + self._addresses = tuple([address for group in self._groups + for address in group.addresses]) + return self._addresses + + +class UniqueAddressHeader(AddressHeader): + + max_count = 1 + + +class SingleAddressHeader(AddressHeader): + + @property + def address(self): + if len(self.addresses)!=1: + raise ValueError(("value of single address header {} is not " + "a single address").format(self.name)) + return self.addresses[0] + + +class UniqueSingleAddressHeader(SingleAddressHeader): + + max_count = 1 + + +class MIMEVersionHeader(object): + + max_count = 1 + + value_parser = staticmethod(parser.parse_mime_version) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = str(parse_tree) + kwds['defects'].extend(parse_tree.all_defects) + kwds['major'] = None if parse_tree.minor is None else parse_tree.major + kwds['minor'] = parse_tree.minor + if parse_tree.minor is not None: + kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor']) + else: + kwds['version'] = None + + def init(self, *args, **kw): + self._version = kw.pop('version') + self._major = kw.pop('major') + self._minor = kw.pop('minor') + super().init(*args, **kw) + + @property + def major(self): + return self._major + + @property + def minor(self): + return self._minor + + @property + def version(self): + return self._version + + +class ParameterizedMIMEHeader(object): + + # Mixin that handles the params dict. Must be subclassed and + # a property value_parser for the specific header provided. + + max_count = 1 + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = str(parse_tree) + kwds['defects'].extend(parse_tree.all_defects) + if parse_tree.params is None: + kwds['params'] = {} + else: + # The MIME RFCs specify that parameter ordering is arbitrary. + kwds['params'] = dict((utils._sanitize(name).lower(), + utils._sanitize(value)) + for name, value in parse_tree.params) + + def init(self, *args, **kw): + self._params = kw.pop('params') + super().init(*args, **kw) + + @property + def params(self): + return self._params.copy() + + +class ContentTypeHeader(ParameterizedMIMEHeader): + + value_parser = staticmethod(parser.parse_content_type_header) + + def init(self, *args, **kw): + super().init(*args, **kw) + self._maintype = utils._sanitize(self._parse_tree.maintype) + self._subtype = utils._sanitize(self._parse_tree.subtype) + + @property + def maintype(self): + return self._maintype + + @property + def subtype(self): + return self._subtype + + @property + def content_type(self): + return self.maintype + '/' + self.subtype + + +class ContentDispositionHeader(ParameterizedMIMEHeader): + + value_parser = staticmethod(parser.parse_content_disposition_header) + + def init(self, *args, **kw): + super().init(*args, **kw) + cd = self._parse_tree.content_disposition + self._content_disposition = cd if cd is None else utils._sanitize(cd) + + @property + def content_disposition(self): + return self._content_disposition + + +class ContentTransferEncodingHeader(object): + + max_count = 1 + + value_parser = staticmethod(parser.parse_content_transfer_encoding_header) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = str(parse_tree) + kwds['defects'].extend(parse_tree.all_defects) + + def init(self, *args, **kw): + super().init(*args, **kw) + self._cte = utils._sanitize(self._parse_tree.cte) + + @property + def cte(self): + return self._cte + + +# The header factory # + +_default_header_map = { + 'subject': UniqueUnstructuredHeader, + 'date': UniqueDateHeader, + 'resent-date': DateHeader, + 'orig-date': UniqueDateHeader, + 'sender': UniqueSingleAddressHeader, + 'resent-sender': SingleAddressHeader, + 'to': UniqueAddressHeader, + 'resent-to': AddressHeader, + 'cc': UniqueAddressHeader, + 'resent-cc': AddressHeader, + 'bcc': UniqueAddressHeader, + 'resent-bcc': AddressHeader, + 'from': UniqueAddressHeader, + 'resent-from': AddressHeader, + 'reply-to': UniqueAddressHeader, + 'mime-version': MIMEVersionHeader, + 'content-type': ContentTypeHeader, + 'content-disposition': ContentDispositionHeader, + 'content-transfer-encoding': ContentTransferEncodingHeader, + } + +class HeaderRegistry(object): + + """A header_factory and header registry.""" + + def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader, + use_default_map=True): + """Create a header_factory that works with the Policy API. + + base_class is the class that will be the last class in the created + header class's __bases__ list. default_class is the class that will be + used if "name" (see __call__) does not appear in the registry. + use_default_map controls whether or not the default mapping of names to + specialized classes is copied in to the registry when the factory is + created. The default is True. + + """ + self.registry = {} + self.base_class = base_class + self.default_class = default_class + if use_default_map: + self.registry.update(_default_header_map) + + def map_to_type(self, name, cls): + """Register cls as the specialized class for handling "name" headers. + + """ + self.registry[name.lower()] = cls + + def __getitem__(self, name): + cls = self.registry.get(name.lower(), self.default_class) + return type(text_to_native_str('_'+cls.__name__), (cls, self.base_class), {}) + + def __call__(self, name, value): + """Create a header instance for header 'name' from 'value'. + + Creates a header instance by creating a specialized class for parsing + and representing the specified header by combining the factory + base_class with a specialized class from the registry or the + default_class, and passing the name and value to the constructed + class's constructor. + + """ + return self[name](name, value) diff --git a/lib/future/backports/email/iterators.py b/lib/future/backports/email/iterators.py new file mode 100644 index 00000000..82d320f8 --- /dev/null +++ b/lib/future/backports/email/iterators.py @@ -0,0 +1,74 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Various types of useful iterators and generators.""" +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = [ + 'body_line_iterator', + 'typed_subpart_iterator', + 'walk', + # Do not include _structure() since it's part of the debugging API. + ] + +import sys +from io import StringIO + + +# This function will become a method of the Message class +def walk(self): + """Walk over the message tree, yielding each subpart. + + The walk is performed in depth-first order. This method is a + generator. + """ + yield self + if self.is_multipart(): + for subpart in self.get_payload(): + for subsubpart in subpart.walk(): + yield subsubpart + + +# These two functions are imported into the Iterators.py interface module. +def body_line_iterator(msg, decode=False): + """Iterate over the parts, returning string payloads line-by-line. + + Optional decode (default False) is passed through to .get_payload(). + """ + for subpart in msg.walk(): + payload = subpart.get_payload(decode=decode) + if isinstance(payload, str): + for line in StringIO(payload): + yield line + + +def typed_subpart_iterator(msg, maintype='text', subtype=None): + """Iterate over the subparts with a given MIME type. + + Use `maintype' as the main MIME type to match against; this defaults to + "text". Optional `subtype' is the MIME subtype to match against; if + omitted, only the main type is matched. + """ + for subpart in msg.walk(): + if subpart.get_content_maintype() == maintype: + if subtype is None or subpart.get_content_subtype() == subtype: + yield subpart + + +def _structure(msg, fp=None, level=0, include_default=False): + """A handy debugging aid""" + if fp is None: + fp = sys.stdout + tab = ' ' * (level * 4) + print(tab + msg.get_content_type(), end='', file=fp) + if include_default: + print(' [%s]' % msg.get_default_type(), file=fp) + else: + print(file=fp) + if msg.is_multipart(): + for subpart in msg.get_payload(): + _structure(subpart, fp, level+1, include_default) diff --git a/lib/future/backports/email/message.py b/lib/future/backports/email/message.py new file mode 100644 index 00000000..d8d9615d --- /dev/null +++ b/lib/future/backports/email/message.py @@ -0,0 +1,882 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2001-2007 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Basic message object for the email package object model.""" +from __future__ import absolute_import, division, unicode_literals +from future.builtins import list, range, str, zip + +__all__ = ['Message'] + +import re +import uu +import base64 +import binascii +from io import BytesIO, StringIO + +# Intrapackage imports +from future.utils import as_native_str +from future.backports.email import utils +from future.backports.email import errors +from future.backports.email._policybase import compat32 +from future.backports.email import charset as _charset +from future.backports.email._encoded_words import decode_b +Charset = _charset.Charset + +SEMISPACE = '; ' + +# Regular expression that matches `special' characters in parameters, the +# existence of which force quoting of the parameter value. +tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') + + +def _splitparam(param): + # Split header parameters. BAW: this may be too simple. It isn't + # strictly RFC 2045 (section 5.1) compliant, but it catches most headers + # found in the wild. We may eventually need a full fledged parser. + # RDM: we might have a Header here; for now just stringify it. + a, sep, b = str(param).partition(';') + if not sep: + return a.strip(), None + return a.strip(), b.strip() + +def _formatparam(param, value=None, quote=True): + """Convenience function to format and return a key=value pair. + + This will quote the value if needed or if quote is true. If value is a + three tuple (charset, language, value), it will be encoded according + to RFC2231 rules. If it contains non-ascii characters it will likewise + be encoded according to RFC2231 rules, using the utf-8 charset and + a null language. + """ + if value is not None and len(value) > 0: + # A tuple is used for RFC 2231 encoded parameter values where items + # are (charset, language, value). charset is a string, not a Charset + # instance. RFC 2231 encoded values are never quoted, per RFC. + if isinstance(value, tuple): + # Encode as per RFC 2231 + param += '*' + value = utils.encode_rfc2231(value[2], value[0], value[1]) + return '%s=%s' % (param, value) + else: + try: + value.encode('ascii') + except UnicodeEncodeError: + param += '*' + value = utils.encode_rfc2231(value, 'utf-8', '') + return '%s=%s' % (param, value) + # BAW: Please check this. I think that if quote is set it should + # force quoting even if not necessary. + if quote or tspecials.search(value): + return '%s="%s"' % (param, utils.quote(value)) + else: + return '%s=%s' % (param, value) + else: + return param + +def _parseparam(s): + # RDM This might be a Header, so for now stringify it. + s = ';' + str(s) + plist = [] + while s[:1] == ';': + s = s[1:] + end = s.find(';') + while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: + end = s.find(';', end + 1) + if end < 0: + end = len(s) + f = s[:end] + if '=' in f: + i = f.index('=') + f = f[:i].strip().lower() + '=' + f[i+1:].strip() + plist.append(f.strip()) + s = s[end:] + return plist + + +def _unquotevalue(value): + # This is different than utils.collapse_rfc2231_value() because it doesn't + # try to convert the value to a unicode. Message.get_param() and + # Message.get_params() are both currently defined to return the tuple in + # the face of RFC 2231 parameters. + if isinstance(value, tuple): + return value[0], value[1], utils.unquote(value[2]) + else: + return utils.unquote(value) + + +class Message(object): + """Basic message object. + + A message object is defined as something that has a bunch of RFC 2822 + headers and a payload. It may optionally have an envelope header + (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a + multipart or a message/rfc822), then the payload is a list of Message + objects, otherwise it is a string. + + Message objects implement part of the `mapping' interface, which assumes + there is exactly one occurrence of the header per message. Some headers + do in fact appear multiple times (e.g. Received) and for those headers, + you must use the explicit API to set or get all the headers. Not all of + the mapping methods are implemented. + """ + def __init__(self, policy=compat32): + self.policy = policy + self._headers = list() + self._unixfrom = None + self._payload = None + self._charset = None + # Defaults for multipart messages + self.preamble = self.epilogue = None + self.defects = [] + # Default content type + self._default_type = 'text/plain' + + @as_native_str(encoding='utf-8') + def __str__(self): + """Return the entire formatted message as a string. + This includes the headers, body, and envelope header. + """ + return self.as_string() + + def as_string(self, unixfrom=False, maxheaderlen=0): + """Return the entire formatted message as a (unicode) string. + Optional `unixfrom' when True, means include the Unix From_ envelope + header. + + This is a convenience method and may not generate the message exactly + as you intend. For more flexibility, use the flatten() method of a + Generator instance. + """ + from future.backports.email.generator import Generator + fp = StringIO() + g = Generator(fp, mangle_from_=False, maxheaderlen=maxheaderlen) + g.flatten(self, unixfrom=unixfrom) + return fp.getvalue() + + def is_multipart(self): + """Return True if the message consists of multiple parts.""" + return isinstance(self._payload, list) + + # + # Unix From_ line + # + def set_unixfrom(self, unixfrom): + self._unixfrom = unixfrom + + def get_unixfrom(self): + return self._unixfrom + + # + # Payload manipulation. + # + def attach(self, payload): + """Add the given payload to the current payload. + + The current payload will always be a list of objects after this method + is called. If you want to set the payload to a scalar object, use + set_payload() instead. + """ + if self._payload is None: + self._payload = [payload] + else: + self._payload.append(payload) + + def get_payload(self, i=None, decode=False): + """Return a reference to the payload. + + The payload will either be a list object or a string. If you mutate + the list object, you modify the message's payload in place. Optional + i returns that index into the payload. + + Optional decode is a flag indicating whether the payload should be + decoded or not, according to the Content-Transfer-Encoding header + (default is False). + + When True and the message is not a multipart, the payload will be + decoded if this header's value is `quoted-printable' or `base64'. If + some other encoding is used, or the header is missing, or if the + payload has bogus data (i.e. bogus base64 or uuencoded data), the + payload is returned as-is. + + If the message is a multipart and the decode flag is True, then None + is returned. + """ + # Here is the logic table for this code, based on the email5.0.0 code: + # i decode is_multipart result + # ------ ------ ------------ ------------------------------ + # None True True None + # i True True None + # None False True _payload (a list) + # i False True _payload element i (a Message) + # i False False error (not a list) + # i True False error (not a list) + # None False False _payload + # None True False _payload decoded (bytes) + # Note that Barry planned to factor out the 'decode' case, but that + # isn't so easy now that we handle the 8 bit data, which needs to be + # converted in both the decode and non-decode path. + if self.is_multipart(): + if decode: + return None + if i is None: + return self._payload + else: + return self._payload[i] + # For backward compatibility, Use isinstance and this error message + # instead of the more logical is_multipart test. + if i is not None and not isinstance(self._payload, list): + raise TypeError('Expected list, got %s' % type(self._payload)) + payload = self._payload + # cte might be a Header, so for now stringify it. + cte = str(self.get('content-transfer-encoding', '')).lower() + # payload may be bytes here. + if isinstance(payload, str): + payload = str(payload) # for Python-Future, so surrogateescape works + if utils._has_surrogates(payload): + bpayload = payload.encode('ascii', 'surrogateescape') + if not decode: + try: + payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') + except LookupError: + payload = bpayload.decode('ascii', 'replace') + elif decode: + try: + bpayload = payload.encode('ascii') + except UnicodeError: + # This won't happen for RFC compliant messages (messages + # containing only ASCII codepoints in the unicode input). + # If it does happen, turn the string into bytes in a way + # guaranteed not to fail. + bpayload = payload.encode('raw-unicode-escape') + if not decode: + return payload + if cte == 'quoted-printable': + return utils._qdecode(bpayload) + elif cte == 'base64': + # XXX: this is a bit of a hack; decode_b should probably be factored + # out somewhere, but I haven't figured out where yet. + value, defects = decode_b(b''.join(bpayload.splitlines())) + for defect in defects: + self.policy.handle_defect(self, defect) + return value + elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): + in_file = BytesIO(bpayload) + out_file = BytesIO() + try: + uu.decode(in_file, out_file, quiet=True) + return out_file.getvalue() + except uu.Error: + # Some decoding problem + return bpayload + if isinstance(payload, str): + return bpayload + return payload + + def set_payload(self, payload, charset=None): + """Set the payload to the given value. + + Optional charset sets the message's default character set. See + set_charset() for details. + """ + self._payload = payload + if charset is not None: + self.set_charset(charset) + + def set_charset(self, charset): + """Set the charset of the payload to a given character set. + + charset can be a Charset instance, a string naming a character set, or + None. If it is a string it will be converted to a Charset instance. + If charset is None, the charset parameter will be removed from the + Content-Type field. Anything else will generate a TypeError. + + The message will be assumed to be of type text/* encoded with + charset.input_charset. It will be converted to charset.output_charset + and encoded properly, if needed, when generating the plain text + representation of the message. MIME headers (MIME-Version, + Content-Type, Content-Transfer-Encoding) will be added as needed. + """ + if charset is None: + self.del_param('charset') + self._charset = None + return + if not isinstance(charset, Charset): + charset = Charset(charset) + self._charset = charset + if 'MIME-Version' not in self: + self.add_header('MIME-Version', '1.0') + if 'Content-Type' not in self: + self.add_header('Content-Type', 'text/plain', + charset=charset.get_output_charset()) + else: + self.set_param('charset', charset.get_output_charset()) + if charset != charset.get_output_charset(): + self._payload = charset.body_encode(self._payload) + if 'Content-Transfer-Encoding' not in self: + cte = charset.get_body_encoding() + try: + cte(self) + except TypeError: + self._payload = charset.body_encode(self._payload) + self.add_header('Content-Transfer-Encoding', cte) + + def get_charset(self): + """Return the Charset instance associated with the message's payload. + """ + return self._charset + + # + # MAPPING INTERFACE (partial) + # + def __len__(self): + """Return the total number of headers, including duplicates.""" + return len(self._headers) + + def __getitem__(self, name): + """Get a header value. + + Return None if the header is missing instead of raising an exception. + + Note that if the header appeared multiple times, exactly which + occurrence gets returned is undefined. Use get_all() to get all + the values matching a header field name. + """ + return self.get(name) + + def __setitem__(self, name, val): + """Set the value of a header. + + Note: this does not overwrite an existing header with the same field + name. Use __delitem__() first to delete any existing headers. + """ + max_count = self.policy.header_max_count(name) + if max_count: + lname = name.lower() + found = 0 + for k, v in self._headers: + if k.lower() == lname: + found += 1 + if found >= max_count: + raise ValueError("There may be at most {} {} headers " + "in a message".format(max_count, name)) + self._headers.append(self.policy.header_store_parse(name, val)) + + def __delitem__(self, name): + """Delete all occurrences of a header, if present. + + Does not raise an exception if the header is missing. + """ + name = name.lower() + newheaders = list() + for k, v in self._headers: + if k.lower() != name: + newheaders.append((k, v)) + self._headers = newheaders + + def __contains__(self, name): + return name.lower() in [k.lower() for k, v in self._headers] + + def __iter__(self): + for field, value in self._headers: + yield field + + def keys(self): + """Return a list of all the message's header field names. + + These will be sorted in the order they appeared in the original + message, or were added to the message, and may contain duplicates. + Any fields deleted and re-inserted are always appended to the header + list. + """ + return [k for k, v in self._headers] + + def values(self): + """Return a list of all the message's header values. + + These will be sorted in the order they appeared in the original + message, or were added to the message, and may contain duplicates. + Any fields deleted and re-inserted are always appended to the header + list. + """ + return [self.policy.header_fetch_parse(k, v) + for k, v in self._headers] + + def items(self): + """Get all the message's header fields and values. + + These will be sorted in the order they appeared in the original + message, or were added to the message, and may contain duplicates. + Any fields deleted and re-inserted are always appended to the header + list. + """ + return [(k, self.policy.header_fetch_parse(k, v)) + for k, v in self._headers] + + def get(self, name, failobj=None): + """Get a header value. + + Like __getitem__() but return failobj instead of None when the field + is missing. + """ + name = name.lower() + for k, v in self._headers: + if k.lower() == name: + return self.policy.header_fetch_parse(k, v) + return failobj + + # + # "Internal" methods (public API, but only intended for use by a parser + # or generator, not normal application code. + # + + def set_raw(self, name, value): + """Store name and value in the model without modification. + + This is an "internal" API, intended only for use by a parser. + """ + self._headers.append((name, value)) + + def raw_items(self): + """Return the (name, value) header pairs without modification. + + This is an "internal" API, intended only for use by a generator. + """ + return iter(self._headers.copy()) + + # + # Additional useful stuff + # + + def get_all(self, name, failobj=None): + """Return a list of all the values for the named field. + + These will be sorted in the order they appeared in the original + message, and may contain duplicates. Any fields deleted and + re-inserted are always appended to the header list. + + If no such fields exist, failobj is returned (defaults to None). + """ + values = [] + name = name.lower() + for k, v in self._headers: + if k.lower() == name: + values.append(self.policy.header_fetch_parse(k, v)) + if not values: + return failobj + return values + + def add_header(self, _name, _value, **_params): + """Extended header setting. + + name is the header field to add. keyword arguments can be used to set + additional parameters for the header field, with underscores converted + to dashes. Normally the parameter will be added as key="value" unless + value is None, in which case only the key will be added. If a + parameter value contains non-ASCII characters it can be specified as a + three-tuple of (charset, language, value), in which case it will be + encoded according to RFC2231 rules. Otherwise it will be encoded using + the utf-8 charset and a language of ''. + + Examples: + + msg.add_header('content-disposition', 'attachment', filename='bud.gif') + msg.add_header('content-disposition', 'attachment', + filename=('utf-8', '', 'Fußballer.ppt')) + msg.add_header('content-disposition', 'attachment', + filename='Fußballer.ppt')) + """ + parts = [] + for k, v in _params.items(): + if v is None: + parts.append(k.replace('_', '-')) + else: + parts.append(_formatparam(k.replace('_', '-'), v)) + if _value is not None: + parts.insert(0, _value) + self[_name] = SEMISPACE.join(parts) + + def replace_header(self, _name, _value): + """Replace a header. + + Replace the first matching header found in the message, retaining + header order and case. If no matching header was found, a KeyError is + raised. + """ + _name = _name.lower() + for i, (k, v) in zip(range(len(self._headers)), self._headers): + if k.lower() == _name: + self._headers[i] = self.policy.header_store_parse(k, _value) + break + else: + raise KeyError(_name) + + # + # Use these three methods instead of the three above. + # + + def get_content_type(self): + """Return the message's content type. + + The returned string is coerced to lower case of the form + `maintype/subtype'. If there was no Content-Type header in the + message, the default type as given by get_default_type() will be + returned. Since according to RFC 2045, messages always have a default + type this will always return a value. + + RFC 2045 defines a message's default type to be text/plain unless it + appears inside a multipart/digest container, in which case it would be + message/rfc822. + """ + missing = object() + value = self.get('content-type', missing) + if value is missing: + # This should have no parameters + return self.get_default_type() + ctype = _splitparam(value)[0].lower() + # RFC 2045, section 5.2 says if its invalid, use text/plain + if ctype.count('/') != 1: + return 'text/plain' + return ctype + + def get_content_maintype(self): + """Return the message's main content type. + + This is the `maintype' part of the string returned by + get_content_type(). + """ + ctype = self.get_content_type() + return ctype.split('/')[0] + + def get_content_subtype(self): + """Returns the message's sub-content type. + + This is the `subtype' part of the string returned by + get_content_type(). + """ + ctype = self.get_content_type() + return ctype.split('/')[1] + + def get_default_type(self): + """Return the `default' content type. + + Most messages have a default content type of text/plain, except for + messages that are subparts of multipart/digest containers. Such + subparts have a default content type of message/rfc822. + """ + return self._default_type + + def set_default_type(self, ctype): + """Set the `default' content type. + + ctype should be either "text/plain" or "message/rfc822", although this + is not enforced. The default content type is not stored in the + Content-Type header. + """ + self._default_type = ctype + + def _get_params_preserve(self, failobj, header): + # Like get_params() but preserves the quoting of values. BAW: + # should this be part of the public interface? + missing = object() + value = self.get(header, missing) + if value is missing: + return failobj + params = [] + for p in _parseparam(value): + try: + name, val = p.split('=', 1) + name = name.strip() + val = val.strip() + except ValueError: + # Must have been a bare attribute + name = p.strip() + val = '' + params.append((name, val)) + params = utils.decode_params(params) + return params + + def get_params(self, failobj=None, header='content-type', unquote=True): + """Return the message's Content-Type parameters, as a list. + + The elements of the returned list are 2-tuples of key/value pairs, as + split on the `=' sign. The left hand side of the `=' is the key, + while the right hand side is the value. If there is no `=' sign in + the parameter the value is the empty string. The value is as + described in the get_param() method. + + Optional failobj is the object to return if there is no Content-Type + header. Optional header is the header to search instead of + Content-Type. If unquote is True, the value is unquoted. + """ + missing = object() + params = self._get_params_preserve(missing, header) + if params is missing: + return failobj + if unquote: + return [(k, _unquotevalue(v)) for k, v in params] + else: + return params + + def get_param(self, param, failobj=None, header='content-type', + unquote=True): + """Return the parameter value if found in the Content-Type header. + + Optional failobj is the object to return if there is no Content-Type + header, or the Content-Type header has no such parameter. Optional + header is the header to search instead of Content-Type. + + Parameter keys are always compared case insensitively. The return + value can either be a string, or a 3-tuple if the parameter was RFC + 2231 encoded. When it's a 3-tuple, the elements of the value are of + the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and + LANGUAGE can be None, in which case you should consider VALUE to be + encoded in the us-ascii charset. You can usually ignore LANGUAGE. + The parameter value (either the returned string, or the VALUE item in + the 3-tuple) is always unquoted, unless unquote is set to False. + + If your application doesn't care whether the parameter was RFC 2231 + encoded, it can turn the return value into a string as follows: + + param = msg.get_param('foo') + param = email.utils.collapse_rfc2231_value(rawparam) + + """ + if header not in self: + return failobj + for k, v in self._get_params_preserve(failobj, header): + if k.lower() == param.lower(): + if unquote: + return _unquotevalue(v) + else: + return v + return failobj + + def set_param(self, param, value, header='Content-Type', requote=True, + charset=None, language=''): + """Set a parameter in the Content-Type header. + + If the parameter already exists in the header, its value will be + replaced with the new value. + + If header is Content-Type and has not yet been defined for this + message, it will be set to "text/plain" and the new parameter and + value will be appended as per RFC 2045. + + An alternate header can specified in the header argument, and all + parameters will be quoted as necessary unless requote is False. + + If charset is specified, the parameter will be encoded according to RFC + 2231. Optional language specifies the RFC 2231 language, defaulting + to the empty string. Both charset and language should be strings. + """ + if not isinstance(value, tuple) and charset: + value = (charset, language, value) + + if header not in self and header.lower() == 'content-type': + ctype = 'text/plain' + else: + ctype = self.get(header) + if not self.get_param(param, header=header): + if not ctype: + ctype = _formatparam(param, value, requote) + else: + ctype = SEMISPACE.join( + [ctype, _formatparam(param, value, requote)]) + else: + ctype = '' + for old_param, old_value in self.get_params(header=header, + unquote=requote): + append_param = '' + if old_param.lower() == param.lower(): + append_param = _formatparam(param, value, requote) + else: + append_param = _formatparam(old_param, old_value, requote) + if not ctype: + ctype = append_param + else: + ctype = SEMISPACE.join([ctype, append_param]) + if ctype != self.get(header): + del self[header] + self[header] = ctype + + def del_param(self, param, header='content-type', requote=True): + """Remove the given parameter completely from the Content-Type header. + + The header will be re-written in place without the parameter or its + value. All values will be quoted as necessary unless requote is + False. Optional header specifies an alternative to the Content-Type + header. + """ + if header not in self: + return + new_ctype = '' + for p, v in self.get_params(header=header, unquote=requote): + if p.lower() != param.lower(): + if not new_ctype: + new_ctype = _formatparam(p, v, requote) + else: + new_ctype = SEMISPACE.join([new_ctype, + _formatparam(p, v, requote)]) + if new_ctype != self.get(header): + del self[header] + self[header] = new_ctype + + def set_type(self, type, header='Content-Type', requote=True): + """Set the main type and subtype for the Content-Type header. + + type must be a string in the form "maintype/subtype", otherwise a + ValueError is raised. + + This method replaces the Content-Type header, keeping all the + parameters in place. If requote is False, this leaves the existing + header's quoting as is. Otherwise, the parameters will be quoted (the + default). + + An alternative header can be specified in the header argument. When + the Content-Type header is set, we'll always also add a MIME-Version + header. + """ + # BAW: should we be strict? + if not type.count('/') == 1: + raise ValueError + # Set the Content-Type, you get a MIME-Version + if header.lower() == 'content-type': + del self['mime-version'] + self['MIME-Version'] = '1.0' + if header not in self: + self[header] = type + return + params = self.get_params(header=header, unquote=requote) + del self[header] + self[header] = type + # Skip the first param; it's the old type. + for p, v in params[1:]: + self.set_param(p, v, header, requote) + + def get_filename(self, failobj=None): + """Return the filename associated with the payload if present. + + The filename is extracted from the Content-Disposition header's + `filename' parameter, and it is unquoted. If that header is missing + the `filename' parameter, this method falls back to looking for the + `name' parameter. + """ + missing = object() + filename = self.get_param('filename', missing, 'content-disposition') + if filename is missing: + filename = self.get_param('name', missing, 'content-type') + if filename is missing: + return failobj + return utils.collapse_rfc2231_value(filename).strip() + + def get_boundary(self, failobj=None): + """Return the boundary associated with the payload if present. + + The boundary is extracted from the Content-Type header's `boundary' + parameter, and it is unquoted. + """ + missing = object() + boundary = self.get_param('boundary', missing) + if boundary is missing: + return failobj + # RFC 2046 says that boundaries may begin but not end in w/s + return utils.collapse_rfc2231_value(boundary).rstrip() + + def set_boundary(self, boundary): + """Set the boundary parameter in Content-Type to 'boundary'. + + This is subtly different than deleting the Content-Type header and + adding a new one with a new boundary parameter via add_header(). The + main difference is that using the set_boundary() method preserves the + order of the Content-Type header in the original message. + + HeaderParseError is raised if the message has no Content-Type header. + """ + missing = object() + params = self._get_params_preserve(missing, 'content-type') + if params is missing: + # There was no Content-Type header, and we don't know what type + # to set it to, so raise an exception. + raise errors.HeaderParseError('No Content-Type header found') + newparams = list() + foundp = False + for pk, pv in params: + if pk.lower() == 'boundary': + newparams.append(('boundary', '"%s"' % boundary)) + foundp = True + else: + newparams.append((pk, pv)) + if not foundp: + # The original Content-Type header had no boundary attribute. + # Tack one on the end. BAW: should we raise an exception + # instead??? + newparams.append(('boundary', '"%s"' % boundary)) + # Replace the existing Content-Type header with the new value + newheaders = list() + for h, v in self._headers: + if h.lower() == 'content-type': + parts = list() + for k, v in newparams: + if v == '': + parts.append(k) + else: + parts.append('%s=%s' % (k, v)) + val = SEMISPACE.join(parts) + newheaders.append(self.policy.header_store_parse(h, val)) + + else: + newheaders.append((h, v)) + self._headers = newheaders + + def get_content_charset(self, failobj=None): + """Return the charset parameter of the Content-Type header. + + The returned string is always coerced to lower case. If there is no + Content-Type header, or if that header has no charset parameter, + failobj is returned. + """ + missing = object() + charset = self.get_param('charset', missing) + if charset is missing: + return failobj + if isinstance(charset, tuple): + # RFC 2231 encoded, so decode it, and it better end up as ascii. + pcharset = charset[0] or 'us-ascii' + try: + # LookupError will be raised if the charset isn't known to + # Python. UnicodeError will be raised if the encoded text + # contains a character not in the charset. + as_bytes = charset[2].encode('raw-unicode-escape') + charset = str(as_bytes, pcharset) + except (LookupError, UnicodeError): + charset = charset[2] + # charset characters must be in us-ascii range + try: + charset.encode('us-ascii') + except UnicodeError: + return failobj + # RFC 2046, $4.1.2 says charsets are not case sensitive + return charset.lower() + + def get_charsets(self, failobj=None): + """Return a list containing the charset(s) used in this message. + + The returned list of items describes the Content-Type headers' + charset parameter for this message and all the subparts in its + payload. + + Each item will either be a string (the value of the charset parameter + in the Content-Type header of that part) or the value of the + 'failobj' parameter (defaults to None), if the part does not have a + main MIME type of "text", or the charset is not defined. + + The list will contain one string for each part of the message, plus + one for the container message (i.e. self), so that a non-multipart + message will still return a list of length 1. + """ + return [part.get_content_charset(failobj) for part in self.walk()] + + # I.e. def walk(self): ... + from future.backports.email.iterators import walk diff --git a/lib/future/backports/email/mime/__init__.py b/lib/future/backports/email/mime/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/future/backports/email/mime/application.py b/lib/future/backports/email/mime/application.py new file mode 100644 index 00000000..5cbfb174 --- /dev/null +++ b/lib/future/backports/email/mime/application.py @@ -0,0 +1,39 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Keith Dart +# Contact: email-sig@python.org + +"""Class representing application/* type MIME documents.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +from future.backports.email import encoders +from future.backports.email.mime.nonmultipart import MIMENonMultipart + +__all__ = ["MIMEApplication"] + + +class MIMEApplication(MIMENonMultipart): + """Class for generating application/* MIME documents.""" + + def __init__(self, _data, _subtype='octet-stream', + _encoder=encoders.encode_base64, **_params): + """Create an application/* type MIME document. + + _data is a string containing the raw application data. + + _subtype is the MIME content type subtype, defaulting to + 'octet-stream'. + + _encoder is a function which will perform the actual encoding for + transport of the application data, defaulting to base64 encoding. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type + header. + """ + if _subtype is None: + raise TypeError('Invalid application MIME subtype') + MIMENonMultipart.__init__(self, 'application', _subtype, **_params) + self.set_payload(_data) + _encoder(self) diff --git a/lib/future/backports/email/mime/audio.py b/lib/future/backports/email/mime/audio.py new file mode 100644 index 00000000..4989c114 --- /dev/null +++ b/lib/future/backports/email/mime/audio.py @@ -0,0 +1,74 @@ +# Copyright (C) 2001-2007 Python Software Foundation +# Author: Anthony Baxter +# Contact: email-sig@python.org + +"""Class representing audio/* type MIME documents.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = ['MIMEAudio'] + +import sndhdr + +from io import BytesIO +from future.backports.email import encoders +from future.backports.email.mime.nonmultipart import MIMENonMultipart + + +_sndhdr_MIMEmap = {'au' : 'basic', + 'wav' :'x-wav', + 'aiff':'x-aiff', + 'aifc':'x-aiff', + } + +# There are others in sndhdr that don't have MIME types. :( +# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma?? +def _whatsnd(data): + """Try to identify a sound file type. + + sndhdr.what() has a pretty cruddy interface, unfortunately. This is why + we re-do it here. It would be easier to reverse engineer the Unix 'file' + command and use the standard 'magic' file, as shipped with a modern Unix. + """ + hdr = data[:512] + fakefile = BytesIO(hdr) + for testfn in sndhdr.tests: + res = testfn(hdr, fakefile) + if res is not None: + return _sndhdr_MIMEmap.get(res[0]) + return None + + +class MIMEAudio(MIMENonMultipart): + """Class for generating audio/* MIME documents.""" + + def __init__(self, _audiodata, _subtype=None, + _encoder=encoders.encode_base64, **_params): + """Create an audio/* type MIME document. + + _audiodata is a string containing the raw audio data. If this data + can be decoded by the standard Python `sndhdr' module, then the + subtype will be automatically included in the Content-Type header. + Otherwise, you can specify the specific audio subtype via the + _subtype parameter. If _subtype is not given, and no subtype can be + guessed, a TypeError is raised. + + _encoder is a function which will perform the actual encoding for + transport of the image data. It takes one argument, which is this + Image instance. It should use get_payload() and set_payload() to + change the payload to the encoded form. It should also add any + Content-Transfer-Encoding or other headers to the message as + necessary. The default encoding is Base64. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type + header. + """ + if _subtype is None: + _subtype = _whatsnd(_audiodata) + if _subtype is None: + raise TypeError('Could not find audio MIME subtype') + MIMENonMultipart.__init__(self, 'audio', _subtype, **_params) + self.set_payload(_audiodata) + _encoder(self) diff --git a/lib/future/backports/email/mime/base.py b/lib/future/backports/email/mime/base.py new file mode 100644 index 00000000..e77f3ca4 --- /dev/null +++ b/lib/future/backports/email/mime/base.py @@ -0,0 +1,25 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Base class for MIME specializations.""" +from __future__ import absolute_import, division, unicode_literals +from future.backports.email import message + +__all__ = ['MIMEBase'] + + +class MIMEBase(message.Message): + """Base class for MIME specializations.""" + + def __init__(self, _maintype, _subtype, **_params): + """This constructor adds a Content-Type: and a MIME-Version: header. + + The Content-Type: header is taken from the _maintype and _subtype + arguments. Additional parameters for this header are taken from the + keyword arguments. + """ + message.Message.__init__(self) + ctype = '%s/%s' % (_maintype, _subtype) + self.add_header('Content-Type', ctype, **_params) + self['MIME-Version'] = '1.0' diff --git a/lib/future/backports/email/mime/image.py b/lib/future/backports/email/mime/image.py new file mode 100644 index 00000000..a0360246 --- /dev/null +++ b/lib/future/backports/email/mime/image.py @@ -0,0 +1,48 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Class representing image/* type MIME documents.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = ['MIMEImage'] + +import imghdr + +from future.backports.email import encoders +from future.backports.email.mime.nonmultipart import MIMENonMultipart + + +class MIMEImage(MIMENonMultipart): + """Class for generating image/* type MIME documents.""" + + def __init__(self, _imagedata, _subtype=None, + _encoder=encoders.encode_base64, **_params): + """Create an image/* type MIME document. + + _imagedata is a string containing the raw image data. If this data + can be decoded by the standard Python `imghdr' module, then the + subtype will be automatically included in the Content-Type header. + Otherwise, you can specify the specific image subtype via the _subtype + parameter. + + _encoder is a function which will perform the actual encoding for + transport of the image data. It takes one argument, which is this + Image instance. It should use get_payload() and set_payload() to + change the payload to the encoded form. It should also add any + Content-Transfer-Encoding or other headers to the message as + necessary. The default encoding is Base64. + + Any additional keyword arguments are passed to the base class + constructor, which turns them into parameters on the Content-Type + header. + """ + if _subtype is None: + _subtype = imghdr.what(None, _imagedata) + if _subtype is None: + raise TypeError('Could not guess image MIME subtype') + MIMENonMultipart.__init__(self, 'image', _subtype, **_params) + self.set_payload(_imagedata) + _encoder(self) diff --git a/lib/future/backports/email/mime/message.py b/lib/future/backports/email/mime/message.py new file mode 100644 index 00000000..7f920751 --- /dev/null +++ b/lib/future/backports/email/mime/message.py @@ -0,0 +1,36 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Class representing message/* MIME documents.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = ['MIMEMessage'] + +from future.backports.email import message +from future.backports.email.mime.nonmultipart import MIMENonMultipart + + +class MIMEMessage(MIMENonMultipart): + """Class representing message/* MIME documents.""" + + def __init__(self, _msg, _subtype='rfc822'): + """Create a message/* type MIME document. + + _msg is a message object and must be an instance of Message, or a + derived class of Message, otherwise a TypeError is raised. + + Optional _subtype defines the subtype of the contained message. The + default is "rfc822" (this is defined by the MIME standard, even though + the term "rfc822" is technically outdated by RFC 2822). + """ + MIMENonMultipart.__init__(self, 'message', _subtype) + if not isinstance(_msg, message.Message): + raise TypeError('Argument is not an instance of Message') + # It's convenient to use this base class method. We need to do it + # this way or we'll get an exception + message.Message.attach(self, _msg) + # And be sure our default type is set correctly + self.set_default_type('message/rfc822') diff --git a/lib/future/backports/email/mime/multipart.py b/lib/future/backports/email/mime/multipart.py new file mode 100644 index 00000000..6d7ed3dc --- /dev/null +++ b/lib/future/backports/email/mime/multipart.py @@ -0,0 +1,49 @@ +# Copyright (C) 2002-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Base class for MIME multipart/* type messages.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = ['MIMEMultipart'] + +from future.backports.email.mime.base import MIMEBase + + +class MIMEMultipart(MIMEBase): + """Base class for MIME multipart/* type messages.""" + + def __init__(self, _subtype='mixed', boundary=None, _subparts=None, + **_params): + """Creates a multipart/* type message. + + By default, creates a multipart/mixed message, with proper + Content-Type and MIME-Version headers. + + _subtype is the subtype of the multipart content type, defaulting to + `mixed'. + + boundary is the multipart boundary string. By default it is + calculated as needed. + + _subparts is a sequence of initial subparts for the payload. It + must be an iterable object, such as a list. You can always + attach new subparts to the message by using the attach() method. + + Additional parameters for the Content-Type header are taken from the + keyword arguments (or passed into the _params argument). + """ + MIMEBase.__init__(self, 'multipart', _subtype, **_params) + + # Initialise _payload to an empty list as the Message superclass's + # implementation of is_multipart assumes that _payload is a list for + # multipart messages. + self._payload = [] + + if _subparts: + for p in _subparts: + self.attach(p) + if boundary: + self.set_boundary(boundary) diff --git a/lib/future/backports/email/mime/nonmultipart.py b/lib/future/backports/email/mime/nonmultipart.py new file mode 100644 index 00000000..08c37c36 --- /dev/null +++ b/lib/future/backports/email/mime/nonmultipart.py @@ -0,0 +1,24 @@ +# Copyright (C) 2002-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Base class for MIME type messages that are not multipart.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = ['MIMENonMultipart'] + +from future.backports.email import errors +from future.backports.email.mime.base import MIMEBase + + +class MIMENonMultipart(MIMEBase): + """Base class for MIME multipart/* type messages.""" + + def attach(self, payload): + # The public API prohibits attaching multiple subparts to MIMEBase + # derived subtypes since none of them are, by definition, of content + # type multipart/* + raise errors.MultipartConversionError( + 'Cannot attach additional subparts to non-multipart/*') diff --git a/lib/future/backports/email/mime/text.py b/lib/future/backports/email/mime/text.py new file mode 100644 index 00000000..6269f4a6 --- /dev/null +++ b/lib/future/backports/email/mime/text.py @@ -0,0 +1,44 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Class representing text/* type MIME documents.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = ['MIMEText'] + +from future.backports.email.encoders import encode_7or8bit +from future.backports.email.mime.nonmultipart import MIMENonMultipart + + +class MIMEText(MIMENonMultipart): + """Class for generating text/* type MIME documents.""" + + def __init__(self, _text, _subtype='plain', _charset=None): + """Create a text/* type MIME document. + + _text is the string for this message object. + + _subtype is the MIME sub content type, defaulting to "plain". + + _charset is the character set parameter added to the Content-Type + header. This defaults to "us-ascii". Note that as a side-effect, the + Content-Transfer-Encoding header will also be set. + """ + + # If no _charset was specified, check to see if there are non-ascii + # characters present. If not, use 'us-ascii', otherwise use utf-8. + # XXX: This can be removed once #7304 is fixed. + if _charset is None: + try: + _text.encode('us-ascii') + _charset = 'us-ascii' + except UnicodeEncodeError: + _charset = 'utf-8' + + MIMENonMultipart.__init__(self, 'text', _subtype, + **{'charset': _charset}) + + self.set_payload(_text, _charset) diff --git a/lib/future/backports/email/parser.py b/lib/future/backports/email/parser.py new file mode 100644 index 00000000..df1c6e28 --- /dev/null +++ b/lib/future/backports/email/parser.py @@ -0,0 +1,135 @@ +# Copyright (C) 2001-2007 Python Software Foundation +# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter +# Contact: email-sig@python.org + +"""A parser of RFC 2822 and MIME email messages.""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import + +__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser'] + +import warnings +from io import StringIO, TextIOWrapper + +from future.backports.email.feedparser import FeedParser, BytesFeedParser +from future.backports.email.message import Message +from future.backports.email._policybase import compat32 + + +class Parser(object): + def __init__(self, _class=Message, **_3to2kwargs): + """Parser of RFC 2822 and MIME email messages. + + Creates an in-memory object tree representing the email message, which + can then be manipulated and turned over to a Generator to return the + textual representation of the message. + + The string must be formatted as a block of RFC 2822 headers and header + continuation lines, optionally preceeded by a `Unix-from' header. The + header block is terminated either by the end of the string or by a + blank line. + + _class is the class to instantiate for new message objects when they + must be created. This class must have a constructor that can take + zero arguments. Default is Message.Message. + + The policy keyword specifies a policy object that controls a number of + aspects of the parser's operation. The default policy maintains + backward compatibility. + + """ + if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy'] + else: policy = compat32 + self._class = _class + self.policy = policy + + def parse(self, fp, headersonly=False): + """Create a message structure from the data in a file. + + Reads all the data from the file and returns the root of the message + structure. Optional headersonly is a flag specifying whether to stop + parsing after reading the headers or not. The default is False, + meaning it parses the entire contents of the file. + """ + feedparser = FeedParser(self._class, policy=self.policy) + if headersonly: + feedparser._set_headersonly() + while True: + data = fp.read(8192) + if not data: + break + feedparser.feed(data) + return feedparser.close() + + def parsestr(self, text, headersonly=False): + """Create a message structure from a string. + + Returns the root of the message structure. Optional headersonly is a + flag specifying whether to stop parsing after reading the headers or + not. The default is False, meaning it parses the entire contents of + the file. + """ + return self.parse(StringIO(text), headersonly=headersonly) + + + +class HeaderParser(Parser): + def parse(self, fp, headersonly=True): + return Parser.parse(self, fp, True) + + def parsestr(self, text, headersonly=True): + return Parser.parsestr(self, text, True) + + +class BytesParser(object): + + def __init__(self, *args, **kw): + """Parser of binary RFC 2822 and MIME email messages. + + Creates an in-memory object tree representing the email message, which + can then be manipulated and turned over to a Generator to return the + textual representation of the message. + + The input must be formatted as a block of RFC 2822 headers and header + continuation lines, optionally preceeded by a `Unix-from' header. The + header block is terminated either by the end of the input or by a + blank line. + + _class is the class to instantiate for new message objects when they + must be created. This class must have a constructor that can take + zero arguments. Default is Message.Message. + """ + self.parser = Parser(*args, **kw) + + def parse(self, fp, headersonly=False): + """Create a message structure from the data in a binary file. + + Reads all the data from the file and returns the root of the message + structure. Optional headersonly is a flag specifying whether to stop + parsing after reading the headers or not. The default is False, + meaning it parses the entire contents of the file. + """ + fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape') + with fp: + return self.parser.parse(fp, headersonly) + + + def parsebytes(self, text, headersonly=False): + """Create a message structure from a byte string. + + Returns the root of the message structure. Optional headersonly is a + flag specifying whether to stop parsing after reading the headers or + not. The default is False, meaning it parses the entire contents of + the file. + """ + text = text.decode('ASCII', errors='surrogateescape') + return self.parser.parsestr(text, headersonly) + + +class BytesHeaderParser(BytesParser): + def parse(self, fp, headersonly=True): + return BytesParser.parse(self, fp, headersonly=True) + + def parsebytes(self, text, headersonly=True): + return BytesParser.parsebytes(self, text, headersonly=True) diff --git a/lib/future/backports/email/policy.py b/lib/future/backports/email/policy.py new file mode 100644 index 00000000..2f609a23 --- /dev/null +++ b/lib/future/backports/email/policy.py @@ -0,0 +1,193 @@ +"""This will be the home for the policy that hooks in the new +code that adds all the email6 features. +""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future.builtins import super + +from future.standard_library.email._policybase import (Policy, Compat32, + compat32, _extend_docstrings) +from future.standard_library.email.utils import _has_surrogates +from future.standard_library.email.headerregistry import HeaderRegistry as HeaderRegistry + +__all__ = [ + 'Compat32', + 'compat32', + 'Policy', + 'EmailPolicy', + 'default', + 'strict', + 'SMTP', + 'HTTP', + ] + +@_extend_docstrings +class EmailPolicy(Policy): + + """+ + PROVISIONAL + + The API extensions enabled by this policy are currently provisional. + Refer to the documentation for details. + + This policy adds new header parsing and folding algorithms. Instead of + simple strings, headers are custom objects with custom attributes + depending on the type of the field. The folding algorithm fully + implements RFCs 2047 and 5322. + + In addition to the settable attributes listed above that apply to + all Policies, this policy adds the following additional attributes: + + refold_source -- if the value for a header in the Message object + came from the parsing of some source, this attribute + indicates whether or not a generator should refold + that value when transforming the message back into + stream form. The possible values are: + + none -- all source values use original folding + long -- source values that have any line that is + longer than max_line_length will be + refolded + all -- all values are refolded. + + The default is 'long'. + + header_factory -- a callable that takes two arguments, 'name' and + 'value', where 'name' is a header field name and + 'value' is an unfolded header field value, and + returns a string-like object that represents that + header. A default header_factory is provided that + understands some of the RFC5322 header field types. + (Currently address fields and date fields have + special treatment, while all other fields are + treated as unstructured. This list will be + completed before the extension is marked stable.) + """ + + refold_source = 'long' + header_factory = HeaderRegistry() + + def __init__(self, **kw): + # Ensure that each new instance gets a unique header factory + # (as opposed to clones, which share the factory). + if 'header_factory' not in kw: + object.__setattr__(self, 'header_factory', HeaderRegistry()) + super().__init__(**kw) + + def header_max_count(self, name): + """+ + The implementation for this class returns the max_count attribute from + the specialized header class that would be used to construct a header + of type 'name'. + """ + return self.header_factory[name].max_count + + # The logic of the next three methods is chosen such that it is possible to + # switch a Message object between a Compat32 policy and a policy derived + # from this class and have the results stay consistent. This allows a + # Message object constructed with this policy to be passed to a library + # that only handles Compat32 objects, or to receive such an object and + # convert it to use the newer style by just changing its policy. It is + # also chosen because it postpones the relatively expensive full rfc5322 + # parse until as late as possible when parsing from source, since in many + # applications only a few headers will actually be inspected. + + def header_source_parse(self, sourcelines): + """+ + The name is parsed as everything up to the ':' and returned unmodified. + The value is determined by stripping leading whitespace off the + remainder of the first line, joining all subsequent lines together, and + stripping any trailing carriage return or linefeed characters. (This + is the same as Compat32). + + """ + name, value = sourcelines[0].split(':', 1) + value = value.lstrip(' \t') + ''.join(sourcelines[1:]) + return (name, value.rstrip('\r\n')) + + def header_store_parse(self, name, value): + """+ + The name is returned unchanged. If the input value has a 'name' + attribute and it matches the name ignoring case, the value is returned + unchanged. Otherwise the name and value are passed to header_factory + method, and the resulting custom header object is returned as the + value. In this case a ValueError is raised if the input value contains + CR or LF characters. + + """ + if hasattr(value, 'name') and value.name.lower() == name.lower(): + return (name, value) + if isinstance(value, str) and len(value.splitlines())>1: + raise ValueError("Header values may not contain linefeed " + "or carriage return characters") + return (name, self.header_factory(name, value)) + + def header_fetch_parse(self, name, value): + """+ + If the value has a 'name' attribute, it is returned to unmodified. + Otherwise the name and the value with any linesep characters removed + are passed to the header_factory method, and the resulting custom + header object is returned. Any surrogateescaped bytes get turned + into the unicode unknown-character glyph. + + """ + if hasattr(value, 'name'): + return value + return self.header_factory(name, ''.join(value.splitlines())) + + def fold(self, name, value): + """+ + Header folding is controlled by the refold_source policy setting. A + value is considered to be a 'source value' if and only if it does not + have a 'name' attribute (having a 'name' attribute means it is a header + object of some sort). If a source value needs to be refolded according + to the policy, it is converted into a custom header object by passing + the name and the value with any linesep characters removed to the + header_factory method. Folding of a custom header object is done by + calling its fold method with the current policy. + + Source values are split into lines using splitlines. If the value is + not to be refolded, the lines are rejoined using the linesep from the + policy and returned. The exception is lines containing non-ascii + binary data. In that case the value is refolded regardless of the + refold_source setting, which causes the binary data to be CTE encoded + using the unknown-8bit charset. + + """ + return self._fold(name, value, refold_binary=True) + + def fold_binary(self, name, value): + """+ + The same as fold if cte_type is 7bit, except that the returned value is + bytes. + + If cte_type is 8bit, non-ASCII binary data is converted back into + bytes. Headers with binary data are not refolded, regardless of the + refold_header setting, since there is no way to know whether the binary + data consists of single byte characters or multibyte characters. + + """ + folded = self._fold(name, value, refold_binary=self.cte_type=='7bit') + return folded.encode('ascii', 'surrogateescape') + + def _fold(self, name, value, refold_binary=False): + if hasattr(value, 'name'): + return value.fold(policy=self) + maxlen = self.max_line_length if self.max_line_length else float('inf') + lines = value.splitlines() + refold = (self.refold_source == 'all' or + self.refold_source == 'long' and + (lines and len(lines[0])+len(name)+2 > maxlen or + any(len(x) > maxlen for x in lines[1:]))) + if refold or refold_binary and _has_surrogates(value): + return self.header_factory(name, ''.join(lines)).fold(policy=self) + return name + ': ' + self.linesep.join(lines) + self.linesep + + +default = EmailPolicy() +# Make the default policy use the class default header_factory +del default.header_factory +strict = default.clone(raise_on_defect=True) +SMTP = default.clone(linesep='\r\n') +HTTP = default.clone(linesep='\r\n', max_line_length=None) diff --git a/lib/future/backports/email/quoprimime.py b/lib/future/backports/email/quoprimime.py new file mode 100644 index 00000000..b69d158b --- /dev/null +++ b/lib/future/backports/email/quoprimime.py @@ -0,0 +1,326 @@ +# Copyright (C) 2001-2006 Python Software Foundation +# Author: Ben Gertzfield +# Contact: email-sig@python.org + +"""Quoted-printable content transfer encoding per RFCs 2045-2047. + +This module handles the content transfer encoding method defined in RFC 2045 +to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to +safely encode text that is in a character set similar to the 7-bit US ASCII +character set, but that includes some 8-bit characters that are normally not +allowed in email bodies or headers. + +Quoted-printable is very space-inefficient for encoding binary files; use the +email.base64mime module for that instead. + +This module provides an interface to encode and decode both headers and bodies +with quoted-printable encoding. + +RFC 2045 defines a method for including character set information in an +`encoded-word' in a header. This method is commonly used for 8-bit real names +in To:/From:/Cc: etc. fields, as well as Subject: lines. + +This module does not do the line wrapping or end-of-line character +conversion necessary for proper internationalized headers; it only +does dumb encoding and decoding. To deal with the various line +wrapping issues, use the email.header module. +""" +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future.builtins import bytes, chr, dict, int, range, super + +__all__ = [ + 'body_decode', + 'body_encode', + 'body_length', + 'decode', + 'decodestring', + 'header_decode', + 'header_encode', + 'header_length', + 'quote', + 'unquote', + ] + +import re +import io + +from string import ascii_letters, digits, hexdigits + +CRLF = '\r\n' +NL = '\n' +EMPTYSTRING = '' + +# Build a mapping of octets to the expansion of that octet. Since we're only +# going to have 256 of these things, this isn't terribly inefficient +# space-wise. Remember that headers and bodies have different sets of safe +# characters. Initialize both maps with the full expansion, and then override +# the safe bytes with the more compact form. +_QUOPRI_HEADER_MAP = dict((c, '=%02X' % c) for c in range(256)) +_QUOPRI_BODY_MAP = _QUOPRI_HEADER_MAP.copy() + +# Safe header bytes which need no encoding. +for c in bytes(b'-!*+/' + ascii_letters.encode('ascii') + digits.encode('ascii')): + _QUOPRI_HEADER_MAP[c] = chr(c) +# Headers have one other special encoding; spaces become underscores. +_QUOPRI_HEADER_MAP[ord(' ')] = '_' + +# Safe body bytes which need no encoding. +for c in bytes(b' !"#$%&\'()*+,-./0123456789:;<>' + b'?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`' + b'abcdefghijklmnopqrstuvwxyz{|}~\t'): + _QUOPRI_BODY_MAP[c] = chr(c) + + + +# Helpers +def header_check(octet): + """Return True if the octet should be escaped with header quopri.""" + return chr(octet) != _QUOPRI_HEADER_MAP[octet] + + +def body_check(octet): + """Return True if the octet should be escaped with body quopri.""" + return chr(octet) != _QUOPRI_BODY_MAP[octet] + + +def header_length(bytearray): + """Return a header quoted-printable encoding length. + + Note that this does not include any RFC 2047 chrome added by + `header_encode()`. + + :param bytearray: An array of bytes (a.k.a. octets). + :return: The length in bytes of the byte array when it is encoded with + quoted-printable for headers. + """ + return sum(len(_QUOPRI_HEADER_MAP[octet]) for octet in bytearray) + + +def body_length(bytearray): + """Return a body quoted-printable encoding length. + + :param bytearray: An array of bytes (a.k.a. octets). + :return: The length in bytes of the byte array when it is encoded with + quoted-printable for bodies. + """ + return sum(len(_QUOPRI_BODY_MAP[octet]) for octet in bytearray) + + +def _max_append(L, s, maxlen, extra=''): + if not isinstance(s, str): + s = chr(s) + if not L: + L.append(s.lstrip()) + elif len(L[-1]) + len(s) <= maxlen: + L[-1] += extra + s + else: + L.append(s.lstrip()) + + +def unquote(s): + """Turn a string in the form =AB to the ASCII character with value 0xab""" + return chr(int(s[1:3], 16)) + + +def quote(c): + return '=%02X' % ord(c) + + + +def header_encode(header_bytes, charset='iso-8859-1'): + """Encode a single header line with quoted-printable (like) encoding. + + Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but + used specifically for email header fields to allow charsets with mostly 7 + bit characters (and some 8 bit) to remain more or less readable in non-RFC + 2045 aware mail clients. + + charset names the character set to use in the RFC 2046 header. It + defaults to iso-8859-1. + """ + # Return empty headers as an empty string. + if not header_bytes: + return '' + # Iterate over every byte, encoding if necessary. + encoded = [] + for octet in header_bytes: + encoded.append(_QUOPRI_HEADER_MAP[octet]) + # Now add the RFC chrome to each encoded chunk and glue the chunks + # together. + return '=?%s?q?%s?=' % (charset, EMPTYSTRING.join(encoded)) + + +class _body_accumulator(io.StringIO): + + def __init__(self, maxlinelen, eol, *args, **kw): + super().__init__(*args, **kw) + self.eol = eol + self.maxlinelen = self.room = maxlinelen + + def write_str(self, s): + """Add string s to the accumulated body.""" + self.write(s) + self.room -= len(s) + + def newline(self): + """Write eol, then start new line.""" + self.write_str(self.eol) + self.room = self.maxlinelen + + def write_soft_break(self): + """Write a soft break, then start a new line.""" + self.write_str('=') + self.newline() + + def write_wrapped(self, s, extra_room=0): + """Add a soft line break if needed, then write s.""" + if self.room < len(s) + extra_room: + self.write_soft_break() + self.write_str(s) + + def write_char(self, c, is_last_char): + if not is_last_char: + # Another character follows on this line, so we must leave + # extra room, either for it or a soft break, and whitespace + # need not be quoted. + self.write_wrapped(c, extra_room=1) + elif c not in ' \t': + # For this and remaining cases, no more characters follow, + # so there is no need to reserve extra room (since a hard + # break will immediately follow). + self.write_wrapped(c) + elif self.room >= 3: + # It's a whitespace character at end-of-line, and we have room + # for the three-character quoted encoding. + self.write(quote(c)) + elif self.room == 2: + # There's room for the whitespace character and a soft break. + self.write(c) + self.write_soft_break() + else: + # There's room only for a soft break. The quoted whitespace + # will be the only content on the subsequent line. + self.write_soft_break() + self.write(quote(c)) + + +def body_encode(body, maxlinelen=76, eol=NL): + """Encode with quoted-printable, wrapping at maxlinelen characters. + + Each line of encoded text will end with eol, which defaults to "\\n". Set + this to "\\r\\n" if you will be using the result of this function directly + in an email. + + Each line will be wrapped at, at most, maxlinelen characters before the + eol string (maxlinelen defaults to 76 characters, the maximum value + permitted by RFC 2045). Long lines will have the 'soft line break' + quoted-printable character "=" appended to them, so the decoded text will + be identical to the original text. + + The minimum maxlinelen is 4 to have room for a quoted character ("=XX") + followed by a soft line break. Smaller values will generate a + ValueError. + + """ + + if maxlinelen < 4: + raise ValueError("maxlinelen must be at least 4") + if not body: + return body + + # The last line may or may not end in eol, but all other lines do. + last_has_eol = (body[-1] in '\r\n') + + # This accumulator will make it easier to build the encoded body. + encoded_body = _body_accumulator(maxlinelen, eol) + + lines = body.splitlines() + last_line_no = len(lines) - 1 + for line_no, line in enumerate(lines): + last_char_index = len(line) - 1 + for i, c in enumerate(line): + if body_check(ord(c)): + c = quote(c) + encoded_body.write_char(c, i==last_char_index) + # Add an eol if input line had eol. All input lines have eol except + # possibly the last one. + if line_no < last_line_no or last_has_eol: + encoded_body.newline() + + return encoded_body.getvalue() + + + +# BAW: I'm not sure if the intent was for the signature of this function to be +# the same as base64MIME.decode() or not... +def decode(encoded, eol=NL): + """Decode a quoted-printable string. + + Lines are separated with eol, which defaults to \\n. + """ + if not encoded: + return encoded + # BAW: see comment in encode() above. Again, we're building up the + # decoded string with string concatenation, which could be done much more + # efficiently. + decoded = '' + + for line in encoded.splitlines(): + line = line.rstrip() + if not line: + decoded += eol + continue + + i = 0 + n = len(line) + while i < n: + c = line[i] + if c != '=': + decoded += c + i += 1 + # Otherwise, c == "=". Are we at the end of the line? If so, add + # a soft line break. + elif i+1 == n: + i += 1 + continue + # Decode if in form =AB + elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits: + decoded += unquote(line[i:i+3]) + i += 3 + # Otherwise, not in form =AB, pass literally + else: + decoded += c + i += 1 + + if i == n: + decoded += eol + # Special case if original string did not end with eol + if encoded[-1] not in '\r\n' and decoded.endswith(eol): + decoded = decoded[:-1] + return decoded + + +# For convenience and backwards compatibility w/ standard base64 module +body_decode = decode +decodestring = decode + + + +def _unquote_match(match): + """Turn a match in the form =AB to the ASCII character with value 0xab""" + s = match.group(0) + return unquote(s) + + +# Header decoding is done a bit differently +def header_decode(s): + """Decode a string encoded with RFC 2045 MIME header `Q' encoding. + + This function does not parse a full MIME header value encoded with + quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use + the high level email.header class for that functionality. + """ + s = s.replace('_', ' ') + return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s, re.ASCII) diff --git a/lib/future/backports/email/utils.py b/lib/future/backports/email/utils.py new file mode 100644 index 00000000..4abebf7c --- /dev/null +++ b/lib/future/backports/email/utils.py @@ -0,0 +1,400 @@ +# Copyright (C) 2001-2010 Python Software Foundation +# Author: Barry Warsaw +# Contact: email-sig@python.org + +"""Miscellaneous utilities.""" + +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future import utils +from future.builtins import bytes, int, str + +__all__ = [ + 'collapse_rfc2231_value', + 'decode_params', + 'decode_rfc2231', + 'encode_rfc2231', + 'formataddr', + 'formatdate', + 'format_datetime', + 'getaddresses', + 'make_msgid', + 'mktime_tz', + 'parseaddr', + 'parsedate', + 'parsedate_tz', + 'parsedate_to_datetime', + 'unquote', + ] + +import os +import re +if utils.PY2: + re.ASCII = 0 +import time +import base64 +import random +import socket +from future.backports import datetime +from future.backports.urllib.parse import quote as url_quote, unquote as url_unquote +import warnings +from io import StringIO + +from future.backports.email._parseaddr import quote +from future.backports.email._parseaddr import AddressList as _AddressList +from future.backports.email._parseaddr import mktime_tz + +from future.backports.email._parseaddr import parsedate, parsedate_tz, _parsedate_tz + +from quopri import decodestring as _qdecode + +# Intrapackage imports +from future.backports.email.encoders import _bencode, _qencode +from future.backports.email.charset import Charset + +COMMASPACE = ', ' +EMPTYSTRING = '' +UEMPTYSTRING = '' +CRLF = '\r\n' +TICK = "'" + +specialsre = re.compile(r'[][\\()<>@,:;".]') +escapesre = re.compile(r'[\\"]') + +# How to figure out if we are processing strings that come from a byte +# source with undecodable characters. +_has_surrogates = re.compile( + '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search + +# How to deal with a string containing bytes before handing it to the +# application through the 'normal' interface. +def _sanitize(string): + # Turn any escaped bytes into unicode 'unknown' char. + original_bytes = string.encode('ascii', 'surrogateescape') + return original_bytes.decode('ascii', 'replace') + + +# Helpers + +def formataddr(pair, charset='utf-8'): + """The inverse of parseaddr(), this takes a 2-tuple of the form + (realname, email_address) and returns the string value suitable + for an RFC 2822 From, To or Cc header. + + If the first element of pair is false, then the second element is + returned unmodified. + + Optional charset if given is the character set that is used to encode + realname in case realname is not ASCII safe. Can be an instance of str or + a Charset-like object which has a header_encode method. Default is + 'utf-8'. + """ + name, address = pair + # The address MUST (per RFC) be ascii, so raise an UnicodeError if it isn't. + address.encode('ascii') + if name: + try: + name.encode('ascii') + except UnicodeEncodeError: + if isinstance(charset, str): + charset = Charset(charset) + encoded_name = charset.header_encode(name) + return "%s <%s>" % (encoded_name, address) + else: + quotes = '' + if specialsre.search(name): + quotes = '"' + name = escapesre.sub(r'\\\g<0>', name) + return '%s%s%s <%s>' % (quotes, name, quotes, address) + return address + + + +def getaddresses(fieldvalues): + """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" + all = COMMASPACE.join(fieldvalues) + a = _AddressList(all) + return a.addresslist + + + +ecre = re.compile(r''' + =\? # literal =? + (?P[^?]*?) # non-greedy up to the next ? is the charset + \? # literal ? + (?P[qb]) # either a "q" or a "b", case insensitive + \? # literal ? + (?P.*?) # non-greedy up to the next ?= is the atom + \?= # literal ?= + ''', re.VERBOSE | re.IGNORECASE) + + +def _format_timetuple_and_zone(timetuple, zone): + return '%s, %02d %s %04d %02d:%02d:%02d %s' % ( + ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]], + timetuple[2], + ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1], + timetuple[0], timetuple[3], timetuple[4], timetuple[5], + zone) + +def formatdate(timeval=None, localtime=False, usegmt=False): + """Returns a date string as specified by RFC 2822, e.g.: + + Fri, 09 Nov 2001 01:08:47 -0000 + + Optional timeval if given is a floating point time value as accepted by + gmtime() and localtime(), otherwise the current time is used. + + Optional localtime is a flag that when True, interprets timeval, and + returns a date relative to the local timezone instead of UTC, properly + taking daylight savings time into account. + + Optional argument usegmt means that the timezone is written out as + an ascii string, not numeric one (so "GMT" instead of "+0000"). This + is needed for HTTP, and is only used when localtime==False. + """ + # Note: we cannot use strftime() because that honors the locale and RFC + # 2822 requires that day and month names be the English abbreviations. + if timeval is None: + timeval = time.time() + if localtime: + now = time.localtime(timeval) + # Calculate timezone offset, based on whether the local zone has + # daylight savings time, and whether DST is in effect. + if time.daylight and now[-1]: + offset = time.altzone + else: + offset = time.timezone + hours, minutes = divmod(abs(offset), 3600) + # Remember offset is in seconds west of UTC, but the timezone is in + # minutes east of UTC, so the signs differ. + if offset > 0: + sign = '-' + else: + sign = '+' + zone = '%s%02d%02d' % (sign, hours, minutes // 60) + else: + now = time.gmtime(timeval) + # Timezone offset is always -0000 + if usegmt: + zone = 'GMT' + else: + zone = '-0000' + return _format_timetuple_and_zone(now, zone) + +def format_datetime(dt, usegmt=False): + """Turn a datetime into a date string as specified in RFC 2822. + + If usegmt is True, dt must be an aware datetime with an offset of zero. In + this case 'GMT' will be rendered instead of the normal +0000 required by + RFC2822. This is to support HTTP headers involving date stamps. + """ + now = dt.timetuple() + if usegmt: + if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc: + raise ValueError("usegmt option requires a UTC datetime") + zone = 'GMT' + elif dt.tzinfo is None: + zone = '-0000' + else: + zone = dt.strftime("%z") + return _format_timetuple_and_zone(now, zone) + + +def make_msgid(idstring=None, domain=None): + """Returns a string suitable for RFC 2822 compliant Message-ID, e.g: + + <20020201195627.33539.96671@nightshade.la.mastaler.com> + + Optional idstring if given is a string used to strengthen the + uniqueness of the message id. Optional domain if given provides the + portion of the message id after the '@'. It defaults to the locally + defined hostname. + """ + timeval = time.time() + utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval)) + pid = os.getpid() + randint = random.randrange(100000) + if idstring is None: + idstring = '' + else: + idstring = '.' + idstring + if domain is None: + domain = socket.getfqdn() + msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, domain) + return msgid + + +def parsedate_to_datetime(data): + _3to2list = list(_parsedate_tz(data)) + dtuple, tz, = [_3to2list[:-1]] + _3to2list[-1:] + if tz is None: + return datetime.datetime(*dtuple[:6]) + return datetime.datetime(*dtuple[:6], + tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) + + +def parseaddr(addr): + addrs = _AddressList(addr).addresslist + if not addrs: + return '', '' + return addrs[0] + + +# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3. +def unquote(str): + """Remove quotes from a string.""" + if len(str) > 1: + if str.startswith('"') and str.endswith('"'): + return str[1:-1].replace('\\\\', '\\').replace('\\"', '"') + if str.startswith('<') and str.endswith('>'): + return str[1:-1] + return str + + + +# RFC2231-related functions - parameter encoding and decoding +def decode_rfc2231(s): + """Decode string according to RFC 2231""" + parts = s.split(TICK, 2) + if len(parts) <= 2: + return None, None, s + return parts + + +def encode_rfc2231(s, charset=None, language=None): + """Encode string according to RFC 2231. + + If neither charset nor language is given, then s is returned as-is. If + charset is given but not language, the string is encoded using the empty + string for language. + """ + s = url_quote(s, safe='', encoding=charset or 'ascii') + if charset is None and language is None: + return s + if language is None: + language = '' + return "%s'%s'%s" % (charset, language, s) + + +rfc2231_continuation = re.compile(r'^(?P\w+)\*((?P[0-9]+)\*?)?$', + re.ASCII) + +def decode_params(params): + """Decode parameters list according to RFC 2231. + + params is a sequence of 2-tuples containing (param name, string value). + """ + # Copy params so we don't mess with the original + params = params[:] + new_params = [] + # Map parameter's name to a list of continuations. The values are a + # 3-tuple of the continuation number, the string value, and a flag + # specifying whether a particular segment is %-encoded. + rfc2231_params = {} + name, value = params.pop(0) + new_params.append((name, value)) + while params: + name, value = params.pop(0) + if name.endswith('*'): + encoded = True + else: + encoded = False + value = unquote(value) + mo = rfc2231_continuation.match(name) + if mo: + name, num = mo.group('name', 'num') + if num is not None: + num = int(num) + rfc2231_params.setdefault(name, []).append((num, value, encoded)) + else: + new_params.append((name, '"%s"' % quote(value))) + if rfc2231_params: + for name, continuations in rfc2231_params.items(): + value = [] + extended = False + # Sort by number + continuations.sort() + # And now append all values in numerical order, converting + # %-encodings for the encoded segments. If any of the + # continuation names ends in a *, then the entire string, after + # decoding segments and concatenating, must have the charset and + # language specifiers at the beginning of the string. + for num, s, encoded in continuations: + if encoded: + # Decode as "latin-1", so the characters in s directly + # represent the percent-encoded octet values. + # collapse_rfc2231_value treats this as an octet sequence. + s = url_unquote(s, encoding="latin-1") + extended = True + value.append(s) + value = quote(EMPTYSTRING.join(value)) + if extended: + charset, language, value = decode_rfc2231(value) + new_params.append((name, (charset, language, '"%s"' % value))) + else: + new_params.append((name, '"%s"' % value)) + return new_params + +def collapse_rfc2231_value(value, errors='replace', + fallback_charset='us-ascii'): + if not isinstance(value, tuple) or len(value) != 3: + return unquote(value) + # While value comes to us as a unicode string, we need it to be a bytes + # object. We do not want bytes() normal utf-8 decoder, we want a straight + # interpretation of the string as character bytes. + charset, language, text = value + rawbytes = bytes(text, 'raw-unicode-escape') + try: + return str(rawbytes, charset, errors) + except LookupError: + # charset is not a known codec. + return unquote(text) + + +# +# datetime doesn't provide a localtime function yet, so provide one. Code +# adapted from the patch in issue 9527. This may not be perfect, but it is +# better than not having it. +# + +def localtime(dt=None, isdst=-1): + """Return local time as an aware datetime object. + + If called without arguments, return current time. Otherwise *dt* + argument should be a datetime instance, and it is converted to the + local time zone according to the system time zone database. If *dt* is + naive (that is, dt.tzinfo is None), it is assumed to be in local time. + In this case, a positive or zero value for *isdst* causes localtime to + presume initially that summer time (for example, Daylight Saving Time) + is or is not (respectively) in effect for the specified time. A + negative value for *isdst* causes the localtime() function to attempt + to divine whether summer time is in effect for the specified time. + + """ + if dt is None: + return datetime.datetime.now(datetime.timezone.utc).astimezone() + if dt.tzinfo is not None: + return dt.astimezone() + # We have a naive datetime. Convert to a (localtime) timetuple and pass to + # system mktime together with the isdst hint. System mktime will return + # seconds since epoch. + tm = dt.timetuple()[:-1] + (isdst,) + seconds = time.mktime(tm) + localtm = time.localtime(seconds) + try: + delta = datetime.timedelta(seconds=localtm.tm_gmtoff) + tz = datetime.timezone(delta, localtm.tm_zone) + except AttributeError: + # Compute UTC offset and compare with the value implied by tm_isdst. + # If the values match, use the zone name implied by tm_isdst. + delta = dt - datetime.datetime(*time.gmtime(seconds)[:6]) + dst = time.daylight and localtm.tm_isdst > 0 + gmtoff = -(time.altzone if dst else time.timezone) + if delta == datetime.timedelta(seconds=gmtoff): + tz = datetime.timezone(delta, time.tzname[dst]) + else: + tz = datetime.timezone(delta) + return dt.replace(tzinfo=tz) diff --git a/lib/future/backports/html/__init__.py b/lib/future/backports/html/__init__.py new file mode 100644 index 00000000..58e133fd --- /dev/null +++ b/lib/future/backports/html/__init__.py @@ -0,0 +1,27 @@ +""" +General functions for HTML manipulation, backported from Py3. + +Note that this uses Python 2.7 code with the corresponding Python 3 +module names and locations. +""" + +from __future__ import unicode_literals + + +_escape_map = {ord('&'): '&', ord('<'): '<', ord('>'): '>'} +_escape_map_full = {ord('&'): '&', ord('<'): '<', ord('>'): '>', + ord('"'): '"', ord('\''): '''} + +# NB: this is a candidate for a bytes/string polymorphic interface + +def escape(s, quote=True): + """ + Replace special characters "&", "<" and ">" to HTML-safe sequences. + If the optional flag quote is true (the default), the quotation mark + characters, both double quote (") and single quote (') characters are also + translated. + """ + assert not isinstance(s, bytes), 'Pass a unicode string' + if quote: + return s.translate(_escape_map_full) + return s.translate(_escape_map) diff --git a/lib/future/backports/html/entities.py b/lib/future/backports/html/entities.py new file mode 100644 index 00000000..5c73f692 --- /dev/null +++ b/lib/future/backports/html/entities.py @@ -0,0 +1,2514 @@ +"""HTML character entity references. + +Backported for python-future from Python 3.3 +""" + +from __future__ import (absolute_import, division, + print_function, unicode_literals) +from future.builtins import * + + +# maps the HTML entity name to the Unicode codepoint +name2codepoint = { + 'AElig': 0x00c6, # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1 + 'Aacute': 0x00c1, # latin capital letter A with acute, U+00C1 ISOlat1 + 'Acirc': 0x00c2, # latin capital letter A with circumflex, U+00C2 ISOlat1 + 'Agrave': 0x00c0, # latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1 + 'Alpha': 0x0391, # greek capital letter alpha, U+0391 + 'Aring': 0x00c5, # latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1 + 'Atilde': 0x00c3, # latin capital letter A with tilde, U+00C3 ISOlat1 + 'Auml': 0x00c4, # latin capital letter A with diaeresis, U+00C4 ISOlat1 + 'Beta': 0x0392, # greek capital letter beta, U+0392 + 'Ccedil': 0x00c7, # latin capital letter C with cedilla, U+00C7 ISOlat1 + 'Chi': 0x03a7, # greek capital letter chi, U+03A7 + 'Dagger': 0x2021, # double dagger, U+2021 ISOpub + 'Delta': 0x0394, # greek capital letter delta, U+0394 ISOgrk3 + 'ETH': 0x00d0, # latin capital letter ETH, U+00D0 ISOlat1 + 'Eacute': 0x00c9, # latin capital letter E with acute, U+00C9 ISOlat1 + 'Ecirc': 0x00ca, # latin capital letter E with circumflex, U+00CA ISOlat1 + 'Egrave': 0x00c8, # latin capital letter E with grave, U+00C8 ISOlat1 + 'Epsilon': 0x0395, # greek capital letter epsilon, U+0395 + 'Eta': 0x0397, # greek capital letter eta, U+0397 + 'Euml': 0x00cb, # latin capital letter E with diaeresis, U+00CB ISOlat1 + 'Gamma': 0x0393, # greek capital letter gamma, U+0393 ISOgrk3 + 'Iacute': 0x00cd, # latin capital letter I with acute, U+00CD ISOlat1 + 'Icirc': 0x00ce, # latin capital letter I with circumflex, U+00CE ISOlat1 + 'Igrave': 0x00cc, # latin capital letter I with grave, U+00CC ISOlat1 + 'Iota': 0x0399, # greek capital letter iota, U+0399 + 'Iuml': 0x00cf, # latin capital letter I with diaeresis, U+00CF ISOlat1 + 'Kappa': 0x039a, # greek capital letter kappa, U+039A + 'Lambda': 0x039b, # greek capital letter lambda, U+039B ISOgrk3 + 'Mu': 0x039c, # greek capital letter mu, U+039C + 'Ntilde': 0x00d1, # latin capital letter N with tilde, U+00D1 ISOlat1 + 'Nu': 0x039d, # greek capital letter nu, U+039D + 'OElig': 0x0152, # latin capital ligature OE, U+0152 ISOlat2 + 'Oacute': 0x00d3, # latin capital letter O with acute, U+00D3 ISOlat1 + 'Ocirc': 0x00d4, # latin capital letter O with circumflex, U+00D4 ISOlat1 + 'Ograve': 0x00d2, # latin capital letter O with grave, U+00D2 ISOlat1 + 'Omega': 0x03a9, # greek capital letter omega, U+03A9 ISOgrk3 + 'Omicron': 0x039f, # greek capital letter omicron, U+039F + 'Oslash': 0x00d8, # latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1 + 'Otilde': 0x00d5, # latin capital letter O with tilde, U+00D5 ISOlat1 + 'Ouml': 0x00d6, # latin capital letter O with diaeresis, U+00D6 ISOlat1 + 'Phi': 0x03a6, # greek capital letter phi, U+03A6 ISOgrk3 + 'Pi': 0x03a0, # greek capital letter pi, U+03A0 ISOgrk3 + 'Prime': 0x2033, # double prime = seconds = inches, U+2033 ISOtech + 'Psi': 0x03a8, # greek capital letter psi, U+03A8 ISOgrk3 + 'Rho': 0x03a1, # greek capital letter rho, U+03A1 + 'Scaron': 0x0160, # latin capital letter S with caron, U+0160 ISOlat2 + 'Sigma': 0x03a3, # greek capital letter sigma, U+03A3 ISOgrk3 + 'THORN': 0x00de, # latin capital letter THORN, U+00DE ISOlat1 + 'Tau': 0x03a4, # greek capital letter tau, U+03A4 + 'Theta': 0x0398, # greek capital letter theta, U+0398 ISOgrk3 + 'Uacute': 0x00da, # latin capital letter U with acute, U+00DA ISOlat1 + 'Ucirc': 0x00db, # latin capital letter U with circumflex, U+00DB ISOlat1 + 'Ugrave': 0x00d9, # latin capital letter U with grave, U+00D9 ISOlat1 + 'Upsilon': 0x03a5, # greek capital letter upsilon, U+03A5 ISOgrk3 + 'Uuml': 0x00dc, # latin capital letter U with diaeresis, U+00DC ISOlat1 + 'Xi': 0x039e, # greek capital letter xi, U+039E ISOgrk3 + 'Yacute': 0x00dd, # latin capital letter Y with acute, U+00DD ISOlat1 + 'Yuml': 0x0178, # latin capital letter Y with diaeresis, U+0178 ISOlat2 + 'Zeta': 0x0396, # greek capital letter zeta, U+0396 + 'aacute': 0x00e1, # latin small letter a with acute, U+00E1 ISOlat1 + 'acirc': 0x00e2, # latin small letter a with circumflex, U+00E2 ISOlat1 + 'acute': 0x00b4, # acute accent = spacing acute, U+00B4 ISOdia + 'aelig': 0x00e6, # latin small letter ae = latin small ligature ae, U+00E6 ISOlat1 + 'agrave': 0x00e0, # latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1 + 'alefsym': 0x2135, # alef symbol = first transfinite cardinal, U+2135 NEW + 'alpha': 0x03b1, # greek small letter alpha, U+03B1 ISOgrk3 + 'amp': 0x0026, # ampersand, U+0026 ISOnum + 'and': 0x2227, # logical and = wedge, U+2227 ISOtech + 'ang': 0x2220, # angle, U+2220 ISOamso + 'aring': 0x00e5, # latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1 + 'asymp': 0x2248, # almost equal to = asymptotic to, U+2248 ISOamsr + 'atilde': 0x00e3, # latin small letter a with tilde, U+00E3 ISOlat1 + 'auml': 0x00e4, # latin small letter a with diaeresis, U+00E4 ISOlat1 + 'bdquo': 0x201e, # double low-9 quotation mark, U+201E NEW + 'beta': 0x03b2, # greek small letter beta, U+03B2 ISOgrk3 + 'brvbar': 0x00a6, # broken bar = broken vertical bar, U+00A6 ISOnum + 'bull': 0x2022, # bullet = black small circle, U+2022 ISOpub + 'cap': 0x2229, # intersection = cap, U+2229 ISOtech + 'ccedil': 0x00e7, # latin small letter c with cedilla, U+00E7 ISOlat1 + 'cedil': 0x00b8, # cedilla = spacing cedilla, U+00B8 ISOdia + 'cent': 0x00a2, # cent sign, U+00A2 ISOnum + 'chi': 0x03c7, # greek small letter chi, U+03C7 ISOgrk3 + 'circ': 0x02c6, # modifier letter circumflex accent, U+02C6 ISOpub + 'clubs': 0x2663, # black club suit = shamrock, U+2663 ISOpub + 'cong': 0x2245, # approximately equal to, U+2245 ISOtech + 'copy': 0x00a9, # copyright sign, U+00A9 ISOnum + 'crarr': 0x21b5, # downwards arrow with corner leftwards = carriage return, U+21B5 NEW + 'cup': 0x222a, # union = cup, U+222A ISOtech + 'curren': 0x00a4, # currency sign, U+00A4 ISOnum + 'dArr': 0x21d3, # downwards double arrow, U+21D3 ISOamsa + 'dagger': 0x2020, # dagger, U+2020 ISOpub + 'darr': 0x2193, # downwards arrow, U+2193 ISOnum + 'deg': 0x00b0, # degree sign, U+00B0 ISOnum + 'delta': 0x03b4, # greek small letter delta, U+03B4 ISOgrk3 + 'diams': 0x2666, # black diamond suit, U+2666 ISOpub + 'divide': 0x00f7, # division sign, U+00F7 ISOnum + 'eacute': 0x00e9, # latin small letter e with acute, U+00E9 ISOlat1 + 'ecirc': 0x00ea, # latin small letter e with circumflex, U+00EA ISOlat1 + 'egrave': 0x00e8, # latin small letter e with grave, U+00E8 ISOlat1 + 'empty': 0x2205, # empty set = null set = diameter, U+2205 ISOamso + 'emsp': 0x2003, # em space, U+2003 ISOpub + 'ensp': 0x2002, # en space, U+2002 ISOpub + 'epsilon': 0x03b5, # greek small letter epsilon, U+03B5 ISOgrk3 + 'equiv': 0x2261, # identical to, U+2261 ISOtech + 'eta': 0x03b7, # greek small letter eta, U+03B7 ISOgrk3 + 'eth': 0x00f0, # latin small letter eth, U+00F0 ISOlat1 + 'euml': 0x00eb, # latin small letter e with diaeresis, U+00EB ISOlat1 + 'euro': 0x20ac, # euro sign, U+20AC NEW + 'exist': 0x2203, # there exists, U+2203 ISOtech + 'fnof': 0x0192, # latin small f with hook = function = florin, U+0192 ISOtech + 'forall': 0x2200, # for all, U+2200 ISOtech + 'frac12': 0x00bd, # vulgar fraction one half = fraction one half, U+00BD ISOnum + 'frac14': 0x00bc, # vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum + 'frac34': 0x00be, # vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum + 'frasl': 0x2044, # fraction slash, U+2044 NEW + 'gamma': 0x03b3, # greek small letter gamma, U+03B3 ISOgrk3 + 'ge': 0x2265, # greater-than or equal to, U+2265 ISOtech + 'gt': 0x003e, # greater-than sign, U+003E ISOnum + 'hArr': 0x21d4, # left right double arrow, U+21D4 ISOamsa + 'harr': 0x2194, # left right arrow, U+2194 ISOamsa + 'hearts': 0x2665, # black heart suit = valentine, U+2665 ISOpub + 'hellip': 0x2026, # horizontal ellipsis = three dot leader, U+2026 ISOpub + 'iacute': 0x00ed, # latin small letter i with acute, U+00ED ISOlat1 + 'icirc': 0x00ee, # latin small letter i with circumflex, U+00EE ISOlat1 + 'iexcl': 0x00a1, # inverted exclamation mark, U+00A1 ISOnum + 'igrave': 0x00ec, # latin small letter i with grave, U+00EC ISOlat1 + 'image': 0x2111, # blackletter capital I = imaginary part, U+2111 ISOamso + 'infin': 0x221e, # infinity, U+221E ISOtech + 'int': 0x222b, # integral, U+222B ISOtech + 'iota': 0x03b9, # greek small letter iota, U+03B9 ISOgrk3 + 'iquest': 0x00bf, # inverted question mark = turned question mark, U+00BF ISOnum + 'isin': 0x2208, # element of, U+2208 ISOtech + 'iuml': 0x00ef, # latin small letter i with diaeresis, U+00EF ISOlat1 + 'kappa': 0x03ba, # greek small letter kappa, U+03BA ISOgrk3 + 'lArr': 0x21d0, # leftwards double arrow, U+21D0 ISOtech + 'lambda': 0x03bb, # greek small letter lambda, U+03BB ISOgrk3 + 'lang': 0x2329, # left-pointing angle bracket = bra, U+2329 ISOtech + 'laquo': 0x00ab, # left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum + 'larr': 0x2190, # leftwards arrow, U+2190 ISOnum + 'lceil': 0x2308, # left ceiling = apl upstile, U+2308 ISOamsc + 'ldquo': 0x201c, # left double quotation mark, U+201C ISOnum + 'le': 0x2264, # less-than or equal to, U+2264 ISOtech + 'lfloor': 0x230a, # left floor = apl downstile, U+230A ISOamsc + 'lowast': 0x2217, # asterisk operator, U+2217 ISOtech + 'loz': 0x25ca, # lozenge, U+25CA ISOpub + 'lrm': 0x200e, # left-to-right mark, U+200E NEW RFC 2070 + 'lsaquo': 0x2039, # single left-pointing angle quotation mark, U+2039 ISO proposed + 'lsquo': 0x2018, # left single quotation mark, U+2018 ISOnum + 'lt': 0x003c, # less-than sign, U+003C ISOnum + 'macr': 0x00af, # macron = spacing macron = overline = APL overbar, U+00AF ISOdia + 'mdash': 0x2014, # em dash, U+2014 ISOpub + 'micro': 0x00b5, # micro sign, U+00B5 ISOnum + 'middot': 0x00b7, # middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum + 'minus': 0x2212, # minus sign, U+2212 ISOtech + 'mu': 0x03bc, # greek small letter mu, U+03BC ISOgrk3 + 'nabla': 0x2207, # nabla = backward difference, U+2207 ISOtech + 'nbsp': 0x00a0, # no-break space = non-breaking space, U+00A0 ISOnum + 'ndash': 0x2013, # en dash, U+2013 ISOpub + 'ne': 0x2260, # not equal to, U+2260 ISOtech + 'ni': 0x220b, # contains as member, U+220B ISOtech + 'not': 0x00ac, # not sign, U+00AC ISOnum + 'notin': 0x2209, # not an element of, U+2209 ISOtech + 'nsub': 0x2284, # not a subset of, U+2284 ISOamsn + 'ntilde': 0x00f1, # latin small letter n with tilde, U+00F1 ISOlat1 + 'nu': 0x03bd, # greek small letter nu, U+03BD ISOgrk3 + 'oacute': 0x00f3, # latin small letter o with acute, U+00F3 ISOlat1 + 'ocirc': 0x00f4, # latin small letter o with circumflex, U+00F4 ISOlat1 + 'oelig': 0x0153, # latin small ligature oe, U+0153 ISOlat2 + 'ograve': 0x00f2, # latin small letter o with grave, U+00F2 ISOlat1 + 'oline': 0x203e, # overline = spacing overscore, U+203E NEW + 'omega': 0x03c9, # greek small letter omega, U+03C9 ISOgrk3 + 'omicron': 0x03bf, # greek small letter omicron, U+03BF NEW + 'oplus': 0x2295, # circled plus = direct sum, U+2295 ISOamsb + 'or': 0x2228, # logical or = vee, U+2228 ISOtech + 'ordf': 0x00aa, # feminine ordinal indicator, U+00AA ISOnum + 'ordm': 0x00ba, # masculine ordinal indicator, U+00BA ISOnum + 'oslash': 0x00f8, # latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1 + 'otilde': 0x00f5, # latin small letter o with tilde, U+00F5 ISOlat1 + 'otimes': 0x2297, # circled times = vector product, U+2297 ISOamsb + 'ouml': 0x00f6, # latin small letter o with diaeresis, U+00F6 ISOlat1 + 'para': 0x00b6, # pilcrow sign = paragraph sign, U+00B6 ISOnum + 'part': 0x2202, # partial differential, U+2202 ISOtech + 'permil': 0x2030, # per mille sign, U+2030 ISOtech + 'perp': 0x22a5, # up tack = orthogonal to = perpendicular, U+22A5 ISOtech + 'phi': 0x03c6, # greek small letter phi, U+03C6 ISOgrk3 + 'pi': 0x03c0, # greek small letter pi, U+03C0 ISOgrk3 + 'piv': 0x03d6, # greek pi symbol, U+03D6 ISOgrk3 + 'plusmn': 0x00b1, # plus-minus sign = plus-or-minus sign, U+00B1 ISOnum + 'pound': 0x00a3, # pound sign, U+00A3 ISOnum + 'prime': 0x2032, # prime = minutes = feet, U+2032 ISOtech + 'prod': 0x220f, # n-ary product = product sign, U+220F ISOamsb + 'prop': 0x221d, # proportional to, U+221D ISOtech + 'psi': 0x03c8, # greek small letter psi, U+03C8 ISOgrk3 + 'quot': 0x0022, # quotation mark = APL quote, U+0022 ISOnum + 'rArr': 0x21d2, # rightwards double arrow, U+21D2 ISOtech + 'radic': 0x221a, # square root = radical sign, U+221A ISOtech + 'rang': 0x232a, # right-pointing angle bracket = ket, U+232A ISOtech + 'raquo': 0x00bb, # right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum + 'rarr': 0x2192, # rightwards arrow, U+2192 ISOnum + 'rceil': 0x2309, # right ceiling, U+2309 ISOamsc + 'rdquo': 0x201d, # right double quotation mark, U+201D ISOnum + 'real': 0x211c, # blackletter capital R = real part symbol, U+211C ISOamso + 'reg': 0x00ae, # registered sign = registered trade mark sign, U+00AE ISOnum + 'rfloor': 0x230b, # right floor, U+230B ISOamsc + 'rho': 0x03c1, # greek small letter rho, U+03C1 ISOgrk3 + 'rlm': 0x200f, # right-to-left mark, U+200F NEW RFC 2070 + 'rsaquo': 0x203a, # single right-pointing angle quotation mark, U+203A ISO proposed + 'rsquo': 0x2019, # right single quotation mark, U+2019 ISOnum + 'sbquo': 0x201a, # single low-9 quotation mark, U+201A NEW + 'scaron': 0x0161, # latin small letter s with caron, U+0161 ISOlat2 + 'sdot': 0x22c5, # dot operator, U+22C5 ISOamsb + 'sect': 0x00a7, # section sign, U+00A7 ISOnum + 'shy': 0x00ad, # soft hyphen = discretionary hyphen, U+00AD ISOnum + 'sigma': 0x03c3, # greek small letter sigma, U+03C3 ISOgrk3 + 'sigmaf': 0x03c2, # greek small letter final sigma, U+03C2 ISOgrk3 + 'sim': 0x223c, # tilde operator = varies with = similar to, U+223C ISOtech + 'spades': 0x2660, # black spade suit, U+2660 ISOpub + 'sub': 0x2282, # subset of, U+2282 ISOtech + 'sube': 0x2286, # subset of or equal to, U+2286 ISOtech + 'sum': 0x2211, # n-ary sumation, U+2211 ISOamsb + 'sup': 0x2283, # superset of, U+2283 ISOtech + 'sup1': 0x00b9, # superscript one = superscript digit one, U+00B9 ISOnum + 'sup2': 0x00b2, # superscript two = superscript digit two = squared, U+00B2 ISOnum + 'sup3': 0x00b3, # superscript three = superscript digit three = cubed, U+00B3 ISOnum + 'supe': 0x2287, # superset of or equal to, U+2287 ISOtech + 'szlig': 0x00df, # latin small letter sharp s = ess-zed, U+00DF ISOlat1 + 'tau': 0x03c4, # greek small letter tau, U+03C4 ISOgrk3 + 'there4': 0x2234, # therefore, U+2234 ISOtech + 'theta': 0x03b8, # greek small letter theta, U+03B8 ISOgrk3 + 'thetasym': 0x03d1, # greek small letter theta symbol, U+03D1 NEW + 'thinsp': 0x2009, # thin space, U+2009 ISOpub + 'thorn': 0x00fe, # latin small letter thorn with, U+00FE ISOlat1 + 'tilde': 0x02dc, # small tilde, U+02DC ISOdia + 'times': 0x00d7, # multiplication sign, U+00D7 ISOnum + 'trade': 0x2122, # trade mark sign, U+2122 ISOnum + 'uArr': 0x21d1, # upwards double arrow, U+21D1 ISOamsa + 'uacute': 0x00fa, # latin small letter u with acute, U+00FA ISOlat1 + 'uarr': 0x2191, # upwards arrow, U+2191 ISOnum + 'ucirc': 0x00fb, # latin small letter u with circumflex, U+00FB ISOlat1 + 'ugrave': 0x00f9, # latin small letter u with grave, U+00F9 ISOlat1 + 'uml': 0x00a8, # diaeresis = spacing diaeresis, U+00A8 ISOdia + 'upsih': 0x03d2, # greek upsilon with hook symbol, U+03D2 NEW + 'upsilon': 0x03c5, # greek small letter upsilon, U+03C5 ISOgrk3 + 'uuml': 0x00fc, # latin small letter u with diaeresis, U+00FC ISOlat1 + 'weierp': 0x2118, # script capital P = power set = Weierstrass p, U+2118 ISOamso + 'xi': 0x03be, # greek small letter xi, U+03BE ISOgrk3 + 'yacute': 0x00fd, # latin small letter y with acute, U+00FD ISOlat1 + 'yen': 0x00a5, # yen sign = yuan sign, U+00A5 ISOnum + 'yuml': 0x00ff, # latin small letter y with diaeresis, U+00FF ISOlat1 + 'zeta': 0x03b6, # greek small letter zeta, U+03B6 ISOgrk3 + 'zwj': 0x200d, # zero width joiner, U+200D NEW RFC 2070 + 'zwnj': 0x200c, # zero width non-joiner, U+200C NEW RFC 2070 +} + + +# maps the HTML5 named character references to the equivalent Unicode character(s) +html5 = { + 'Aacute': '\xc1', + 'aacute': '\xe1', + 'Aacute;': '\xc1', + 'aacute;': '\xe1', + 'Abreve;': '\u0102', + 'abreve;': '\u0103', + 'ac;': '\u223e', + 'acd;': '\u223f', + 'acE;': '\u223e\u0333', + 'Acirc': '\xc2', + 'acirc': '\xe2', + 'Acirc;': '\xc2', + 'acirc;': '\xe2', + 'acute': '\xb4', + 'acute;': '\xb4', + 'Acy;': '\u0410', + 'acy;': '\u0430', + 'AElig': '\xc6', + 'aelig': '\xe6', + 'AElig;': '\xc6', + 'aelig;': '\xe6', + 'af;': '\u2061', + 'Afr;': '\U0001d504', + 'afr;': '\U0001d51e', + 'Agrave': '\xc0', + 'agrave': '\xe0', + 'Agrave;': '\xc0', + 'agrave;': '\xe0', + 'alefsym;': '\u2135', + 'aleph;': '\u2135', + 'Alpha;': '\u0391', + 'alpha;': '\u03b1', + 'Amacr;': '\u0100', + 'amacr;': '\u0101', + 'amalg;': '\u2a3f', + 'AMP': '&', + 'amp': '&', + 'AMP;': '&', + 'amp;': '&', + 'And;': '\u2a53', + 'and;': '\u2227', + 'andand;': '\u2a55', + 'andd;': '\u2a5c', + 'andslope;': '\u2a58', + 'andv;': '\u2a5a', + 'ang;': '\u2220', + 'ange;': '\u29a4', + 'angle;': '\u2220', + 'angmsd;': '\u2221', + 'angmsdaa;': '\u29a8', + 'angmsdab;': '\u29a9', + 'angmsdac;': '\u29aa', + 'angmsdad;': '\u29ab', + 'angmsdae;': '\u29ac', + 'angmsdaf;': '\u29ad', + 'angmsdag;': '\u29ae', + 'angmsdah;': '\u29af', + 'angrt;': '\u221f', + 'angrtvb;': '\u22be', + 'angrtvbd;': '\u299d', + 'angsph;': '\u2222', + 'angst;': '\xc5', + 'angzarr;': '\u237c', + 'Aogon;': '\u0104', + 'aogon;': '\u0105', + 'Aopf;': '\U0001d538', + 'aopf;': '\U0001d552', + 'ap;': '\u2248', + 'apacir;': '\u2a6f', + 'apE;': '\u2a70', + 'ape;': '\u224a', + 'apid;': '\u224b', + 'apos;': "'", + 'ApplyFunction;': '\u2061', + 'approx;': '\u2248', + 'approxeq;': '\u224a', + 'Aring': '\xc5', + 'aring': '\xe5', + 'Aring;': '\xc5', + 'aring;': '\xe5', + 'Ascr;': '\U0001d49c', + 'ascr;': '\U0001d4b6', + 'Assign;': '\u2254', + 'ast;': '*', + 'asymp;': '\u2248', + 'asympeq;': '\u224d', + 'Atilde': '\xc3', + 'atilde': '\xe3', + 'Atilde;': '\xc3', + 'atilde;': '\xe3', + 'Auml': '\xc4', + 'auml': '\xe4', + 'Auml;': '\xc4', + 'auml;': '\xe4', + 'awconint;': '\u2233', + 'awint;': '\u2a11', + 'backcong;': '\u224c', + 'backepsilon;': '\u03f6', + 'backprime;': '\u2035', + 'backsim;': '\u223d', + 'backsimeq;': '\u22cd', + 'Backslash;': '\u2216', + 'Barv;': '\u2ae7', + 'barvee;': '\u22bd', + 'Barwed;': '\u2306', + 'barwed;': '\u2305', + 'barwedge;': '\u2305', + 'bbrk;': '\u23b5', + 'bbrktbrk;': '\u23b6', + 'bcong;': '\u224c', + 'Bcy;': '\u0411', + 'bcy;': '\u0431', + 'bdquo;': '\u201e', + 'becaus;': '\u2235', + 'Because;': '\u2235', + 'because;': '\u2235', + 'bemptyv;': '\u29b0', + 'bepsi;': '\u03f6', + 'bernou;': '\u212c', + 'Bernoullis;': '\u212c', + 'Beta;': '\u0392', + 'beta;': '\u03b2', + 'beth;': '\u2136', + 'between;': '\u226c', + 'Bfr;': '\U0001d505', + 'bfr;': '\U0001d51f', + 'bigcap;': '\u22c2', + 'bigcirc;': '\u25ef', + 'bigcup;': '\u22c3', + 'bigodot;': '\u2a00', + 'bigoplus;': '\u2a01', + 'bigotimes;': '\u2a02', + 'bigsqcup;': '\u2a06', + 'bigstar;': '\u2605', + 'bigtriangledown;': '\u25bd', + 'bigtriangleup;': '\u25b3', + 'biguplus;': '\u2a04', + 'bigvee;': '\u22c1', + 'bigwedge;': '\u22c0', + 'bkarow;': '\u290d', + 'blacklozenge;': '\u29eb', + 'blacksquare;': '\u25aa', + 'blacktriangle;': '\u25b4', + 'blacktriangledown;': '\u25be', + 'blacktriangleleft;': '\u25c2', + 'blacktriangleright;': '\u25b8', + 'blank;': '\u2423', + 'blk12;': '\u2592', + 'blk14;': '\u2591', + 'blk34;': '\u2593', + 'block;': '\u2588', + 'bne;': '=\u20e5', + 'bnequiv;': '\u2261\u20e5', + 'bNot;': '\u2aed', + 'bnot;': '\u2310', + 'Bopf;': '\U0001d539', + 'bopf;': '\U0001d553', + 'bot;': '\u22a5', + 'bottom;': '\u22a5', + 'bowtie;': '\u22c8', + 'boxbox;': '\u29c9', + 'boxDL;': '\u2557', + 'boxDl;': '\u2556', + 'boxdL;': '\u2555', + 'boxdl;': '\u2510', + 'boxDR;': '\u2554', + 'boxDr;': '\u2553', + 'boxdR;': '\u2552', + 'boxdr;': '\u250c', + 'boxH;': '\u2550', + 'boxh;': '\u2500', + 'boxHD;': '\u2566', + 'boxHd;': '\u2564', + 'boxhD;': '\u2565', + 'boxhd;': '\u252c', + 'boxHU;': '\u2569', + 'boxHu;': '\u2567', + 'boxhU;': '\u2568', + 'boxhu;': '\u2534', + 'boxminus;': '\u229f', + 'boxplus;': '\u229e', + 'boxtimes;': '\u22a0', + 'boxUL;': '\u255d', + 'boxUl;': '\u255c', + 'boxuL;': '\u255b', + 'boxul;': '\u2518', + 'boxUR;': '\u255a', + 'boxUr;': '\u2559', + 'boxuR;': '\u2558', + 'boxur;': '\u2514', + 'boxV;': '\u2551', + 'boxv;': '\u2502', + 'boxVH;': '\u256c', + 'boxVh;': '\u256b', + 'boxvH;': '\u256a', + 'boxvh;': '\u253c', + 'boxVL;': '\u2563', + 'boxVl;': '\u2562', + 'boxvL;': '\u2561', + 'boxvl;': '\u2524', + 'boxVR;': '\u2560', + 'boxVr;': '\u255f', + 'boxvR;': '\u255e', + 'boxvr;': '\u251c', + 'bprime;': '\u2035', + 'Breve;': '\u02d8', + 'breve;': '\u02d8', + 'brvbar': '\xa6', + 'brvbar;': '\xa6', + 'Bscr;': '\u212c', + 'bscr;': '\U0001d4b7', + 'bsemi;': '\u204f', + 'bsim;': '\u223d', + 'bsime;': '\u22cd', + 'bsol;': '\\', + 'bsolb;': '\u29c5', + 'bsolhsub;': '\u27c8', + 'bull;': '\u2022', + 'bullet;': '\u2022', + 'bump;': '\u224e', + 'bumpE;': '\u2aae', + 'bumpe;': '\u224f', + 'Bumpeq;': '\u224e', + 'bumpeq;': '\u224f', + 'Cacute;': '\u0106', + 'cacute;': '\u0107', + 'Cap;': '\u22d2', + 'cap;': '\u2229', + 'capand;': '\u2a44', + 'capbrcup;': '\u2a49', + 'capcap;': '\u2a4b', + 'capcup;': '\u2a47', + 'capdot;': '\u2a40', + 'CapitalDifferentialD;': '\u2145', + 'caps;': '\u2229\ufe00', + 'caret;': '\u2041', + 'caron;': '\u02c7', + 'Cayleys;': '\u212d', + 'ccaps;': '\u2a4d', + 'Ccaron;': '\u010c', + 'ccaron;': '\u010d', + 'Ccedil': '\xc7', + 'ccedil': '\xe7', + 'Ccedil;': '\xc7', + 'ccedil;': '\xe7', + 'Ccirc;': '\u0108', + 'ccirc;': '\u0109', + 'Cconint;': '\u2230', + 'ccups;': '\u2a4c', + 'ccupssm;': '\u2a50', + 'Cdot;': '\u010a', + 'cdot;': '\u010b', + 'cedil': '\xb8', + 'cedil;': '\xb8', + 'Cedilla;': '\xb8', + 'cemptyv;': '\u29b2', + 'cent': '\xa2', + 'cent;': '\xa2', + 'CenterDot;': '\xb7', + 'centerdot;': '\xb7', + 'Cfr;': '\u212d', + 'cfr;': '\U0001d520', + 'CHcy;': '\u0427', + 'chcy;': '\u0447', + 'check;': '\u2713', + 'checkmark;': '\u2713', + 'Chi;': '\u03a7', + 'chi;': '\u03c7', + 'cir;': '\u25cb', + 'circ;': '\u02c6', + 'circeq;': '\u2257', + 'circlearrowleft;': '\u21ba', + 'circlearrowright;': '\u21bb', + 'circledast;': '\u229b', + 'circledcirc;': '\u229a', + 'circleddash;': '\u229d', + 'CircleDot;': '\u2299', + 'circledR;': '\xae', + 'circledS;': '\u24c8', + 'CircleMinus;': '\u2296', + 'CirclePlus;': '\u2295', + 'CircleTimes;': '\u2297', + 'cirE;': '\u29c3', + 'cire;': '\u2257', + 'cirfnint;': '\u2a10', + 'cirmid;': '\u2aef', + 'cirscir;': '\u29c2', + 'ClockwiseContourIntegral;': '\u2232', + 'CloseCurlyDoubleQuote;': '\u201d', + 'CloseCurlyQuote;': '\u2019', + 'clubs;': '\u2663', + 'clubsuit;': '\u2663', + 'Colon;': '\u2237', + 'colon;': ':', + 'Colone;': '\u2a74', + 'colone;': '\u2254', + 'coloneq;': '\u2254', + 'comma;': ',', + 'commat;': '@', + 'comp;': '\u2201', + 'compfn;': '\u2218', + 'complement;': '\u2201', + 'complexes;': '\u2102', + 'cong;': '\u2245', + 'congdot;': '\u2a6d', + 'Congruent;': '\u2261', + 'Conint;': '\u222f', + 'conint;': '\u222e', + 'ContourIntegral;': '\u222e', + 'Copf;': '\u2102', + 'copf;': '\U0001d554', + 'coprod;': '\u2210', + 'Coproduct;': '\u2210', + 'COPY': '\xa9', + 'copy': '\xa9', + 'COPY;': '\xa9', + 'copy;': '\xa9', + 'copysr;': '\u2117', + 'CounterClockwiseContourIntegral;': '\u2233', + 'crarr;': '\u21b5', + 'Cross;': '\u2a2f', + 'cross;': '\u2717', + 'Cscr;': '\U0001d49e', + 'cscr;': '\U0001d4b8', + 'csub;': '\u2acf', + 'csube;': '\u2ad1', + 'csup;': '\u2ad0', + 'csupe;': '\u2ad2', + 'ctdot;': '\u22ef', + 'cudarrl;': '\u2938', + 'cudarrr;': '\u2935', + 'cuepr;': '\u22de', + 'cuesc;': '\u22df', + 'cularr;': '\u21b6', + 'cularrp;': '\u293d', + 'Cup;': '\u22d3', + 'cup;': '\u222a', + 'cupbrcap;': '\u2a48', + 'CupCap;': '\u224d', + 'cupcap;': '\u2a46', + 'cupcup;': '\u2a4a', + 'cupdot;': '\u228d', + 'cupor;': '\u2a45', + 'cups;': '\u222a\ufe00', + 'curarr;': '\u21b7', + 'curarrm;': '\u293c', + 'curlyeqprec;': '\u22de', + 'curlyeqsucc;': '\u22df', + 'curlyvee;': '\u22ce', + 'curlywedge;': '\u22cf', + 'curren': '\xa4', + 'curren;': '\xa4', + 'curvearrowleft;': '\u21b6', + 'curvearrowright;': '\u21b7', + 'cuvee;': '\u22ce', + 'cuwed;': '\u22cf', + 'cwconint;': '\u2232', + 'cwint;': '\u2231', + 'cylcty;': '\u232d', + 'Dagger;': '\u2021', + 'dagger;': '\u2020', + 'daleth;': '\u2138', + 'Darr;': '\u21a1', + 'dArr;': '\u21d3', + 'darr;': '\u2193', + 'dash;': '\u2010', + 'Dashv;': '\u2ae4', + 'dashv;': '\u22a3', + 'dbkarow;': '\u290f', + 'dblac;': '\u02dd', + 'Dcaron;': '\u010e', + 'dcaron;': '\u010f', + 'Dcy;': '\u0414', + 'dcy;': '\u0434', + 'DD;': '\u2145', + 'dd;': '\u2146', + 'ddagger;': '\u2021', + 'ddarr;': '\u21ca', + 'DDotrahd;': '\u2911', + 'ddotseq;': '\u2a77', + 'deg': '\xb0', + 'deg;': '\xb0', + 'Del;': '\u2207', + 'Delta;': '\u0394', + 'delta;': '\u03b4', + 'demptyv;': '\u29b1', + 'dfisht;': '\u297f', + 'Dfr;': '\U0001d507', + 'dfr;': '\U0001d521', + 'dHar;': '\u2965', + 'dharl;': '\u21c3', + 'dharr;': '\u21c2', + 'DiacriticalAcute;': '\xb4', + 'DiacriticalDot;': '\u02d9', + 'DiacriticalDoubleAcute;': '\u02dd', + 'DiacriticalGrave;': '`', + 'DiacriticalTilde;': '\u02dc', + 'diam;': '\u22c4', + 'Diamond;': '\u22c4', + 'diamond;': '\u22c4', + 'diamondsuit;': '\u2666', + 'diams;': '\u2666', + 'die;': '\xa8', + 'DifferentialD;': '\u2146', + 'digamma;': '\u03dd', + 'disin;': '\u22f2', + 'div;': '\xf7', + 'divide': '\xf7', + 'divide;': '\xf7', + 'divideontimes;': '\u22c7', + 'divonx;': '\u22c7', + 'DJcy;': '\u0402', + 'djcy;': '\u0452', + 'dlcorn;': '\u231e', + 'dlcrop;': '\u230d', + 'dollar;': '$', + 'Dopf;': '\U0001d53b', + 'dopf;': '\U0001d555', + 'Dot;': '\xa8', + 'dot;': '\u02d9', + 'DotDot;': '\u20dc', + 'doteq;': '\u2250', + 'doteqdot;': '\u2251', + 'DotEqual;': '\u2250', + 'dotminus;': '\u2238', + 'dotplus;': '\u2214', + 'dotsquare;': '\u22a1', + 'doublebarwedge;': '\u2306', + 'DoubleContourIntegral;': '\u222f', + 'DoubleDot;': '\xa8', + 'DoubleDownArrow;': '\u21d3', + 'DoubleLeftArrow;': '\u21d0', + 'DoubleLeftRightArrow;': '\u21d4', + 'DoubleLeftTee;': '\u2ae4', + 'DoubleLongLeftArrow;': '\u27f8', + 'DoubleLongLeftRightArrow;': '\u27fa', + 'DoubleLongRightArrow;': '\u27f9', + 'DoubleRightArrow;': '\u21d2', + 'DoubleRightTee;': '\u22a8', + 'DoubleUpArrow;': '\u21d1', + 'DoubleUpDownArrow;': '\u21d5', + 'DoubleVerticalBar;': '\u2225', + 'DownArrow;': '\u2193', + 'Downarrow;': '\u21d3', + 'downarrow;': '\u2193', + 'DownArrowBar;': '\u2913', + 'DownArrowUpArrow;': '\u21f5', + 'DownBreve;': '\u0311', + 'downdownarrows;': '\u21ca', + 'downharpoonleft;': '\u21c3', + 'downharpoonright;': '\u21c2', + 'DownLeftRightVector;': '\u2950', + 'DownLeftTeeVector;': '\u295e', + 'DownLeftVector;': '\u21bd', + 'DownLeftVectorBar;': '\u2956', + 'DownRightTeeVector;': '\u295f', + 'DownRightVector;': '\u21c1', + 'DownRightVectorBar;': '\u2957', + 'DownTee;': '\u22a4', + 'DownTeeArrow;': '\u21a7', + 'drbkarow;': '\u2910', + 'drcorn;': '\u231f', + 'drcrop;': '\u230c', + 'Dscr;': '\U0001d49f', + 'dscr;': '\U0001d4b9', + 'DScy;': '\u0405', + 'dscy;': '\u0455', + 'dsol;': '\u29f6', + 'Dstrok;': '\u0110', + 'dstrok;': '\u0111', + 'dtdot;': '\u22f1', + 'dtri;': '\u25bf', + 'dtrif;': '\u25be', + 'duarr;': '\u21f5', + 'duhar;': '\u296f', + 'dwangle;': '\u29a6', + 'DZcy;': '\u040f', + 'dzcy;': '\u045f', + 'dzigrarr;': '\u27ff', + 'Eacute': '\xc9', + 'eacute': '\xe9', + 'Eacute;': '\xc9', + 'eacute;': '\xe9', + 'easter;': '\u2a6e', + 'Ecaron;': '\u011a', + 'ecaron;': '\u011b', + 'ecir;': '\u2256', + 'Ecirc': '\xca', + 'ecirc': '\xea', + 'Ecirc;': '\xca', + 'ecirc;': '\xea', + 'ecolon;': '\u2255', + 'Ecy;': '\u042d', + 'ecy;': '\u044d', + 'eDDot;': '\u2a77', + 'Edot;': '\u0116', + 'eDot;': '\u2251', + 'edot;': '\u0117', + 'ee;': '\u2147', + 'efDot;': '\u2252', + 'Efr;': '\U0001d508', + 'efr;': '\U0001d522', + 'eg;': '\u2a9a', + 'Egrave': '\xc8', + 'egrave': '\xe8', + 'Egrave;': '\xc8', + 'egrave;': '\xe8', + 'egs;': '\u2a96', + 'egsdot;': '\u2a98', + 'el;': '\u2a99', + 'Element;': '\u2208', + 'elinters;': '\u23e7', + 'ell;': '\u2113', + 'els;': '\u2a95', + 'elsdot;': '\u2a97', + 'Emacr;': '\u0112', + 'emacr;': '\u0113', + 'empty;': '\u2205', + 'emptyset;': '\u2205', + 'EmptySmallSquare;': '\u25fb', + 'emptyv;': '\u2205', + 'EmptyVerySmallSquare;': '\u25ab', + 'emsp13;': '\u2004', + 'emsp14;': '\u2005', + 'emsp;': '\u2003', + 'ENG;': '\u014a', + 'eng;': '\u014b', + 'ensp;': '\u2002', + 'Eogon;': '\u0118', + 'eogon;': '\u0119', + 'Eopf;': '\U0001d53c', + 'eopf;': '\U0001d556', + 'epar;': '\u22d5', + 'eparsl;': '\u29e3', + 'eplus;': '\u2a71', + 'epsi;': '\u03b5', + 'Epsilon;': '\u0395', + 'epsilon;': '\u03b5', + 'epsiv;': '\u03f5', + 'eqcirc;': '\u2256', + 'eqcolon;': '\u2255', + 'eqsim;': '\u2242', + 'eqslantgtr;': '\u2a96', + 'eqslantless;': '\u2a95', + 'Equal;': '\u2a75', + 'equals;': '=', + 'EqualTilde;': '\u2242', + 'equest;': '\u225f', + 'Equilibrium;': '\u21cc', + 'equiv;': '\u2261', + 'equivDD;': '\u2a78', + 'eqvparsl;': '\u29e5', + 'erarr;': '\u2971', + 'erDot;': '\u2253', + 'Escr;': '\u2130', + 'escr;': '\u212f', + 'esdot;': '\u2250', + 'Esim;': '\u2a73', + 'esim;': '\u2242', + 'Eta;': '\u0397', + 'eta;': '\u03b7', + 'ETH': '\xd0', + 'eth': '\xf0', + 'ETH;': '\xd0', + 'eth;': '\xf0', + 'Euml': '\xcb', + 'euml': '\xeb', + 'Euml;': '\xcb', + 'euml;': '\xeb', + 'euro;': '\u20ac', + 'excl;': '!', + 'exist;': '\u2203', + 'Exists;': '\u2203', + 'expectation;': '\u2130', + 'ExponentialE;': '\u2147', + 'exponentiale;': '\u2147', + 'fallingdotseq;': '\u2252', + 'Fcy;': '\u0424', + 'fcy;': '\u0444', + 'female;': '\u2640', + 'ffilig;': '\ufb03', + 'fflig;': '\ufb00', + 'ffllig;': '\ufb04', + 'Ffr;': '\U0001d509', + 'ffr;': '\U0001d523', + 'filig;': '\ufb01', + 'FilledSmallSquare;': '\u25fc', + 'FilledVerySmallSquare;': '\u25aa', + 'fjlig;': 'fj', + 'flat;': '\u266d', + 'fllig;': '\ufb02', + 'fltns;': '\u25b1', + 'fnof;': '\u0192', + 'Fopf;': '\U0001d53d', + 'fopf;': '\U0001d557', + 'ForAll;': '\u2200', + 'forall;': '\u2200', + 'fork;': '\u22d4', + 'forkv;': '\u2ad9', + 'Fouriertrf;': '\u2131', + 'fpartint;': '\u2a0d', + 'frac12': '\xbd', + 'frac12;': '\xbd', + 'frac13;': '\u2153', + 'frac14': '\xbc', + 'frac14;': '\xbc', + 'frac15;': '\u2155', + 'frac16;': '\u2159', + 'frac18;': '\u215b', + 'frac23;': '\u2154', + 'frac25;': '\u2156', + 'frac34': '\xbe', + 'frac34;': '\xbe', + 'frac35;': '\u2157', + 'frac38;': '\u215c', + 'frac45;': '\u2158', + 'frac56;': '\u215a', + 'frac58;': '\u215d', + 'frac78;': '\u215e', + 'frasl;': '\u2044', + 'frown;': '\u2322', + 'Fscr;': '\u2131', + 'fscr;': '\U0001d4bb', + 'gacute;': '\u01f5', + 'Gamma;': '\u0393', + 'gamma;': '\u03b3', + 'Gammad;': '\u03dc', + 'gammad;': '\u03dd', + 'gap;': '\u2a86', + 'Gbreve;': '\u011e', + 'gbreve;': '\u011f', + 'Gcedil;': '\u0122', + 'Gcirc;': '\u011c', + 'gcirc;': '\u011d', + 'Gcy;': '\u0413', + 'gcy;': '\u0433', + 'Gdot;': '\u0120', + 'gdot;': '\u0121', + 'gE;': '\u2267', + 'ge;': '\u2265', + 'gEl;': '\u2a8c', + 'gel;': '\u22db', + 'geq;': '\u2265', + 'geqq;': '\u2267', + 'geqslant;': '\u2a7e', + 'ges;': '\u2a7e', + 'gescc;': '\u2aa9', + 'gesdot;': '\u2a80', + 'gesdoto;': '\u2a82', + 'gesdotol;': '\u2a84', + 'gesl;': '\u22db\ufe00', + 'gesles;': '\u2a94', + 'Gfr;': '\U0001d50a', + 'gfr;': '\U0001d524', + 'Gg;': '\u22d9', + 'gg;': '\u226b', + 'ggg;': '\u22d9', + 'gimel;': '\u2137', + 'GJcy;': '\u0403', + 'gjcy;': '\u0453', + 'gl;': '\u2277', + 'gla;': '\u2aa5', + 'glE;': '\u2a92', + 'glj;': '\u2aa4', + 'gnap;': '\u2a8a', + 'gnapprox;': '\u2a8a', + 'gnE;': '\u2269', + 'gne;': '\u2a88', + 'gneq;': '\u2a88', + 'gneqq;': '\u2269', + 'gnsim;': '\u22e7', + 'Gopf;': '\U0001d53e', + 'gopf;': '\U0001d558', + 'grave;': '`', + 'GreaterEqual;': '\u2265', + 'GreaterEqualLess;': '\u22db', + 'GreaterFullEqual;': '\u2267', + 'GreaterGreater;': '\u2aa2', + 'GreaterLess;': '\u2277', + 'GreaterSlantEqual;': '\u2a7e', + 'GreaterTilde;': '\u2273', + 'Gscr;': '\U0001d4a2', + 'gscr;': '\u210a', + 'gsim;': '\u2273', + 'gsime;': '\u2a8e', + 'gsiml;': '\u2a90', + 'GT': '>', + 'gt': '>', + 'GT;': '>', + 'Gt;': '\u226b', + 'gt;': '>', + 'gtcc;': '\u2aa7', + 'gtcir;': '\u2a7a', + 'gtdot;': '\u22d7', + 'gtlPar;': '\u2995', + 'gtquest;': '\u2a7c', + 'gtrapprox;': '\u2a86', + 'gtrarr;': '\u2978', + 'gtrdot;': '\u22d7', + 'gtreqless;': '\u22db', + 'gtreqqless;': '\u2a8c', + 'gtrless;': '\u2277', + 'gtrsim;': '\u2273', + 'gvertneqq;': '\u2269\ufe00', + 'gvnE;': '\u2269\ufe00', + 'Hacek;': '\u02c7', + 'hairsp;': '\u200a', + 'half;': '\xbd', + 'hamilt;': '\u210b', + 'HARDcy;': '\u042a', + 'hardcy;': '\u044a', + 'hArr;': '\u21d4', + 'harr;': '\u2194', + 'harrcir;': '\u2948', + 'harrw;': '\u21ad', + 'Hat;': '^', + 'hbar;': '\u210f', + 'Hcirc;': '\u0124', + 'hcirc;': '\u0125', + 'hearts;': '\u2665', + 'heartsuit;': '\u2665', + 'hellip;': '\u2026', + 'hercon;': '\u22b9', + 'Hfr;': '\u210c', + 'hfr;': '\U0001d525', + 'HilbertSpace;': '\u210b', + 'hksearow;': '\u2925', + 'hkswarow;': '\u2926', + 'hoarr;': '\u21ff', + 'homtht;': '\u223b', + 'hookleftarrow;': '\u21a9', + 'hookrightarrow;': '\u21aa', + 'Hopf;': '\u210d', + 'hopf;': '\U0001d559', + 'horbar;': '\u2015', + 'HorizontalLine;': '\u2500', + 'Hscr;': '\u210b', + 'hscr;': '\U0001d4bd', + 'hslash;': '\u210f', + 'Hstrok;': '\u0126', + 'hstrok;': '\u0127', + 'HumpDownHump;': '\u224e', + 'HumpEqual;': '\u224f', + 'hybull;': '\u2043', + 'hyphen;': '\u2010', + 'Iacute': '\xcd', + 'iacute': '\xed', + 'Iacute;': '\xcd', + 'iacute;': '\xed', + 'ic;': '\u2063', + 'Icirc': '\xce', + 'icirc': '\xee', + 'Icirc;': '\xce', + 'icirc;': '\xee', + 'Icy;': '\u0418', + 'icy;': '\u0438', + 'Idot;': '\u0130', + 'IEcy;': '\u0415', + 'iecy;': '\u0435', + 'iexcl': '\xa1', + 'iexcl;': '\xa1', + 'iff;': '\u21d4', + 'Ifr;': '\u2111', + 'ifr;': '\U0001d526', + 'Igrave': '\xcc', + 'igrave': '\xec', + 'Igrave;': '\xcc', + 'igrave;': '\xec', + 'ii;': '\u2148', + 'iiiint;': '\u2a0c', + 'iiint;': '\u222d', + 'iinfin;': '\u29dc', + 'iiota;': '\u2129', + 'IJlig;': '\u0132', + 'ijlig;': '\u0133', + 'Im;': '\u2111', + 'Imacr;': '\u012a', + 'imacr;': '\u012b', + 'image;': '\u2111', + 'ImaginaryI;': '\u2148', + 'imagline;': '\u2110', + 'imagpart;': '\u2111', + 'imath;': '\u0131', + 'imof;': '\u22b7', + 'imped;': '\u01b5', + 'Implies;': '\u21d2', + 'in;': '\u2208', + 'incare;': '\u2105', + 'infin;': '\u221e', + 'infintie;': '\u29dd', + 'inodot;': '\u0131', + 'Int;': '\u222c', + 'int;': '\u222b', + 'intcal;': '\u22ba', + 'integers;': '\u2124', + 'Integral;': '\u222b', + 'intercal;': '\u22ba', + 'Intersection;': '\u22c2', + 'intlarhk;': '\u2a17', + 'intprod;': '\u2a3c', + 'InvisibleComma;': '\u2063', + 'InvisibleTimes;': '\u2062', + 'IOcy;': '\u0401', + 'iocy;': '\u0451', + 'Iogon;': '\u012e', + 'iogon;': '\u012f', + 'Iopf;': '\U0001d540', + 'iopf;': '\U0001d55a', + 'Iota;': '\u0399', + 'iota;': '\u03b9', + 'iprod;': '\u2a3c', + 'iquest': '\xbf', + 'iquest;': '\xbf', + 'Iscr;': '\u2110', + 'iscr;': '\U0001d4be', + 'isin;': '\u2208', + 'isindot;': '\u22f5', + 'isinE;': '\u22f9', + 'isins;': '\u22f4', + 'isinsv;': '\u22f3', + 'isinv;': '\u2208', + 'it;': '\u2062', + 'Itilde;': '\u0128', + 'itilde;': '\u0129', + 'Iukcy;': '\u0406', + 'iukcy;': '\u0456', + 'Iuml': '\xcf', + 'iuml': '\xef', + 'Iuml;': '\xcf', + 'iuml;': '\xef', + 'Jcirc;': '\u0134', + 'jcirc;': '\u0135', + 'Jcy;': '\u0419', + 'jcy;': '\u0439', + 'Jfr;': '\U0001d50d', + 'jfr;': '\U0001d527', + 'jmath;': '\u0237', + 'Jopf;': '\U0001d541', + 'jopf;': '\U0001d55b', + 'Jscr;': '\U0001d4a5', + 'jscr;': '\U0001d4bf', + 'Jsercy;': '\u0408', + 'jsercy;': '\u0458', + 'Jukcy;': '\u0404', + 'jukcy;': '\u0454', + 'Kappa;': '\u039a', + 'kappa;': '\u03ba', + 'kappav;': '\u03f0', + 'Kcedil;': '\u0136', + 'kcedil;': '\u0137', + 'Kcy;': '\u041a', + 'kcy;': '\u043a', + 'Kfr;': '\U0001d50e', + 'kfr;': '\U0001d528', + 'kgreen;': '\u0138', + 'KHcy;': '\u0425', + 'khcy;': '\u0445', + 'KJcy;': '\u040c', + 'kjcy;': '\u045c', + 'Kopf;': '\U0001d542', + 'kopf;': '\U0001d55c', + 'Kscr;': '\U0001d4a6', + 'kscr;': '\U0001d4c0', + 'lAarr;': '\u21da', + 'Lacute;': '\u0139', + 'lacute;': '\u013a', + 'laemptyv;': '\u29b4', + 'lagran;': '\u2112', + 'Lambda;': '\u039b', + 'lambda;': '\u03bb', + 'Lang;': '\u27ea', + 'lang;': '\u27e8', + 'langd;': '\u2991', + 'langle;': '\u27e8', + 'lap;': '\u2a85', + 'Laplacetrf;': '\u2112', + 'laquo': '\xab', + 'laquo;': '\xab', + 'Larr;': '\u219e', + 'lArr;': '\u21d0', + 'larr;': '\u2190', + 'larrb;': '\u21e4', + 'larrbfs;': '\u291f', + 'larrfs;': '\u291d', + 'larrhk;': '\u21a9', + 'larrlp;': '\u21ab', + 'larrpl;': '\u2939', + 'larrsim;': '\u2973', + 'larrtl;': '\u21a2', + 'lat;': '\u2aab', + 'lAtail;': '\u291b', + 'latail;': '\u2919', + 'late;': '\u2aad', + 'lates;': '\u2aad\ufe00', + 'lBarr;': '\u290e', + 'lbarr;': '\u290c', + 'lbbrk;': '\u2772', + 'lbrace;': '{', + 'lbrack;': '[', + 'lbrke;': '\u298b', + 'lbrksld;': '\u298f', + 'lbrkslu;': '\u298d', + 'Lcaron;': '\u013d', + 'lcaron;': '\u013e', + 'Lcedil;': '\u013b', + 'lcedil;': '\u013c', + 'lceil;': '\u2308', + 'lcub;': '{', + 'Lcy;': '\u041b', + 'lcy;': '\u043b', + 'ldca;': '\u2936', + 'ldquo;': '\u201c', + 'ldquor;': '\u201e', + 'ldrdhar;': '\u2967', + 'ldrushar;': '\u294b', + 'ldsh;': '\u21b2', + 'lE;': '\u2266', + 'le;': '\u2264', + 'LeftAngleBracket;': '\u27e8', + 'LeftArrow;': '\u2190', + 'Leftarrow;': '\u21d0', + 'leftarrow;': '\u2190', + 'LeftArrowBar;': '\u21e4', + 'LeftArrowRightArrow;': '\u21c6', + 'leftarrowtail;': '\u21a2', + 'LeftCeiling;': '\u2308', + 'LeftDoubleBracket;': '\u27e6', + 'LeftDownTeeVector;': '\u2961', + 'LeftDownVector;': '\u21c3', + 'LeftDownVectorBar;': '\u2959', + 'LeftFloor;': '\u230a', + 'leftharpoondown;': '\u21bd', + 'leftharpoonup;': '\u21bc', + 'leftleftarrows;': '\u21c7', + 'LeftRightArrow;': '\u2194', + 'Leftrightarrow;': '\u21d4', + 'leftrightarrow;': '\u2194', + 'leftrightarrows;': '\u21c6', + 'leftrightharpoons;': '\u21cb', + 'leftrightsquigarrow;': '\u21ad', + 'LeftRightVector;': '\u294e', + 'LeftTee;': '\u22a3', + 'LeftTeeArrow;': '\u21a4', + 'LeftTeeVector;': '\u295a', + 'leftthreetimes;': '\u22cb', + 'LeftTriangle;': '\u22b2', + 'LeftTriangleBar;': '\u29cf', + 'LeftTriangleEqual;': '\u22b4', + 'LeftUpDownVector;': '\u2951', + 'LeftUpTeeVector;': '\u2960', + 'LeftUpVector;': '\u21bf', + 'LeftUpVectorBar;': '\u2958', + 'LeftVector;': '\u21bc', + 'LeftVectorBar;': '\u2952', + 'lEg;': '\u2a8b', + 'leg;': '\u22da', + 'leq;': '\u2264', + 'leqq;': '\u2266', + 'leqslant;': '\u2a7d', + 'les;': '\u2a7d', + 'lescc;': '\u2aa8', + 'lesdot;': '\u2a7f', + 'lesdoto;': '\u2a81', + 'lesdotor;': '\u2a83', + 'lesg;': '\u22da\ufe00', + 'lesges;': '\u2a93', + 'lessapprox;': '\u2a85', + 'lessdot;': '\u22d6', + 'lesseqgtr;': '\u22da', + 'lesseqqgtr;': '\u2a8b', + 'LessEqualGreater;': '\u22da', + 'LessFullEqual;': '\u2266', + 'LessGreater;': '\u2276', + 'lessgtr;': '\u2276', + 'LessLess;': '\u2aa1', + 'lesssim;': '\u2272', + 'LessSlantEqual;': '\u2a7d', + 'LessTilde;': '\u2272', + 'lfisht;': '\u297c', + 'lfloor;': '\u230a', + 'Lfr;': '\U0001d50f', + 'lfr;': '\U0001d529', + 'lg;': '\u2276', + 'lgE;': '\u2a91', + 'lHar;': '\u2962', + 'lhard;': '\u21bd', + 'lharu;': '\u21bc', + 'lharul;': '\u296a', + 'lhblk;': '\u2584', + 'LJcy;': '\u0409', + 'ljcy;': '\u0459', + 'Ll;': '\u22d8', + 'll;': '\u226a', + 'llarr;': '\u21c7', + 'llcorner;': '\u231e', + 'Lleftarrow;': '\u21da', + 'llhard;': '\u296b', + 'lltri;': '\u25fa', + 'Lmidot;': '\u013f', + 'lmidot;': '\u0140', + 'lmoust;': '\u23b0', + 'lmoustache;': '\u23b0', + 'lnap;': '\u2a89', + 'lnapprox;': '\u2a89', + 'lnE;': '\u2268', + 'lne;': '\u2a87', + 'lneq;': '\u2a87', + 'lneqq;': '\u2268', + 'lnsim;': '\u22e6', + 'loang;': '\u27ec', + 'loarr;': '\u21fd', + 'lobrk;': '\u27e6', + 'LongLeftArrow;': '\u27f5', + 'Longleftarrow;': '\u27f8', + 'longleftarrow;': '\u27f5', + 'LongLeftRightArrow;': '\u27f7', + 'Longleftrightarrow;': '\u27fa', + 'longleftrightarrow;': '\u27f7', + 'longmapsto;': '\u27fc', + 'LongRightArrow;': '\u27f6', + 'Longrightarrow;': '\u27f9', + 'longrightarrow;': '\u27f6', + 'looparrowleft;': '\u21ab', + 'looparrowright;': '\u21ac', + 'lopar;': '\u2985', + 'Lopf;': '\U0001d543', + 'lopf;': '\U0001d55d', + 'loplus;': '\u2a2d', + 'lotimes;': '\u2a34', + 'lowast;': '\u2217', + 'lowbar;': '_', + 'LowerLeftArrow;': '\u2199', + 'LowerRightArrow;': '\u2198', + 'loz;': '\u25ca', + 'lozenge;': '\u25ca', + 'lozf;': '\u29eb', + 'lpar;': '(', + 'lparlt;': '\u2993', + 'lrarr;': '\u21c6', + 'lrcorner;': '\u231f', + 'lrhar;': '\u21cb', + 'lrhard;': '\u296d', + 'lrm;': '\u200e', + 'lrtri;': '\u22bf', + 'lsaquo;': '\u2039', + 'Lscr;': '\u2112', + 'lscr;': '\U0001d4c1', + 'Lsh;': '\u21b0', + 'lsh;': '\u21b0', + 'lsim;': '\u2272', + 'lsime;': '\u2a8d', + 'lsimg;': '\u2a8f', + 'lsqb;': '[', + 'lsquo;': '\u2018', + 'lsquor;': '\u201a', + 'Lstrok;': '\u0141', + 'lstrok;': '\u0142', + 'LT': '<', + 'lt': '<', + 'LT;': '<', + 'Lt;': '\u226a', + 'lt;': '<', + 'ltcc;': '\u2aa6', + 'ltcir;': '\u2a79', + 'ltdot;': '\u22d6', + 'lthree;': '\u22cb', + 'ltimes;': '\u22c9', + 'ltlarr;': '\u2976', + 'ltquest;': '\u2a7b', + 'ltri;': '\u25c3', + 'ltrie;': '\u22b4', + 'ltrif;': '\u25c2', + 'ltrPar;': '\u2996', + 'lurdshar;': '\u294a', + 'luruhar;': '\u2966', + 'lvertneqq;': '\u2268\ufe00', + 'lvnE;': '\u2268\ufe00', + 'macr': '\xaf', + 'macr;': '\xaf', + 'male;': '\u2642', + 'malt;': '\u2720', + 'maltese;': '\u2720', + 'Map;': '\u2905', + 'map;': '\u21a6', + 'mapsto;': '\u21a6', + 'mapstodown;': '\u21a7', + 'mapstoleft;': '\u21a4', + 'mapstoup;': '\u21a5', + 'marker;': '\u25ae', + 'mcomma;': '\u2a29', + 'Mcy;': '\u041c', + 'mcy;': '\u043c', + 'mdash;': '\u2014', + 'mDDot;': '\u223a', + 'measuredangle;': '\u2221', + 'MediumSpace;': '\u205f', + 'Mellintrf;': '\u2133', + 'Mfr;': '\U0001d510', + 'mfr;': '\U0001d52a', + 'mho;': '\u2127', + 'micro': '\xb5', + 'micro;': '\xb5', + 'mid;': '\u2223', + 'midast;': '*', + 'midcir;': '\u2af0', + 'middot': '\xb7', + 'middot;': '\xb7', + 'minus;': '\u2212', + 'minusb;': '\u229f', + 'minusd;': '\u2238', + 'minusdu;': '\u2a2a', + 'MinusPlus;': '\u2213', + 'mlcp;': '\u2adb', + 'mldr;': '\u2026', + 'mnplus;': '\u2213', + 'models;': '\u22a7', + 'Mopf;': '\U0001d544', + 'mopf;': '\U0001d55e', + 'mp;': '\u2213', + 'Mscr;': '\u2133', + 'mscr;': '\U0001d4c2', + 'mstpos;': '\u223e', + 'Mu;': '\u039c', + 'mu;': '\u03bc', + 'multimap;': '\u22b8', + 'mumap;': '\u22b8', + 'nabla;': '\u2207', + 'Nacute;': '\u0143', + 'nacute;': '\u0144', + 'nang;': '\u2220\u20d2', + 'nap;': '\u2249', + 'napE;': '\u2a70\u0338', + 'napid;': '\u224b\u0338', + 'napos;': '\u0149', + 'napprox;': '\u2249', + 'natur;': '\u266e', + 'natural;': '\u266e', + 'naturals;': '\u2115', + 'nbsp': '\xa0', + 'nbsp;': '\xa0', + 'nbump;': '\u224e\u0338', + 'nbumpe;': '\u224f\u0338', + 'ncap;': '\u2a43', + 'Ncaron;': '\u0147', + 'ncaron;': '\u0148', + 'Ncedil;': '\u0145', + 'ncedil;': '\u0146', + 'ncong;': '\u2247', + 'ncongdot;': '\u2a6d\u0338', + 'ncup;': '\u2a42', + 'Ncy;': '\u041d', + 'ncy;': '\u043d', + 'ndash;': '\u2013', + 'ne;': '\u2260', + 'nearhk;': '\u2924', + 'neArr;': '\u21d7', + 'nearr;': '\u2197', + 'nearrow;': '\u2197', + 'nedot;': '\u2250\u0338', + 'NegativeMediumSpace;': '\u200b', + 'NegativeThickSpace;': '\u200b', + 'NegativeThinSpace;': '\u200b', + 'NegativeVeryThinSpace;': '\u200b', + 'nequiv;': '\u2262', + 'nesear;': '\u2928', + 'nesim;': '\u2242\u0338', + 'NestedGreaterGreater;': '\u226b', + 'NestedLessLess;': '\u226a', + 'NewLine;': '\n', + 'nexist;': '\u2204', + 'nexists;': '\u2204', + 'Nfr;': '\U0001d511', + 'nfr;': '\U0001d52b', + 'ngE;': '\u2267\u0338', + 'nge;': '\u2271', + 'ngeq;': '\u2271', + 'ngeqq;': '\u2267\u0338', + 'ngeqslant;': '\u2a7e\u0338', + 'nges;': '\u2a7e\u0338', + 'nGg;': '\u22d9\u0338', + 'ngsim;': '\u2275', + 'nGt;': '\u226b\u20d2', + 'ngt;': '\u226f', + 'ngtr;': '\u226f', + 'nGtv;': '\u226b\u0338', + 'nhArr;': '\u21ce', + 'nharr;': '\u21ae', + 'nhpar;': '\u2af2', + 'ni;': '\u220b', + 'nis;': '\u22fc', + 'nisd;': '\u22fa', + 'niv;': '\u220b', + 'NJcy;': '\u040a', + 'njcy;': '\u045a', + 'nlArr;': '\u21cd', + 'nlarr;': '\u219a', + 'nldr;': '\u2025', + 'nlE;': '\u2266\u0338', + 'nle;': '\u2270', + 'nLeftarrow;': '\u21cd', + 'nleftarrow;': '\u219a', + 'nLeftrightarrow;': '\u21ce', + 'nleftrightarrow;': '\u21ae', + 'nleq;': '\u2270', + 'nleqq;': '\u2266\u0338', + 'nleqslant;': '\u2a7d\u0338', + 'nles;': '\u2a7d\u0338', + 'nless;': '\u226e', + 'nLl;': '\u22d8\u0338', + 'nlsim;': '\u2274', + 'nLt;': '\u226a\u20d2', + 'nlt;': '\u226e', + 'nltri;': '\u22ea', + 'nltrie;': '\u22ec', + 'nLtv;': '\u226a\u0338', + 'nmid;': '\u2224', + 'NoBreak;': '\u2060', + 'NonBreakingSpace;': '\xa0', + 'Nopf;': '\u2115', + 'nopf;': '\U0001d55f', + 'not': '\xac', + 'Not;': '\u2aec', + 'not;': '\xac', + 'NotCongruent;': '\u2262', + 'NotCupCap;': '\u226d', + 'NotDoubleVerticalBar;': '\u2226', + 'NotElement;': '\u2209', + 'NotEqual;': '\u2260', + 'NotEqualTilde;': '\u2242\u0338', + 'NotExists;': '\u2204', + 'NotGreater;': '\u226f', + 'NotGreaterEqual;': '\u2271', + 'NotGreaterFullEqual;': '\u2267\u0338', + 'NotGreaterGreater;': '\u226b\u0338', + 'NotGreaterLess;': '\u2279', + 'NotGreaterSlantEqual;': '\u2a7e\u0338', + 'NotGreaterTilde;': '\u2275', + 'NotHumpDownHump;': '\u224e\u0338', + 'NotHumpEqual;': '\u224f\u0338', + 'notin;': '\u2209', + 'notindot;': '\u22f5\u0338', + 'notinE;': '\u22f9\u0338', + 'notinva;': '\u2209', + 'notinvb;': '\u22f7', + 'notinvc;': '\u22f6', + 'NotLeftTriangle;': '\u22ea', + 'NotLeftTriangleBar;': '\u29cf\u0338', + 'NotLeftTriangleEqual;': '\u22ec', + 'NotLess;': '\u226e', + 'NotLessEqual;': '\u2270', + 'NotLessGreater;': '\u2278', + 'NotLessLess;': '\u226a\u0338', + 'NotLessSlantEqual;': '\u2a7d\u0338', + 'NotLessTilde;': '\u2274', + 'NotNestedGreaterGreater;': '\u2aa2\u0338', + 'NotNestedLessLess;': '\u2aa1\u0338', + 'notni;': '\u220c', + 'notniva;': '\u220c', + 'notnivb;': '\u22fe', + 'notnivc;': '\u22fd', + 'NotPrecedes;': '\u2280', + 'NotPrecedesEqual;': '\u2aaf\u0338', + 'NotPrecedesSlantEqual;': '\u22e0', + 'NotReverseElement;': '\u220c', + 'NotRightTriangle;': '\u22eb', + 'NotRightTriangleBar;': '\u29d0\u0338', + 'NotRightTriangleEqual;': '\u22ed', + 'NotSquareSubset;': '\u228f\u0338', + 'NotSquareSubsetEqual;': '\u22e2', + 'NotSquareSuperset;': '\u2290\u0338', + 'NotSquareSupersetEqual;': '\u22e3', + 'NotSubset;': '\u2282\u20d2', + 'NotSubsetEqual;': '\u2288', + 'NotSucceeds;': '\u2281', + 'NotSucceedsEqual;': '\u2ab0\u0338', + 'NotSucceedsSlantEqual;': '\u22e1', + 'NotSucceedsTilde;': '\u227f\u0338', + 'NotSuperset;': '\u2283\u20d2', + 'NotSupersetEqual;': '\u2289', + 'NotTilde;': '\u2241', + 'NotTildeEqual;': '\u2244', + 'NotTildeFullEqual;': '\u2247', + 'NotTildeTilde;': '\u2249', + 'NotVerticalBar;': '\u2224', + 'npar;': '\u2226', + 'nparallel;': '\u2226', + 'nparsl;': '\u2afd\u20e5', + 'npart;': '\u2202\u0338', + 'npolint;': '\u2a14', + 'npr;': '\u2280', + 'nprcue;': '\u22e0', + 'npre;': '\u2aaf\u0338', + 'nprec;': '\u2280', + 'npreceq;': '\u2aaf\u0338', + 'nrArr;': '\u21cf', + 'nrarr;': '\u219b', + 'nrarrc;': '\u2933\u0338', + 'nrarrw;': '\u219d\u0338', + 'nRightarrow;': '\u21cf', + 'nrightarrow;': '\u219b', + 'nrtri;': '\u22eb', + 'nrtrie;': '\u22ed', + 'nsc;': '\u2281', + 'nsccue;': '\u22e1', + 'nsce;': '\u2ab0\u0338', + 'Nscr;': '\U0001d4a9', + 'nscr;': '\U0001d4c3', + 'nshortmid;': '\u2224', + 'nshortparallel;': '\u2226', + 'nsim;': '\u2241', + 'nsime;': '\u2244', + 'nsimeq;': '\u2244', + 'nsmid;': '\u2224', + 'nspar;': '\u2226', + 'nsqsube;': '\u22e2', + 'nsqsupe;': '\u22e3', + 'nsub;': '\u2284', + 'nsubE;': '\u2ac5\u0338', + 'nsube;': '\u2288', + 'nsubset;': '\u2282\u20d2', + 'nsubseteq;': '\u2288', + 'nsubseteqq;': '\u2ac5\u0338', + 'nsucc;': '\u2281', + 'nsucceq;': '\u2ab0\u0338', + 'nsup;': '\u2285', + 'nsupE;': '\u2ac6\u0338', + 'nsupe;': '\u2289', + 'nsupset;': '\u2283\u20d2', + 'nsupseteq;': '\u2289', + 'nsupseteqq;': '\u2ac6\u0338', + 'ntgl;': '\u2279', + 'Ntilde': '\xd1', + 'ntilde': '\xf1', + 'Ntilde;': '\xd1', + 'ntilde;': '\xf1', + 'ntlg;': '\u2278', + 'ntriangleleft;': '\u22ea', + 'ntrianglelefteq;': '\u22ec', + 'ntriangleright;': '\u22eb', + 'ntrianglerighteq;': '\u22ed', + 'Nu;': '\u039d', + 'nu;': '\u03bd', + 'num;': '#', + 'numero;': '\u2116', + 'numsp;': '\u2007', + 'nvap;': '\u224d\u20d2', + 'nVDash;': '\u22af', + 'nVdash;': '\u22ae', + 'nvDash;': '\u22ad', + 'nvdash;': '\u22ac', + 'nvge;': '\u2265\u20d2', + 'nvgt;': '>\u20d2', + 'nvHarr;': '\u2904', + 'nvinfin;': '\u29de', + 'nvlArr;': '\u2902', + 'nvle;': '\u2264\u20d2', + 'nvlt;': '<\u20d2', + 'nvltrie;': '\u22b4\u20d2', + 'nvrArr;': '\u2903', + 'nvrtrie;': '\u22b5\u20d2', + 'nvsim;': '\u223c\u20d2', + 'nwarhk;': '\u2923', + 'nwArr;': '\u21d6', + 'nwarr;': '\u2196', + 'nwarrow;': '\u2196', + 'nwnear;': '\u2927', + 'Oacute': '\xd3', + 'oacute': '\xf3', + 'Oacute;': '\xd3', + 'oacute;': '\xf3', + 'oast;': '\u229b', + 'ocir;': '\u229a', + 'Ocirc': '\xd4', + 'ocirc': '\xf4', + 'Ocirc;': '\xd4', + 'ocirc;': '\xf4', + 'Ocy;': '\u041e', + 'ocy;': '\u043e', + 'odash;': '\u229d', + 'Odblac;': '\u0150', + 'odblac;': '\u0151', + 'odiv;': '\u2a38', + 'odot;': '\u2299', + 'odsold;': '\u29bc', + 'OElig;': '\u0152', + 'oelig;': '\u0153', + 'ofcir;': '\u29bf', + 'Ofr;': '\U0001d512', + 'ofr;': '\U0001d52c', + 'ogon;': '\u02db', + 'Ograve': '\xd2', + 'ograve': '\xf2', + 'Ograve;': '\xd2', + 'ograve;': '\xf2', + 'ogt;': '\u29c1', + 'ohbar;': '\u29b5', + 'ohm;': '\u03a9', + 'oint;': '\u222e', + 'olarr;': '\u21ba', + 'olcir;': '\u29be', + 'olcross;': '\u29bb', + 'oline;': '\u203e', + 'olt;': '\u29c0', + 'Omacr;': '\u014c', + 'omacr;': '\u014d', + 'Omega;': '\u03a9', + 'omega;': '\u03c9', + 'Omicron;': '\u039f', + 'omicron;': '\u03bf', + 'omid;': '\u29b6', + 'ominus;': '\u2296', + 'Oopf;': '\U0001d546', + 'oopf;': '\U0001d560', + 'opar;': '\u29b7', + 'OpenCurlyDoubleQuote;': '\u201c', + 'OpenCurlyQuote;': '\u2018', + 'operp;': '\u29b9', + 'oplus;': '\u2295', + 'Or;': '\u2a54', + 'or;': '\u2228', + 'orarr;': '\u21bb', + 'ord;': '\u2a5d', + 'order;': '\u2134', + 'orderof;': '\u2134', + 'ordf': '\xaa', + 'ordf;': '\xaa', + 'ordm': '\xba', + 'ordm;': '\xba', + 'origof;': '\u22b6', + 'oror;': '\u2a56', + 'orslope;': '\u2a57', + 'orv;': '\u2a5b', + 'oS;': '\u24c8', + 'Oscr;': '\U0001d4aa', + 'oscr;': '\u2134', + 'Oslash': '\xd8', + 'oslash': '\xf8', + 'Oslash;': '\xd8', + 'oslash;': '\xf8', + 'osol;': '\u2298', + 'Otilde': '\xd5', + 'otilde': '\xf5', + 'Otilde;': '\xd5', + 'otilde;': '\xf5', + 'Otimes;': '\u2a37', + 'otimes;': '\u2297', + 'otimesas;': '\u2a36', + 'Ouml': '\xd6', + 'ouml': '\xf6', + 'Ouml;': '\xd6', + 'ouml;': '\xf6', + 'ovbar;': '\u233d', + 'OverBar;': '\u203e', + 'OverBrace;': '\u23de', + 'OverBracket;': '\u23b4', + 'OverParenthesis;': '\u23dc', + 'par;': '\u2225', + 'para': '\xb6', + 'para;': '\xb6', + 'parallel;': '\u2225', + 'parsim;': '\u2af3', + 'parsl;': '\u2afd', + 'part;': '\u2202', + 'PartialD;': '\u2202', + 'Pcy;': '\u041f', + 'pcy;': '\u043f', + 'percnt;': '%', + 'period;': '.', + 'permil;': '\u2030', + 'perp;': '\u22a5', + 'pertenk;': '\u2031', + 'Pfr;': '\U0001d513', + 'pfr;': '\U0001d52d', + 'Phi;': '\u03a6', + 'phi;': '\u03c6', + 'phiv;': '\u03d5', + 'phmmat;': '\u2133', + 'phone;': '\u260e', + 'Pi;': '\u03a0', + 'pi;': '\u03c0', + 'pitchfork;': '\u22d4', + 'piv;': '\u03d6', + 'planck;': '\u210f', + 'planckh;': '\u210e', + 'plankv;': '\u210f', + 'plus;': '+', + 'plusacir;': '\u2a23', + 'plusb;': '\u229e', + 'pluscir;': '\u2a22', + 'plusdo;': '\u2214', + 'plusdu;': '\u2a25', + 'pluse;': '\u2a72', + 'PlusMinus;': '\xb1', + 'plusmn': '\xb1', + 'plusmn;': '\xb1', + 'plussim;': '\u2a26', + 'plustwo;': '\u2a27', + 'pm;': '\xb1', + 'Poincareplane;': '\u210c', + 'pointint;': '\u2a15', + 'Popf;': '\u2119', + 'popf;': '\U0001d561', + 'pound': '\xa3', + 'pound;': '\xa3', + 'Pr;': '\u2abb', + 'pr;': '\u227a', + 'prap;': '\u2ab7', + 'prcue;': '\u227c', + 'prE;': '\u2ab3', + 'pre;': '\u2aaf', + 'prec;': '\u227a', + 'precapprox;': '\u2ab7', + 'preccurlyeq;': '\u227c', + 'Precedes;': '\u227a', + 'PrecedesEqual;': '\u2aaf', + 'PrecedesSlantEqual;': '\u227c', + 'PrecedesTilde;': '\u227e', + 'preceq;': '\u2aaf', + 'precnapprox;': '\u2ab9', + 'precneqq;': '\u2ab5', + 'precnsim;': '\u22e8', + 'precsim;': '\u227e', + 'Prime;': '\u2033', + 'prime;': '\u2032', + 'primes;': '\u2119', + 'prnap;': '\u2ab9', + 'prnE;': '\u2ab5', + 'prnsim;': '\u22e8', + 'prod;': '\u220f', + 'Product;': '\u220f', + 'profalar;': '\u232e', + 'profline;': '\u2312', + 'profsurf;': '\u2313', + 'prop;': '\u221d', + 'Proportion;': '\u2237', + 'Proportional;': '\u221d', + 'propto;': '\u221d', + 'prsim;': '\u227e', + 'prurel;': '\u22b0', + 'Pscr;': '\U0001d4ab', + 'pscr;': '\U0001d4c5', + 'Psi;': '\u03a8', + 'psi;': '\u03c8', + 'puncsp;': '\u2008', + 'Qfr;': '\U0001d514', + 'qfr;': '\U0001d52e', + 'qint;': '\u2a0c', + 'Qopf;': '\u211a', + 'qopf;': '\U0001d562', + 'qprime;': '\u2057', + 'Qscr;': '\U0001d4ac', + 'qscr;': '\U0001d4c6', + 'quaternions;': '\u210d', + 'quatint;': '\u2a16', + 'quest;': '?', + 'questeq;': '\u225f', + 'QUOT': '"', + 'quot': '"', + 'QUOT;': '"', + 'quot;': '"', + 'rAarr;': '\u21db', + 'race;': '\u223d\u0331', + 'Racute;': '\u0154', + 'racute;': '\u0155', + 'radic;': '\u221a', + 'raemptyv;': '\u29b3', + 'Rang;': '\u27eb', + 'rang;': '\u27e9', + 'rangd;': '\u2992', + 'range;': '\u29a5', + 'rangle;': '\u27e9', + 'raquo': '\xbb', + 'raquo;': '\xbb', + 'Rarr;': '\u21a0', + 'rArr;': '\u21d2', + 'rarr;': '\u2192', + 'rarrap;': '\u2975', + 'rarrb;': '\u21e5', + 'rarrbfs;': '\u2920', + 'rarrc;': '\u2933', + 'rarrfs;': '\u291e', + 'rarrhk;': '\u21aa', + 'rarrlp;': '\u21ac', + 'rarrpl;': '\u2945', + 'rarrsim;': '\u2974', + 'Rarrtl;': '\u2916', + 'rarrtl;': '\u21a3', + 'rarrw;': '\u219d', + 'rAtail;': '\u291c', + 'ratail;': '\u291a', + 'ratio;': '\u2236', + 'rationals;': '\u211a', + 'RBarr;': '\u2910', + 'rBarr;': '\u290f', + 'rbarr;': '\u290d', + 'rbbrk;': '\u2773', + 'rbrace;': '}', + 'rbrack;': ']', + 'rbrke;': '\u298c', + 'rbrksld;': '\u298e', + 'rbrkslu;': '\u2990', + 'Rcaron;': '\u0158', + 'rcaron;': '\u0159', + 'Rcedil;': '\u0156', + 'rcedil;': '\u0157', + 'rceil;': '\u2309', + 'rcub;': '}', + 'Rcy;': '\u0420', + 'rcy;': '\u0440', + 'rdca;': '\u2937', + 'rdldhar;': '\u2969', + 'rdquo;': '\u201d', + 'rdquor;': '\u201d', + 'rdsh;': '\u21b3', + 'Re;': '\u211c', + 'real;': '\u211c', + 'realine;': '\u211b', + 'realpart;': '\u211c', + 'reals;': '\u211d', + 'rect;': '\u25ad', + 'REG': '\xae', + 'reg': '\xae', + 'REG;': '\xae', + 'reg;': '\xae', + 'ReverseElement;': '\u220b', + 'ReverseEquilibrium;': '\u21cb', + 'ReverseUpEquilibrium;': '\u296f', + 'rfisht;': '\u297d', + 'rfloor;': '\u230b', + 'Rfr;': '\u211c', + 'rfr;': '\U0001d52f', + 'rHar;': '\u2964', + 'rhard;': '\u21c1', + 'rharu;': '\u21c0', + 'rharul;': '\u296c', + 'Rho;': '\u03a1', + 'rho;': '\u03c1', + 'rhov;': '\u03f1', + 'RightAngleBracket;': '\u27e9', + 'RightArrow;': '\u2192', + 'Rightarrow;': '\u21d2', + 'rightarrow;': '\u2192', + 'RightArrowBar;': '\u21e5', + 'RightArrowLeftArrow;': '\u21c4', + 'rightarrowtail;': '\u21a3', + 'RightCeiling;': '\u2309', + 'RightDoubleBracket;': '\u27e7', + 'RightDownTeeVector;': '\u295d', + 'RightDownVector;': '\u21c2', + 'RightDownVectorBar;': '\u2955', + 'RightFloor;': '\u230b', + 'rightharpoondown;': '\u21c1', + 'rightharpoonup;': '\u21c0', + 'rightleftarrows;': '\u21c4', + 'rightleftharpoons;': '\u21cc', + 'rightrightarrows;': '\u21c9', + 'rightsquigarrow;': '\u219d', + 'RightTee;': '\u22a2', + 'RightTeeArrow;': '\u21a6', + 'RightTeeVector;': '\u295b', + 'rightthreetimes;': '\u22cc', + 'RightTriangle;': '\u22b3', + 'RightTriangleBar;': '\u29d0', + 'RightTriangleEqual;': '\u22b5', + 'RightUpDownVector;': '\u294f', + 'RightUpTeeVector;': '\u295c', + 'RightUpVector;': '\u21be', + 'RightUpVectorBar;': '\u2954', + 'RightVector;': '\u21c0', + 'RightVectorBar;': '\u2953', + 'ring;': '\u02da', + 'risingdotseq;': '\u2253', + 'rlarr;': '\u21c4', + 'rlhar;': '\u21cc', + 'rlm;': '\u200f', + 'rmoust;': '\u23b1', + 'rmoustache;': '\u23b1', + 'rnmid;': '\u2aee', + 'roang;': '\u27ed', + 'roarr;': '\u21fe', + 'robrk;': '\u27e7', + 'ropar;': '\u2986', + 'Ropf;': '\u211d', + 'ropf;': '\U0001d563', + 'roplus;': '\u2a2e', + 'rotimes;': '\u2a35', + 'RoundImplies;': '\u2970', + 'rpar;': ')', + 'rpargt;': '\u2994', + 'rppolint;': '\u2a12', + 'rrarr;': '\u21c9', + 'Rrightarrow;': '\u21db', + 'rsaquo;': '\u203a', + 'Rscr;': '\u211b', + 'rscr;': '\U0001d4c7', + 'Rsh;': '\u21b1', + 'rsh;': '\u21b1', + 'rsqb;': ']', + 'rsquo;': '\u2019', + 'rsquor;': '\u2019', + 'rthree;': '\u22cc', + 'rtimes;': '\u22ca', + 'rtri;': '\u25b9', + 'rtrie;': '\u22b5', + 'rtrif;': '\u25b8', + 'rtriltri;': '\u29ce', + 'RuleDelayed;': '\u29f4', + 'ruluhar;': '\u2968', + 'rx;': '\u211e', + 'Sacute;': '\u015a', + 'sacute;': '\u015b', + 'sbquo;': '\u201a', + 'Sc;': '\u2abc', + 'sc;': '\u227b', + 'scap;': '\u2ab8', + 'Scaron;': '\u0160', + 'scaron;': '\u0161', + 'sccue;': '\u227d', + 'scE;': '\u2ab4', + 'sce;': '\u2ab0', + 'Scedil;': '\u015e', + 'scedil;': '\u015f', + 'Scirc;': '\u015c', + 'scirc;': '\u015d', + 'scnap;': '\u2aba', + 'scnE;': '\u2ab6', + 'scnsim;': '\u22e9', + 'scpolint;': '\u2a13', + 'scsim;': '\u227f', + 'Scy;': '\u0421', + 'scy;': '\u0441', + 'sdot;': '\u22c5', + 'sdotb;': '\u22a1', + 'sdote;': '\u2a66', + 'searhk;': '\u2925', + 'seArr;': '\u21d8', + 'searr;': '\u2198', + 'searrow;': '\u2198', + 'sect': '\xa7', + 'sect;': '\xa7', + 'semi;': ';', + 'seswar;': '\u2929', + 'setminus;': '\u2216', + 'setmn;': '\u2216', + 'sext;': '\u2736', + 'Sfr;': '\U0001d516', + 'sfr;': '\U0001d530', + 'sfrown;': '\u2322', + 'sharp;': '\u266f', + 'SHCHcy;': '\u0429', + 'shchcy;': '\u0449', + 'SHcy;': '\u0428', + 'shcy;': '\u0448', + 'ShortDownArrow;': '\u2193', + 'ShortLeftArrow;': '\u2190', + 'shortmid;': '\u2223', + 'shortparallel;': '\u2225', + 'ShortRightArrow;': '\u2192', + 'ShortUpArrow;': '\u2191', + 'shy': '\xad', + 'shy;': '\xad', + 'Sigma;': '\u03a3', + 'sigma;': '\u03c3', + 'sigmaf;': '\u03c2', + 'sigmav;': '\u03c2', + 'sim;': '\u223c', + 'simdot;': '\u2a6a', + 'sime;': '\u2243', + 'simeq;': '\u2243', + 'simg;': '\u2a9e', + 'simgE;': '\u2aa0', + 'siml;': '\u2a9d', + 'simlE;': '\u2a9f', + 'simne;': '\u2246', + 'simplus;': '\u2a24', + 'simrarr;': '\u2972', + 'slarr;': '\u2190', + 'SmallCircle;': '\u2218', + 'smallsetminus;': '\u2216', + 'smashp;': '\u2a33', + 'smeparsl;': '\u29e4', + 'smid;': '\u2223', + 'smile;': '\u2323', + 'smt;': '\u2aaa', + 'smte;': '\u2aac', + 'smtes;': '\u2aac\ufe00', + 'SOFTcy;': '\u042c', + 'softcy;': '\u044c', + 'sol;': '/', + 'solb;': '\u29c4', + 'solbar;': '\u233f', + 'Sopf;': '\U0001d54a', + 'sopf;': '\U0001d564', + 'spades;': '\u2660', + 'spadesuit;': '\u2660', + 'spar;': '\u2225', + 'sqcap;': '\u2293', + 'sqcaps;': '\u2293\ufe00', + 'sqcup;': '\u2294', + 'sqcups;': '\u2294\ufe00', + 'Sqrt;': '\u221a', + 'sqsub;': '\u228f', + 'sqsube;': '\u2291', + 'sqsubset;': '\u228f', + 'sqsubseteq;': '\u2291', + 'sqsup;': '\u2290', + 'sqsupe;': '\u2292', + 'sqsupset;': '\u2290', + 'sqsupseteq;': '\u2292', + 'squ;': '\u25a1', + 'Square;': '\u25a1', + 'square;': '\u25a1', + 'SquareIntersection;': '\u2293', + 'SquareSubset;': '\u228f', + 'SquareSubsetEqual;': '\u2291', + 'SquareSuperset;': '\u2290', + 'SquareSupersetEqual;': '\u2292', + 'SquareUnion;': '\u2294', + 'squarf;': '\u25aa', + 'squf;': '\u25aa', + 'srarr;': '\u2192', + 'Sscr;': '\U0001d4ae', + 'sscr;': '\U0001d4c8', + 'ssetmn;': '\u2216', + 'ssmile;': '\u2323', + 'sstarf;': '\u22c6', + 'Star;': '\u22c6', + 'star;': '\u2606', + 'starf;': '\u2605', + 'straightepsilon;': '\u03f5', + 'straightphi;': '\u03d5', + 'strns;': '\xaf', + 'Sub;': '\u22d0', + 'sub;': '\u2282', + 'subdot;': '\u2abd', + 'subE;': '\u2ac5', + 'sube;': '\u2286', + 'subedot;': '\u2ac3', + 'submult;': '\u2ac1', + 'subnE;': '\u2acb', + 'subne;': '\u228a', + 'subplus;': '\u2abf', + 'subrarr;': '\u2979', + 'Subset;': '\u22d0', + 'subset;': '\u2282', + 'subseteq;': '\u2286', + 'subseteqq;': '\u2ac5', + 'SubsetEqual;': '\u2286', + 'subsetneq;': '\u228a', + 'subsetneqq;': '\u2acb', + 'subsim;': '\u2ac7', + 'subsub;': '\u2ad5', + 'subsup;': '\u2ad3', + 'succ;': '\u227b', + 'succapprox;': '\u2ab8', + 'succcurlyeq;': '\u227d', + 'Succeeds;': '\u227b', + 'SucceedsEqual;': '\u2ab0', + 'SucceedsSlantEqual;': '\u227d', + 'SucceedsTilde;': '\u227f', + 'succeq;': '\u2ab0', + 'succnapprox;': '\u2aba', + 'succneqq;': '\u2ab6', + 'succnsim;': '\u22e9', + 'succsim;': '\u227f', + 'SuchThat;': '\u220b', + 'Sum;': '\u2211', + 'sum;': '\u2211', + 'sung;': '\u266a', + 'sup1': '\xb9', + 'sup1;': '\xb9', + 'sup2': '\xb2', + 'sup2;': '\xb2', + 'sup3': '\xb3', + 'sup3;': '\xb3', + 'Sup;': '\u22d1', + 'sup;': '\u2283', + 'supdot;': '\u2abe', + 'supdsub;': '\u2ad8', + 'supE;': '\u2ac6', + 'supe;': '\u2287', + 'supedot;': '\u2ac4', + 'Superset;': '\u2283', + 'SupersetEqual;': '\u2287', + 'suphsol;': '\u27c9', + 'suphsub;': '\u2ad7', + 'suplarr;': '\u297b', + 'supmult;': '\u2ac2', + 'supnE;': '\u2acc', + 'supne;': '\u228b', + 'supplus;': '\u2ac0', + 'Supset;': '\u22d1', + 'supset;': '\u2283', + 'supseteq;': '\u2287', + 'supseteqq;': '\u2ac6', + 'supsetneq;': '\u228b', + 'supsetneqq;': '\u2acc', + 'supsim;': '\u2ac8', + 'supsub;': '\u2ad4', + 'supsup;': '\u2ad6', + 'swarhk;': '\u2926', + 'swArr;': '\u21d9', + 'swarr;': '\u2199', + 'swarrow;': '\u2199', + 'swnwar;': '\u292a', + 'szlig': '\xdf', + 'szlig;': '\xdf', + 'Tab;': '\t', + 'target;': '\u2316', + 'Tau;': '\u03a4', + 'tau;': '\u03c4', + 'tbrk;': '\u23b4', + 'Tcaron;': '\u0164', + 'tcaron;': '\u0165', + 'Tcedil;': '\u0162', + 'tcedil;': '\u0163', + 'Tcy;': '\u0422', + 'tcy;': '\u0442', + 'tdot;': '\u20db', + 'telrec;': '\u2315', + 'Tfr;': '\U0001d517', + 'tfr;': '\U0001d531', + 'there4;': '\u2234', + 'Therefore;': '\u2234', + 'therefore;': '\u2234', + 'Theta;': '\u0398', + 'theta;': '\u03b8', + 'thetasym;': '\u03d1', + 'thetav;': '\u03d1', + 'thickapprox;': '\u2248', + 'thicksim;': '\u223c', + 'ThickSpace;': '\u205f\u200a', + 'thinsp;': '\u2009', + 'ThinSpace;': '\u2009', + 'thkap;': '\u2248', + 'thksim;': '\u223c', + 'THORN': '\xde', + 'thorn': '\xfe', + 'THORN;': '\xde', + 'thorn;': '\xfe', + 'Tilde;': '\u223c', + 'tilde;': '\u02dc', + 'TildeEqual;': '\u2243', + 'TildeFullEqual;': '\u2245', + 'TildeTilde;': '\u2248', + 'times': '\xd7', + 'times;': '\xd7', + 'timesb;': '\u22a0', + 'timesbar;': '\u2a31', + 'timesd;': '\u2a30', + 'tint;': '\u222d', + 'toea;': '\u2928', + 'top;': '\u22a4', + 'topbot;': '\u2336', + 'topcir;': '\u2af1', + 'Topf;': '\U0001d54b', + 'topf;': '\U0001d565', + 'topfork;': '\u2ada', + 'tosa;': '\u2929', + 'tprime;': '\u2034', + 'TRADE;': '\u2122', + 'trade;': '\u2122', + 'triangle;': '\u25b5', + 'triangledown;': '\u25bf', + 'triangleleft;': '\u25c3', + 'trianglelefteq;': '\u22b4', + 'triangleq;': '\u225c', + 'triangleright;': '\u25b9', + 'trianglerighteq;': '\u22b5', + 'tridot;': '\u25ec', + 'trie;': '\u225c', + 'triminus;': '\u2a3a', + 'TripleDot;': '\u20db', + 'triplus;': '\u2a39', + 'trisb;': '\u29cd', + 'tritime;': '\u2a3b', + 'trpezium;': '\u23e2', + 'Tscr;': '\U0001d4af', + 'tscr;': '\U0001d4c9', + 'TScy;': '\u0426', + 'tscy;': '\u0446', + 'TSHcy;': '\u040b', + 'tshcy;': '\u045b', + 'Tstrok;': '\u0166', + 'tstrok;': '\u0167', + 'twixt;': '\u226c', + 'twoheadleftarrow;': '\u219e', + 'twoheadrightarrow;': '\u21a0', + 'Uacute': '\xda', + 'uacute': '\xfa', + 'Uacute;': '\xda', + 'uacute;': '\xfa', + 'Uarr;': '\u219f', + 'uArr;': '\u21d1', + 'uarr;': '\u2191', + 'Uarrocir;': '\u2949', + 'Ubrcy;': '\u040e', + 'ubrcy;': '\u045e', + 'Ubreve;': '\u016c', + 'ubreve;': '\u016d', + 'Ucirc': '\xdb', + 'ucirc': '\xfb', + 'Ucirc;': '\xdb', + 'ucirc;': '\xfb', + 'Ucy;': '\u0423', + 'ucy;': '\u0443', + 'udarr;': '\u21c5', + 'Udblac;': '\u0170', + 'udblac;': '\u0171', + 'udhar;': '\u296e', + 'ufisht;': '\u297e', + 'Ufr;': '\U0001d518', + 'ufr;': '\U0001d532', + 'Ugrave': '\xd9', + 'ugrave': '\xf9', + 'Ugrave;': '\xd9', + 'ugrave;': '\xf9', + 'uHar;': '\u2963', + 'uharl;': '\u21bf', + 'uharr;': '\u21be', + 'uhblk;': '\u2580', + 'ulcorn;': '\u231c', + 'ulcorner;': '\u231c', + 'ulcrop;': '\u230f', + 'ultri;': '\u25f8', + 'Umacr;': '\u016a', + 'umacr;': '\u016b', + 'uml': '\xa8', + 'uml;': '\xa8', + 'UnderBar;': '_', + 'UnderBrace;': '\u23df', + 'UnderBracket;': '\u23b5', + 'UnderParenthesis;': '\u23dd', + 'Union;': '\u22c3', + 'UnionPlus;': '\u228e', + 'Uogon;': '\u0172', + 'uogon;': '\u0173', + 'Uopf;': '\U0001d54c', + 'uopf;': '\U0001d566', + 'UpArrow;': '\u2191', + 'Uparrow;': '\u21d1', + 'uparrow;': '\u2191', + 'UpArrowBar;': '\u2912', + 'UpArrowDownArrow;': '\u21c5', + 'UpDownArrow;': '\u2195', + 'Updownarrow;': '\u21d5', + 'updownarrow;': '\u2195', + 'UpEquilibrium;': '\u296e', + 'upharpoonleft;': '\u21bf', + 'upharpoonright;': '\u21be', + 'uplus;': '\u228e', + 'UpperLeftArrow;': '\u2196', + 'UpperRightArrow;': '\u2197', + 'Upsi;': '\u03d2', + 'upsi;': '\u03c5', + 'upsih;': '\u03d2', + 'Upsilon;': '\u03a5', + 'upsilon;': '\u03c5', + 'UpTee;': '\u22a5', + 'UpTeeArrow;': '\u21a5', + 'upuparrows;': '\u21c8', + 'urcorn;': '\u231d', + 'urcorner;': '\u231d', + 'urcrop;': '\u230e', + 'Uring;': '\u016e', + 'uring;': '\u016f', + 'urtri;': '\u25f9', + 'Uscr;': '\U0001d4b0', + 'uscr;': '\U0001d4ca', + 'utdot;': '\u22f0', + 'Utilde;': '\u0168', + 'utilde;': '\u0169', + 'utri;': '\u25b5', + 'utrif;': '\u25b4', + 'uuarr;': '\u21c8', + 'Uuml': '\xdc', + 'uuml': '\xfc', + 'Uuml;': '\xdc', + 'uuml;': '\xfc', + 'uwangle;': '\u29a7', + 'vangrt;': '\u299c', + 'varepsilon;': '\u03f5', + 'varkappa;': '\u03f0', + 'varnothing;': '\u2205', + 'varphi;': '\u03d5', + 'varpi;': '\u03d6', + 'varpropto;': '\u221d', + 'vArr;': '\u21d5', + 'varr;': '\u2195', + 'varrho;': '\u03f1', + 'varsigma;': '\u03c2', + 'varsubsetneq;': '\u228a\ufe00', + 'varsubsetneqq;': '\u2acb\ufe00', + 'varsupsetneq;': '\u228b\ufe00', + 'varsupsetneqq;': '\u2acc\ufe00', + 'vartheta;': '\u03d1', + 'vartriangleleft;': '\u22b2', + 'vartriangleright;': '\u22b3', + 'Vbar;': '\u2aeb', + 'vBar;': '\u2ae8', + 'vBarv;': '\u2ae9', + 'Vcy;': '\u0412', + 'vcy;': '\u0432', + 'VDash;': '\u22ab', + 'Vdash;': '\u22a9', + 'vDash;': '\u22a8', + 'vdash;': '\u22a2', + 'Vdashl;': '\u2ae6', + 'Vee;': '\u22c1', + 'vee;': '\u2228', + 'veebar;': '\u22bb', + 'veeeq;': '\u225a', + 'vellip;': '\u22ee', + 'Verbar;': '\u2016', + 'verbar;': '|', + 'Vert;': '\u2016', + 'vert;': '|', + 'VerticalBar;': '\u2223', + 'VerticalLine;': '|', + 'VerticalSeparator;': '\u2758', + 'VerticalTilde;': '\u2240', + 'VeryThinSpace;': '\u200a', + 'Vfr;': '\U0001d519', + 'vfr;': '\U0001d533', + 'vltri;': '\u22b2', + 'vnsub;': '\u2282\u20d2', + 'vnsup;': '\u2283\u20d2', + 'Vopf;': '\U0001d54d', + 'vopf;': '\U0001d567', + 'vprop;': '\u221d', + 'vrtri;': '\u22b3', + 'Vscr;': '\U0001d4b1', + 'vscr;': '\U0001d4cb', + 'vsubnE;': '\u2acb\ufe00', + 'vsubne;': '\u228a\ufe00', + 'vsupnE;': '\u2acc\ufe00', + 'vsupne;': '\u228b\ufe00', + 'Vvdash;': '\u22aa', + 'vzigzag;': '\u299a', + 'Wcirc;': '\u0174', + 'wcirc;': '\u0175', + 'wedbar;': '\u2a5f', + 'Wedge;': '\u22c0', + 'wedge;': '\u2227', + 'wedgeq;': '\u2259', + 'weierp;': '\u2118', + 'Wfr;': '\U0001d51a', + 'wfr;': '\U0001d534', + 'Wopf;': '\U0001d54e', + 'wopf;': '\U0001d568', + 'wp;': '\u2118', + 'wr;': '\u2240', + 'wreath;': '\u2240', + 'Wscr;': '\U0001d4b2', + 'wscr;': '\U0001d4cc', + 'xcap;': '\u22c2', + 'xcirc;': '\u25ef', + 'xcup;': '\u22c3', + 'xdtri;': '\u25bd', + 'Xfr;': '\U0001d51b', + 'xfr;': '\U0001d535', + 'xhArr;': '\u27fa', + 'xharr;': '\u27f7', + 'Xi;': '\u039e', + 'xi;': '\u03be', + 'xlArr;': '\u27f8', + 'xlarr;': '\u27f5', + 'xmap;': '\u27fc', + 'xnis;': '\u22fb', + 'xodot;': '\u2a00', + 'Xopf;': '\U0001d54f', + 'xopf;': '\U0001d569', + 'xoplus;': '\u2a01', + 'xotime;': '\u2a02', + 'xrArr;': '\u27f9', + 'xrarr;': '\u27f6', + 'Xscr;': '\U0001d4b3', + 'xscr;': '\U0001d4cd', + 'xsqcup;': '\u2a06', + 'xuplus;': '\u2a04', + 'xutri;': '\u25b3', + 'xvee;': '\u22c1', + 'xwedge;': '\u22c0', + 'Yacute': '\xdd', + 'yacute': '\xfd', + 'Yacute;': '\xdd', + 'yacute;': '\xfd', + 'YAcy;': '\u042f', + 'yacy;': '\u044f', + 'Ycirc;': '\u0176', + 'ycirc;': '\u0177', + 'Ycy;': '\u042b', + 'ycy;': '\u044b', + 'yen': '\xa5', + 'yen;': '\xa5', + 'Yfr;': '\U0001d51c', + 'yfr;': '\U0001d536', + 'YIcy;': '\u0407', + 'yicy;': '\u0457', + 'Yopf;': '\U0001d550', + 'yopf;': '\U0001d56a', + 'Yscr;': '\U0001d4b4', + 'yscr;': '\U0001d4ce', + 'YUcy;': '\u042e', + 'yucy;': '\u044e', + 'yuml': '\xff', + 'Yuml;': '\u0178', + 'yuml;': '\xff', + 'Zacute;': '\u0179', + 'zacute;': '\u017a', + 'Zcaron;': '\u017d', + 'zcaron;': '\u017e', + 'Zcy;': '\u0417', + 'zcy;': '\u0437', + 'Zdot;': '\u017b', + 'zdot;': '\u017c', + 'zeetrf;': '\u2128', + 'ZeroWidthSpace;': '\u200b', + 'Zeta;': '\u0396', + 'zeta;': '\u03b6', + 'Zfr;': '\u2128', + 'zfr;': '\U0001d537', + 'ZHcy;': '\u0416', + 'zhcy;': '\u0436', + 'zigrarr;': '\u21dd', + 'Zopf;': '\u2124', + 'zopf;': '\U0001d56b', + 'Zscr;': '\U0001d4b5', + 'zscr;': '\U0001d4cf', + 'zwj;': '\u200d', + 'zwnj;': '\u200c', +} + +# maps the Unicode codepoint to the HTML entity name +codepoint2name = {} + +# maps the HTML entity name to the character +# (or a character reference if the character is outside the Latin-1 range) +entitydefs = {} + +for (name, codepoint) in name2codepoint.items(): + codepoint2name[codepoint] = name + entitydefs[name] = chr(codepoint) + +del name, codepoint diff --git a/lib/future/backports/html/parser.py b/lib/future/backports/html/parser.py new file mode 100644 index 00000000..fb652636 --- /dev/null +++ b/lib/future/backports/html/parser.py @@ -0,0 +1,536 @@ +"""A parser for HTML and XHTML. + +Backported for python-future from Python 3.3. +""" + +# This file is based on sgmllib.py, but the API is slightly different. + +# XXX There should be a way to distinguish between PCDATA (parsed +# character data -- the normal case), RCDATA (replaceable character +# data -- only char and entity references and end tags are special) +# and CDATA (character data -- only end tags are special). + +from __future__ import (absolute_import, division, + print_function, unicode_literals) +from future.builtins import * +from future.backports import _markupbase +import re +import warnings + +# Regular expressions used for parsing + +interesting_normal = re.compile('[&<]') +incomplete = re.compile('&[a-zA-Z#]') + +entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') +charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') + +starttagopen = re.compile('<[a-zA-Z]') +piclose = re.compile('>') +commentclose = re.compile(r'--\s*>') +tagfind = re.compile('([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*') +# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state +# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state +tagfind_tolerant = re.compile('[a-zA-Z][^\t\n\r\f />\x00]*') +# Note: +# 1) the strict attrfind isn't really strict, but we can't make it +# correctly strict without breaking backward compatibility; +# 2) if you change attrfind remember to update locatestarttagend too; +# 3) if you change attrfind and/or locatestarttagend the parser will +# explode, so don't do it. +attrfind = re.compile( + r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' + r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?') +attrfind_tolerant = re.compile( + r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' + r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*') +locatestarttagend = re.compile(r""" + <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name + (?:\s+ # whitespace before attribute name + (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name + (?:\s*=\s* # value indicator + (?:'[^']*' # LITA-enclosed value + |\"[^\"]*\" # LIT-enclosed value + |[^'\">\s]+ # bare value + ) + )? + ) + )* + \s* # trailing whitespace +""", re.VERBOSE) +locatestarttagend_tolerant = re.compile(r""" + <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name + (?:[\s/]* # optional whitespace before attribute name + (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name + (?:\s*=+\s* # value indicator + (?:'[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\s]* # bare value + ) + (?:\s*,)* # possibly followed by a comma + )?(?:\s|/(?!>))* + )* + )? + \s* # trailing whitespace +""", re.VERBOSE) +endendtag = re.compile('>') +# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between +# ') + + +class HTMLParseError(Exception): + """Exception raised for all parse errors.""" + + def __init__(self, msg, position=(None, None)): + assert msg + self.msg = msg + self.lineno = position[0] + self.offset = position[1] + + def __str__(self): + result = self.msg + if self.lineno is not None: + result = result + ", at line %d" % self.lineno + if self.offset is not None: + result = result + ", column %d" % (self.offset + 1) + return result + + +class HTMLParser(_markupbase.ParserBase): + """Find tags and other markup and call handler functions. + + Usage: + p = HTMLParser() + p.feed(data) + ... + p.close() + + Start tags are handled by calling self.handle_starttag() or + self.handle_startendtag(); end tags by self.handle_endtag(). The + data between tags is passed from the parser to the derived class + by calling self.handle_data() with the data as argument (the data + may be split up in arbitrary chunks). Entity references are + passed by calling self.handle_entityref() with the entity + reference as the argument. Numeric character references are + passed to self.handle_charref() with the string containing the + reference as the argument. + """ + + CDATA_CONTENT_ELEMENTS = ("script", "style") + + def __init__(self, strict=False): + """Initialize and reset this instance. + + If strict is set to False (the default) the parser will parse invalid + markup, otherwise it will raise an error. Note that the strict mode + is deprecated. + """ + if strict: + warnings.warn("The strict mode is deprecated.", + DeprecationWarning, stacklevel=2) + self.strict = strict + self.reset() + + def reset(self): + """Reset this instance. Loses all unprocessed data.""" + self.rawdata = '' + self.lasttag = '???' + self.interesting = interesting_normal + self.cdata_elem = None + _markupbase.ParserBase.reset(self) + + def feed(self, data): + r"""Feed data to the parser. + + Call this as often as you want, with as little or as much text + as you want (may include '\n'). + """ + self.rawdata = self.rawdata + data + self.goahead(0) + + def close(self): + """Handle any buffered data.""" + self.goahead(1) + + def error(self, message): + raise HTMLParseError(message, self.getpos()) + + __starttag_text = None + + def get_starttag_text(self): + """Return full source of start tag: '<...>'.""" + return self.__starttag_text + + def set_cdata_mode(self, elem): + self.cdata_elem = elem.lower() + self.interesting = re.compile(r'' % self.cdata_elem, re.I) + + def clear_cdata_mode(self): + self.interesting = interesting_normal + self.cdata_elem = None + + # Internal -- handle data as far as reasonable. May leave state + # and data to be processed by a subsequent call. If 'end' is + # true, force handling all data as if followed by EOF marker. + def goahead(self, end): + rawdata = self.rawdata + i = 0 + n = len(rawdata) + while i < n: + match = self.interesting.search(rawdata, i) # < or & + if match: + j = match.start() + else: + if self.cdata_elem: + break + j = n + if i < j: self.handle_data(rawdata[i:j]) + i = self.updatepos(i, j) + if i == n: break + startswith = rawdata.startswith + if startswith('<', i): + if starttagopen.match(rawdata, i): # < + letter + k = self.parse_starttag(i) + elif startswith("', i + 1) + if k < 0: + k = rawdata.find('<', i + 1) + if k < 0: + k = i + 1 + else: + k += 1 + self.handle_data(rawdata[i:k]) + i = self.updatepos(i, k) + elif startswith("&#", i): + match = charref.match(rawdata, i) + if match: + name = match.group()[2:-1] + self.handle_charref(name) + k = match.end() + if not startswith(';', k-1): + k = k - 1 + i = self.updatepos(i, k) + continue + else: + if ";" in rawdata[i:]: #bail by consuming &# + self.handle_data(rawdata[0:2]) + i = self.updatepos(i, 2) + break + elif startswith('&', i): + match = entityref.match(rawdata, i) + if match: + name = match.group(1) + self.handle_entityref(name) + k = match.end() + if not startswith(';', k-1): + k = k - 1 + i = self.updatepos(i, k) + continue + match = incomplete.match(rawdata, i) + if match: + # match.group() will contain at least 2 chars + if end and match.group() == rawdata[i:]: + if self.strict: + self.error("EOF in middle of entity or char ref") + else: + if k <= i: + k = n + i = self.updatepos(i, i + 1) + # incomplete + break + elif (i + 1) < n: + # not the end of the buffer, and can't be confused + # with some other construct + self.handle_data("&") + i = self.updatepos(i, i + 1) + else: + break + else: + assert 0, "interesting.search() lied" + # end while + if end and i < n and not self.cdata_elem: + self.handle_data(rawdata[i:n]) + i = self.updatepos(i, n) + self.rawdata = rawdata[i:] + + # Internal -- parse html declarations, return length or -1 if not terminated + # See w3.org/TR/html5/tokenization.html#markup-declaration-open-state + # See also parse_declaration in _markupbase + def parse_html_declaration(self, i): + rawdata = self.rawdata + assert rawdata[i:i+2] == ' + gtpos = rawdata.find('>', i+9) + if gtpos == -1: + return -1 + self.handle_decl(rawdata[i+2:gtpos]) + return gtpos+1 + else: + return self.parse_bogus_comment(i) + + # Internal -- parse bogus comment, return length or -1 if not terminated + # see http://www.w3.org/TR/html5/tokenization.html#bogus-comment-state + def parse_bogus_comment(self, i, report=1): + rawdata = self.rawdata + assert rawdata[i:i+2] in ('', i+2) + if pos == -1: + return -1 + if report: + self.handle_comment(rawdata[i+2:pos]) + return pos + 1 + + # Internal -- parse processing instr, return end or -1 if not terminated + def parse_pi(self, i): + rawdata = self.rawdata + assert rawdata[i:i+2] == ' + if not match: + return -1 + j = match.start() + self.handle_pi(rawdata[i+2: j]) + j = match.end() + return j + + # Internal -- handle starttag, return end or -1 if not terminated + def parse_starttag(self, i): + self.__starttag_text = None + endpos = self.check_for_whole_start_tag(i) + if endpos < 0: + return endpos + rawdata = self.rawdata + self.__starttag_text = rawdata[i:endpos] + + # Now parse the data between i+1 and j into a tag and attrs + attrs = [] + match = tagfind.match(rawdata, i+1) + assert match, 'unexpected call to parse_starttag()' + k = match.end() + self.lasttag = tag = match.group(1).lower() + while k < endpos: + if self.strict: + m = attrfind.match(rawdata, k) + else: + m = attrfind_tolerant.match(rawdata, k) + if not m: + break + attrname, rest, attrvalue = m.group(1, 2, 3) + if not rest: + attrvalue = None + elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ + attrvalue[:1] == '"' == attrvalue[-1:]: + attrvalue = attrvalue[1:-1] + if attrvalue: + attrvalue = self.unescape(attrvalue) + attrs.append((attrname.lower(), attrvalue)) + k = m.end() + + end = rawdata[k:endpos].strip() + if end not in (">", "/>"): + lineno, offset = self.getpos() + if "\n" in self.__starttag_text: + lineno = lineno + self.__starttag_text.count("\n") + offset = len(self.__starttag_text) \ + - self.__starttag_text.rfind("\n") + else: + offset = offset + len(self.__starttag_text) + if self.strict: + self.error("junk characters in start tag: %r" + % (rawdata[k:endpos][:20],)) + self.handle_data(rawdata[i:endpos]) + return endpos + if end.endswith('/>'): + # XHTML-style empty tag: + self.handle_startendtag(tag, attrs) + else: + self.handle_starttag(tag, attrs) + if tag in self.CDATA_CONTENT_ELEMENTS: + self.set_cdata_mode(tag) + return endpos + + # Internal -- check to see if we have a complete starttag; return end + # or -1 if incomplete. + def check_for_whole_start_tag(self, i): + rawdata = self.rawdata + if self.strict: + m = locatestarttagend.match(rawdata, i) + else: + m = locatestarttagend_tolerant.match(rawdata, i) + if m: + j = m.end() + next = rawdata[j:j+1] + if next == ">": + return j + 1 + if next == "/": + if rawdata.startswith("/>", j): + return j + 2 + if rawdata.startswith("/", j): + # buffer boundary + return -1 + # else bogus input + if self.strict: + self.updatepos(i, j + 1) + self.error("malformed empty start tag") + if j > i: + return j + else: + return i + 1 + if next == "": + # end of input + return -1 + if next in ("abcdefghijklmnopqrstuvwxyz=/" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"): + # end of input in or before attribute value, or we have the + # '/' from a '/>' ending + return -1 + if self.strict: + self.updatepos(i, j) + self.error("malformed start tag") + if j > i: + return j + else: + return i + 1 + raise AssertionError("we should not get here!") + + # Internal -- parse endtag, return end or -1 if incomplete + def parse_endtag(self, i): + rawdata = self.rawdata + assert rawdata[i:i+2] == " + if not match: + return -1 + gtpos = match.end() + match = endtagfind.match(rawdata, i) # + if not match: + if self.cdata_elem is not None: + self.handle_data(rawdata[i:gtpos]) + return gtpos + if self.strict: + self.error("bad end tag: %r" % (rawdata[i:gtpos],)) + # find the name: w3.org/TR/html5/tokenization.html#tag-name-state + namematch = tagfind_tolerant.match(rawdata, i+2) + if not namematch: + # w3.org/TR/html5/tokenization.html#end-tag-open-state + if rawdata[i:i+3] == '': + return i+3 + else: + return self.parse_bogus_comment(i) + tagname = namematch.group().lower() + # consume and ignore other stuff between the name and the > + # Note: this is not 100% correct, since we might have things like + # , but looking for > after tha name should cover + # most of the cases and is much simpler + gtpos = rawdata.find('>', namematch.end()) + self.handle_endtag(tagname) + return gtpos+1 + + elem = match.group(1).lower() # script or style + if self.cdata_elem is not None: + if elem != self.cdata_elem: + self.handle_data(rawdata[i:gtpos]) + return gtpos + + self.handle_endtag(elem.lower()) + self.clear_cdata_mode() + return gtpos + + # Overridable -- finish processing of start+end tag: + def handle_startendtag(self, tag, attrs): + self.handle_starttag(tag, attrs) + self.handle_endtag(tag) + + # Overridable -- handle start tag + def handle_starttag(self, tag, attrs): + pass + + # Overridable -- handle end tag + def handle_endtag(self, tag): + pass + + # Overridable -- handle character reference + def handle_charref(self, name): + pass + + # Overridable -- handle entity reference + def handle_entityref(self, name): + pass + + # Overridable -- handle data + def handle_data(self, data): + pass + + # Overridable -- handle comment + def handle_comment(self, data): + pass + + # Overridable -- handle declaration + def handle_decl(self, decl): + pass + + # Overridable -- handle processing instruction + def handle_pi(self, data): + pass + + def unknown_decl(self, data): + if self.strict: + self.error("unknown declaration: %r" % (data,)) + + # Internal -- helper to remove special character quoting + def unescape(self, s): + if '&' not in s: + return s + def replaceEntities(s): + s = s.groups()[0] + try: + if s[0] == "#": + s = s[1:] + if s[0] in ['x','X']: + c = int(s[1:].rstrip(';'), 16) + else: + c = int(s.rstrip(';')) + return chr(c) + except ValueError: + return '&#' + s + else: + from future.backports.html.entities import html5 + if s in html5: + return html5[s] + elif s.endswith(';'): + return '&' + s + for x in range(2, len(s)): + if s[:x] in html5: + return html5[s[:x]] + s[x:] + else: + return '&' + s + + return re.sub(r"&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?))", + replaceEntities, s) diff --git a/lib/future/backports/http/__init__.py b/lib/future/backports/http/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/future/backports/http/client.py b/lib/future/backports/http/client.py new file mode 100644 index 00000000..e663d125 --- /dev/null +++ b/lib/future/backports/http/client.py @@ -0,0 +1,1346 @@ +"""HTTP/1.1 client library + +A backport of the Python 3.3 http/client.py module for python-future. + + + + +HTTPConnection goes through a number of "states", which define when a client +may legally make another request or fetch the response for a particular +request. This diagram details these state transitions: + + (null) + | + | HTTPConnection() + v + Idle + | + | putrequest() + v + Request-started + | + | ( putheader() )* endheaders() + v + Request-sent + | + | response = getresponse() + v + Unread-response [Response-headers-read] + |\____________________ + | | + | response.read() | putrequest() + v v + Idle Req-started-unread-response + ______/| + / | + response.read() | | ( putheader() )* endheaders() + v v + Request-started Req-sent-unread-response + | + | response.read() + v + Request-sent + +This diagram presents the following rules: + -- a second request may not be started until {response-headers-read} + -- a response [object] cannot be retrieved until {request-sent} + -- there is no differentiation between an unread response body and a + partially read response body + +Note: this enforcement is applied by the HTTPConnection class. The + HTTPResponse class does not enforce this state machine, which + implies sophisticated clients may accelerate the request/response + pipeline. Caution should be taken, though: accelerating the states + beyond the above pattern may imply knowledge of the server's + connection-close behavior for certain requests. For example, it + is impossible to tell whether the server will close the connection + UNTIL the response headers have been read; this means that further + requests cannot be placed into the pipeline until it is known that + the server will NOT be closing the connection. + +Logical State __state __response +------------- ------- ---------- +Idle _CS_IDLE None +Request-started _CS_REQ_STARTED None +Request-sent _CS_REQ_SENT None +Unread-response _CS_IDLE +Req-started-unread-response _CS_REQ_STARTED +Req-sent-unread-response _CS_REQ_SENT +""" + +from __future__ import (absolute_import, division, + print_function, unicode_literals) +from future.builtins import bytes, int, str, super +from future.utils import PY2 + +from future.backports.email import parser as email_parser +from future.backports.email import message as email_message +from future.backports.misc import create_connection as socket_create_connection +import io +import os +import socket +from future.backports.urllib.parse import urlsplit +import warnings +from array import array + +if PY2: + from collections import Iterable +else: + from collections.abc import Iterable + +__all__ = ["HTTPResponse", "HTTPConnection", + "HTTPException", "NotConnected", "UnknownProtocol", + "UnknownTransferEncoding", "UnimplementedFileMode", + "IncompleteRead", "InvalidURL", "ImproperConnectionState", + "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", + "BadStatusLine", "error", "responses"] + +HTTP_PORT = 80 +HTTPS_PORT = 443 + +_UNKNOWN = 'UNKNOWN' + +# connection states +_CS_IDLE = 'Idle' +_CS_REQ_STARTED = 'Request-started' +_CS_REQ_SENT = 'Request-sent' + +# status codes +# informational +CONTINUE = 100 +SWITCHING_PROTOCOLS = 101 +PROCESSING = 102 + +# successful +OK = 200 +CREATED = 201 +ACCEPTED = 202 +NON_AUTHORITATIVE_INFORMATION = 203 +NO_CONTENT = 204 +RESET_CONTENT = 205 +PARTIAL_CONTENT = 206 +MULTI_STATUS = 207 +IM_USED = 226 + +# redirection +MULTIPLE_CHOICES = 300 +MOVED_PERMANENTLY = 301 +FOUND = 302 +SEE_OTHER = 303 +NOT_MODIFIED = 304 +USE_PROXY = 305 +TEMPORARY_REDIRECT = 307 + +# client error +BAD_REQUEST = 400 +UNAUTHORIZED = 401 +PAYMENT_REQUIRED = 402 +FORBIDDEN = 403 +NOT_FOUND = 404 +METHOD_NOT_ALLOWED = 405 +NOT_ACCEPTABLE = 406 +PROXY_AUTHENTICATION_REQUIRED = 407 +REQUEST_TIMEOUT = 408 +CONFLICT = 409 +GONE = 410 +LENGTH_REQUIRED = 411 +PRECONDITION_FAILED = 412 +REQUEST_ENTITY_TOO_LARGE = 413 +REQUEST_URI_TOO_LONG = 414 +UNSUPPORTED_MEDIA_TYPE = 415 +REQUESTED_RANGE_NOT_SATISFIABLE = 416 +EXPECTATION_FAILED = 417 +UNPROCESSABLE_ENTITY = 422 +LOCKED = 423 +FAILED_DEPENDENCY = 424 +UPGRADE_REQUIRED = 426 +PRECONDITION_REQUIRED = 428 +TOO_MANY_REQUESTS = 429 +REQUEST_HEADER_FIELDS_TOO_LARGE = 431 + +# server error +INTERNAL_SERVER_ERROR = 500 +NOT_IMPLEMENTED = 501 +BAD_GATEWAY = 502 +SERVICE_UNAVAILABLE = 503 +GATEWAY_TIMEOUT = 504 +HTTP_VERSION_NOT_SUPPORTED = 505 +INSUFFICIENT_STORAGE = 507 +NOT_EXTENDED = 510 +NETWORK_AUTHENTICATION_REQUIRED = 511 + +# Mapping status codes to official W3C names +responses = { + 100: 'Continue', + 101: 'Switching Protocols', + + 200: 'OK', + 201: 'Created', + 202: 'Accepted', + 203: 'Non-Authoritative Information', + 204: 'No Content', + 205: 'Reset Content', + 206: 'Partial Content', + + 300: 'Multiple Choices', + 301: 'Moved Permanently', + 302: 'Found', + 303: 'See Other', + 304: 'Not Modified', + 305: 'Use Proxy', + 306: '(Unused)', + 307: 'Temporary Redirect', + + 400: 'Bad Request', + 401: 'Unauthorized', + 402: 'Payment Required', + 403: 'Forbidden', + 404: 'Not Found', + 405: 'Method Not Allowed', + 406: 'Not Acceptable', + 407: 'Proxy Authentication Required', + 408: 'Request Timeout', + 409: 'Conflict', + 410: 'Gone', + 411: 'Length Required', + 412: 'Precondition Failed', + 413: 'Request Entity Too Large', + 414: 'Request-URI Too Long', + 415: 'Unsupported Media Type', + 416: 'Requested Range Not Satisfiable', + 417: 'Expectation Failed', + 428: 'Precondition Required', + 429: 'Too Many Requests', + 431: 'Request Header Fields Too Large', + + 500: 'Internal Server Error', + 501: 'Not Implemented', + 502: 'Bad Gateway', + 503: 'Service Unavailable', + 504: 'Gateway Timeout', + 505: 'HTTP Version Not Supported', + 511: 'Network Authentication Required', +} + +# maximal amount of data to read at one time in _safe_read +MAXAMOUNT = 1048576 + +# maximal line length when calling readline(). +_MAXLINE = 65536 +_MAXHEADERS = 100 + + +class HTTPMessage(email_message.Message): + # XXX The only usage of this method is in + # http.server.CGIHTTPRequestHandler. Maybe move the code there so + # that it doesn't need to be part of the public API. The API has + # never been defined so this could cause backwards compatibility + # issues. + + def getallmatchingheaders(self, name): + """Find all header lines matching a given header name. + + Look through the list of headers and find all lines matching a given + header name (and their continuation lines). A list of the lines is + returned, without interpretation. If the header does not occur, an + empty list is returned. If the header occurs multiple times, all + occurrences are returned. Case is not important in the header name. + + """ + name = name.lower() + ':' + n = len(name) + lst = [] + hit = 0 + for line in self.keys(): + if line[:n].lower() == name: + hit = 1 + elif not line[:1].isspace(): + hit = 0 + if hit: + lst.append(line) + return lst + +def parse_headers(fp, _class=HTTPMessage): + """Parses only RFC2822 headers from a file pointer. + + email Parser wants to see strings rather than bytes. + But a TextIOWrapper around self.rfile would buffer too many bytes + from the stream, bytes which we later need to read as bytes. + So we read the correct bytes here, as bytes, for email Parser + to parse. + + """ + headers = [] + while True: + line = fp.readline(_MAXLINE + 1) + if len(line) > _MAXLINE: + raise LineTooLong("header line") + headers.append(line) + if len(headers) > _MAXHEADERS: + raise HTTPException("got more than %d headers" % _MAXHEADERS) + if line in (b'\r\n', b'\n', b''): + break + hstring = bytes(b'').join(headers).decode('iso-8859-1') + return email_parser.Parser(_class=_class).parsestr(hstring) + + +_strict_sentinel = object() + +class HTTPResponse(io.RawIOBase): + + # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. + + # The bytes from the socket object are iso-8859-1 strings. + # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded + # text following RFC 2047. The basic status line parsing only + # accepts iso-8859-1. + + def __init__(self, sock, debuglevel=0, strict=_strict_sentinel, method=None, url=None): + # If the response includes a content-length header, we need to + # make sure that the client doesn't read more than the + # specified number of bytes. If it does, it will block until + # the server times out and closes the connection. This will + # happen if a self.fp.read() is done (without a size) whether + # self.fp is buffered or not. So, no self.fp.read() by + # clients unless they know what they are doing. + self.fp = sock.makefile("rb") + self.debuglevel = debuglevel + if strict is not _strict_sentinel: + warnings.warn("the 'strict' argument isn't supported anymore; " + "http.client now always assumes HTTP/1.x compliant servers.", + DeprecationWarning, 2) + self._method = method + + # The HTTPResponse object is returned via urllib. The clients + # of http and urllib expect different attributes for the + # headers. headers is used here and supports urllib. msg is + # provided as a backwards compatibility layer for http + # clients. + + self.headers = self.msg = None + + # from the Status-Line of the response + self.version = _UNKNOWN # HTTP-Version + self.status = _UNKNOWN # Status-Code + self.reason = _UNKNOWN # Reason-Phrase + + self.chunked = _UNKNOWN # is "chunked" being used? + self.chunk_left = _UNKNOWN # bytes left to read in current chunk + self.length = _UNKNOWN # number of bytes left in response + self.will_close = _UNKNOWN # conn will close at end of response + + def _read_status(self): + line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") + if len(line) > _MAXLINE: + raise LineTooLong("status line") + if self.debuglevel > 0: + print("reply:", repr(line)) + if not line: + # Presumably, the server closed the connection before + # sending a valid response. + raise BadStatusLine(line) + try: + version, status, reason = line.split(None, 2) + except ValueError: + try: + version, status = line.split(None, 1) + reason = "" + except ValueError: + # empty version will cause next test to fail. + version = "" + if not version.startswith("HTTP/"): + self._close_conn() + raise BadStatusLine(line) + + # The status code is a three-digit number + try: + status = int(status) + if status < 100 or status > 999: + raise BadStatusLine(line) + except ValueError: + raise BadStatusLine(line) + return version, status, reason + + def begin(self): + if self.headers is not None: + # we've already started reading the response + return + + # read until we get a non-100 response + while True: + version, status, reason = self._read_status() + if status != CONTINUE: + break + # skip the header from the 100 response + while True: + skip = self.fp.readline(_MAXLINE + 1) + if len(skip) > _MAXLINE: + raise LineTooLong("header line") + skip = skip.strip() + if not skip: + break + if self.debuglevel > 0: + print("header:", skip) + + self.code = self.status = status + self.reason = reason.strip() + if version in ("HTTP/1.0", "HTTP/0.9"): + # Some servers might still return "0.9", treat it as 1.0 anyway + self.version = 10 + elif version.startswith("HTTP/1."): + self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 + else: + raise UnknownProtocol(version) + + self.headers = self.msg = parse_headers(self.fp) + + if self.debuglevel > 0: + for hdr in self.headers: + print("header:", hdr, end=" ") + + # are we using the chunked-style of transfer encoding? + tr_enc = self.headers.get("transfer-encoding") + if tr_enc and tr_enc.lower() == "chunked": + self.chunked = True + self.chunk_left = None + else: + self.chunked = False + + # will the connection close at the end of the response? + self.will_close = self._check_close() + + # do we have a Content-Length? + # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" + self.length = None + length = self.headers.get("content-length") + + # are we using the chunked-style of transfer encoding? + tr_enc = self.headers.get("transfer-encoding") + if length and not self.chunked: + try: + self.length = int(length) + except ValueError: + self.length = None + else: + if self.length < 0: # ignore nonsensical negative lengths + self.length = None + else: + self.length = None + + # does the body have a fixed length? (of zero) + if (status == NO_CONTENT or status == NOT_MODIFIED or + 100 <= status < 200 or # 1xx codes + self._method == "HEAD"): + self.length = 0 + + # if the connection remains open, and we aren't using chunked, and + # a content-length was not provided, then assume that the connection + # WILL close. + if (not self.will_close and + not self.chunked and + self.length is None): + self.will_close = True + + def _check_close(self): + conn = self.headers.get("connection") + if self.version == 11: + # An HTTP/1.1 proxy is assumed to stay open unless + # explicitly closed. + conn = self.headers.get("connection") + if conn and "close" in conn.lower(): + return True + return False + + # Some HTTP/1.0 implementations have support for persistent + # connections, using rules different than HTTP/1.1. + + # For older HTTP, Keep-Alive indicates persistent connection. + if self.headers.get("keep-alive"): + return False + + # At least Akamai returns a "Connection: Keep-Alive" header, + # which was supposed to be sent by the client. + if conn and "keep-alive" in conn.lower(): + return False + + # Proxy-Connection is a netscape hack. + pconn = self.headers.get("proxy-connection") + if pconn and "keep-alive" in pconn.lower(): + return False + + # otherwise, assume it will close + return True + + def _close_conn(self): + fp = self.fp + self.fp = None + fp.close() + + def close(self): + super().close() # set "closed" flag + if self.fp: + self._close_conn() + + # These implementations are for the benefit of io.BufferedReader. + + # XXX This class should probably be revised to act more like + # the "raw stream" that BufferedReader expects. + + def flush(self): + super().flush() + if self.fp: + self.fp.flush() + + def readable(self): + return True + + # End of "raw stream" methods + + def isclosed(self): + """True if the connection is closed.""" + # NOTE: it is possible that we will not ever call self.close(). This + # case occurs when will_close is TRUE, length is None, and we + # read up to the last byte, but NOT past it. + # + # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be + # called, meaning self.isclosed() is meaningful. + return self.fp is None + + def read(self, amt=None): + if self.fp is None: + return bytes(b"") + + if self._method == "HEAD": + self._close_conn() + return bytes(b"") + + if amt is not None: + # Amount is given, so call base class version + # (which is implemented in terms of self.readinto) + return bytes(super(HTTPResponse, self).read(amt)) + else: + # Amount is not given (unbounded read) so we must check self.length + # and self.chunked + + if self.chunked: + return self._readall_chunked() + + if self.length is None: + s = self.fp.read() + else: + try: + s = self._safe_read(self.length) + except IncompleteRead: + self._close_conn() + raise + self.length = 0 + self._close_conn() # we read everything + return bytes(s) + + def readinto(self, b): + if self.fp is None: + return 0 + + if self._method == "HEAD": + self._close_conn() + return 0 + + if self.chunked: + return self._readinto_chunked(b) + + if self.length is not None: + if len(b) > self.length: + # clip the read to the "end of response" + b = memoryview(b)[0:self.length] + + # we do not use _safe_read() here because this may be a .will_close + # connection, and the user is reading more bytes than will be provided + # (for example, reading in 1k chunks) + + if PY2: + data = self.fp.read(len(b)) + n = len(data) + b[:n] = data + else: + n = self.fp.readinto(b) + + if not n and b: + # Ideally, we would raise IncompleteRead if the content-length + # wasn't satisfied, but it might break compatibility. + self._close_conn() + elif self.length is not None: + self.length -= n + if not self.length: + self._close_conn() + return n + + def _read_next_chunk_size(self): + # Read the next chunk size from the file + line = self.fp.readline(_MAXLINE + 1) + if len(line) > _MAXLINE: + raise LineTooLong("chunk size") + i = line.find(b";") + if i >= 0: + line = line[:i] # strip chunk-extensions + try: + return int(line, 16) + except ValueError: + # close the connection as protocol synchronisation is + # probably lost + self._close_conn() + raise + + def _read_and_discard_trailer(self): + # read and discard trailer up to the CRLF terminator + ### note: we shouldn't have any trailers! + while True: + line = self.fp.readline(_MAXLINE + 1) + if len(line) > _MAXLINE: + raise LineTooLong("trailer line") + if not line: + # a vanishingly small number of sites EOF without + # sending the trailer + break + if line in (b'\r\n', b'\n', b''): + break + + def _readall_chunked(self): + assert self.chunked != _UNKNOWN + chunk_left = self.chunk_left + value = [] + while True: + if chunk_left is None: + try: + chunk_left = self._read_next_chunk_size() + if chunk_left == 0: + break + except ValueError: + raise IncompleteRead(bytes(b'').join(value)) + value.append(self._safe_read(chunk_left)) + + # we read the whole chunk, get another + self._safe_read(2) # toss the CRLF at the end of the chunk + chunk_left = None + + self._read_and_discard_trailer() + + # we read everything; close the "file" + self._close_conn() + + return bytes(b'').join(value) + + def _readinto_chunked(self, b): + assert self.chunked != _UNKNOWN + chunk_left = self.chunk_left + + total_bytes = 0 + mvb = memoryview(b) + while True: + if chunk_left is None: + try: + chunk_left = self._read_next_chunk_size() + if chunk_left == 0: + break + except ValueError: + raise IncompleteRead(bytes(b[0:total_bytes])) + + if len(mvb) < chunk_left: + n = self._safe_readinto(mvb) + self.chunk_left = chunk_left - n + return total_bytes + n + elif len(mvb) == chunk_left: + n = self._safe_readinto(mvb) + self._safe_read(2) # toss the CRLF at the end of the chunk + self.chunk_left = None + return total_bytes + n + else: + temp_mvb = mvb[0:chunk_left] + n = self._safe_readinto(temp_mvb) + mvb = mvb[n:] + total_bytes += n + + # we read the whole chunk, get another + self._safe_read(2) # toss the CRLF at the end of the chunk + chunk_left = None + + self._read_and_discard_trailer() + + # we read everything; close the "file" + self._close_conn() + + return total_bytes + + def _safe_read(self, amt): + """Read the number of bytes requested, compensating for partial reads. + + Normally, we have a blocking socket, but a read() can be interrupted + by a signal (resulting in a partial read). + + Note that we cannot distinguish between EOF and an interrupt when zero + bytes have been read. IncompleteRead() will be raised in this + situation. + + This function should be used when bytes "should" be present for + reading. If the bytes are truly not available (due to EOF), then the + IncompleteRead exception can be used to detect the problem. + """ + s = [] + while amt > 0: + chunk = self.fp.read(min(amt, MAXAMOUNT)) + if not chunk: + raise IncompleteRead(bytes(b'').join(s), amt) + s.append(chunk) + amt -= len(chunk) + return bytes(b"").join(s) + + def _safe_readinto(self, b): + """Same as _safe_read, but for reading into a buffer.""" + total_bytes = 0 + mvb = memoryview(b) + while total_bytes < len(b): + if MAXAMOUNT < len(mvb): + temp_mvb = mvb[0:MAXAMOUNT] + if PY2: + data = self.fp.read(len(temp_mvb)) + n = len(data) + temp_mvb[:n] = data + else: + n = self.fp.readinto(temp_mvb) + else: + if PY2: + data = self.fp.read(len(mvb)) + n = len(data) + mvb[:n] = data + else: + n = self.fp.readinto(mvb) + if not n: + raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b)) + mvb = mvb[n:] + total_bytes += n + return total_bytes + + def fileno(self): + return self.fp.fileno() + + def getheader(self, name, default=None): + if self.headers is None: + raise ResponseNotReady() + headers = self.headers.get_all(name) or default + if isinstance(headers, str) or not hasattr(headers, '__iter__'): + return headers + else: + return ', '.join(headers) + + def getheaders(self): + """Return list of (header, value) tuples.""" + if self.headers is None: + raise ResponseNotReady() + return list(self.headers.items()) + + # We override IOBase.__iter__ so that it doesn't check for closed-ness + + def __iter__(self): + return self + + # For compatibility with old-style urllib responses. + + def info(self): + return self.headers + + def geturl(self): + return self.url + + def getcode(self): + return self.status + +class HTTPConnection(object): + + _http_vsn = 11 + _http_vsn_str = 'HTTP/1.1' + + response_class = HTTPResponse + default_port = HTTP_PORT + auto_open = 1 + debuglevel = 0 + + def __init__(self, host, port=None, strict=_strict_sentinel, + timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): + if strict is not _strict_sentinel: + warnings.warn("the 'strict' argument isn't supported anymore; " + "http.client now always assumes HTTP/1.x compliant servers.", + DeprecationWarning, 2) + self.timeout = timeout + self.source_address = source_address + self.sock = None + self._buffer = [] + self.__response = None + self.__state = _CS_IDLE + self._method = None + self._tunnel_host = None + self._tunnel_port = None + self._tunnel_headers = {} + + self._set_hostport(host, port) + + def set_tunnel(self, host, port=None, headers=None): + """ Sets up the host and the port for the HTTP CONNECT Tunnelling. + + The headers argument should be a mapping of extra HTTP headers + to send with the CONNECT request. + """ + self._tunnel_host = host + self._tunnel_port = port + if headers: + self._tunnel_headers = headers + else: + self._tunnel_headers.clear() + + def _set_hostport(self, host, port): + if port is None: + i = host.rfind(':') + j = host.rfind(']') # ipv6 addresses have [...] + if i > j: + try: + port = int(host[i+1:]) + except ValueError: + if host[i+1:] == "": # http://foo.com:/ == http://foo.com/ + port = self.default_port + else: + raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) + host = host[:i] + else: + port = self.default_port + if host and host[0] == '[' and host[-1] == ']': + host = host[1:-1] + self.host = host + self.port = port + + def set_debuglevel(self, level): + self.debuglevel = level + + def _tunnel(self): + self._set_hostport(self._tunnel_host, self._tunnel_port) + connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port) + connect_bytes = connect_str.encode("ascii") + self.send(connect_bytes) + for header, value in self._tunnel_headers.items(): + header_str = "%s: %s\r\n" % (header, value) + header_bytes = header_str.encode("latin-1") + self.send(header_bytes) + self.send(bytes(b'\r\n')) + + response = self.response_class(self.sock, method=self._method) + (version, code, message) = response._read_status() + + if code != 200: + self.close() + raise socket.error("Tunnel connection failed: %d %s" % (code, + message.strip())) + while True: + line = response.fp.readline(_MAXLINE + 1) + if len(line) > _MAXLINE: + raise LineTooLong("header line") + if not line: + # for sites which EOF without sending a trailer + break + if line in (b'\r\n', b'\n', b''): + break + + def connect(self): + """Connect to the host and port specified in __init__.""" + self.sock = socket_create_connection((self.host,self.port), + self.timeout, self.source_address) + if self._tunnel_host: + self._tunnel() + + def close(self): + """Close the connection to the HTTP server.""" + if self.sock: + self.sock.close() # close it manually... there may be other refs + self.sock = None + if self.__response: + self.__response.close() + self.__response = None + self.__state = _CS_IDLE + + def send(self, data): + """Send `data' to the server. + ``data`` can be a string object, a bytes object, an array object, a + file-like object that supports a .read() method, or an iterable object. + """ + + if self.sock is None: + if self.auto_open: + self.connect() + else: + raise NotConnected() + + if self.debuglevel > 0: + print("send:", repr(data)) + blocksize = 8192 + # Python 2.7 array objects have a read method which is incompatible + # with the 2-arg calling syntax below. + if hasattr(data, "read") and not isinstance(data, array): + if self.debuglevel > 0: + print("sendIng a read()able") + encode = False + try: + mode = data.mode + except AttributeError: + # io.BytesIO and other file-like objects don't have a `mode` + # attribute. + pass + else: + if "b" not in mode: + encode = True + if self.debuglevel > 0: + print("encoding file using iso-8859-1") + while 1: + datablock = data.read(blocksize) + if not datablock: + break + if encode: + datablock = datablock.encode("iso-8859-1") + self.sock.sendall(datablock) + return + try: + self.sock.sendall(data) + except TypeError: + if isinstance(data, Iterable): + for d in data: + self.sock.sendall(d) + else: + raise TypeError("data should be a bytes-like object " + "or an iterable, got %r" % type(data)) + + def _output(self, s): + """Add a line of output to the current request buffer. + + Assumes that the line does *not* end with \\r\\n. + """ + self._buffer.append(s) + + def _send_output(self, message_body=None): + """Send the currently buffered request and clear the buffer. + + Appends an extra \\r\\n to the buffer. + A message_body may be specified, to be appended to the request. + """ + self._buffer.extend((bytes(b""), bytes(b""))) + msg = bytes(b"\r\n").join(self._buffer) + del self._buffer[:] + # If msg and message_body are sent in a single send() call, + # it will avoid performance problems caused by the interaction + # between delayed ack and the Nagle algorithm. + if isinstance(message_body, bytes): + msg += message_body + message_body = None + self.send(msg) + if message_body is not None: + # message_body was not a string (i.e. it is a file), and + # we must run the risk of Nagle. + self.send(message_body) + + def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): + """Send a request to the server. + + `method' specifies an HTTP request method, e.g. 'GET'. + `url' specifies the object being requested, e.g. '/index.html'. + `skip_host' if True does not add automatically a 'Host:' header + `skip_accept_encoding' if True does not add automatically an + 'Accept-Encoding:' header + """ + + # if a prior response has been completed, then forget about it. + if self.__response and self.__response.isclosed(): + self.__response = None + + + # in certain cases, we cannot issue another request on this connection. + # this occurs when: + # 1) we are in the process of sending a request. (_CS_REQ_STARTED) + # 2) a response to a previous request has signalled that it is going + # to close the connection upon completion. + # 3) the headers for the previous response have not been read, thus + # we cannot determine whether point (2) is true. (_CS_REQ_SENT) + # + # if there is no prior response, then we can request at will. + # + # if point (2) is true, then we will have passed the socket to the + # response (effectively meaning, "there is no prior response"), and + # will open a new one when a new request is made. + # + # Note: if a prior response exists, then we *can* start a new request. + # We are not allowed to begin fetching the response to this new + # request, however, until that prior response is complete. + # + if self.__state == _CS_IDLE: + self.__state = _CS_REQ_STARTED + else: + raise CannotSendRequest(self.__state) + + # Save the method we use, we need it later in the response phase + self._method = method + if not url: + url = '/' + request = '%s %s %s' % (method, url, self._http_vsn_str) + + # Non-ASCII characters should have been eliminated earlier + self._output(request.encode('ascii')) + + if self._http_vsn == 11: + # Issue some standard headers for better HTTP/1.1 compliance + + if not skip_host: + # this header is issued *only* for HTTP/1.1 + # connections. more specifically, this means it is + # only issued when the client uses the new + # HTTPConnection() class. backwards-compat clients + # will be using HTTP/1.0 and those clients may be + # issuing this header themselves. we should NOT issue + # it twice; some web servers (such as Apache) barf + # when they see two Host: headers + + # If we need a non-standard port,include it in the + # header. If the request is going through a proxy, + # but the host of the actual URL, not the host of the + # proxy. + + netloc = '' + if url.startswith('http'): + nil, netloc, nil, nil, nil = urlsplit(url) + + if netloc: + try: + netloc_enc = netloc.encode("ascii") + except UnicodeEncodeError: + netloc_enc = netloc.encode("idna") + self.putheader('Host', netloc_enc) + else: + try: + host_enc = self.host.encode("ascii") + except UnicodeEncodeError: + host_enc = self.host.encode("idna") + + # As per RFC 273, IPv6 address should be wrapped with [] + # when used as Host header + + if self.host.find(':') >= 0: + host_enc = bytes(b'[' + host_enc + b']') + + if self.port == self.default_port: + self.putheader('Host', host_enc) + else: + host_enc = host_enc.decode("ascii") + self.putheader('Host', "%s:%s" % (host_enc, self.port)) + + # note: we are assuming that clients will not attempt to set these + # headers since *this* library must deal with the + # consequences. this also means that when the supporting + # libraries are updated to recognize other forms, then this + # code should be changed (removed or updated). + + # we only want a Content-Encoding of "identity" since we don't + # support encodings such as x-gzip or x-deflate. + if not skip_accept_encoding: + self.putheader('Accept-Encoding', 'identity') + + # we can accept "chunked" Transfer-Encodings, but no others + # NOTE: no TE header implies *only* "chunked" + #self.putheader('TE', 'chunked') + + # if TE is supplied in the header, then it must appear in a + # Connection header. + #self.putheader('Connection', 'TE') + + else: + # For HTTP/1.0, the server will assume "not chunked" + pass + + def putheader(self, header, *values): + """Send a request header line to the server. + + For example: h.putheader('Accept', 'text/html') + """ + if self.__state != _CS_REQ_STARTED: + raise CannotSendHeader() + + if hasattr(header, 'encode'): + header = header.encode('ascii') + values = list(values) + for i, one_value in enumerate(values): + if hasattr(one_value, 'encode'): + values[i] = one_value.encode('latin-1') + elif isinstance(one_value, int): + values[i] = str(one_value).encode('ascii') + value = bytes(b'\r\n\t').join(values) + header = header + bytes(b': ') + value + self._output(header) + + def endheaders(self, message_body=None): + """Indicate that the last header line has been sent to the server. + + This method sends the request to the server. The optional message_body + argument can be used to pass a message body associated with the + request. The message body will be sent in the same packet as the + message headers if it is a string, otherwise it is sent as a separate + packet. + """ + if self.__state == _CS_REQ_STARTED: + self.__state = _CS_REQ_SENT + else: + raise CannotSendHeader() + self._send_output(message_body) + + def request(self, method, url, body=None, headers={}): + """Send a complete request to the server.""" + self._send_request(method, url, body, headers) + + def _set_content_length(self, body): + # Set the content-length based on the body. + thelen = None + try: + thelen = str(len(body)) + except TypeError as te: + # If this is a file-like object, try to + # fstat its file descriptor + try: + thelen = str(os.fstat(body.fileno()).st_size) + except (AttributeError, OSError): + # Don't send a length if this failed + if self.debuglevel > 0: print("Cannot stat!!") + + if thelen is not None: + self.putheader('Content-Length', thelen) + + def _send_request(self, method, url, body, headers): + # Honor explicitly requested Host: and Accept-Encoding: headers. + header_names = dict.fromkeys([k.lower() for k in headers]) + skips = {} + if 'host' in header_names: + skips['skip_host'] = 1 + if 'accept-encoding' in header_names: + skips['skip_accept_encoding'] = 1 + + self.putrequest(method, url, **skips) + + if body is not None and ('content-length' not in header_names): + self._set_content_length(body) + for hdr, value in headers.items(): + self.putheader(hdr, value) + if isinstance(body, str): + # RFC 2616 Section 3.7.1 says that text default has a + # default charset of iso-8859-1. + body = body.encode('iso-8859-1') + self.endheaders(body) + + def getresponse(self): + """Get the response from the server. + + If the HTTPConnection is in the correct state, returns an + instance of HTTPResponse or of whatever object is returned by + class the response_class variable. + + If a request has not been sent or if a previous response has + not be handled, ResponseNotReady is raised. If the HTTP + response indicates that the connection should be closed, then + it will be closed before the response is returned. When the + connection is closed, the underlying socket is closed. + """ + + # if a prior response has been completed, then forget about it. + if self.__response and self.__response.isclosed(): + self.__response = None + + # if a prior response exists, then it must be completed (otherwise, we + # cannot read this response's header to determine the connection-close + # behavior) + # + # note: if a prior response existed, but was connection-close, then the + # socket and response were made independent of this HTTPConnection + # object since a new request requires that we open a whole new + # connection + # + # this means the prior response had one of two states: + # 1) will_close: this connection was reset and the prior socket and + # response operate independently + # 2) persistent: the response was retained and we await its + # isclosed() status to become true. + # + if self.__state != _CS_REQ_SENT or self.__response: + raise ResponseNotReady(self.__state) + + if self.debuglevel > 0: + response = self.response_class(self.sock, self.debuglevel, + method=self._method) + else: + response = self.response_class(self.sock, method=self._method) + + response.begin() + assert response.will_close != _UNKNOWN + self.__state = _CS_IDLE + + if response.will_close: + # this effectively passes the connection to the response + self.close() + else: + # remember this, so we can tell when it is complete + self.__response = response + + return response + +try: + import ssl + from ssl import SSLContext +except ImportError: + pass +else: + class HTTPSConnection(HTTPConnection): + "This class allows communication via SSL." + + default_port = HTTPS_PORT + + # XXX Should key_file and cert_file be deprecated in favour of context? + + def __init__(self, host, port=None, key_file=None, cert_file=None, + strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, **_3to2kwargs): + if 'check_hostname' in _3to2kwargs: check_hostname = _3to2kwargs['check_hostname']; del _3to2kwargs['check_hostname'] + else: check_hostname = None + if 'context' in _3to2kwargs: context = _3to2kwargs['context']; del _3to2kwargs['context'] + else: context = None + super(HTTPSConnection, self).__init__(host, port, strict, timeout, + source_address) + self.key_file = key_file + self.cert_file = cert_file + if context is None: + # Some reasonable defaults + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context.options |= ssl.OP_NO_SSLv2 + will_verify = context.verify_mode != ssl.CERT_NONE + if check_hostname is None: + check_hostname = will_verify + elif check_hostname and not will_verify: + raise ValueError("check_hostname needs a SSL context with " + "either CERT_OPTIONAL or CERT_REQUIRED") + if key_file or cert_file: + context.load_cert_chain(cert_file, key_file) + self._context = context + self._check_hostname = check_hostname + + def connect(self): + "Connect to a host on a given (SSL) port." + + sock = socket_create_connection((self.host, self.port), + self.timeout, self.source_address) + + if self._tunnel_host: + self.sock = sock + self._tunnel() + + server_hostname = self.host if ssl.HAS_SNI else None + self.sock = self._context.wrap_socket(sock, + server_hostname=server_hostname) + try: + if self._check_hostname: + ssl.match_hostname(self.sock.getpeercert(), self.host) + except Exception: + self.sock.shutdown(socket.SHUT_RDWR) + self.sock.close() + raise + + __all__.append("HTTPSConnection") + + + # ###################################### + # # We use the old HTTPSConnection class from Py2.7, because ssl.SSLContext + # # doesn't exist in the Py2.7 stdlib + # class HTTPSConnection(HTTPConnection): + # "This class allows communication via SSL." + + # default_port = HTTPS_PORT + + # def __init__(self, host, port=None, key_file=None, cert_file=None, + # strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + # source_address=None): + # HTTPConnection.__init__(self, host, port, strict, timeout, + # source_address) + # self.key_file = key_file + # self.cert_file = cert_file + + # def connect(self): + # "Connect to a host on a given (SSL) port." + + # sock = socket_create_connection((self.host, self.port), + # self.timeout, self.source_address) + # if self._tunnel_host: + # self.sock = sock + # self._tunnel() + # self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file) + + # __all__.append("HTTPSConnection") + # ###################################### + + +class HTTPException(Exception): + # Subclasses that define an __init__ must call Exception.__init__ + # or define self.args. Otherwise, str() will fail. + pass + +class NotConnected(HTTPException): + pass + +class InvalidURL(HTTPException): + pass + +class UnknownProtocol(HTTPException): + def __init__(self, version): + self.args = version, + self.version = version + +class UnknownTransferEncoding(HTTPException): + pass + +class UnimplementedFileMode(HTTPException): + pass + +class IncompleteRead(HTTPException): + def __init__(self, partial, expected=None): + self.args = partial, + self.partial = partial + self.expected = expected + def __repr__(self): + if self.expected is not None: + e = ', %i more expected' % self.expected + else: + e = '' + return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e) + def __str__(self): + return repr(self) + +class ImproperConnectionState(HTTPException): + pass + +class CannotSendRequest(ImproperConnectionState): + pass + +class CannotSendHeader(ImproperConnectionState): + pass + +class ResponseNotReady(ImproperConnectionState): + pass + +class BadStatusLine(HTTPException): + def __init__(self, line): + if not line: + line = repr(line) + self.args = line, + self.line = line + +class LineTooLong(HTTPException): + def __init__(self, line_type): + HTTPException.__init__(self, "got more than %d bytes when reading %s" + % (_MAXLINE, line_type)) + +# for backwards compatibility +error = HTTPException diff --git a/lib/future/backports/http/cookiejar.py b/lib/future/backports/http/cookiejar.py new file mode 100644 index 00000000..af3ef415 --- /dev/null +++ b/lib/future/backports/http/cookiejar.py @@ -0,0 +1,2110 @@ +r"""HTTP cookie handling for web clients. + +This is a backport of the Py3.3 ``http.cookiejar`` module for +python-future. + +This module has (now fairly distant) origins in Gisle Aas' Perl module +HTTP::Cookies, from the libwww-perl library. + +Docstrings, comments and debug strings in this code refer to the +attributes of the HTTP cookie system as cookie-attributes, to distinguish +them clearly from Python attributes. + +Class diagram (note that BSDDBCookieJar and the MSIE* classes are not +distributed with the Python standard library, but are available from +http://wwwsearch.sf.net/): + + CookieJar____ + / \ \ + FileCookieJar \ \ + / | \ \ \ + MozillaCookieJar | LWPCookieJar \ \ + | | \ + | ---MSIEBase | \ + | / | | \ + | / MSIEDBCookieJar BSDDBCookieJar + |/ + MSIECookieJar + +""" + +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from future.builtins import filter, int, map, open, str +from future.utils import as_native_str, PY2 + +__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', + 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar'] + +import copy +import datetime +import re +if PY2: + re.ASCII = 0 +import time +from future.backports.urllib.parse import urlparse, urlsplit, quote +from future.backports.http.client import HTTP_PORT +try: + import threading as _threading +except ImportError: + import dummy_threading as _threading +from calendar import timegm + +debug = False # set to True to enable debugging via the logging module +logger = None + +def _debug(*args): + if not debug: + return + global logger + if not logger: + import logging + logger = logging.getLogger("http.cookiejar") + return logger.debug(*args) + + +DEFAULT_HTTP_PORT = str(HTTP_PORT) +MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " + "instance initialised with one)") + +def _warn_unhandled_exception(): + # There are a few catch-all except: statements in this module, for + # catching input that's bad in unexpected ways. Warn if any + # exceptions are caught there. + import io, warnings, traceback + f = io.StringIO() + traceback.print_exc(None, f) + msg = f.getvalue() + warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2) + + +# Date/time conversion +# ----------------------------------------------------------------------------- + +EPOCH_YEAR = 1970 +def _timegm(tt): + year, month, mday, hour, min, sec = tt[:6] + if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and + (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): + return timegm(tt) + else: + return None + +DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] +MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] +MONTHS_LOWER = [] +for month in MONTHS: MONTHS_LOWER.append(month.lower()) + +def time2isoz(t=None): + """Return a string representing time in seconds since epoch, t. + + If the function is called without an argument, it will use the current + time. + + The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", + representing Universal Time (UTC, aka GMT). An example of this format is: + + 1994-11-24 08:49:37Z + + """ + if t is None: + dt = datetime.datetime.utcnow() + else: + dt = datetime.datetime.utcfromtimestamp(t) + return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( + dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second) + +def time2netscape(t=None): + """Return a string representing time in seconds since epoch, t. + + If the function is called without an argument, it will use the current + time. + + The format of the returned string is like this: + + Wed, DD-Mon-YYYY HH:MM:SS GMT + + """ + if t is None: + dt = datetime.datetime.utcnow() + else: + dt = datetime.datetime.utcfromtimestamp(t) + return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % ( + DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1], + dt.year, dt.hour, dt.minute, dt.second) + + +UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} + +TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII) +def offset_from_tz_string(tz): + offset = None + if tz in UTC_ZONES: + offset = 0 + else: + m = TIMEZONE_RE.search(tz) + if m: + offset = 3600 * int(m.group(2)) + if m.group(3): + offset = offset + 60 * int(m.group(3)) + if m.group(1) == '-': + offset = -offset + return offset + +def _str2time(day, mon, yr, hr, min, sec, tz): + # translate month name to number + # month numbers start with 1 (January) + try: + mon = MONTHS_LOWER.index(mon.lower())+1 + except ValueError: + # maybe it's already a number + try: + imon = int(mon) + except ValueError: + return None + if 1 <= imon <= 12: + mon = imon + else: + return None + + # make sure clock elements are defined + if hr is None: hr = 0 + if min is None: min = 0 + if sec is None: sec = 0 + + yr = int(yr) + day = int(day) + hr = int(hr) + min = int(min) + sec = int(sec) + + if yr < 1000: + # find "obvious" year + cur_yr = time.localtime(time.time())[0] + m = cur_yr % 100 + tmp = yr + yr = yr + cur_yr - m + m = m - tmp + if abs(m) > 50: + if m > 0: yr = yr + 100 + else: yr = yr - 100 + + # convert UTC time tuple to seconds since epoch (not timezone-adjusted) + t = _timegm((yr, mon, day, hr, min, sec, tz)) + + if t is not None: + # adjust time using timezone string, to get absolute time since epoch + if tz is None: + tz = "UTC" + tz = tz.upper() + offset = offset_from_tz_string(tz) + if offset is None: + return None + t = t - offset + + return t + +STRICT_DATE_RE = re.compile( + r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " + "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII) +WEEKDAY_RE = re.compile( + r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII) +LOOSE_HTTP_DATE_RE = re.compile( + r"""^ + (\d\d?) # day + (?:\s+|[-\/]) + (\w+) # month + (?:\s+|[-\/]) + (\d+) # year + (?: + (?:\s+|:) # separator before clock + (\d\d?):(\d\d) # hour:min + (?::(\d\d))? # optional seconds + )? # optional clock + \s* + ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone + \s* + (?:\(\w+\))? # ASCII representation of timezone in parens. + \s*$""", re.X | re.ASCII) +def http2time(text): + """Returns time in seconds since epoch of time represented by a string. + + Return value is an integer. + + None is returned if the format of str is unrecognized, the time is outside + the representable range, or the timezone string is not recognized. If the + string contains no timezone, UTC is assumed. + + The timezone in the string may be numerical (like "-0800" or "+0100") or a + string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the + timezone strings equivalent to UTC (zero offset) are known to the function. + + The function loosely parses the following formats: + + Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format + Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format + Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format + 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) + 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) + 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) + + The parser ignores leading and trailing whitespace. The time may be + absent. + + If the year is given with only 2 digits, the function will select the + century that makes the year closest to the current date. + + """ + # fast exit for strictly conforming string + m = STRICT_DATE_RE.search(text) + if m: + g = m.groups() + mon = MONTHS_LOWER.index(g[1].lower()) + 1 + tt = (int(g[2]), mon, int(g[0]), + int(g[3]), int(g[4]), float(g[5])) + return _timegm(tt) + + # No, we need some messy parsing... + + # clean up + text = text.lstrip() + text = WEEKDAY_RE.sub("", text, 1) # Useless weekday + + # tz is time zone specifier string + day, mon, yr, hr, min, sec, tz = [None]*7 + + # loose regexp parse + m = LOOSE_HTTP_DATE_RE.search(text) + if m is not None: + day, mon, yr, hr, min, sec, tz = m.groups() + else: + return None # bad format + + return _str2time(day, mon, yr, hr, min, sec, tz) + +ISO_DATE_RE = re.compile( + """^ + (\d{4}) # year + [-\/]? + (\d\d?) # numerical month + [-\/]? + (\d\d?) # day + (?: + (?:\s+|[-:Tt]) # separator before clock + (\d\d?):?(\d\d) # hour:min + (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) + )? # optional clock + \s* + ([-+]?\d\d?:?(:?\d\d)? + |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) + \s*$""", re.X | re. ASCII) +def iso2time(text): + """ + As for http2time, but parses the ISO 8601 formats: + + 1994-02-03 14:15:29 -0100 -- ISO 8601 format + 1994-02-03 14:15:29 -- zone is optional + 1994-02-03 -- only date + 1994-02-03T14:15:29 -- Use T as separator + 19940203T141529Z -- ISO 8601 compact format + 19940203 -- only date + + """ + # clean up + text = text.lstrip() + + # tz is time zone specifier string + day, mon, yr, hr, min, sec, tz = [None]*7 + + # loose regexp parse + m = ISO_DATE_RE.search(text) + if m is not None: + # XXX there's an extra bit of the timezone I'm ignoring here: is + # this the right thing to do? + yr, mon, day, hr, min, sec, tz, _ = m.groups() + else: + return None # bad format + + return _str2time(day, mon, yr, hr, min, sec, tz) + + +# Header parsing +# ----------------------------------------------------------------------------- + +def unmatched(match): + """Return unmatched part of re.Match object.""" + start, end = match.span(0) + return match.string[:start]+match.string[end:] + +HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") +HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") +HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") +HEADER_ESCAPE_RE = re.compile(r"\\(.)") +def split_header_words(header_values): + r"""Parse header values into a list of lists containing key,value pairs. + + The function knows how to deal with ",", ";" and "=" as well as quoted + values after "=". A list of space separated tokens are parsed as if they + were separated by ";". + + If the header_values passed as argument contains multiple values, then they + are treated as if they were a single value separated by comma ",". + + This means that this function is useful for parsing header fields that + follow this syntax (BNF as from the HTTP/1.1 specification, but we relax + the requirement for tokens). + + headers = #header + header = (token | parameter) *( [";"] (token | parameter)) + + token = 1* + separators = "(" | ")" | "<" | ">" | "@" + | "," | ";" | ":" | "\" | <"> + | "/" | "[" | "]" | "?" | "=" + | "{" | "}" | SP | HT + + quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) + qdtext = > + quoted-pair = "\" CHAR + + parameter = attribute "=" value + attribute = token + value = token | quoted-string + + Each header is represented by a list of key/value pairs. The value for a + simple token (not part of a parameter) is None. Syntactically incorrect + headers will not necessarily be parsed as you would want. + + This is easier to describe with some examples: + + >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) + [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] + >>> split_header_words(['text/html; charset="iso-8859-1"']) + [[('text/html', None), ('charset', 'iso-8859-1')]] + >>> split_header_words([r'Basic realm="\"foo\bar\""']) + [[('Basic', None), ('realm', '"foobar"')]] + + """ + assert not isinstance(header_values, str) + result = [] + for text in header_values: + orig_text = text + pairs = [] + while text: + m = HEADER_TOKEN_RE.search(text) + if m: + text = unmatched(m) + name = m.group(1) + m = HEADER_QUOTED_VALUE_RE.search(text) + if m: # quoted value + text = unmatched(m) + value = m.group(1) + value = HEADER_ESCAPE_RE.sub(r"\1", value) + else: + m = HEADER_VALUE_RE.search(text) + if m: # unquoted value + text = unmatched(m) + value = m.group(1) + value = value.rstrip() + else: + # no value, a lone token + value = None + pairs.append((name, value)) + elif text.lstrip().startswith(","): + # concatenated headers, as per RFC 2616 section 4.2 + text = text.lstrip()[1:] + if pairs: result.append(pairs) + pairs = [] + else: + # skip junk + non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text) + assert nr_junk_chars > 0, ( + "split_header_words bug: '%s', '%s', %s" % + (orig_text, text, pairs)) + text = non_junk + if pairs: result.append(pairs) + return result + +HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") +def join_header_words(lists): + """Do the inverse (almost) of the conversion done by split_header_words. + + Takes a list of lists of (key, value) pairs and produces a single header + value. Attribute values are quoted if needed. + + >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) + 'text/plain; charset="iso-8859/1"' + >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) + 'text/plain, charset="iso-8859/1"' + + """ + headers = [] + for pairs in lists: + attr = [] + for k, v in pairs: + if v is not None: + if not re.search(r"^\w+$", v): + v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ + v = '"%s"' % v + k = "%s=%s" % (k, v) + attr.append(k) + if attr: headers.append("; ".join(attr)) + return ", ".join(headers) + +def strip_quotes(text): + if text.startswith('"'): + text = text[1:] + if text.endswith('"'): + text = text[:-1] + return text + +def parse_ns_headers(ns_headers): + """Ad-hoc parser for Netscape protocol cookie-attributes. + + The old Netscape cookie format for Set-Cookie can for instance contain + an unquoted "," in the expires field, so we have to use this ad-hoc + parser instead of split_header_words. + + XXX This may not make the best possible effort to parse all the crap + that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient + parser is probably better, so could do worse than following that if + this ever gives any trouble. + + Currently, this is also used for parsing RFC 2109 cookies. + + """ + known_attrs = ("expires", "domain", "path", "secure", + # RFC 2109 attrs (may turn up in Netscape cookies, too) + "version", "port", "max-age") + + result = [] + for ns_header in ns_headers: + pairs = [] + version_set = False + for ii, param in enumerate(re.split(r";\s*", ns_header)): + param = param.rstrip() + if param == "": continue + if "=" not in param: + k, v = param, None + else: + k, v = re.split(r"\s*=\s*", param, 1) + k = k.lstrip() + if ii != 0: + lc = k.lower() + if lc in known_attrs: + k = lc + if k == "version": + # This is an RFC 2109 cookie. + v = strip_quotes(v) + version_set = True + if k == "expires": + # convert expires date to seconds since epoch + v = http2time(strip_quotes(v)) # None if invalid + pairs.append((k, v)) + + if pairs: + if not version_set: + pairs.append(("version", "0")) + result.append(pairs) + + return result + + +IPV4_RE = re.compile(r"\.\d+$", re.ASCII) +def is_HDN(text): + """Return True if text is a host domain name.""" + # XXX + # This may well be wrong. Which RFC is HDN defined in, if any (for + # the purposes of RFC 2965)? + # For the current implementation, what about IPv6? Remember to look + # at other uses of IPV4_RE also, if change this. + if IPV4_RE.search(text): + return False + if text == "": + return False + if text[0] == "." or text[-1] == ".": + return False + return True + +def domain_match(A, B): + """Return True if domain A domain-matches domain B, according to RFC 2965. + + A and B may be host domain names or IP addresses. + + RFC 2965, section 1: + + Host names can be specified either as an IP address or a HDN string. + Sometimes we compare one host name with another. (Such comparisons SHALL + be case-insensitive.) Host A's name domain-matches host B's if + + * their host name strings string-compare equal; or + + * A is a HDN string and has the form NB, where N is a non-empty + name string, B has the form .B', and B' is a HDN string. (So, + x.y.com domain-matches .Y.com but not Y.com.) + + Note that domain-match is not a commutative operation: a.b.c.com + domain-matches .c.com, but not the reverse. + + """ + # Note that, if A or B are IP addresses, the only relevant part of the + # definition of the domain-match algorithm is the direct string-compare. + A = A.lower() + B = B.lower() + if A == B: + return True + if not is_HDN(A): + return False + i = A.rfind(B) + if i == -1 or i == 0: + # A does not have form NB, or N is the empty string + return False + if not B.startswith("."): + return False + if not is_HDN(B[1:]): + return False + return True + +def liberal_is_HDN(text): + """Return True if text is a sort-of-like a host domain name. + + For accepting/blocking domains. + + """ + if IPV4_RE.search(text): + return False + return True + +def user_domain_match(A, B): + """For blocking/accepting domains. + + A and B may be host domain names or IP addresses. + + """ + A = A.lower() + B = B.lower() + if not (liberal_is_HDN(A) and liberal_is_HDN(B)): + if A == B: + # equal IP addresses + return True + return False + initial_dot = B.startswith(".") + if initial_dot and A.endswith(B): + return True + if not initial_dot and A == B: + return True + return False + +cut_port_re = re.compile(r":\d+$", re.ASCII) +def request_host(request): + """Return request-host, as defined by RFC 2965. + + Variation from RFC: returned value is lowercased, for convenient + comparison. + + """ + url = request.get_full_url() + host = urlparse(url)[1] + if host == "": + host = request.get_header("Host", "") + + # remove port, if present + host = cut_port_re.sub("", host, 1) + return host.lower() + +def eff_request_host(request): + """Return a tuple (request-host, effective request-host name). + + As defined by RFC 2965, except both are lowercased. + + """ + erhn = req_host = request_host(request) + if req_host.find(".") == -1 and not IPV4_RE.search(req_host): + erhn = req_host + ".local" + return req_host, erhn + +def request_path(request): + """Path component of request-URI, as defined by RFC 2965.""" + url = request.get_full_url() + parts = urlsplit(url) + path = escape_path(parts.path) + if not path.startswith("/"): + # fix bad RFC 2396 absoluteURI + path = "/" + path + return path + +def request_port(request): + host = request.host + i = host.find(':') + if i >= 0: + port = host[i+1:] + try: + int(port) + except ValueError: + _debug("nonnumeric port: '%s'", port) + return None + else: + port = DEFAULT_HTTP_PORT + return port + +# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't +# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). +HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" +ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") +def uppercase_escaped_char(match): + return "%%%s" % match.group(1).upper() +def escape_path(path): + """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" + # There's no knowing what character encoding was used to create URLs + # containing %-escapes, but since we have to pick one to escape invalid + # path characters, we pick UTF-8, as recommended in the HTML 4.0 + # specification: + # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 + # And here, kind of: draft-fielding-uri-rfc2396bis-03 + # (And in draft IRI specification: draft-duerst-iri-05) + # (And here, for new URI schemes: RFC 2718) + path = quote(path, HTTP_PATH_SAFE) + path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) + return path + +def reach(h): + """Return reach of host h, as defined by RFC 2965, section 1. + + The reach R of a host name H is defined as follows: + + * If + + - H is the host domain name of a host; and, + + - H has the form A.B; and + + - A has no embedded (that is, interior) dots; and + + - B has at least one embedded dot, or B is the string "local". + then the reach of H is .B. + + * Otherwise, the reach of H is H. + + >>> reach("www.acme.com") + '.acme.com' + >>> reach("acme.com") + 'acme.com' + >>> reach("acme.local") + '.local' + + """ + i = h.find(".") + if i >= 0: + #a = h[:i] # this line is only here to show what a is + b = h[i+1:] + i = b.find(".") + if is_HDN(h) and (i >= 0 or b == "local"): + return "."+b + return h + +def is_third_party(request): + """ + + RFC 2965, section 3.3.6: + + An unverifiable transaction is to a third-party host if its request- + host U does not domain-match the reach R of the request-host O in the + origin transaction. + + """ + req_host = request_host(request) + if not domain_match(req_host, reach(request.get_origin_req_host())): + return True + else: + return False + + +class Cookie(object): + """HTTP Cookie. + + This class represents both Netscape and RFC 2965 cookies. + + This is deliberately a very simple class. It just holds attributes. It's + possible to construct Cookie instances that don't comply with the cookie + standards. CookieJar.make_cookies is the factory function for Cookie + objects -- it deals with cookie parsing, supplying defaults, and + normalising to the representation used in this class. CookiePolicy is + responsible for checking them to see whether they should be accepted from + and returned to the server. + + Note that the port may be present in the headers, but unspecified ("Port" + rather than"Port=80", for example); if this is the case, port is None. + + """ + + def __init__(self, version, name, value, + port, port_specified, + domain, domain_specified, domain_initial_dot, + path, path_specified, + secure, + expires, + discard, + comment, + comment_url, + rest, + rfc2109=False, + ): + + if version is not None: version = int(version) + if expires is not None: expires = int(expires) + if port is None and port_specified is True: + raise ValueError("if port is None, port_specified must be false") + + self.version = version + self.name = name + self.value = value + self.port = port + self.port_specified = port_specified + # normalise case, as per RFC 2965 section 3.3.3 + self.domain = domain.lower() + self.domain_specified = domain_specified + # Sigh. We need to know whether the domain given in the + # cookie-attribute had an initial dot, in order to follow RFC 2965 + # (as clarified in draft errata). Needed for the returned $Domain + # value. + self.domain_initial_dot = domain_initial_dot + self.path = path + self.path_specified = path_specified + self.secure = secure + self.expires = expires + self.discard = discard + self.comment = comment + self.comment_url = comment_url + self.rfc2109 = rfc2109 + + self._rest = copy.copy(rest) + + def has_nonstandard_attr(self, name): + return name in self._rest + def get_nonstandard_attr(self, name, default=None): + return self._rest.get(name, default) + def set_nonstandard_attr(self, name, value): + self._rest[name] = value + + def is_expired(self, now=None): + if now is None: now = time.time() + if (self.expires is not None) and (self.expires <= now): + return True + return False + + def __str__(self): + if self.port is None: p = "" + else: p = ":"+self.port + limit = self.domain + p + self.path + if self.value is not None: + namevalue = "%s=%s" % (self.name, self.value) + else: + namevalue = self.name + return "" % (namevalue, limit) + + @as_native_str() + def __repr__(self): + args = [] + for name in ("version", "name", "value", + "port", "port_specified", + "domain", "domain_specified", "domain_initial_dot", + "path", "path_specified", + "secure", "expires", "discard", "comment", "comment_url", + ): + attr = getattr(self, name) + ### Python-Future: + # Avoid u'...' prefixes for unicode strings: + if isinstance(attr, str): + attr = str(attr) + ### + args.append(str("%s=%s") % (name, repr(attr))) + args.append("rest=%s" % repr(self._rest)) + args.append("rfc2109=%s" % repr(self.rfc2109)) + return "Cookie(%s)" % ", ".join(args) + + +class CookiePolicy(object): + """Defines which cookies get accepted from and returned to server. + + May also modify cookies, though this is probably a bad idea. + + The subclass DefaultCookiePolicy defines the standard rules for Netscape + and RFC 2965 cookies -- override that if you want a customised policy. + + """ + def set_ok(self, cookie, request): + """Return true if (and only if) cookie should be accepted from server. + + Currently, pre-expired cookies never get this far -- the CookieJar + class deletes such cookies itself. + + """ + raise NotImplementedError() + + def return_ok(self, cookie, request): + """Return true if (and only if) cookie should be returned to server.""" + raise NotImplementedError() + + def domain_return_ok(self, domain, request): + """Return false if cookies should not be returned, given cookie domain. + """ + return True + + def path_return_ok(self, path, request): + """Return false if cookies should not be returned, given cookie path. + """ + return True + + +class DefaultCookiePolicy(CookiePolicy): + """Implements the standard rules for accepting and returning cookies.""" + + DomainStrictNoDots = 1 + DomainStrictNonDomain = 2 + DomainRFC2965Match = 4 + + DomainLiberal = 0 + DomainStrict = DomainStrictNoDots|DomainStrictNonDomain + + def __init__(self, + blocked_domains=None, allowed_domains=None, + netscape=True, rfc2965=False, + rfc2109_as_netscape=None, + hide_cookie2=False, + strict_domain=False, + strict_rfc2965_unverifiable=True, + strict_ns_unverifiable=False, + strict_ns_domain=DomainLiberal, + strict_ns_set_initial_dollar=False, + strict_ns_set_path=False, + ): + """Constructor arguments should be passed as keyword arguments only.""" + self.netscape = netscape + self.rfc2965 = rfc2965 + self.rfc2109_as_netscape = rfc2109_as_netscape + self.hide_cookie2 = hide_cookie2 + self.strict_domain = strict_domain + self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable + self.strict_ns_unverifiable = strict_ns_unverifiable + self.strict_ns_domain = strict_ns_domain + self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar + self.strict_ns_set_path = strict_ns_set_path + + if blocked_domains is not None: + self._blocked_domains = tuple(blocked_domains) + else: + self._blocked_domains = () + + if allowed_domains is not None: + allowed_domains = tuple(allowed_domains) + self._allowed_domains = allowed_domains + + def blocked_domains(self): + """Return the sequence of blocked domains (as a tuple).""" + return self._blocked_domains + def set_blocked_domains(self, blocked_domains): + """Set the sequence of blocked domains.""" + self._blocked_domains = tuple(blocked_domains) + + def is_blocked(self, domain): + for blocked_domain in self._blocked_domains: + if user_domain_match(domain, blocked_domain): + return True + return False + + def allowed_domains(self): + """Return None, or the sequence of allowed domains (as a tuple).""" + return self._allowed_domains + def set_allowed_domains(self, allowed_domains): + """Set the sequence of allowed domains, or None.""" + if allowed_domains is not None: + allowed_domains = tuple(allowed_domains) + self._allowed_domains = allowed_domains + + def is_not_allowed(self, domain): + if self._allowed_domains is None: + return False + for allowed_domain in self._allowed_domains: + if user_domain_match(domain, allowed_domain): + return False + return True + + def set_ok(self, cookie, request): + """ + If you override .set_ok(), be sure to call this method. If it returns + false, so should your subclass (assuming your subclass wants to be more + strict about which cookies to accept). + + """ + _debug(" - checking cookie %s=%s", cookie.name, cookie.value) + + assert cookie.name is not None + + for n in "version", "verifiability", "name", "path", "domain", "port": + fn_name = "set_ok_"+n + fn = getattr(self, fn_name) + if not fn(cookie, request): + return False + + return True + + def set_ok_version(self, cookie, request): + if cookie.version is None: + # Version is always set to 0 by parse_ns_headers if it's a Netscape + # cookie, so this must be an invalid RFC 2965 cookie. + _debug(" Set-Cookie2 without version attribute (%s=%s)", + cookie.name, cookie.value) + return False + if cookie.version > 0 and not self.rfc2965: + _debug(" RFC 2965 cookies are switched off") + return False + elif cookie.version == 0 and not self.netscape: + _debug(" Netscape cookies are switched off") + return False + return True + + def set_ok_verifiability(self, cookie, request): + if request.unverifiable and is_third_party(request): + if cookie.version > 0 and self.strict_rfc2965_unverifiable: + _debug(" third-party RFC 2965 cookie during " + "unverifiable transaction") + return False + elif cookie.version == 0 and self.strict_ns_unverifiable: + _debug(" third-party Netscape cookie during " + "unverifiable transaction") + return False + return True + + def set_ok_name(self, cookie, request): + # Try and stop servers setting V0 cookies designed to hack other + # servers that know both V0 and V1 protocols. + if (cookie.version == 0 and self.strict_ns_set_initial_dollar and + cookie.name.startswith("$")): + _debug(" illegal name (starts with '$'): '%s'", cookie.name) + return False + return True + + def set_ok_path(self, cookie, request): + if cookie.path_specified: + req_path = request_path(request) + if ((cookie.version > 0 or + (cookie.version == 0 and self.strict_ns_set_path)) and + not req_path.startswith(cookie.path)): + _debug(" path attribute %s is not a prefix of request " + "path %s", cookie.path, req_path) + return False + return True + + def set_ok_domain(self, cookie, request): + if self.is_blocked(cookie.domain): + _debug(" domain %s is in user block-list", cookie.domain) + return False + if self.is_not_allowed(cookie.domain): + _debug(" domain %s is not in user allow-list", cookie.domain) + return False + if cookie.domain_specified: + req_host, erhn = eff_request_host(request) + domain = cookie.domain + if self.strict_domain and (domain.count(".") >= 2): + # XXX This should probably be compared with the Konqueror + # (kcookiejar.cpp) and Mozilla implementations, but it's a + # losing battle. + i = domain.rfind(".") + j = domain.rfind(".", 0, i) + if j == 0: # domain like .foo.bar + tld = domain[i+1:] + sld = domain[j+1:i] + if sld.lower() in ("co", "ac", "com", "edu", "org", "net", + "gov", "mil", "int", "aero", "biz", "cat", "coop", + "info", "jobs", "mobi", "museum", "name", "pro", + "travel", "eu") and len(tld) == 2: + # domain like .co.uk + _debug(" country-code second level domain %s", domain) + return False + if domain.startswith("."): + undotted_domain = domain[1:] + else: + undotted_domain = domain + embedded_dots = (undotted_domain.find(".") >= 0) + if not embedded_dots and domain != ".local": + _debug(" non-local domain %s contains no embedded dot", + domain) + return False + if cookie.version == 0: + if (not erhn.endswith(domain) and + (not erhn.startswith(".") and + not ("."+erhn).endswith(domain))): + _debug(" effective request-host %s (even with added " + "initial dot) does not end with %s", + erhn, domain) + return False + if (cookie.version > 0 or + (self.strict_ns_domain & self.DomainRFC2965Match)): + if not domain_match(erhn, domain): + _debug(" effective request-host %s does not domain-match " + "%s", erhn, domain) + return False + if (cookie.version > 0 or + (self.strict_ns_domain & self.DomainStrictNoDots)): + host_prefix = req_host[:-len(domain)] + if (host_prefix.find(".") >= 0 and + not IPV4_RE.search(req_host)): + _debug(" host prefix %s for domain %s contains a dot", + host_prefix, domain) + return False + return True + + def set_ok_port(self, cookie, request): + if cookie.port_specified: + req_port = request_port(request) + if req_port is None: + req_port = "80" + else: + req_port = str(req_port) + for p in cookie.port.split(","): + try: + int(p) + except ValueError: + _debug(" bad port %s (not numeric)", p) + return False + if p == req_port: + break + else: + _debug(" request port (%s) not found in %s", + req_port, cookie.port) + return False + return True + + def return_ok(self, cookie, request): + """ + If you override .return_ok(), be sure to call this method. If it + returns false, so should your subclass (assuming your subclass wants to + be more strict about which cookies to return). + + """ + # Path has already been checked by .path_return_ok(), and domain + # blocking done by .domain_return_ok(). + _debug(" - checking cookie %s=%s", cookie.name, cookie.value) + + for n in "version", "verifiability", "secure", "expires", "port", "domain": + fn_name = "return_ok_"+n + fn = getattr(self, fn_name) + if not fn(cookie, request): + return False + return True + + def return_ok_version(self, cookie, request): + if cookie.version > 0 and not self.rfc2965: + _debug(" RFC 2965 cookies are switched off") + return False + elif cookie.version == 0 and not self.netscape: + _debug(" Netscape cookies are switched off") + return False + return True + + def return_ok_verifiability(self, cookie, request): + if request.unverifiable and is_third_party(request): + if cookie.version > 0 and self.strict_rfc2965_unverifiable: + _debug(" third-party RFC 2965 cookie during unverifiable " + "transaction") + return False + elif cookie.version == 0 and self.strict_ns_unverifiable: + _debug(" third-party Netscape cookie during unverifiable " + "transaction") + return False + return True + + def return_ok_secure(self, cookie, request): + if cookie.secure and request.type != "https": + _debug(" secure cookie with non-secure request") + return False + return True + + def return_ok_expires(self, cookie, request): + if cookie.is_expired(self._now): + _debug(" cookie expired") + return False + return True + + def return_ok_port(self, cookie, request): + if cookie.port: + req_port = request_port(request) + if req_port is None: + req_port = "80" + for p in cookie.port.split(","): + if p == req_port: + break + else: + _debug(" request port %s does not match cookie port %s", + req_port, cookie.port) + return False + return True + + def return_ok_domain(self, cookie, request): + req_host, erhn = eff_request_host(request) + domain = cookie.domain + + # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't + if (cookie.version == 0 and + (self.strict_ns_domain & self.DomainStrictNonDomain) and + not cookie.domain_specified and domain != erhn): + _debug(" cookie with unspecified domain does not string-compare " + "equal to request domain") + return False + + if cookie.version > 0 and not domain_match(erhn, domain): + _debug(" effective request-host name %s does not domain-match " + "RFC 2965 cookie domain %s", erhn, domain) + return False + if cookie.version == 0 and not ("."+erhn).endswith(domain): + _debug(" request-host %s does not match Netscape cookie domain " + "%s", req_host, domain) + return False + return True + + def domain_return_ok(self, domain, request): + # Liberal check of. This is here as an optimization to avoid + # having to load lots of MSIE cookie files unless necessary. + req_host, erhn = eff_request_host(request) + if not req_host.startswith("."): + req_host = "."+req_host + if not erhn.startswith("."): + erhn = "."+erhn + if not (req_host.endswith(domain) or erhn.endswith(domain)): + #_debug(" request domain %s does not match cookie domain %s", + # req_host, domain) + return False + + if self.is_blocked(domain): + _debug(" domain %s is in user block-list", domain) + return False + if self.is_not_allowed(domain): + _debug(" domain %s is not in user allow-list", domain) + return False + + return True + + def path_return_ok(self, path, request): + _debug("- checking cookie path=%s", path) + req_path = request_path(request) + if not req_path.startswith(path): + _debug(" %s does not path-match %s", req_path, path) + return False + return True + + +def vals_sorted_by_key(adict): + keys = sorted(adict.keys()) + return map(adict.get, keys) + +def deepvalues(mapping): + """Iterates over nested mapping, depth-first, in sorted order by key.""" + values = vals_sorted_by_key(mapping) + for obj in values: + mapping = False + try: + obj.items + except AttributeError: + pass + else: + mapping = True + for subobj in deepvalues(obj): + yield subobj + if not mapping: + yield obj + + +# Used as second parameter to dict.get() method, to distinguish absent +# dict key from one with a None value. +class Absent(object): pass + +class CookieJar(object): + """Collection of HTTP cookies. + + You may not need to know about this class: try + urllib.request.build_opener(HTTPCookieProcessor).open(url). + """ + + non_word_re = re.compile(r"\W") + quote_re = re.compile(r"([\"\\])") + strict_domain_re = re.compile(r"\.?[^.]*") + domain_re = re.compile(r"[^.]*") + dots_re = re.compile(r"^\.+") + + magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII) + + def __init__(self, policy=None): + if policy is None: + policy = DefaultCookiePolicy() + self._policy = policy + + self._cookies_lock = _threading.RLock() + self._cookies = {} + + def set_policy(self, policy): + self._policy = policy + + def _cookies_for_domain(self, domain, request): + cookies = [] + if not self._policy.domain_return_ok(domain, request): + return [] + _debug("Checking %s for cookies to return", domain) + cookies_by_path = self._cookies[domain] + for path in cookies_by_path.keys(): + if not self._policy.path_return_ok(path, request): + continue + cookies_by_name = cookies_by_path[path] + for cookie in cookies_by_name.values(): + if not self._policy.return_ok(cookie, request): + _debug(" not returning cookie") + continue + _debug(" it's a match") + cookies.append(cookie) + return cookies + + def _cookies_for_request(self, request): + """Return a list of cookies to be returned to server.""" + cookies = [] + for domain in self._cookies.keys(): + cookies.extend(self._cookies_for_domain(domain, request)) + return cookies + + def _cookie_attrs(self, cookies): + """Return a list of cookie-attributes to be returned to server. + + like ['foo="bar"; $Path="/"', ...] + + The $Version attribute is also added when appropriate (currently only + once per request). + + """ + # add cookies in order of most specific (ie. longest) path first + cookies.sort(key=lambda a: len(a.path), reverse=True) + + version_set = False + + attrs = [] + for cookie in cookies: + # set version of Cookie header + # XXX + # What should it be if multiple matching Set-Cookie headers have + # different versions themselves? + # Answer: there is no answer; was supposed to be settled by + # RFC 2965 errata, but that may never appear... + version = cookie.version + if not version_set: + version_set = True + if version > 0: + attrs.append("$Version=%s" % version) + + # quote cookie value if necessary + # (not for Netscape protocol, which already has any quotes + # intact, due to the poorly-specified Netscape Cookie: syntax) + if ((cookie.value is not None) and + self.non_word_re.search(cookie.value) and version > 0): + value = self.quote_re.sub(r"\\\1", cookie.value) + else: + value = cookie.value + + # add cookie-attributes to be returned in Cookie header + if cookie.value is None: + attrs.append(cookie.name) + else: + attrs.append("%s=%s" % (cookie.name, value)) + if version > 0: + if cookie.path_specified: + attrs.append('$Path="%s"' % cookie.path) + if cookie.domain.startswith("."): + domain = cookie.domain + if (not cookie.domain_initial_dot and + domain.startswith(".")): + domain = domain[1:] + attrs.append('$Domain="%s"' % domain) + if cookie.port is not None: + p = "$Port" + if cookie.port_specified: + p = p + ('="%s"' % cookie.port) + attrs.append(p) + + return attrs + + def add_cookie_header(self, request): + """Add correct Cookie: header to request (urllib.request.Request object). + + The Cookie2 header is also added unless policy.hide_cookie2 is true. + + """ + _debug("add_cookie_header") + self._cookies_lock.acquire() + try: + + self._policy._now = self._now = int(time.time()) + + cookies = self._cookies_for_request(request) + + attrs = self._cookie_attrs(cookies) + if attrs: + if not request.has_header("Cookie"): + request.add_unredirected_header( + "Cookie", "; ".join(attrs)) + + # if necessary, advertise that we know RFC 2965 + if (self._policy.rfc2965 and not self._policy.hide_cookie2 and + not request.has_header("Cookie2")): + for cookie in cookies: + if cookie.version != 1: + request.add_unredirected_header("Cookie2", '$Version="1"') + break + + finally: + self._cookies_lock.release() + + self.clear_expired_cookies() + + def _normalized_cookie_tuples(self, attrs_set): + """Return list of tuples containing normalised cookie information. + + attrs_set is the list of lists of key,value pairs extracted from + the Set-Cookie or Set-Cookie2 headers. + + Tuples are name, value, standard, rest, where name and value are the + cookie name and value, standard is a dictionary containing the standard + cookie-attributes (discard, secure, version, expires or max-age, + domain, path and port) and rest is a dictionary containing the rest of + the cookie-attributes. + + """ + cookie_tuples = [] + + boolean_attrs = "discard", "secure" + value_attrs = ("version", + "expires", "max-age", + "domain", "path", "port", + "comment", "commenturl") + + for cookie_attrs in attrs_set: + name, value = cookie_attrs[0] + + # Build dictionary of standard cookie-attributes (standard) and + # dictionary of other cookie-attributes (rest). + + # Note: expiry time is normalised to seconds since epoch. V0 + # cookies should have the Expires cookie-attribute, and V1 cookies + # should have Max-Age, but since V1 includes RFC 2109 cookies (and + # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we + # accept either (but prefer Max-Age). + max_age_set = False + + bad_cookie = False + + standard = {} + rest = {} + for k, v in cookie_attrs[1:]: + lc = k.lower() + # don't lose case distinction for unknown fields + if lc in value_attrs or lc in boolean_attrs: + k = lc + if k in boolean_attrs and v is None: + # boolean cookie-attribute is present, but has no value + # (like "discard", rather than "port=80") + v = True + if k in standard: + # only first value is significant + continue + if k == "domain": + if v is None: + _debug(" missing value for domain attribute") + bad_cookie = True + break + # RFC 2965 section 3.3.3 + v = v.lower() + if k == "expires": + if max_age_set: + # Prefer max-age to expires (like Mozilla) + continue + if v is None: + _debug(" missing or invalid value for expires " + "attribute: treating as session cookie") + continue + if k == "max-age": + max_age_set = True + try: + v = int(v) + except ValueError: + _debug(" missing or invalid (non-numeric) value for " + "max-age attribute") + bad_cookie = True + break + # convert RFC 2965 Max-Age to seconds since epoch + # XXX Strictly you're supposed to follow RFC 2616 + # age-calculation rules. Remember that zero Max-Age is a + # is a request to discard (old and new) cookie, though. + k = "expires" + v = self._now + v + if (k in value_attrs) or (k in boolean_attrs): + if (v is None and + k not in ("port", "comment", "commenturl")): + _debug(" missing value for %s attribute" % k) + bad_cookie = True + break + standard[k] = v + else: + rest[k] = v + + if bad_cookie: + continue + + cookie_tuples.append((name, value, standard, rest)) + + return cookie_tuples + + def _cookie_from_cookie_tuple(self, tup, request): + # standard is dict of standard cookie-attributes, rest is dict of the + # rest of them + name, value, standard, rest = tup + + domain = standard.get("domain", Absent) + path = standard.get("path", Absent) + port = standard.get("port", Absent) + expires = standard.get("expires", Absent) + + # set the easy defaults + version = standard.get("version", None) + if version is not None: + try: + version = int(version) + except ValueError: + return None # invalid version, ignore cookie + secure = standard.get("secure", False) + # (discard is also set if expires is Absent) + discard = standard.get("discard", False) + comment = standard.get("comment", None) + comment_url = standard.get("commenturl", None) + + # set default path + if path is not Absent and path != "": + path_specified = True + path = escape_path(path) + else: + path_specified = False + path = request_path(request) + i = path.rfind("/") + if i != -1: + if version == 0: + # Netscape spec parts company from reality here + path = path[:i] + else: + path = path[:i+1] + if len(path) == 0: path = "/" + + # set default domain + domain_specified = domain is not Absent + # but first we have to remember whether it starts with a dot + domain_initial_dot = False + if domain_specified: + domain_initial_dot = bool(domain.startswith(".")) + if domain is Absent: + req_host, erhn = eff_request_host(request) + domain = erhn + elif not domain.startswith("."): + domain = "."+domain + + # set default port + port_specified = False + if port is not Absent: + if port is None: + # Port attr present, but has no value: default to request port. + # Cookie should then only be sent back on that port. + port = request_port(request) + else: + port_specified = True + port = re.sub(r"\s+", "", port) + else: + # No port attr present. Cookie can be sent back on any port. + port = None + + # set default expires and discard + if expires is Absent: + expires = None + discard = True + elif expires <= self._now: + # Expiry date in past is request to delete cookie. This can't be + # in DefaultCookiePolicy, because can't delete cookies there. + try: + self.clear(domain, path, name) + except KeyError: + pass + _debug("Expiring cookie, domain='%s', path='%s', name='%s'", + domain, path, name) + return None + + return Cookie(version, + name, value, + port, port_specified, + domain, domain_specified, domain_initial_dot, + path, path_specified, + secure, + expires, + discard, + comment, + comment_url, + rest) + + def _cookies_from_attrs_set(self, attrs_set, request): + cookie_tuples = self._normalized_cookie_tuples(attrs_set) + + cookies = [] + for tup in cookie_tuples: + cookie = self._cookie_from_cookie_tuple(tup, request) + if cookie: cookies.append(cookie) + return cookies + + def _process_rfc2109_cookies(self, cookies): + rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) + if rfc2109_as_ns is None: + rfc2109_as_ns = not self._policy.rfc2965 + for cookie in cookies: + if cookie.version == 1: + cookie.rfc2109 = True + if rfc2109_as_ns: + # treat 2109 cookies as Netscape cookies rather than + # as RFC2965 cookies + cookie.version = 0 + + def make_cookies(self, response, request): + """Return sequence of Cookie objects extracted from response object.""" + # get cookie-attributes for RFC 2965 and Netscape protocols + headers = response.info() + rfc2965_hdrs = headers.get_all("Set-Cookie2", []) + ns_hdrs = headers.get_all("Set-Cookie", []) + + rfc2965 = self._policy.rfc2965 + netscape = self._policy.netscape + + if ((not rfc2965_hdrs and not ns_hdrs) or + (not ns_hdrs and not rfc2965) or + (not rfc2965_hdrs and not netscape) or + (not netscape and not rfc2965)): + return [] # no relevant cookie headers: quick exit + + try: + cookies = self._cookies_from_attrs_set( + split_header_words(rfc2965_hdrs), request) + except Exception: + _warn_unhandled_exception() + cookies = [] + + if ns_hdrs and netscape: + try: + # RFC 2109 and Netscape cookies + ns_cookies = self._cookies_from_attrs_set( + parse_ns_headers(ns_hdrs), request) + except Exception: + _warn_unhandled_exception() + ns_cookies = [] + self._process_rfc2109_cookies(ns_cookies) + + # Look for Netscape cookies (from Set-Cookie headers) that match + # corresponding RFC 2965 cookies (from Set-Cookie2 headers). + # For each match, keep the RFC 2965 cookie and ignore the Netscape + # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are + # bundled in with the Netscape cookies for this purpose, which is + # reasonable behaviour. + if rfc2965: + lookup = {} + for cookie in cookies: + lookup[(cookie.domain, cookie.path, cookie.name)] = None + + def no_matching_rfc2965(ns_cookie, lookup=lookup): + key = ns_cookie.domain, ns_cookie.path, ns_cookie.name + return key not in lookup + ns_cookies = filter(no_matching_rfc2965, ns_cookies) + + if ns_cookies: + cookies.extend(ns_cookies) + + return cookies + + def set_cookie_if_ok(self, cookie, request): + """Set a cookie if policy says it's OK to do so.""" + self._cookies_lock.acquire() + try: + self._policy._now = self._now = int(time.time()) + + if self._policy.set_ok(cookie, request): + self.set_cookie(cookie) + + + finally: + self._cookies_lock.release() + + def set_cookie(self, cookie): + """Set a cookie, without checking whether or not it should be set.""" + c = self._cookies + self._cookies_lock.acquire() + try: + if cookie.domain not in c: c[cookie.domain] = {} + c2 = c[cookie.domain] + if cookie.path not in c2: c2[cookie.path] = {} + c3 = c2[cookie.path] + c3[cookie.name] = cookie + finally: + self._cookies_lock.release() + + def extract_cookies(self, response, request): + """Extract cookies from response, where allowable given the request.""" + _debug("extract_cookies: %s", response.info()) + self._cookies_lock.acquire() + try: + self._policy._now = self._now = int(time.time()) + + for cookie in self.make_cookies(response, request): + if self._policy.set_ok(cookie, request): + _debug(" setting cookie: %s", cookie) + self.set_cookie(cookie) + finally: + self._cookies_lock.release() + + def clear(self, domain=None, path=None, name=None): + """Clear some cookies. + + Invoking this method without arguments will clear all cookies. If + given a single argument, only cookies belonging to that domain will be + removed. If given two arguments, cookies belonging to the specified + path within that domain are removed. If given three arguments, then + the cookie with the specified name, path and domain is removed. + + Raises KeyError if no matching cookie exists. + + """ + if name is not None: + if (domain is None) or (path is None): + raise ValueError( + "domain and path must be given to remove a cookie by name") + del self._cookies[domain][path][name] + elif path is not None: + if domain is None: + raise ValueError( + "domain must be given to remove cookies by path") + del self._cookies[domain][path] + elif domain is not None: + del self._cookies[domain] + else: + self._cookies = {} + + def clear_session_cookies(self): + """Discard all session cookies. + + Note that the .save() method won't save session cookies anyway, unless + you ask otherwise by passing a true ignore_discard argument. + + """ + self._cookies_lock.acquire() + try: + for cookie in self: + if cookie.discard: + self.clear(cookie.domain, cookie.path, cookie.name) + finally: + self._cookies_lock.release() + + def clear_expired_cookies(self): + """Discard all expired cookies. + + You probably don't need to call this method: expired cookies are never + sent back to the server (provided you're using DefaultCookiePolicy), + this method is called by CookieJar itself every so often, and the + .save() method won't save expired cookies anyway (unless you ask + otherwise by passing a true ignore_expires argument). + + """ + self._cookies_lock.acquire() + try: + now = time.time() + for cookie in self: + if cookie.is_expired(now): + self.clear(cookie.domain, cookie.path, cookie.name) + finally: + self._cookies_lock.release() + + def __iter__(self): + return deepvalues(self._cookies) + + def __len__(self): + """Return number of contained cookies.""" + i = 0 + for cookie in self: i = i + 1 + return i + + @as_native_str() + def __repr__(self): + r = [] + for cookie in self: r.append(repr(cookie)) + return "<%s[%s]>" % (self.__class__, ", ".join(r)) + + def __str__(self): + r = [] + for cookie in self: r.append(str(cookie)) + return "<%s[%s]>" % (self.__class__, ", ".join(r)) + + +# derives from IOError for backwards-compatibility with Python 2.4.0 +class LoadError(IOError): pass + +class FileCookieJar(CookieJar): + """CookieJar that can be loaded from and saved to a file.""" + + def __init__(self, filename=None, delayload=False, policy=None): + """ + Cookies are NOT loaded from the named file until either the .load() or + .revert() method is called. + + """ + CookieJar.__init__(self, policy) + if filename is not None: + try: + filename+"" + except: + raise ValueError("filename must be string-like") + self.filename = filename + self.delayload = bool(delayload) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + """Save cookies to a file.""" + raise NotImplementedError() + + def load(self, filename=None, ignore_discard=False, ignore_expires=False): + """Load cookies from a file.""" + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + f = open(filename) + try: + self._really_load(f, filename, ignore_discard, ignore_expires) + finally: + f.close() + + def revert(self, filename=None, + ignore_discard=False, ignore_expires=False): + """Clear all cookies and reload cookies from a saved file. + + Raises LoadError (or IOError) if reversion is not successful; the + object's state will not be altered if this happens. + + """ + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + self._cookies_lock.acquire() + try: + + old_state = copy.deepcopy(self._cookies) + self._cookies = {} + try: + self.load(filename, ignore_discard, ignore_expires) + except (LoadError, IOError): + self._cookies = old_state + raise + + finally: + self._cookies_lock.release() + + +def lwp_cookie_str(cookie): + """Return string representation of Cookie in an the LWP cookie file format. + + Actually, the format is extended a bit -- see module docstring. + + """ + h = [(cookie.name, cookie.value), + ("path", cookie.path), + ("domain", cookie.domain)] + if cookie.port is not None: h.append(("port", cookie.port)) + if cookie.path_specified: h.append(("path_spec", None)) + if cookie.port_specified: h.append(("port_spec", None)) + if cookie.domain_initial_dot: h.append(("domain_dot", None)) + if cookie.secure: h.append(("secure", None)) + if cookie.expires: h.append(("expires", + time2isoz(float(cookie.expires)))) + if cookie.discard: h.append(("discard", None)) + if cookie.comment: h.append(("comment", cookie.comment)) + if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) + + keys = sorted(cookie._rest.keys()) + for k in keys: + h.append((k, str(cookie._rest[k]))) + + h.append(("version", str(cookie.version))) + + return join_header_words([h]) + +class LWPCookieJar(FileCookieJar): + """ + The LWPCookieJar saves a sequence of "Set-Cookie3" lines. + "Set-Cookie3" is the format used by the libwww-perl libary, not known + to be compatible with any browser, but which is easy to read and + doesn't lose information about RFC 2965 cookies. + + Additional methods + + as_lwp_str(ignore_discard=True, ignore_expired=True) + + """ + + def as_lwp_str(self, ignore_discard=True, ignore_expires=True): + """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers. + + ignore_discard and ignore_expires: see docstring for FileCookieJar.save + + """ + now = time.time() + r = [] + for cookie in self: + if not ignore_discard and cookie.discard: + continue + if not ignore_expires and cookie.is_expired(now): + continue + r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie)) + return "\n".join(r+[""]) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + f = open(filename, "w") + try: + # There really isn't an LWP Cookies 2.0 format, but this indicates + # that there is extra information in here (domain_dot and + # port_spec) while still being compatible with libwww-perl, I hope. + f.write("#LWP-Cookies-2.0\n") + f.write(self.as_lwp_str(ignore_discard, ignore_expires)) + finally: + f.close() + + def _really_load(self, f, filename, ignore_discard, ignore_expires): + magic = f.readline() + if not self.magic_re.search(magic): + msg = ("%r does not look like a Set-Cookie3 (LWP) format " + "file" % filename) + raise LoadError(msg) + + now = time.time() + + header = "Set-Cookie3:" + boolean_attrs = ("port_spec", "path_spec", "domain_dot", + "secure", "discard") + value_attrs = ("version", + "port", "path", "domain", + "expires", + "comment", "commenturl") + + try: + while 1: + line = f.readline() + if line == "": break + if not line.startswith(header): + continue + line = line[len(header):].strip() + + for data in split_header_words([line]): + name, value = data[0] + standard = {} + rest = {} + for k in boolean_attrs: + standard[k] = False + for k, v in data[1:]: + if k is not None: + lc = k.lower() + else: + lc = None + # don't lose case distinction for unknown fields + if (lc in value_attrs) or (lc in boolean_attrs): + k = lc + if k in boolean_attrs: + if v is None: v = True + standard[k] = v + elif k in value_attrs: + standard[k] = v + else: + rest[k] = v + + h = standard.get + expires = h("expires") + discard = h("discard") + if expires is not None: + expires = iso2time(expires) + if expires is None: + discard = True + domain = h("domain") + domain_specified = domain.startswith(".") + c = Cookie(h("version"), name, value, + h("port"), h("port_spec"), + domain, domain_specified, h("domain_dot"), + h("path"), h("path_spec"), + h("secure"), + expires, + discard, + h("comment"), + h("commenturl"), + rest) + if not ignore_discard and c.discard: + continue + if not ignore_expires and c.is_expired(now): + continue + self.set_cookie(c) + + except IOError: + raise + except Exception: + _warn_unhandled_exception() + raise LoadError("invalid Set-Cookie3 format file %r: %r" % + (filename, line)) + + +class MozillaCookieJar(FileCookieJar): + """ + + WARNING: you may want to backup your browser's cookies file if you use + this class to save cookies. I *think* it works, but there have been + bugs in the past! + + This class differs from CookieJar only in the format it uses to save and + load cookies to and from a file. This class uses the Mozilla/Netscape + `cookies.txt' format. lynx uses this file format, too. + + Don't expect cookies saved while the browser is running to be noticed by + the browser (in fact, Mozilla on unix will overwrite your saved cookies if + you change them on disk while it's running; on Windows, you probably can't + save at all while the browser is running). + + Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to + Netscape cookies on saving. + + In particular, the cookie version and port number information is lost, + together with information about whether or not Path, Port and Discard were + specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the + domain as set in the HTTP header started with a dot (yes, I'm aware some + domains in Netscape files start with a dot and some don't -- trust me, you + really don't want to know any more about this). + + Note that though Mozilla and Netscape use the same format, they use + slightly different headers. The class saves cookies using the Netscape + header by default (Mozilla can cope with that). + + """ + magic_re = re.compile("#( Netscape)? HTTP Cookie File") + header = """\ +# Netscape HTTP Cookie File +# http://www.netscape.com/newsref/std/cookie_spec.html +# This is a generated file! Do not edit. + +""" + + def _really_load(self, f, filename, ignore_discard, ignore_expires): + now = time.time() + + magic = f.readline() + if not self.magic_re.search(magic): + f.close() + raise LoadError( + "%r does not look like a Netscape format cookies file" % + filename) + + try: + while 1: + line = f.readline() + if line == "": break + + # last field may be absent, so keep any trailing tab + if line.endswith("\n"): line = line[:-1] + + # skip comments and blank lines XXX what is $ for? + if (line.strip().startswith(("#", "$")) or + line.strip() == ""): + continue + + domain, domain_specified, path, secure, expires, name, value = \ + line.split("\t") + secure = (secure == "TRUE") + domain_specified = (domain_specified == "TRUE") + if name == "": + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas http.cookiejar regards it as a + # cookie with no value. + name = value + value = None + + initial_dot = domain.startswith(".") + assert domain_specified == initial_dot + + discard = False + if expires == "": + expires = None + discard = True + + # assume path_specified is false + c = Cookie(0, name, value, + None, False, + domain, domain_specified, initial_dot, + path, False, + secure, + expires, + discard, + None, + None, + {}) + if not ignore_discard and c.discard: + continue + if not ignore_expires and c.is_expired(now): + continue + self.set_cookie(c) + + except IOError: + raise + except Exception: + _warn_unhandled_exception() + raise LoadError("invalid Netscape format cookies file %r: %r" % + (filename, line)) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + f = open(filename, "w") + try: + f.write(self.header) + now = time.time() + for cookie in self: + if not ignore_discard and cookie.discard: + continue + if not ignore_expires and cookie.is_expired(now): + continue + if cookie.secure: secure = "TRUE" + else: secure = "FALSE" + if cookie.domain.startswith("."): initial_dot = "TRUE" + else: initial_dot = "FALSE" + if cookie.expires is not None: + expires = str(cookie.expires) + else: + expires = "" + if cookie.value is None: + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas http.cookiejar regards it as a + # cookie with no value. + name = "" + value = cookie.name + else: + name = cookie.name + value = cookie.value + f.write( + "\t".join([cookie.domain, initial_dot, cookie.path, + secure, expires, name, value])+ + "\n") + finally: + f.close() diff --git a/lib/future/backports/http/cookies.py b/lib/future/backports/http/cookies.py new file mode 100644 index 00000000..8bb61e22 --- /dev/null +++ b/lib/future/backports/http/cookies.py @@ -0,0 +1,598 @@ +#### +# Copyright 2000 by Timothy O'Malley +# +# All Rights Reserved +# +# Permission to use, copy, modify, and distribute this software +# and its documentation for any purpose and without fee is hereby +# granted, provided that the above copyright notice appear in all +# copies and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Timothy O'Malley not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS +# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR +# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. +# +#### +# +# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp +# by Timothy O'Malley +# +# Cookie.py is a Python module for the handling of HTTP +# cookies as a Python dictionary. See RFC 2109 for more +# information on cookies. +# +# The original idea to treat Cookies as a dictionary came from +# Dave Mitchell (davem@magnet.com) in 1995, when he released the +# first version of nscookie.py. +# +#### + +r""" +http.cookies module ported to python-future from Py3.3 + +Here's a sample session to show how to use this module. +At the moment, this is the only documentation. + +The Basics +---------- + +Importing is easy... + + >>> from http import cookies + +Most of the time you start by creating a cookie. + + >>> C = cookies.SimpleCookie() + +Once you've created your Cookie, you can add values just as if it were +a dictionary. + + >>> C = cookies.SimpleCookie() + >>> C["fig"] = "newton" + >>> C["sugar"] = "wafer" + >>> C.output() + 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer' + +Notice that the printable representation of a Cookie is the +appropriate format for a Set-Cookie: header. This is the +default behavior. You can change the header and printed +attributes by using the .output() function + + >>> C = cookies.SimpleCookie() + >>> C["rocky"] = "road" + >>> C["rocky"]["path"] = "/cookie" + >>> print(C.output(header="Cookie:")) + Cookie: rocky=road; Path=/cookie + >>> print(C.output(attrs=[], header="Cookie:")) + Cookie: rocky=road + +The load() method of a Cookie extracts cookies from a string. In a +CGI script, you would use this method to extract the cookies from the +HTTP_COOKIE environment variable. + + >>> C = cookies.SimpleCookie() + >>> C.load("chips=ahoy; vienna=finger") + >>> C.output() + 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger' + +The load() method is darn-tootin smart about identifying cookies +within a string. Escaped quotation marks, nested semicolons, and other +such trickeries do not confuse it. + + >>> C = cookies.SimpleCookie() + >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') + >>> print(C) + Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" + +Each element of the Cookie also supports all of the RFC 2109 +Cookie attributes. Here's an example which sets the Path +attribute. + + >>> C = cookies.SimpleCookie() + >>> C["oreo"] = "doublestuff" + >>> C["oreo"]["path"] = "/" + >>> print(C) + Set-Cookie: oreo=doublestuff; Path=/ + +Each dictionary element has a 'value' attribute, which gives you +back the value associated with the key. + + >>> C = cookies.SimpleCookie() + >>> C["twix"] = "none for you" + >>> C["twix"].value + 'none for you' + +The SimpleCookie expects that all values should be standard strings. +Just to be sure, SimpleCookie invokes the str() builtin to convert +the value to a string, when the values are set dictionary-style. + + >>> C = cookies.SimpleCookie() + >>> C["number"] = 7 + >>> C["string"] = "seven" + >>> C["number"].value + '7' + >>> C["string"].value + 'seven' + >>> C.output() + 'Set-Cookie: number=7\r\nSet-Cookie: string=seven' + +Finis. +""" +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from future.builtins import chr, dict, int, str +from future.utils import PY2, as_native_str + +# +# Import our required modules +# +import re +if PY2: + re.ASCII = 0 # for py2 compatibility +import string + +__all__ = ["CookieError", "BaseCookie", "SimpleCookie"] + +_nulljoin = ''.join +_semispacejoin = '; '.join +_spacejoin = ' '.join + +# +# Define an exception visible to External modules +# +class CookieError(Exception): + pass + + +# These quoting routines conform to the RFC2109 specification, which in +# turn references the character definitions from RFC2068. They provide +# a two-way quoting algorithm. Any non-text character is translated +# into a 4 character sequence: a forward-slash followed by the +# three-digit octal equivalent of the character. Any '\' or '"' is +# quoted with a preceeding '\' slash. +# +# These are taken from RFC2068 and RFC2109. +# _LegalChars is the list of chars which don't require "'s +# _Translator hash-table for fast quoting +# +_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:" +_Translator = { + '\000' : '\\000', '\001' : '\\001', '\002' : '\\002', + '\003' : '\\003', '\004' : '\\004', '\005' : '\\005', + '\006' : '\\006', '\007' : '\\007', '\010' : '\\010', + '\011' : '\\011', '\012' : '\\012', '\013' : '\\013', + '\014' : '\\014', '\015' : '\\015', '\016' : '\\016', + '\017' : '\\017', '\020' : '\\020', '\021' : '\\021', + '\022' : '\\022', '\023' : '\\023', '\024' : '\\024', + '\025' : '\\025', '\026' : '\\026', '\027' : '\\027', + '\030' : '\\030', '\031' : '\\031', '\032' : '\\032', + '\033' : '\\033', '\034' : '\\034', '\035' : '\\035', + '\036' : '\\036', '\037' : '\\037', + + # Because of the way browsers really handle cookies (as opposed + # to what the RFC says) we also encode , and ; + + ',' : '\\054', ';' : '\\073', + + '"' : '\\"', '\\' : '\\\\', + + '\177' : '\\177', '\200' : '\\200', '\201' : '\\201', + '\202' : '\\202', '\203' : '\\203', '\204' : '\\204', + '\205' : '\\205', '\206' : '\\206', '\207' : '\\207', + '\210' : '\\210', '\211' : '\\211', '\212' : '\\212', + '\213' : '\\213', '\214' : '\\214', '\215' : '\\215', + '\216' : '\\216', '\217' : '\\217', '\220' : '\\220', + '\221' : '\\221', '\222' : '\\222', '\223' : '\\223', + '\224' : '\\224', '\225' : '\\225', '\226' : '\\226', + '\227' : '\\227', '\230' : '\\230', '\231' : '\\231', + '\232' : '\\232', '\233' : '\\233', '\234' : '\\234', + '\235' : '\\235', '\236' : '\\236', '\237' : '\\237', + '\240' : '\\240', '\241' : '\\241', '\242' : '\\242', + '\243' : '\\243', '\244' : '\\244', '\245' : '\\245', + '\246' : '\\246', '\247' : '\\247', '\250' : '\\250', + '\251' : '\\251', '\252' : '\\252', '\253' : '\\253', + '\254' : '\\254', '\255' : '\\255', '\256' : '\\256', + '\257' : '\\257', '\260' : '\\260', '\261' : '\\261', + '\262' : '\\262', '\263' : '\\263', '\264' : '\\264', + '\265' : '\\265', '\266' : '\\266', '\267' : '\\267', + '\270' : '\\270', '\271' : '\\271', '\272' : '\\272', + '\273' : '\\273', '\274' : '\\274', '\275' : '\\275', + '\276' : '\\276', '\277' : '\\277', '\300' : '\\300', + '\301' : '\\301', '\302' : '\\302', '\303' : '\\303', + '\304' : '\\304', '\305' : '\\305', '\306' : '\\306', + '\307' : '\\307', '\310' : '\\310', '\311' : '\\311', + '\312' : '\\312', '\313' : '\\313', '\314' : '\\314', + '\315' : '\\315', '\316' : '\\316', '\317' : '\\317', + '\320' : '\\320', '\321' : '\\321', '\322' : '\\322', + '\323' : '\\323', '\324' : '\\324', '\325' : '\\325', + '\326' : '\\326', '\327' : '\\327', '\330' : '\\330', + '\331' : '\\331', '\332' : '\\332', '\333' : '\\333', + '\334' : '\\334', '\335' : '\\335', '\336' : '\\336', + '\337' : '\\337', '\340' : '\\340', '\341' : '\\341', + '\342' : '\\342', '\343' : '\\343', '\344' : '\\344', + '\345' : '\\345', '\346' : '\\346', '\347' : '\\347', + '\350' : '\\350', '\351' : '\\351', '\352' : '\\352', + '\353' : '\\353', '\354' : '\\354', '\355' : '\\355', + '\356' : '\\356', '\357' : '\\357', '\360' : '\\360', + '\361' : '\\361', '\362' : '\\362', '\363' : '\\363', + '\364' : '\\364', '\365' : '\\365', '\366' : '\\366', + '\367' : '\\367', '\370' : '\\370', '\371' : '\\371', + '\372' : '\\372', '\373' : '\\373', '\374' : '\\374', + '\375' : '\\375', '\376' : '\\376', '\377' : '\\377' + } + +def _quote(str, LegalChars=_LegalChars): + r"""Quote a string for use in a cookie header. + + If the string does not need to be double-quoted, then just return the + string. Otherwise, surround the string in doublequotes and quote + (with a \) special characters. + """ + if all(c in LegalChars for c in str): + return str + else: + return '"' + _nulljoin(_Translator.get(s, s) for s in str) + '"' + + +_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") +_QuotePatt = re.compile(r"[\\].") + +def _unquote(mystr): + # If there aren't any doublequotes, + # then there can't be any special characters. See RFC 2109. + if len(mystr) < 2: + return mystr + if mystr[0] != '"' or mystr[-1] != '"': + return mystr + + # We have to assume that we must decode this string. + # Down to work. + + # Remove the "s + mystr = mystr[1:-1] + + # Check for special sequences. Examples: + # \012 --> \n + # \" --> " + # + i = 0 + n = len(mystr) + res = [] + while 0 <= i < n: + o_match = _OctalPatt.search(mystr, i) + q_match = _QuotePatt.search(mystr, i) + if not o_match and not q_match: # Neither matched + res.append(mystr[i:]) + break + # else: + j = k = -1 + if o_match: + j = o_match.start(0) + if q_match: + k = q_match.start(0) + if q_match and (not o_match or k < j): # QuotePatt matched + res.append(mystr[i:k]) + res.append(mystr[k+1]) + i = k + 2 + else: # OctalPatt matched + res.append(mystr[i:j]) + res.append(chr(int(mystr[j+1:j+4], 8))) + i = j + 4 + return _nulljoin(res) + +# The _getdate() routine is used to set the expiration time in the cookie's HTTP +# header. By default, _getdate() returns the current time in the appropriate +# "expires" format for a Set-Cookie header. The one optional argument is an +# offset from now, in seconds. For example, an offset of -3600 means "one hour +# ago". The offset may be a floating point number. +# + +_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + +_monthname = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + +def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname): + from time import gmtime, time + now = time() + year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future) + return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \ + (weekdayname[wd], day, monthname[month], year, hh, mm, ss) + + +class Morsel(dict): + """A class to hold ONE (key, value) pair. + + In a cookie, each such pair may have several attributes, so this class is + used to keep the attributes associated with the appropriate key,value pair. + This class also includes a coded_value attribute, which is used to hold + the network representation of the value. This is most useful when Python + objects are pickled for network transit. + """ + # RFC 2109 lists these attributes as reserved: + # path comment domain + # max-age secure version + # + # For historical reasons, these attributes are also reserved: + # expires + # + # This is an extension from Microsoft: + # httponly + # + # This dictionary provides a mapping from the lowercase + # variant on the left to the appropriate traditional + # formatting on the right. + _reserved = { + "expires" : "expires", + "path" : "Path", + "comment" : "Comment", + "domain" : "Domain", + "max-age" : "Max-Age", + "secure" : "secure", + "httponly" : "httponly", + "version" : "Version", + } + + _flags = set(['secure', 'httponly']) + + def __init__(self): + # Set defaults + self.key = self.value = self.coded_value = None + + # Set default attributes + for key in self._reserved: + dict.__setitem__(self, key, "") + + def __setitem__(self, K, V): + K = K.lower() + if not K in self._reserved: + raise CookieError("Invalid Attribute %s" % K) + dict.__setitem__(self, K, V) + + def isReservedKey(self, K): + return K.lower() in self._reserved + + def set(self, key, val, coded_val, LegalChars=_LegalChars): + # First we verify that the key isn't a reserved word + # Second we make sure it only contains legal characters + if key.lower() in self._reserved: + raise CookieError("Attempt to set a reserved key: %s" % key) + if any(c not in LegalChars for c in key): + raise CookieError("Illegal key value: %s" % key) + + # It's a good key, so save it. + self.key = key + self.value = val + self.coded_value = coded_val + + def output(self, attrs=None, header="Set-Cookie:"): + return "%s %s" % (header, self.OutputString(attrs)) + + __str__ = output + + @as_native_str() + def __repr__(self): + if PY2 and isinstance(self.value, unicode): + val = str(self.value) # make it a newstr to remove the u prefix + else: + val = self.value + return '<%s: %s=%s>' % (self.__class__.__name__, + str(self.key), repr(val)) + + def js_output(self, attrs=None): + # Print javascript + return """ + + """ % (self.OutputString(attrs).replace('"', r'\"')) + + def OutputString(self, attrs=None): + # Build up our result + # + result = [] + append = result.append + + # First, the key=value pair + append("%s=%s" % (self.key, self.coded_value)) + + # Now add any defined attributes + if attrs is None: + attrs = self._reserved + items = sorted(self.items()) + for key, value in items: + if value == "": + continue + if key not in attrs: + continue + if key == "expires" and isinstance(value, int): + append("%s=%s" % (self._reserved[key], _getdate(value))) + elif key == "max-age" and isinstance(value, int): + append("%s=%d" % (self._reserved[key], value)) + elif key == "secure": + append(str(self._reserved[key])) + elif key == "httponly": + append(str(self._reserved[key])) + else: + append("%s=%s" % (self._reserved[key], value)) + + # Return the result + return _semispacejoin(result) + + +# +# Pattern for finding cookie +# +# This used to be strict parsing based on the RFC2109 and RFC2068 +# specifications. I have since discovered that MSIE 3.0x doesn't +# follow the character rules outlined in those specs. As a +# result, the parsing rules here are less strict. +# + +_LegalCharsPatt = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]" +_CookiePattern = re.compile(r""" + (?x) # This is a verbose pattern + (?P # Start of group 'key' + """ + _LegalCharsPatt + r"""+? # Any word of at least one letter + ) # End of group 'key' + ( # Optional group: there may not be a value. + \s*=\s* # Equal Sign + (?P # Start of group 'val' + "(?:[^\\"]|\\.)*" # Any doublequoted string + | # or + \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr + | # or + """ + _LegalCharsPatt + r"""* # Any word or empty string + ) # End of group 'val' + )? # End of optional value group + \s* # Any number of spaces. + (\s+|;|$) # Ending either at space, semicolon, or EOS. + """, re.ASCII) # May be removed if safe. + + +# At long last, here is the cookie class. Using this class is almost just like +# using a dictionary. See this module's docstring for example usage. +# +class BaseCookie(dict): + """A container class for a set of Morsels.""" + + def value_decode(self, val): + """real_value, coded_value = value_decode(STRING) + Called prior to setting a cookie's value from the network + representation. The VALUE is the value read from HTTP + header. + Override this function to modify the behavior of cookies. + """ + return val, val + + def value_encode(self, val): + """real_value, coded_value = value_encode(VALUE) + Called prior to setting a cookie's value from the dictionary + representation. The VALUE is the value being assigned. + Override this function to modify the behavior of cookies. + """ + strval = str(val) + return strval, strval + + def __init__(self, input=None): + if input: + self.load(input) + + def __set(self, key, real_value, coded_value): + """Private method for setting a cookie's value""" + M = self.get(key, Morsel()) + M.set(key, real_value, coded_value) + dict.__setitem__(self, key, M) + + def __setitem__(self, key, value): + """Dictionary style assignment.""" + rval, cval = self.value_encode(value) + self.__set(key, rval, cval) + + def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"): + """Return a string suitable for HTTP.""" + result = [] + items = sorted(self.items()) + for key, value in items: + result.append(value.output(attrs, header)) + return sep.join(result) + + __str__ = output + + @as_native_str() + def __repr__(self): + l = [] + items = sorted(self.items()) + for key, value in items: + if PY2 and isinstance(value.value, unicode): + val = str(value.value) # make it a newstr to remove the u prefix + else: + val = value.value + l.append('%s=%s' % (str(key), repr(val))) + return '<%s: %s>' % (self.__class__.__name__, _spacejoin(l)) + + def js_output(self, attrs=None): + """Return a string suitable for JavaScript.""" + result = [] + items = sorted(self.items()) + for key, value in items: + result.append(value.js_output(attrs)) + return _nulljoin(result) + + def load(self, rawdata): + """Load cookies from a string (presumably HTTP_COOKIE) or + from a dictionary. Loading cookies from a dictionary 'd' + is equivalent to calling: + map(Cookie.__setitem__, d.keys(), d.values()) + """ + if isinstance(rawdata, str): + self.__parse_string(rawdata) + else: + # self.update() wouldn't call our custom __setitem__ + for key, value in rawdata.items(): + self[key] = value + return + + def __parse_string(self, mystr, patt=_CookiePattern): + i = 0 # Our starting point + n = len(mystr) # Length of string + M = None # current morsel + + while 0 <= i < n: + # Start looking for a cookie + match = patt.search(mystr, i) + if not match: + # No more cookies + break + + key, value = match.group("key"), match.group("val") + + i = match.end(0) + + # Parse the key, value in case it's metainfo + if key[0] == "$": + # We ignore attributes which pertain to the cookie + # mechanism as a whole. See RFC 2109. + # (Does anyone care?) + if M: + M[key[1:]] = value + elif key.lower() in Morsel._reserved: + if M: + if value is None: + if key.lower() in Morsel._flags: + M[key] = True + else: + M[key] = _unquote(value) + elif value is not None: + rval, cval = self.value_decode(value) + self.__set(key, rval, cval) + M = self[key] + + +class SimpleCookie(BaseCookie): + """ + SimpleCookie supports strings as cookie values. When setting + the value using the dictionary assignment notation, SimpleCookie + calls the builtin str() to convert the value to a string. Values + received from HTTP are kept as strings. + """ + def value_decode(self, val): + return _unquote(val), val + + def value_encode(self, val): + strval = str(val) + return strval, _quote(strval) diff --git a/lib/future/backports/http/server.py b/lib/future/backports/http/server.py new file mode 100644 index 00000000..b1c11e0c --- /dev/null +++ b/lib/future/backports/http/server.py @@ -0,0 +1,1226 @@ +"""HTTP server classes. + +From Python 3.3 + +Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see +SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, +and CGIHTTPRequestHandler for CGI scripts. + +It does, however, optionally implement HTTP/1.1 persistent connections, +as of version 0.3. + +Notes on CGIHTTPRequestHandler +------------------------------ + +This class implements GET and POST requests to cgi-bin scripts. + +If the os.fork() function is not present (e.g. on Windows), +subprocess.Popen() is used as a fallback, with slightly altered semantics. + +In all cases, the implementation is intentionally naive -- all +requests are executed synchronously. + +SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL +-- it may execute arbitrary Python code or external programs. + +Note that status code 200 is sent prior to execution of a CGI script, so +scripts cannot send other status codes such as 302 (redirect). + +XXX To do: + +- log requests even later (to capture byte count) +- log user-agent header and other interesting goodies +- send error log to separate file +""" + +from __future__ import (absolute_import, division, + print_function, unicode_literals) +from future import utils +from future.builtins import * + + +# See also: +# +# HTTP Working Group T. Berners-Lee +# INTERNET-DRAFT R. T. Fielding +# H. Frystyk Nielsen +# Expires September 8, 1995 March 8, 1995 +# +# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt +# +# and +# +# Network Working Group R. Fielding +# Request for Comments: 2616 et al +# Obsoletes: 2068 June 1999 +# Category: Standards Track +# +# URL: http://www.faqs.org/rfcs/rfc2616.html + +# Log files +# --------- +# +# Here's a quote from the NCSA httpd docs about log file format. +# +# | The logfile format is as follows. Each line consists of: +# | +# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb +# | +# | host: Either the DNS name or the IP number of the remote client +# | rfc931: Any information returned by identd for this person, +# | - otherwise. +# | authuser: If user sent a userid for authentication, the user name, +# | - otherwise. +# | DD: Day +# | Mon: Month (calendar name) +# | YYYY: Year +# | hh: hour (24-hour format, the machine's timezone) +# | mm: minutes +# | ss: seconds +# | request: The first line of the HTTP request as sent by the client. +# | ddd: the status code returned by the server, - if not available. +# | bbbb: the total number of bytes sent, +# | *not including the HTTP/1.0 header*, - if not available +# | +# | You can determine the name of the file accessed through request. +# +# (Actually, the latter is only true if you know the server configuration +# at the time the request was made!) + +__version__ = "0.6" + +__all__ = ["HTTPServer", "BaseHTTPRequestHandler"] + +from future.backports import html +from future.backports.http import client as http_client +from future.backports.urllib import parse as urllib_parse +from future.backports import socketserver + +import io +import mimetypes +import os +import posixpath +import select +import shutil +import socket # For gethostbyaddr() +import sys +import time +import copy +import argparse + + +# Default error message template +DEFAULT_ERROR_MESSAGE = """\ + + + + + Error response + + +

Error response

+

Error code: %(code)d

+

Message: %(message)s.

+

Error code explanation: %(code)s - %(explain)s.

+ + +""" + +DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" + +def _quote_html(html): + return html.replace("&", "&").replace("<", "<").replace(">", ">") + +class HTTPServer(socketserver.TCPServer): + + allow_reuse_address = 1 # Seems to make sense in testing environment + + def server_bind(self): + """Override server_bind to store the server name.""" + socketserver.TCPServer.server_bind(self) + host, port = self.socket.getsockname()[:2] + self.server_name = socket.getfqdn(host) + self.server_port = port + + +class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): + + """HTTP request handler base class. + + The following explanation of HTTP serves to guide you through the + code as well as to expose any misunderstandings I may have about + HTTP (so you don't need to read the code to figure out I'm wrong + :-). + + HTTP (HyperText Transfer Protocol) is an extensible protocol on + top of a reliable stream transport (e.g. TCP/IP). The protocol + recognizes three parts to a request: + + 1. One line identifying the request type and path + 2. An optional set of RFC-822-style headers + 3. An optional data part + + The headers and data are separated by a blank line. + + The first line of the request has the form + + + + where is a (case-sensitive) keyword such as GET or POST, + is a string containing path information for the request, + and should be the string "HTTP/1.0" or "HTTP/1.1". + is encoded using the URL encoding scheme (using %xx to signify + the ASCII character with hex code xx). + + The specification specifies that lines are separated by CRLF but + for compatibility with the widest range of clients recommends + servers also handle LF. Similarly, whitespace in the request line + is treated sensibly (allowing multiple spaces between components + and allowing trailing whitespace). + + Similarly, for output, lines ought to be separated by CRLF pairs + but most clients grok LF characters just fine. + + If the first line of the request has the form + + + + (i.e. is left out) then this is assumed to be an HTTP + 0.9 request; this form has no optional headers and data part and + the reply consists of just the data. + + The reply form of the HTTP 1.x protocol again has three parts: + + 1. One line giving the response code + 2. An optional set of RFC-822-style headers + 3. The data + + Again, the headers and data are separated by a blank line. + + The response code line has the form + + + + where is the protocol version ("HTTP/1.0" or "HTTP/1.1"), + is a 3-digit response code indicating success or + failure of the request, and is an optional + human-readable string explaining what the response code means. + + This server parses the request and the headers, and then calls a + function specific to the request type (). Specifically, + a request SPAM will be handled by a method do_SPAM(). If no + such method exists the server sends an error response to the + client. If it exists, it is called with no arguments: + + do_SPAM() + + Note that the request name is case sensitive (i.e. SPAM and spam + are different requests). + + The various request details are stored in instance variables: + + - client_address is the client IP address in the form (host, + port); + + - command, path and version are the broken-down request line; + + - headers is an instance of email.message.Message (or a derived + class) containing the header information; + + - rfile is a file object open for reading positioned at the + start of the optional input data part; + + - wfile is a file object open for writing. + + IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! + + The first thing to be written must be the response line. Then + follow 0 or more header lines, then a blank line, and then the + actual data (if any). The meaning of the header lines depends on + the command executed by the server; in most cases, when data is + returned, there should be at least one header line of the form + + Content-type: / + + where and should be registered MIME types, + e.g. "text/html" or "text/plain". + + """ + + # The Python system version, truncated to its first component. + sys_version = "Python/" + sys.version.split()[0] + + # The server software version. You may want to override this. + # The format is multiple whitespace-separated strings, + # where each string is of the form name[/version]. + server_version = "BaseHTTP/" + __version__ + + error_message_format = DEFAULT_ERROR_MESSAGE + error_content_type = DEFAULT_ERROR_CONTENT_TYPE + + # The default request version. This only affects responses up until + # the point where the request line is parsed, so it mainly decides what + # the client gets back when sending a malformed request line. + # Most web servers default to HTTP 0.9, i.e. don't send a status line. + default_request_version = "HTTP/0.9" + + def parse_request(self): + """Parse a request (internal). + + The request should be stored in self.raw_requestline; the results + are in self.command, self.path, self.request_version and + self.headers. + + Return True for success, False for failure; on failure, an + error is sent back. + + """ + self.command = None # set in case of error on the first line + self.request_version = version = self.default_request_version + self.close_connection = 1 + requestline = str(self.raw_requestline, 'iso-8859-1') + requestline = requestline.rstrip('\r\n') + self.requestline = requestline + words = requestline.split() + if len(words) == 3: + command, path, version = words + if version[:5] != 'HTTP/': + self.send_error(400, "Bad request version (%r)" % version) + return False + try: + base_version_number = version.split('/', 1)[1] + version_number = base_version_number.split(".") + # RFC 2145 section 3.1 says there can be only one "." and + # - major and minor numbers MUST be treated as + # separate integers; + # - HTTP/2.4 is a lower version than HTTP/2.13, which in + # turn is lower than HTTP/12.3; + # - Leading zeros MUST be ignored by recipients. + if len(version_number) != 2: + raise ValueError + version_number = int(version_number[0]), int(version_number[1]) + except (ValueError, IndexError): + self.send_error(400, "Bad request version (%r)" % version) + return False + if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": + self.close_connection = 0 + if version_number >= (2, 0): + self.send_error(505, + "Invalid HTTP Version (%s)" % base_version_number) + return False + elif len(words) == 2: + command, path = words + self.close_connection = 1 + if command != 'GET': + self.send_error(400, + "Bad HTTP/0.9 request type (%r)" % command) + return False + elif not words: + return False + else: + self.send_error(400, "Bad request syntax (%r)" % requestline) + return False + self.command, self.path, self.request_version = command, path, version + + # Examine the headers and look for a Connection directive. + try: + self.headers = http_client.parse_headers(self.rfile, + _class=self.MessageClass) + except http_client.LineTooLong: + self.send_error(400, "Line too long") + return False + + conntype = self.headers.get('Connection', "") + if conntype.lower() == 'close': + self.close_connection = 1 + elif (conntype.lower() == 'keep-alive' and + self.protocol_version >= "HTTP/1.1"): + self.close_connection = 0 + # Examine the headers and look for an Expect directive + expect = self.headers.get('Expect', "") + if (expect.lower() == "100-continue" and + self.protocol_version >= "HTTP/1.1" and + self.request_version >= "HTTP/1.1"): + if not self.handle_expect_100(): + return False + return True + + def handle_expect_100(self): + """Decide what to do with an "Expect: 100-continue" header. + + If the client is expecting a 100 Continue response, we must + respond with either a 100 Continue or a final response before + waiting for the request body. The default is to always respond + with a 100 Continue. You can behave differently (for example, + reject unauthorized requests) by overriding this method. + + This method should either return True (possibly after sending + a 100 Continue response) or send an error response and return + False. + + """ + self.send_response_only(100) + self.flush_headers() + return True + + def handle_one_request(self): + """Handle a single HTTP request. + + You normally don't need to override this method; see the class + __doc__ string for information on how to handle specific HTTP + commands such as GET and POST. + + """ + try: + self.raw_requestline = self.rfile.readline(65537) + if len(self.raw_requestline) > 65536: + self.requestline = '' + self.request_version = '' + self.command = '' + self.send_error(414) + return + if not self.raw_requestline: + self.close_connection = 1 + return + if not self.parse_request(): + # An error code has been sent, just exit + return + mname = 'do_' + self.command + if not hasattr(self, mname): + self.send_error(501, "Unsupported method (%r)" % self.command) + return + method = getattr(self, mname) + method() + self.wfile.flush() #actually send the response if not already done. + except socket.timeout as e: + #a read or a write timed out. Discard this connection + self.log_error("Request timed out: %r", e) + self.close_connection = 1 + return + + def handle(self): + """Handle multiple requests if necessary.""" + self.close_connection = 1 + + self.handle_one_request() + while not self.close_connection: + self.handle_one_request() + + def send_error(self, code, message=None): + """Send and log an error reply. + + Arguments are the error code, and a detailed message. + The detailed message defaults to the short entry matching the + response code. + + This sends an error response (so it must be called before any + output has been generated), logs the error, and finally sends + a piece of HTML explaining the error to the user. + + """ + + try: + shortmsg, longmsg = self.responses[code] + except KeyError: + shortmsg, longmsg = '???', '???' + if message is None: + message = shortmsg + explain = longmsg + self.log_error("code %d, message %s", code, message) + # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201) + content = (self.error_message_format % + {'code': code, 'message': _quote_html(message), 'explain': explain}) + self.send_response(code, message) + self.send_header("Content-Type", self.error_content_type) + self.send_header('Connection', 'close') + self.end_headers() + if self.command != 'HEAD' and code >= 200 and code not in (204, 304): + self.wfile.write(content.encode('UTF-8', 'replace')) + + def send_response(self, code, message=None): + """Add the response header to the headers buffer and log the + response code. + + Also send two standard headers with the server software + version and the current date. + + """ + self.log_request(code) + self.send_response_only(code, message) + self.send_header('Server', self.version_string()) + self.send_header('Date', self.date_time_string()) + + def send_response_only(self, code, message=None): + """Send the response header only.""" + if message is None: + if code in self.responses: + message = self.responses[code][0] + else: + message = '' + if self.request_version != 'HTTP/0.9': + if not hasattr(self, '_headers_buffer'): + self._headers_buffer = [] + self._headers_buffer.append(("%s %d %s\r\n" % + (self.protocol_version, code, message)).encode( + 'latin-1', 'strict')) + + def send_header(self, keyword, value): + """Send a MIME header to the headers buffer.""" + if self.request_version != 'HTTP/0.9': + if not hasattr(self, '_headers_buffer'): + self._headers_buffer = [] + self._headers_buffer.append( + ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) + + if keyword.lower() == 'connection': + if value.lower() == 'close': + self.close_connection = 1 + elif value.lower() == 'keep-alive': + self.close_connection = 0 + + def end_headers(self): + """Send the blank line ending the MIME headers.""" + if self.request_version != 'HTTP/0.9': + self._headers_buffer.append(b"\r\n") + self.flush_headers() + + def flush_headers(self): + if hasattr(self, '_headers_buffer'): + self.wfile.write(b"".join(self._headers_buffer)) + self._headers_buffer = [] + + def log_request(self, code='-', size='-'): + """Log an accepted request. + + This is called by send_response(). + + """ + + self.log_message('"%s" %s %s', + self.requestline, str(code), str(size)) + + def log_error(self, format, *args): + """Log an error. + + This is called when a request cannot be fulfilled. By + default it passes the message on to log_message(). + + Arguments are the same as for log_message(). + + XXX This should go to the separate error log. + + """ + + self.log_message(format, *args) + + def log_message(self, format, *args): + """Log an arbitrary message. + + This is used by all other logging functions. Override + it if you have specific logging wishes. + + The first argument, FORMAT, is a format string for the + message to be logged. If the format string contains + any % escapes requiring parameters, they should be + specified as subsequent arguments (it's just like + printf!). + + The client ip and current date/time are prefixed to + every message. + + """ + + sys.stderr.write("%s - - [%s] %s\n" % + (self.address_string(), + self.log_date_time_string(), + format%args)) + + def version_string(self): + """Return the server software version string.""" + return self.server_version + ' ' + self.sys_version + + def date_time_string(self, timestamp=None): + """Return the current date and time formatted for a message header.""" + if timestamp is None: + timestamp = time.time() + year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) + s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( + self.weekdayname[wd], + day, self.monthname[month], year, + hh, mm, ss) + return s + + def log_date_time_string(self): + """Return the current time formatted for logging.""" + now = time.time() + year, month, day, hh, mm, ss, x, y, z = time.localtime(now) + s = "%02d/%3s/%04d %02d:%02d:%02d" % ( + day, self.monthname[month], year, hh, mm, ss) + return s + + weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + + monthname = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + + def address_string(self): + """Return the client address.""" + + return self.client_address[0] + + # Essentially static class variables + + # The version of the HTTP protocol we support. + # Set this to HTTP/1.1 to enable automatic keepalive + protocol_version = "HTTP/1.0" + + # MessageClass used to parse headers + MessageClass = http_client.HTTPMessage + + # Table mapping response codes to messages; entries have the + # form {code: (shortmessage, longmessage)}. + # See RFC 2616 and 6585. + responses = { + 100: ('Continue', 'Request received, please continue'), + 101: ('Switching Protocols', + 'Switching to new protocol; obey Upgrade header'), + + 200: ('OK', 'Request fulfilled, document follows'), + 201: ('Created', 'Document created, URL follows'), + 202: ('Accepted', + 'Request accepted, processing continues off-line'), + 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), + 204: ('No Content', 'Request fulfilled, nothing follows'), + 205: ('Reset Content', 'Clear input form for further input.'), + 206: ('Partial Content', 'Partial content follows.'), + + 300: ('Multiple Choices', + 'Object has several resources -- see URI list'), + 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), + 302: ('Found', 'Object moved temporarily -- see URI list'), + 303: ('See Other', 'Object moved -- see Method and URL list'), + 304: ('Not Modified', + 'Document has not changed since given time'), + 305: ('Use Proxy', + 'You must use proxy specified in Location to access this ' + 'resource.'), + 307: ('Temporary Redirect', + 'Object moved temporarily -- see URI list'), + + 400: ('Bad Request', + 'Bad request syntax or unsupported method'), + 401: ('Unauthorized', + 'No permission -- see authorization schemes'), + 402: ('Payment Required', + 'No payment -- see charging schemes'), + 403: ('Forbidden', + 'Request forbidden -- authorization will not help'), + 404: ('Not Found', 'Nothing matches the given URI'), + 405: ('Method Not Allowed', + 'Specified method is invalid for this resource.'), + 406: ('Not Acceptable', 'URI not available in preferred format.'), + 407: ('Proxy Authentication Required', 'You must authenticate with ' + 'this proxy before proceeding.'), + 408: ('Request Timeout', 'Request timed out; try again later.'), + 409: ('Conflict', 'Request conflict.'), + 410: ('Gone', + 'URI no longer exists and has been permanently removed.'), + 411: ('Length Required', 'Client must specify Content-Length.'), + 412: ('Precondition Failed', 'Precondition in headers is false.'), + 413: ('Request Entity Too Large', 'Entity is too large.'), + 414: ('Request-URI Too Long', 'URI is too long.'), + 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), + 416: ('Requested Range Not Satisfiable', + 'Cannot satisfy request range.'), + 417: ('Expectation Failed', + 'Expect condition could not be satisfied.'), + 428: ('Precondition Required', + 'The origin server requires the request to be conditional.'), + 429: ('Too Many Requests', 'The user has sent too many requests ' + 'in a given amount of time ("rate limiting").'), + 431: ('Request Header Fields Too Large', 'The server is unwilling to ' + 'process the request because its header fields are too large.'), + + 500: ('Internal Server Error', 'Server got itself in trouble'), + 501: ('Not Implemented', + 'Server does not support this operation'), + 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), + 503: ('Service Unavailable', + 'The server cannot process the request due to a high load'), + 504: ('Gateway Timeout', + 'The gateway server did not receive a timely response'), + 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), + 511: ('Network Authentication Required', + 'The client needs to authenticate to gain network access.'), + } + + +class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): + + """Simple HTTP request handler with GET and HEAD commands. + + This serves files from the current directory and any of its + subdirectories. The MIME type for files is determined by + calling the .guess_type() method. + + The GET and HEAD requests are identical except that the HEAD + request omits the actual contents of the file. + + """ + + server_version = "SimpleHTTP/" + __version__ + + def do_GET(self): + """Serve a GET request.""" + f = self.send_head() + if f: + self.copyfile(f, self.wfile) + f.close() + + def do_HEAD(self): + """Serve a HEAD request.""" + f = self.send_head() + if f: + f.close() + + def send_head(self): + """Common code for GET and HEAD commands. + + This sends the response code and MIME headers. + + Return value is either a file object (which has to be copied + to the outputfile by the caller unless the command was HEAD, + and must be closed by the caller under all circumstances), or + None, in which case the caller has nothing further to do. + + """ + path = self.translate_path(self.path) + f = None + if os.path.isdir(path): + if not self.path.endswith('/'): + # redirect browser - doing basically what apache does + self.send_response(301) + self.send_header("Location", self.path + "/") + self.end_headers() + return None + for index in "index.html", "index.htm": + index = os.path.join(path, index) + if os.path.exists(index): + path = index + break + else: + return self.list_directory(path) + ctype = self.guess_type(path) + try: + f = open(path, 'rb') + except IOError: + self.send_error(404, "File not found") + return None + self.send_response(200) + self.send_header("Content-type", ctype) + fs = os.fstat(f.fileno()) + self.send_header("Content-Length", str(fs[6])) + self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) + self.end_headers() + return f + + def list_directory(self, path): + """Helper to produce a directory listing (absent index.html). + + Return value is either a file object, or None (indicating an + error). In either case, the headers are sent, making the + interface the same as for send_head(). + + """ + try: + list = os.listdir(path) + except os.error: + self.send_error(404, "No permission to list directory") + return None + list.sort(key=lambda a: a.lower()) + r = [] + displaypath = html.escape(urllib_parse.unquote(self.path)) + enc = sys.getfilesystemencoding() + title = 'Directory listing for %s' % displaypath + r.append('') + r.append('\n') + r.append('' % enc) + r.append('%s\n' % title) + r.append('\n

%s

' % title) + r.append('
\n
    ') + for name in list: + fullname = os.path.join(path, name) + displayname = linkname = name + # Append / for directories or @ for symbolic links + if os.path.isdir(fullname): + displayname = name + "/" + linkname = name + "/" + if os.path.islink(fullname): + displayname = name + "@" + # Note: a link to a directory displays with @ and links with / + r.append('
  • %s
  • ' + % (urllib_parse.quote(linkname), html.escape(displayname))) + # # Use this instead: + # r.append('
  • %s
  • ' + # % (urllib.quote(linkname), cgi.escape(displayname))) + r.append('
\n
\n\n\n') + encoded = '\n'.join(r).encode(enc) + f = io.BytesIO() + f.write(encoded) + f.seek(0) + self.send_response(200) + self.send_header("Content-type", "text/html; charset=%s" % enc) + self.send_header("Content-Length", str(len(encoded))) + self.end_headers() + return f + + def translate_path(self, path): + """Translate a /-separated PATH to the local filename syntax. + + Components that mean special things to the local file system + (e.g. drive or directory names) are ignored. (XXX They should + probably be diagnosed.) + + """ + # abandon query parameters + path = path.split('?',1)[0] + path = path.split('#',1)[0] + path = posixpath.normpath(urllib_parse.unquote(path)) + words = path.split('/') + words = filter(None, words) + path = os.getcwd() + for word in words: + drive, word = os.path.splitdrive(word) + head, word = os.path.split(word) + if word in (os.curdir, os.pardir): continue + path = os.path.join(path, word) + return path + + def copyfile(self, source, outputfile): + """Copy all data between two file objects. + + The SOURCE argument is a file object open for reading + (or anything with a read() method) and the DESTINATION + argument is a file object open for writing (or + anything with a write() method). + + The only reason for overriding this would be to change + the block size or perhaps to replace newlines by CRLF + -- note however that this the default server uses this + to copy binary data as well. + + """ + shutil.copyfileobj(source, outputfile) + + def guess_type(self, path): + """Guess the type of a file. + + Argument is a PATH (a filename). + + Return value is a string of the form type/subtype, + usable for a MIME Content-type header. + + The default implementation looks the file's extension + up in the table self.extensions_map, using application/octet-stream + as a default; however it would be permissible (if + slow) to look inside the data to make a better guess. + + """ + + base, ext = posixpath.splitext(path) + if ext in self.extensions_map: + return self.extensions_map[ext] + ext = ext.lower() + if ext in self.extensions_map: + return self.extensions_map[ext] + else: + return self.extensions_map[''] + + if not mimetypes.inited: + mimetypes.init() # try to read system mime.types + extensions_map = mimetypes.types_map.copy() + extensions_map.update({ + '': 'application/octet-stream', # Default + '.py': 'text/plain', + '.c': 'text/plain', + '.h': 'text/plain', + }) + + +# Utilities for CGIHTTPRequestHandler + +def _url_collapse_path(path): + """ + Given a URL path, remove extra '/'s and '.' path elements and collapse + any '..' references and returns a colllapsed path. + + Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. + The utility of this function is limited to is_cgi method and helps + preventing some security attacks. + + Returns: A tuple of (head, tail) where tail is everything after the final / + and head is everything before it. Head will always start with a '/' and, + if it contains anything else, never have a trailing '/'. + + Raises: IndexError if too many '..' occur within the path. + + """ + # Similar to os.path.split(os.path.normpath(path)) but specific to URL + # path semantics rather than local operating system semantics. + path_parts = path.split('/') + head_parts = [] + for part in path_parts[:-1]: + if part == '..': + head_parts.pop() # IndexError if more '..' than prior parts + elif part and part != '.': + head_parts.append( part ) + if path_parts: + tail_part = path_parts.pop() + if tail_part: + if tail_part == '..': + head_parts.pop() + tail_part = '' + elif tail_part == '.': + tail_part = '' + else: + tail_part = '' + + splitpath = ('/' + '/'.join(head_parts), tail_part) + collapsed_path = "/".join(splitpath) + + return collapsed_path + + + +nobody = None + +def nobody_uid(): + """Internal routine to get nobody's uid""" + global nobody + if nobody: + return nobody + try: + import pwd + except ImportError: + return -1 + try: + nobody = pwd.getpwnam('nobody')[2] + except KeyError: + nobody = 1 + max(x[2] for x in pwd.getpwall()) + return nobody + + +def executable(path): + """Test for executable file.""" + return os.access(path, os.X_OK) + + +class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): + + """Complete HTTP server with GET, HEAD and POST commands. + + GET and HEAD also support running CGI scripts. + + The POST command is *only* implemented for CGI scripts. + + """ + + # Determine platform specifics + have_fork = hasattr(os, 'fork') + + # Make rfile unbuffered -- we need to read one line and then pass + # the rest to a subprocess, so we can't use buffered input. + rbufsize = 0 + + def do_POST(self): + """Serve a POST request. + + This is only implemented for CGI scripts. + + """ + + if self.is_cgi(): + self.run_cgi() + else: + self.send_error(501, "Can only POST to CGI scripts") + + def send_head(self): + """Version of send_head that support CGI scripts""" + if self.is_cgi(): + return self.run_cgi() + else: + return SimpleHTTPRequestHandler.send_head(self) + + def is_cgi(self): + """Test whether self.path corresponds to a CGI script. + + Returns True and updates the cgi_info attribute to the tuple + (dir, rest) if self.path requires running a CGI script. + Returns False otherwise. + + If any exception is raised, the caller should assume that + self.path was rejected as invalid and act accordingly. + + The default implementation tests whether the normalized url + path begins with one of the strings in self.cgi_directories + (and the next character is a '/' or the end of the string). + + """ + collapsed_path = _url_collapse_path(self.path) + dir_sep = collapsed_path.find('/', 1) + head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] + if head in self.cgi_directories: + self.cgi_info = head, tail + return True + return False + + + cgi_directories = ['/cgi-bin', '/htbin'] + + def is_executable(self, path): + """Test whether argument path is an executable file.""" + return executable(path) + + def is_python(self, path): + """Test whether argument path is a Python script.""" + head, tail = os.path.splitext(path) + return tail.lower() in (".py", ".pyw") + + def run_cgi(self): + """Execute a CGI script.""" + path = self.path + dir, rest = self.cgi_info + + i = path.find('/', len(dir) + 1) + while i >= 0: + nextdir = path[:i] + nextrest = path[i+1:] + + scriptdir = self.translate_path(nextdir) + if os.path.isdir(scriptdir): + dir, rest = nextdir, nextrest + i = path.find('/', len(dir) + 1) + else: + break + + # find an explicit query string, if present. + i = rest.rfind('?') + if i >= 0: + rest, query = rest[:i], rest[i+1:] + else: + query = '' + + # dissect the part after the directory name into a script name & + # a possible additional path, to be stored in PATH_INFO. + i = rest.find('/') + if i >= 0: + script, rest = rest[:i], rest[i:] + else: + script, rest = rest, '' + + scriptname = dir + '/' + script + scriptfile = self.translate_path(scriptname) + if not os.path.exists(scriptfile): + self.send_error(404, "No such CGI script (%r)" % scriptname) + return + if not os.path.isfile(scriptfile): + self.send_error(403, "CGI script is not a plain file (%r)" % + scriptname) + return + ispy = self.is_python(scriptname) + if self.have_fork or not ispy: + if not self.is_executable(scriptfile): + self.send_error(403, "CGI script is not executable (%r)" % + scriptname) + return + + # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html + # XXX Much of the following could be prepared ahead of time! + env = copy.deepcopy(os.environ) + env['SERVER_SOFTWARE'] = self.version_string() + env['SERVER_NAME'] = self.server.server_name + env['GATEWAY_INTERFACE'] = 'CGI/1.1' + env['SERVER_PROTOCOL'] = self.protocol_version + env['SERVER_PORT'] = str(self.server.server_port) + env['REQUEST_METHOD'] = self.command + uqrest = urllib_parse.unquote(rest) + env['PATH_INFO'] = uqrest + env['PATH_TRANSLATED'] = self.translate_path(uqrest) + env['SCRIPT_NAME'] = scriptname + if query: + env['QUERY_STRING'] = query + env['REMOTE_ADDR'] = self.client_address[0] + authorization = self.headers.get("authorization") + if authorization: + authorization = authorization.split() + if len(authorization) == 2: + import base64, binascii + env['AUTH_TYPE'] = authorization[0] + if authorization[0].lower() == "basic": + try: + authorization = authorization[1].encode('ascii') + if utils.PY3: + # In Py3.3, was: + authorization = base64.decodebytes(authorization).\ + decode('ascii') + else: + # Backport to Py2.7: + authorization = base64.decodestring(authorization).\ + decode('ascii') + except (binascii.Error, UnicodeError): + pass + else: + authorization = authorization.split(':') + if len(authorization) == 2: + env['REMOTE_USER'] = authorization[0] + # XXX REMOTE_IDENT + if self.headers.get('content-type') is None: + env['CONTENT_TYPE'] = self.headers.get_content_type() + else: + env['CONTENT_TYPE'] = self.headers['content-type'] + length = self.headers.get('content-length') + if length: + env['CONTENT_LENGTH'] = length + referer = self.headers.get('referer') + if referer: + env['HTTP_REFERER'] = referer + accept = [] + for line in self.headers.getallmatchingheaders('accept'): + if line[:1] in "\t\n\r ": + accept.append(line.strip()) + else: + accept = accept + line[7:].split(',') + env['HTTP_ACCEPT'] = ','.join(accept) + ua = self.headers.get('user-agent') + if ua: + env['HTTP_USER_AGENT'] = ua + co = filter(None, self.headers.get_all('cookie', [])) + cookie_str = ', '.join(co) + if cookie_str: + env['HTTP_COOKIE'] = cookie_str + # XXX Other HTTP_* headers + # Since we're setting the env in the parent, provide empty + # values to override previously set values + for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', + 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): + env.setdefault(k, "") + + self.send_response(200, "Script output follows") + self.flush_headers() + + decoded_query = query.replace('+', ' ') + + if self.have_fork: + # Unix -- fork as we should + args = [script] + if '=' not in decoded_query: + args.append(decoded_query) + nobody = nobody_uid() + self.wfile.flush() # Always flush before forking + pid = os.fork() + if pid != 0: + # Parent + pid, sts = os.waitpid(pid, 0) + # throw away additional data [see bug #427345] + while select.select([self.rfile], [], [], 0)[0]: + if not self.rfile.read(1): + break + if sts: + self.log_error("CGI script exit status %#x", sts) + return + # Child + try: + try: + os.setuid(nobody) + except os.error: + pass + os.dup2(self.rfile.fileno(), 0) + os.dup2(self.wfile.fileno(), 1) + os.execve(scriptfile, args, env) + except: + self.server.handle_error(self.request, self.client_address) + os._exit(127) + + else: + # Non-Unix -- use subprocess + import subprocess + cmdline = [scriptfile] + if self.is_python(scriptfile): + interp = sys.executable + if interp.lower().endswith("w.exe"): + # On Windows, use python.exe, not pythonw.exe + interp = interp[:-5] + interp[-4:] + cmdline = [interp, '-u'] + cmdline + if '=' not in query: + cmdline.append(query) + self.log_message("command: %s", subprocess.list2cmdline(cmdline)) + try: + nbytes = int(length) + except (TypeError, ValueError): + nbytes = 0 + p = subprocess.Popen(cmdline, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env = env + ) + if self.command.lower() == "post" and nbytes > 0: + data = self.rfile.read(nbytes) + else: + data = None + # throw away additional data [see bug #427345] + while select.select([self.rfile._sock], [], [], 0)[0]: + if not self.rfile._sock.recv(1): + break + stdout, stderr = p.communicate(data) + self.wfile.write(stdout) + if stderr: + self.log_error('%s', stderr) + p.stderr.close() + p.stdout.close() + status = p.returncode + if status: + self.log_error("CGI script exit status %#x", status) + else: + self.log_message("CGI script exited OK") + + +def test(HandlerClass = BaseHTTPRequestHandler, + ServerClass = HTTPServer, protocol="HTTP/1.0", port=8000): + """Test the HTTP request handler class. + + This runs an HTTP server on port 8000 (or the first command line + argument). + + """ + server_address = ('', port) + + HandlerClass.protocol_version = protocol + httpd = ServerClass(server_address, HandlerClass) + + sa = httpd.socket.getsockname() + print("Serving HTTP on", sa[0], "port", sa[1], "...") + try: + httpd.serve_forever() + except KeyboardInterrupt: + print("\nKeyboard interrupt received, exiting.") + httpd.server_close() + sys.exit(0) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--cgi', action='store_true', + help='Run as CGI Server') + parser.add_argument('port', action='store', + default=8000, type=int, + nargs='?', + help='Specify alternate port [default: 8000]') + args = parser.parse_args() + if args.cgi: + test(HandlerClass=CGIHTTPRequestHandler, port=args.port) + else: + test(HandlerClass=SimpleHTTPRequestHandler, port=args.port) diff --git a/lib/future/backports/misc.py b/lib/future/backports/misc.py new file mode 100644 index 00000000..098a0667 --- /dev/null +++ b/lib/future/backports/misc.py @@ -0,0 +1,944 @@ +""" +Miscellaneous function (re)definitions from the Py3.4+ standard library +for Python 2.6/2.7. + +- math.ceil (for Python 2.7) +- collections.OrderedDict (for Python 2.6) +- collections.Counter (for Python 2.6) +- collections.ChainMap (for all versions prior to Python 3.3) +- itertools.count (for Python 2.6, with step parameter) +- subprocess.check_output (for Python 2.6) +- reprlib.recursive_repr (for Python 2.6+) +- functools.cmp_to_key (for Python 2.6) +""" + +from __future__ import absolute_import + +import subprocess +from math import ceil as oldceil + +from operator import itemgetter as _itemgetter, eq as _eq +import sys +import heapq as _heapq +from _weakref import proxy as _proxy +from itertools import repeat as _repeat, chain as _chain, starmap as _starmap +from socket import getaddrinfo, SOCK_STREAM, error, socket + +from future.utils import iteritems, itervalues, PY2, PY26, PY3 + +if PY2: + from collections import Mapping, MutableMapping +else: + from collections.abc import Mapping, MutableMapping + + +def ceil(x): + """ + Return the ceiling of x as an int. + This is the smallest integral value >= x. + """ + return int(oldceil(x)) + + +######################################################################## +### reprlib.recursive_repr decorator from Py3.4 +######################################################################## + +from itertools import islice + +if PY3: + try: + from _thread import get_ident + except ImportError: + from _dummy_thread import get_ident +else: + try: + from thread import get_ident + except ImportError: + from dummy_thread import get_ident + + +def recursive_repr(fillvalue='...'): + 'Decorator to make a repr function return fillvalue for a recursive call' + + def decorating_function(user_function): + repr_running = set() + + def wrapper(self): + key = id(self), get_ident() + if key in repr_running: + return fillvalue + repr_running.add(key) + try: + result = user_function(self) + finally: + repr_running.discard(key) + return result + + # Can't use functools.wraps() here because of bootstrap issues + wrapper.__module__ = getattr(user_function, '__module__') + wrapper.__doc__ = getattr(user_function, '__doc__') + wrapper.__name__ = getattr(user_function, '__name__') + wrapper.__annotations__ = getattr(user_function, '__annotations__', {}) + return wrapper + + return decorating_function + + +################################################################################ +### OrderedDict +################################################################################ + +class _Link(object): + __slots__ = 'prev', 'next', 'key', '__weakref__' + +class OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as regular dictionaries. + + # The internal self.__map dict maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # The sentinel is in self.__hardroot with a weakref proxy in self.__root. + # The prev links are weakref proxies (to prevent circular references). + # Individual links are kept alive by the hard reference in self.__map. + # Those hard references disappear when a key is deleted from an OrderedDict. + + def __init__(*args, **kwds): + '''Initialize an ordered dictionary. The signature is the same as + regular dictionaries, but keyword arguments are not recommended because + their insertion order is arbitrary. + + ''' + if not args: + raise TypeError("descriptor '__init__' of 'OrderedDict' object " + "needs an argument") + self = args[0] + args = args[1:] + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__hardroot = _Link() + self.__root = root = _proxy(self.__hardroot) + root.prev = root.next = root + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, + dict_setitem=dict.__setitem__, proxy=_proxy, Link=_Link): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link at the end of the linked list, + # and the inherited dictionary is updated with the new key/value pair. + if key not in self: + self.__map[key] = link = Link() + root = self.__root + last = root.prev + link.prev, link.next, link.key = last, root, key + last.next = link + root.prev = proxy(link) + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which gets + # removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link = self.__map.pop(key) + link_prev = link.prev + link_next = link.next + link_prev.next = link_next + link_next.prev = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + # Traverse the linked list in order. + root = self.__root + curr = root.next + while curr is not root: + yield curr.key + curr = curr.next + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + # Traverse the linked list in reverse order. + root = self.__root + curr = root.prev + while curr is not root: + yield curr.key + curr = curr.prev + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + root = self.__root + root.prev = root.next = root + self.__map.clear() + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root.prev + link_prev = link.prev + link_prev.next = root + root.prev = link_prev + else: + link = root.next + link_next = link.next + root.next = link_next + link_next.prev = root + key = link.key + del self.__map[key] + value = dict.pop(self, key) + return key, value + + def move_to_end(self, key, last=True): + '''Move an existing element to the end (or beginning if last==False). + + Raises KeyError if the element does not exist. + When last=True, acts like a fast version of self[key]=self.pop(key). + + ''' + link = self.__map[key] + link_prev = link.prev + link_next = link.next + link_prev.next = link_next + link_next.prev = link_prev + root = self.__root + if last: + last = root.prev + link.prev = last + link.next = root + last.next = root.prev = link + else: + first = root.next + link.prev = root + link.next = first + root.next = first.prev = link + + def __sizeof__(self): + sizeof = sys.getsizeof + n = len(self) + 1 # number of links including root + size = sizeof(self.__dict__) # instance dictionary + size += sizeof(self.__map) * 2 # internal dict and inherited dict + size += sizeof(self.__hardroot) * n # link objects + size += sizeof(self.__root) * n # proxy objects + return size + + update = __update = MutableMapping.update + keys = MutableMapping.keys + values = MutableMapping.values + items = MutableMapping.items + __ne__ = MutableMapping.__ne__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding + value. If key is not found, d is returned if given, otherwise KeyError + is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + @recursive_repr() + def __repr__(self): + 'od.__repr__() <==> repr(od)' + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, list(self.items())) + + def __reduce__(self): + 'Return state information for pickling' + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + return self.__class__, (), inst_dict or None, None, iter(self.items()) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S. + If not specified, the value defaults to None. + + ''' + self = cls() + for key in iterable: + self[key] = value + return self + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return dict.__eq__(self, other) and all(map(_eq, self, other)) + return dict.__eq__(self, other) + + +# {{{ http://code.activestate.com/recipes/576611/ (r11) + +try: + from operator import itemgetter + from heapq import nlargest +except ImportError: + pass + +######################################################################## +### Counter +######################################################################## + +def _count_elements(mapping, iterable): + 'Tally elements from the iterable.' + mapping_get = mapping.get + for elem in iterable: + mapping[elem] = mapping_get(elem, 0) + 1 + +class Counter(dict): + '''Dict subclass for counting hashable items. Sometimes called a bag + or multiset. Elements are stored as dictionary keys and their counts + are stored as dictionary values. + + >>> c = Counter('abcdeabcdabcaba') # count elements from a string + + >>> c.most_common(3) # three most common elements + [('a', 5), ('b', 4), ('c', 3)] + >>> sorted(c) # list all unique elements + ['a', 'b', 'c', 'd', 'e'] + >>> ''.join(sorted(c.elements())) # list elements with repetitions + 'aaaaabbbbcccdde' + >>> sum(c.values()) # total of all counts + 15 + + >>> c['a'] # count of letter 'a' + 5 + >>> for elem in 'shazam': # update counts from an iterable + ... c[elem] += 1 # by adding 1 to each element's count + >>> c['a'] # now there are seven 'a' + 7 + >>> del c['b'] # remove all 'b' + >>> c['b'] # now there are zero 'b' + 0 + + >>> d = Counter('simsalabim') # make another counter + >>> c.update(d) # add in the second counter + >>> c['a'] # now there are nine 'a' + 9 + + >>> c.clear() # empty the counter + >>> c + Counter() + + Note: If a count is set to zero or reduced to zero, it will remain + in the counter until the entry is deleted or the counter is cleared: + + >>> c = Counter('aaabbc') + >>> c['b'] -= 2 # reduce the count of 'b' by two + >>> c.most_common() # 'b' is still in, but its count is zero + [('a', 3), ('c', 1), ('b', 0)] + + ''' + # References: + # http://en.wikipedia.org/wiki/Multiset + # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html + # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm + # http://code.activestate.com/recipes/259174/ + # Knuth, TAOCP Vol. II section 4.6.3 + + def __init__(*args, **kwds): + '''Create a new, empty Counter object. And if given, count elements + from an input iterable. Or, initialize the count from another mapping + of elements to their counts. + + >>> c = Counter() # a new, empty counter + >>> c = Counter('gallahad') # a new counter from an iterable + >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping + >>> c = Counter(a=4, b=2) # a new counter from keyword args + + ''' + if not args: + raise TypeError("descriptor '__init__' of 'Counter' object " + "needs an argument") + self = args[0] + args = args[1:] + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + super(Counter, self).__init__() + self.update(*args, **kwds) + + def __missing__(self, key): + 'The count of elements not in the Counter is zero.' + # Needed so that self[missing_item] does not raise KeyError + return 0 + + def most_common(self, n=None): + '''List the n most common elements and their counts from the most + common to the least. If n is None, then list all element counts. + + >>> Counter('abcdeabcdabcaba').most_common(3) + [('a', 5), ('b', 4), ('c', 3)] + + ''' + # Emulate Bag.sortedByCount from Smalltalk + if n is None: + return sorted(self.items(), key=_itemgetter(1), reverse=True) + return _heapq.nlargest(n, self.items(), key=_itemgetter(1)) + + def elements(self): + '''Iterator over elements repeating each as many times as its count. + + >>> c = Counter('ABCABC') + >>> sorted(c.elements()) + ['A', 'A', 'B', 'B', 'C', 'C'] + + # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1 + >>> prime_factors = Counter({2: 2, 3: 3, 17: 1}) + >>> product = 1 + >>> for factor in prime_factors.elements(): # loop over factors + ... product *= factor # and multiply them + >>> product + 1836 + + Note, if an element's count has been set to zero or is a negative + number, elements() will ignore it. + + ''' + # Emulate Bag.do from Smalltalk and Multiset.begin from C++. + return _chain.from_iterable(_starmap(_repeat, self.items())) + + # Override dict methods where necessary + + @classmethod + def fromkeys(cls, iterable, v=None): + # There is no equivalent method for counters because setting v=1 + # means that no element can have a count greater than one. + raise NotImplementedError( + 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') + + def update(*args, **kwds): + '''Like dict.update() but add counts instead of replacing them. + + Source can be an iterable, a dictionary, or another Counter instance. + + >>> c = Counter('which') + >>> c.update('witch') # add elements from another iterable + >>> d = Counter('watch') + >>> c.update(d) # add elements from another counter + >>> c['h'] # four 'h' in which, witch, and watch + 4 + + ''' + # The regular dict.update() operation makes no sense here because the + # replace behavior results in the some of original untouched counts + # being mixed-in with all of the other counts for a mismash that + # doesn't have a straight-forward interpretation in most counting + # contexts. Instead, we implement straight-addition. Both the inputs + # and outputs are allowed to contain zero and negative counts. + + if not args: + raise TypeError("descriptor 'update' of 'Counter' object " + "needs an argument") + self = args[0] + args = args[1:] + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + iterable = args[0] if args else None + if iterable is not None: + if isinstance(iterable, Mapping): + if self: + self_get = self.get + for elem, count in iterable.items(): + self[elem] = count + self_get(elem, 0) + else: + super(Counter, self).update(iterable) # fast path when counter is empty + else: + _count_elements(self, iterable) + if kwds: + self.update(kwds) + + def subtract(*args, **kwds): + '''Like dict.update() but subtracts counts instead of replacing them. + Counts can be reduced below zero. Both the inputs and outputs are + allowed to contain zero and negative counts. + + Source can be an iterable, a dictionary, or another Counter instance. + + >>> c = Counter('which') + >>> c.subtract('witch') # subtract elements from another iterable + >>> c.subtract(Counter('watch')) # subtract elements from another counter + >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch + 0 + >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch + -1 + + ''' + if not args: + raise TypeError("descriptor 'subtract' of 'Counter' object " + "needs an argument") + self = args[0] + args = args[1:] + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + iterable = args[0] if args else None + if iterable is not None: + self_get = self.get + if isinstance(iterable, Mapping): + for elem, count in iterable.items(): + self[elem] = self_get(elem, 0) - count + else: + for elem in iterable: + self[elem] = self_get(elem, 0) - 1 + if kwds: + self.subtract(kwds) + + def copy(self): + 'Return a shallow copy.' + return self.__class__(self) + + def __reduce__(self): + return self.__class__, (dict(self),) + + def __delitem__(self, elem): + 'Like dict.__delitem__() but does not raise KeyError for missing values.' + if elem in self: + super(Counter, self).__delitem__(elem) + + def __repr__(self): + if not self: + return '%s()' % self.__class__.__name__ + try: + items = ', '.join(map('%r: %r'.__mod__, self.most_common())) + return '%s({%s})' % (self.__class__.__name__, items) + except TypeError: + # handle case where values are not orderable + return '{0}({1!r})'.format(self.__class__.__name__, dict(self)) + + # Multiset-style mathematical operations discussed in: + # Knuth TAOCP Volume II section 4.6.3 exercise 19 + # and at http://en.wikipedia.org/wiki/Multiset + # + # Outputs guaranteed to only include positive counts. + # + # To strip negative and zero counts, add-in an empty counter: + # c += Counter() + + def __add__(self, other): + '''Add counts from two counters. + + >>> Counter('abbb') + Counter('bcc') + Counter({'b': 4, 'c': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.items(): + newcount = count + other[elem] + if newcount > 0: + result[elem] = newcount + for elem, count in other.items(): + if elem not in self and count > 0: + result[elem] = count + return result + + def __sub__(self, other): + ''' Subtract count, but keep only results with positive counts. + + >>> Counter('abbbc') - Counter('bccd') + Counter({'b': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.items(): + newcount = count - other[elem] + if newcount > 0: + result[elem] = newcount + for elem, count in other.items(): + if elem not in self and count < 0: + result[elem] = 0 - count + return result + + def __or__(self, other): + '''Union is the maximum of value in either of the input counters. + + >>> Counter('abbb') | Counter('bcc') + Counter({'b': 3, 'c': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.items(): + other_count = other[elem] + newcount = other_count if count < other_count else count + if newcount > 0: + result[elem] = newcount + for elem, count in other.items(): + if elem not in self and count > 0: + result[elem] = count + return result + + def __and__(self, other): + ''' Intersection is the minimum of corresponding counts. + + >>> Counter('abbb') & Counter('bcc') + Counter({'b': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem, count in self.items(): + other_count = other[elem] + newcount = count if count < other_count else other_count + if newcount > 0: + result[elem] = newcount + return result + + def __pos__(self): + 'Adds an empty counter, effectively stripping negative and zero counts' + return self + Counter() + + def __neg__(self): + '''Subtracts from an empty counter. Strips positive and zero counts, + and flips the sign on negative counts. + + ''' + return Counter() - self + + def _keep_positive(self): + '''Internal method to strip elements with a negative or zero count''' + nonpositive = [elem for elem, count in self.items() if not count > 0] + for elem in nonpositive: + del self[elem] + return self + + def __iadd__(self, other): + '''Inplace add from another counter, keeping only positive counts. + + >>> c = Counter('abbb') + >>> c += Counter('bcc') + >>> c + Counter({'b': 4, 'c': 2, 'a': 1}) + + ''' + for elem, count in other.items(): + self[elem] += count + return self._keep_positive() + + def __isub__(self, other): + '''Inplace subtract counter, but keep only results with positive counts. + + >>> c = Counter('abbbc') + >>> c -= Counter('bccd') + >>> c + Counter({'b': 2, 'a': 1}) + + ''' + for elem, count in other.items(): + self[elem] -= count + return self._keep_positive() + + def __ior__(self, other): + '''Inplace union is the maximum of value from either counter. + + >>> c = Counter('abbb') + >>> c |= Counter('bcc') + >>> c + Counter({'b': 3, 'c': 2, 'a': 1}) + + ''' + for elem, other_count in other.items(): + count = self[elem] + if other_count > count: + self[elem] = other_count + return self._keep_positive() + + def __iand__(self, other): + '''Inplace intersection is the minimum of corresponding counts. + + >>> c = Counter('abbb') + >>> c &= Counter('bcc') + >>> c + Counter({'b': 1}) + + ''' + for elem, count in self.items(): + other_count = other[elem] + if other_count < count: + self[elem] = other_count + return self._keep_positive() + + +def check_output(*popenargs, **kwargs): + """ + For Python 2.6 compatibility: see + http://stackoverflow.com/questions/4814970/ + """ + + if 'stdout' in kwargs: + raise ValueError('stdout argument not allowed, it will be overridden.') + process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + raise subprocess.CalledProcessError(retcode, cmd) + return output + + +def count(start=0, step=1): + """ + ``itertools.count`` in Py 2.6 doesn't accept a step + parameter. This is an enhanced version of ``itertools.count`` + for Py2.6 equivalent to ``itertools.count`` in Python 2.7+. + """ + while True: + yield start + start += step + + +######################################################################## +### ChainMap (helper for configparser and string.Template) +### From the Py3.4 source code. See also: +### https://github.com/kkxue/Py2ChainMap/blob/master/py2chainmap.py +######################################################################## + +class ChainMap(MutableMapping): + ''' A ChainMap groups multiple dicts (or other mappings) together + to create a single, updateable view. + + The underlying mappings are stored in a list. That list is public and can + accessed or updated using the *maps* attribute. There is no other state. + + Lookups search the underlying mappings successively until a key is found. + In contrast, writes, updates, and deletions only operate on the first + mapping. + + ''' + + def __init__(self, *maps): + '''Initialize a ChainMap by setting *maps* to the given mappings. + If no mappings are provided, a single empty dictionary is used. + + ''' + self.maps = list(maps) or [{}] # always at least one map + + def __missing__(self, key): + raise KeyError(key) + + def __getitem__(self, key): + for mapping in self.maps: + try: + return mapping[key] # can't use 'key in mapping' with defaultdict + except KeyError: + pass + return self.__missing__(key) # support subclasses that define __missing__ + + def get(self, key, default=None): + return self[key] if key in self else default + + def __len__(self): + return len(set().union(*self.maps)) # reuses stored hash values if possible + + def __iter__(self): + return iter(set().union(*self.maps)) + + def __contains__(self, key): + return any(key in m for m in self.maps) + + def __bool__(self): + return any(self.maps) + + # Py2 compatibility: + __nonzero__ = __bool__ + + @recursive_repr() + def __repr__(self): + return '{0.__class__.__name__}({1})'.format( + self, ', '.join(map(repr, self.maps))) + + @classmethod + def fromkeys(cls, iterable, *args): + 'Create a ChainMap with a single dict created from the iterable.' + return cls(dict.fromkeys(iterable, *args)) + + def copy(self): + 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' + return self.__class__(self.maps[0].copy(), *self.maps[1:]) + + __copy__ = copy + + def new_child(self, m=None): # like Django's Context.push() + ''' + New ChainMap with a new map followed by all previous maps. If no + map is provided, an empty dict is used. + ''' + if m is None: + m = {} + return self.__class__(m, *self.maps) + + @property + def parents(self): # like Django's Context.pop() + 'New ChainMap from maps[1:].' + return self.__class__(*self.maps[1:]) + + def __setitem__(self, key, value): + self.maps[0][key] = value + + def __delitem__(self, key): + try: + del self.maps[0][key] + except KeyError: + raise KeyError('Key not found in the first mapping: {0!r}'.format(key)) + + def popitem(self): + 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.' + try: + return self.maps[0].popitem() + except KeyError: + raise KeyError('No keys found in the first mapping.') + + def pop(self, key, *args): + 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].' + try: + return self.maps[0].pop(key, *args) + except KeyError: + raise KeyError('Key not found in the first mapping: {0!r}'.format(key)) + + def clear(self): + 'Clear maps[0], leaving maps[1:] intact.' + self.maps[0].clear() + + +# Re-use the same sentinel as in the Python stdlib socket module: +from socket import _GLOBAL_DEFAULT_TIMEOUT +# Was: _GLOBAL_DEFAULT_TIMEOUT = object() + + +def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT, + source_address=None): + """Backport of 3-argument create_connection() for Py2.6. + + Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + err = None + for res in getaddrinfo(host, port, 0, SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket(af, socktype, proto) + if timeout is not _GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + sock.connect(sa) + return sock + + except error as _: + err = _ + if sock is not None: + sock.close() + + if err is not None: + raise err + else: + raise error("getaddrinfo returns an empty list") + +# Backport from Py2.7 for Py2.6: +def cmp_to_key(mycmp): + """Convert a cmp= function into a key= function""" + class K(object): + __slots__ = ['obj'] + def __init__(self, obj, *args): + self.obj = obj + def __lt__(self, other): + return mycmp(self.obj, other.obj) < 0 + def __gt__(self, other): + return mycmp(self.obj, other.obj) > 0 + def __eq__(self, other): + return mycmp(self.obj, other.obj) == 0 + def __le__(self, other): + return mycmp(self.obj, other.obj) <= 0 + def __ge__(self, other): + return mycmp(self.obj, other.obj) >= 0 + def __ne__(self, other): + return mycmp(self.obj, other.obj) != 0 + def __hash__(self): + raise TypeError('hash not implemented') + return K + +# Back up our definitions above in case they're useful +_OrderedDict = OrderedDict +_Counter = Counter +_check_output = check_output +_count = count +_ceil = ceil +__count_elements = _count_elements +_recursive_repr = recursive_repr +_ChainMap = ChainMap +_create_connection = create_connection +_cmp_to_key = cmp_to_key + +# Overwrite the definitions above with the usual ones +# from the standard library: +if sys.version_info >= (2, 7): + from collections import OrderedDict, Counter + from itertools import count + from functools import cmp_to_key + try: + from subprocess import check_output + except ImportError: + # Not available. This happens with Google App Engine: see issue #231 + pass + from socket import create_connection + +if sys.version_info >= (3, 0): + from math import ceil + from collections import _count_elements + +if sys.version_info >= (3, 3): + from reprlib import recursive_repr + from collections import ChainMap diff --git a/lib/future/backports/socket.py b/lib/future/backports/socket.py new file mode 100644 index 00000000..930e1dae --- /dev/null +++ b/lib/future/backports/socket.py @@ -0,0 +1,454 @@ +# Wrapper module for _socket, providing some additional facilities +# implemented in Python. + +"""\ +This module provides socket operations and some related functions. +On Unix, it supports IP (Internet Protocol) and Unix domain sockets. +On other systems, it only supports IP. Functions specific for a +socket are available as methods of the socket object. + +Functions: + +socket() -- create a new socket object +socketpair() -- create a pair of new socket objects [*] +fromfd() -- create a socket object from an open file descriptor [*] +fromshare() -- create a socket object from data received from socket.share() [*] +gethostname() -- return the current hostname +gethostbyname() -- map a hostname to its IP number +gethostbyaddr() -- map an IP number or hostname to DNS info +getservbyname() -- map a service name and a protocol name to a port number +getprotobyname() -- map a protocol name (e.g. 'tcp') to a number +ntohs(), ntohl() -- convert 16, 32 bit int from network to host byte order +htons(), htonl() -- convert 16, 32 bit int from host to network byte order +inet_aton() -- convert IP addr string (123.45.67.89) to 32-bit packed format +inet_ntoa() -- convert 32-bit packed format IP to string (123.45.67.89) +socket.getdefaulttimeout() -- get the default timeout value +socket.setdefaulttimeout() -- set the default timeout value +create_connection() -- connects to an address, with an optional timeout and + optional source address. + + [*] not available on all platforms! + +Special objects: + +SocketType -- type object for socket objects +error -- exception raised for I/O errors +has_ipv6 -- boolean value indicating if IPv6 is supported + +Integer constants: + +AF_INET, AF_UNIX -- socket domains (first argument to socket() call) +SOCK_STREAM, SOCK_DGRAM, SOCK_RAW -- socket types (second argument) + +Many other constants may be defined; these may be used in calls to +the setsockopt() and getsockopt() methods. +""" + +from __future__ import unicode_literals +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from future.builtins import super + +import _socket +from _socket import * + +import os, sys, io + +try: + import errno +except ImportError: + errno = None +EBADF = getattr(errno, 'EBADF', 9) +EAGAIN = getattr(errno, 'EAGAIN', 11) +EWOULDBLOCK = getattr(errno, 'EWOULDBLOCK', 11) + +__all__ = ["getfqdn", "create_connection"] +__all__.extend(os._get_exports_list(_socket)) + + +_realsocket = socket + +# WSA error codes +if sys.platform.lower().startswith("win"): + errorTab = {} + errorTab[10004] = "The operation was interrupted." + errorTab[10009] = "A bad file handle was passed." + errorTab[10013] = "Permission denied." + errorTab[10014] = "A fault occurred on the network??" # WSAEFAULT + errorTab[10022] = "An invalid operation was attempted." + errorTab[10035] = "The socket operation would block" + errorTab[10036] = "A blocking operation is already in progress." + errorTab[10048] = "The network address is in use." + errorTab[10054] = "The connection has been reset." + errorTab[10058] = "The network has been shut down." + errorTab[10060] = "The operation timed out." + errorTab[10061] = "Connection refused." + errorTab[10063] = "The name is too long." + errorTab[10064] = "The host is down." + errorTab[10065] = "The host is unreachable." + __all__.append("errorTab") + + +class socket(_socket.socket): + + """A subclass of _socket.socket adding the makefile() method.""" + + __slots__ = ["__weakref__", "_io_refs", "_closed"] + + def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0, fileno=None): + if fileno is None: + _socket.socket.__init__(self, family, type, proto) + else: + _socket.socket.__init__(self, family, type, proto, fileno) + self._io_refs = 0 + self._closed = False + + def __enter__(self): + return self + + def __exit__(self, *args): + if not self._closed: + self.close() + + def __repr__(self): + """Wrap __repr__() to reveal the real class name.""" + s = _socket.socket.__repr__(self) + if s.startswith(" socket object + + Return a new socket object connected to the same system resource. + """ + fd = dup(self.fileno()) + sock = self.__class__(self.family, self.type, self.proto, fileno=fd) + sock.settimeout(self.gettimeout()) + return sock + + def accept(self): + """accept() -> (socket object, address info) + + Wait for an incoming connection. Return a new socket + representing the connection, and the address of the client. + For IP sockets, the address info is a pair (hostaddr, port). + """ + fd, addr = self._accept() + sock = socket(self.family, self.type, self.proto, fileno=fd) + # Issue #7995: if no default timeout is set and the listening + # socket had a (non-zero) timeout, force the new socket in blocking + # mode to override platform-specific socket flags inheritance. + if getdefaulttimeout() is None and self.gettimeout(): + sock.setblocking(True) + return sock, addr + + def makefile(self, mode="r", buffering=None, **_3to2kwargs): + """makefile(...) -> an I/O stream connected to the socket + + The arguments are as for io.open() after the filename, + except the only mode characters supported are 'r', 'w' and 'b'. + The semantics are similar too. (XXX refactor to share code?) + """ + if 'newline' in _3to2kwargs: newline = _3to2kwargs['newline']; del _3to2kwargs['newline'] + else: newline = None + if 'errors' in _3to2kwargs: errors = _3to2kwargs['errors']; del _3to2kwargs['errors'] + else: errors = None + if 'encoding' in _3to2kwargs: encoding = _3to2kwargs['encoding']; del _3to2kwargs['encoding'] + else: encoding = None + for c in mode: + if c not in ("r", "w", "b"): + raise ValueError("invalid mode %r (only r, w, b allowed)") + writing = "w" in mode + reading = "r" in mode or not writing + assert reading or writing + binary = "b" in mode + rawmode = "" + if reading: + rawmode += "r" + if writing: + rawmode += "w" + raw = SocketIO(self, rawmode) + self._io_refs += 1 + if buffering is None: + buffering = -1 + if buffering < 0: + buffering = io.DEFAULT_BUFFER_SIZE + if buffering == 0: + if not binary: + raise ValueError("unbuffered streams must be binary") + return raw + if reading and writing: + buffer = io.BufferedRWPair(raw, raw, buffering) + elif reading: + buffer = io.BufferedReader(raw, buffering) + else: + assert writing + buffer = io.BufferedWriter(raw, buffering) + if binary: + return buffer + text = io.TextIOWrapper(buffer, encoding, errors, newline) + text.mode = mode + return text + + def _decref_socketios(self): + if self._io_refs > 0: + self._io_refs -= 1 + if self._closed: + self.close() + + def _real_close(self, _ss=_socket.socket): + # This function should not reference any globals. See issue #808164. + _ss.close(self) + + def close(self): + # This function should not reference any globals. See issue #808164. + self._closed = True + if self._io_refs <= 0: + self._real_close() + + def detach(self): + """detach() -> file descriptor + + Close the socket object without closing the underlying file descriptor. + The object cannot be used after this call, but the file descriptor + can be reused for other purposes. The file descriptor is returned. + """ + self._closed = True + return super().detach() + +def fromfd(fd, family, type, proto=0): + """ fromfd(fd, family, type[, proto]) -> socket object + + Create a socket object from a duplicate of the given file + descriptor. The remaining arguments are the same as for socket(). + """ + nfd = dup(fd) + return socket(family, type, proto, nfd) + +if hasattr(_socket.socket, "share"): + def fromshare(info): + """ fromshare(info) -> socket object + + Create a socket object from a the bytes object returned by + socket.share(pid). + """ + return socket(0, 0, 0, info) + +if hasattr(_socket, "socketpair"): + + def socketpair(family=None, type=SOCK_STREAM, proto=0): + """socketpair([family[, type[, proto]]]) -> (socket object, socket object) + + Create a pair of socket objects from the sockets returned by the platform + socketpair() function. + The arguments are the same as for socket() except the default family is + AF_UNIX if defined on the platform; otherwise, the default is AF_INET. + """ + if family is None: + try: + family = AF_UNIX + except NameError: + family = AF_INET + a, b = _socket.socketpair(family, type, proto) + a = socket(family, type, proto, a.detach()) + b = socket(family, type, proto, b.detach()) + return a, b + + +_blocking_errnos = set([EAGAIN, EWOULDBLOCK]) + +class SocketIO(io.RawIOBase): + + """Raw I/O implementation for stream sockets. + + This class supports the makefile() method on sockets. It provides + the raw I/O interface on top of a socket object. + """ + + # One might wonder why not let FileIO do the job instead. There are two + # main reasons why FileIO is not adapted: + # - it wouldn't work under Windows (where you can't used read() and + # write() on a socket handle) + # - it wouldn't work with socket timeouts (FileIO would ignore the + # timeout and consider the socket non-blocking) + + # XXX More docs + + def __init__(self, sock, mode): + if mode not in ("r", "w", "rw", "rb", "wb", "rwb"): + raise ValueError("invalid mode: %r" % mode) + io.RawIOBase.__init__(self) + self._sock = sock + if "b" not in mode: + mode += "b" + self._mode = mode + self._reading = "r" in mode + self._writing = "w" in mode + self._timeout_occurred = False + + def readinto(self, b): + """Read up to len(b) bytes into the writable buffer *b* and return + the number of bytes read. If the socket is non-blocking and no bytes + are available, None is returned. + + If *b* is non-empty, a 0 return value indicates that the connection + was shutdown at the other end. + """ + self._checkClosed() + self._checkReadable() + if self._timeout_occurred: + raise IOError("cannot read from timed out object") + while True: + try: + return self._sock.recv_into(b) + except timeout: + self._timeout_occurred = True + raise + # except InterruptedError: + # continue + except error as e: + if e.args[0] in _blocking_errnos: + return None + raise + + def write(self, b): + """Write the given bytes or bytearray object *b* to the socket + and return the number of bytes written. This can be less than + len(b) if not all data could be written. If the socket is + non-blocking and no bytes could be written None is returned. + """ + self._checkClosed() + self._checkWritable() + try: + return self._sock.send(b) + except error as e: + # XXX what about EINTR? + if e.args[0] in _blocking_errnos: + return None + raise + + def readable(self): + """True if the SocketIO is open for reading. + """ + if self.closed: + raise ValueError("I/O operation on closed socket.") + return self._reading + + def writable(self): + """True if the SocketIO is open for writing. + """ + if self.closed: + raise ValueError("I/O operation on closed socket.") + return self._writing + + def seekable(self): + """True if the SocketIO is open for seeking. + """ + if self.closed: + raise ValueError("I/O operation on closed socket.") + return super().seekable() + + def fileno(self): + """Return the file descriptor of the underlying socket. + """ + self._checkClosed() + return self._sock.fileno() + + @property + def name(self): + if not self.closed: + return self.fileno() + else: + return -1 + + @property + def mode(self): + return self._mode + + def close(self): + """Close the SocketIO object. This doesn't close the underlying + socket, except if all references to it have disappeared. + """ + if self.closed: + return + io.RawIOBase.close(self) + self._sock._decref_socketios() + self._sock = None + + +def getfqdn(name=''): + """Get fully qualified domain name from name. + + An empty argument is interpreted as meaning the local host. + + First the hostname returned by gethostbyaddr() is checked, then + possibly existing aliases. In case no FQDN is available, hostname + from gethostname() is returned. + """ + name = name.strip() + if not name or name == '0.0.0.0': + name = gethostname() + try: + hostname, aliases, ipaddrs = gethostbyaddr(name) + except error: + pass + else: + aliases.insert(0, hostname) + for name in aliases: + if '.' in name: + break + else: + name = hostname + return name + + +# Re-use the same sentinel as in the Python stdlib socket module: +from socket import _GLOBAL_DEFAULT_TIMEOUT +# Was: _GLOBAL_DEFAULT_TIMEOUT = object() + + +def create_connection(address, timeout=_GLOBAL_DEFAULT_TIMEOUT, + source_address=None): + """Connect to *address* and return the socket object. + + Convenience function. Connect to *address* (a 2-tuple ``(host, + port)``) and return the socket object. Passing the optional + *timeout* parameter will set the timeout on the socket instance + before attempting to connect. If no *timeout* is supplied, the + global default timeout setting returned by :func:`getdefaulttimeout` + is used. If *source_address* is set it must be a tuple of (host, port) + for the socket to bind as a source address before making the connection. + An host of '' or port 0 tells the OS to use the default. + """ + + host, port = address + err = None + for res in getaddrinfo(host, port, 0, SOCK_STREAM): + af, socktype, proto, canonname, sa = res + sock = None + try: + sock = socket(af, socktype, proto) + if timeout is not _GLOBAL_DEFAULT_TIMEOUT: + sock.settimeout(timeout) + if source_address: + sock.bind(source_address) + sock.connect(sa) + return sock + + except error as _: + err = _ + if sock is not None: + sock.close() + + if err is not None: + raise err + else: + raise error("getaddrinfo returns an empty list") diff --git a/lib/future/backports/socketserver.py b/lib/future/backports/socketserver.py new file mode 100644 index 00000000..d1e24a6d --- /dev/null +++ b/lib/future/backports/socketserver.py @@ -0,0 +1,747 @@ +"""Generic socket server classes. + +This module tries to capture the various aspects of defining a server: + +For socket-based servers: + +- address family: + - AF_INET{,6}: IP (Internet Protocol) sockets (default) + - AF_UNIX: Unix domain sockets + - others, e.g. AF_DECNET are conceivable (see +- socket type: + - SOCK_STREAM (reliable stream, e.g. TCP) + - SOCK_DGRAM (datagrams, e.g. UDP) + +For request-based servers (including socket-based): + +- client address verification before further looking at the request + (This is actually a hook for any processing that needs to look + at the request before anything else, e.g. logging) +- how to handle multiple requests: + - synchronous (one request is handled at a time) + - forking (each request is handled by a new process) + - threading (each request is handled by a new thread) + +The classes in this module favor the server type that is simplest to +write: a synchronous TCP/IP server. This is bad class design, but +save some typing. (There's also the issue that a deep class hierarchy +slows down method lookups.) + +There are five classes in an inheritance diagram, four of which represent +synchronous servers of four types: + + +------------+ + | BaseServer | + +------------+ + | + v + +-----------+ +------------------+ + | TCPServer |------->| UnixStreamServer | + +-----------+ +------------------+ + | + v + +-----------+ +--------------------+ + | UDPServer |------->| UnixDatagramServer | + +-----------+ +--------------------+ + +Note that UnixDatagramServer derives from UDPServer, not from +UnixStreamServer -- the only difference between an IP and a Unix +stream server is the address family, which is simply repeated in both +unix server classes. + +Forking and threading versions of each type of server can be created +using the ForkingMixIn and ThreadingMixIn mix-in classes. For +instance, a threading UDP server class is created as follows: + + class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass + +The Mix-in class must come first, since it overrides a method defined +in UDPServer! Setting the various member variables also changes +the behavior of the underlying server mechanism. + +To implement a service, you must derive a class from +BaseRequestHandler and redefine its handle() method. You can then run +various versions of the service by combining one of the server classes +with your request handler class. + +The request handler class must be different for datagram or stream +services. This can be hidden by using the request handler +subclasses StreamRequestHandler or DatagramRequestHandler. + +Of course, you still have to use your head! + +For instance, it makes no sense to use a forking server if the service +contains state in memory that can be modified by requests (since the +modifications in the child process would never reach the initial state +kept in the parent process and passed to each child). In this case, +you can use a threading server, but you will probably have to use +locks to avoid two requests that come in nearly simultaneous to apply +conflicting changes to the server state. + +On the other hand, if you are building e.g. an HTTP server, where all +data is stored externally (e.g. in the file system), a synchronous +class will essentially render the service "deaf" while one request is +being handled -- which may be for a very long time if a client is slow +to read all the data it has requested. Here a threading or forking +server is appropriate. + +In some cases, it may be appropriate to process part of a request +synchronously, but to finish processing in a forked child depending on +the request data. This can be implemented by using a synchronous +server and doing an explicit fork in the request handler class +handle() method. + +Another approach to handling multiple simultaneous requests in an +environment that supports neither threads nor fork (or where these are +too expensive or inappropriate for the service) is to maintain an +explicit table of partially finished requests and to use select() to +decide which request to work on next (or whether to handle a new +incoming request). This is particularly important for stream services +where each client can potentially be connected for a long time (if +threads or subprocesses cannot be used). + +Future work: +- Standard classes for Sun RPC (which uses either UDP or TCP) +- Standard mix-in classes to implement various authentication + and encryption schemes +- Standard framework for select-based multiplexing + +XXX Open problems: +- What to do with out-of-band data? + +BaseServer: +- split generic "request" functionality out into BaseServer class. + Copyright (C) 2000 Luke Kenneth Casson Leighton + + example: read entries from a SQL database (requires overriding + get_request() to return a table entry from the database). + entry is processed by a RequestHandlerClass. + +""" + +# Author of the BaseServer patch: Luke Kenneth Casson Leighton + +# XXX Warning! +# There is a test suite for this module, but it cannot be run by the +# standard regression test. +# To run it manually, run Lib/test/test_socketserver.py. + +from __future__ import (absolute_import, print_function) + +__version__ = "0.4" + + +import socket +import select +import sys +import os +import errno +try: + import threading +except ImportError: + import dummy_threading as threading + +__all__ = ["TCPServer","UDPServer","ForkingUDPServer","ForkingTCPServer", + "ThreadingUDPServer","ThreadingTCPServer","BaseRequestHandler", + "StreamRequestHandler","DatagramRequestHandler", + "ThreadingMixIn", "ForkingMixIn"] +if hasattr(socket, "AF_UNIX"): + __all__.extend(["UnixStreamServer","UnixDatagramServer", + "ThreadingUnixStreamServer", + "ThreadingUnixDatagramServer"]) + +def _eintr_retry(func, *args): + """restart a system call interrupted by EINTR""" + while True: + try: + return func(*args) + except OSError as e: + if e.errno != errno.EINTR: + raise + +class BaseServer(object): + + """Base class for server classes. + + Methods for the caller: + + - __init__(server_address, RequestHandlerClass) + - serve_forever(poll_interval=0.5) + - shutdown() + - handle_request() # if you do not use serve_forever() + - fileno() -> int # for select() + + Methods that may be overridden: + + - server_bind() + - server_activate() + - get_request() -> request, client_address + - handle_timeout() + - verify_request(request, client_address) + - server_close() + - process_request(request, client_address) + - shutdown_request(request) + - close_request(request) + - service_actions() + - handle_error() + + Methods for derived classes: + + - finish_request(request, client_address) + + Class variables that may be overridden by derived classes or + instances: + + - timeout + - address_family + - socket_type + - allow_reuse_address + + Instance variables: + + - RequestHandlerClass + - socket + + """ + + timeout = None + + def __init__(self, server_address, RequestHandlerClass): + """Constructor. May be extended, do not override.""" + self.server_address = server_address + self.RequestHandlerClass = RequestHandlerClass + self.__is_shut_down = threading.Event() + self.__shutdown_request = False + + def server_activate(self): + """Called by constructor to activate the server. + + May be overridden. + + """ + pass + + def serve_forever(self, poll_interval=0.5): + """Handle one request at a time until shutdown. + + Polls for shutdown every poll_interval seconds. Ignores + self.timeout. If you need to do periodic tasks, do them in + another thread. + """ + self.__is_shut_down.clear() + try: + while not self.__shutdown_request: + # XXX: Consider using another file descriptor or + # connecting to the socket to wake this up instead of + # polling. Polling reduces our responsiveness to a + # shutdown request and wastes cpu at all other times. + r, w, e = _eintr_retry(select.select, [self], [], [], + poll_interval) + if self in r: + self._handle_request_noblock() + + self.service_actions() + finally: + self.__shutdown_request = False + self.__is_shut_down.set() + + def shutdown(self): + """Stops the serve_forever loop. + + Blocks until the loop has finished. This must be called while + serve_forever() is running in another thread, or it will + deadlock. + """ + self.__shutdown_request = True + self.__is_shut_down.wait() + + def service_actions(self): + """Called by the serve_forever() loop. + + May be overridden by a subclass / Mixin to implement any code that + needs to be run during the loop. + """ + pass + + # The distinction between handling, getting, processing and + # finishing a request is fairly arbitrary. Remember: + # + # - handle_request() is the top-level call. It calls + # select, get_request(), verify_request() and process_request() + # - get_request() is different for stream or datagram sockets + # - process_request() is the place that may fork a new process + # or create a new thread to finish the request + # - finish_request() instantiates the request handler class; + # this constructor will handle the request all by itself + + def handle_request(self): + """Handle one request, possibly blocking. + + Respects self.timeout. + """ + # Support people who used socket.settimeout() to escape + # handle_request before self.timeout was available. + timeout = self.socket.gettimeout() + if timeout is None: + timeout = self.timeout + elif self.timeout is not None: + timeout = min(timeout, self.timeout) + fd_sets = _eintr_retry(select.select, [self], [], [], timeout) + if not fd_sets[0]: + self.handle_timeout() + return + self._handle_request_noblock() + + def _handle_request_noblock(self): + """Handle one request, without blocking. + + I assume that select.select has returned that the socket is + readable before this function was called, so there should be + no risk of blocking in get_request(). + """ + try: + request, client_address = self.get_request() + except socket.error: + return + if self.verify_request(request, client_address): + try: + self.process_request(request, client_address) + except: + self.handle_error(request, client_address) + self.shutdown_request(request) + + def handle_timeout(self): + """Called if no new request arrives within self.timeout. + + Overridden by ForkingMixIn. + """ + pass + + def verify_request(self, request, client_address): + """Verify the request. May be overridden. + + Return True if we should proceed with this request. + + """ + return True + + def process_request(self, request, client_address): + """Call finish_request. + + Overridden by ForkingMixIn and ThreadingMixIn. + + """ + self.finish_request(request, client_address) + self.shutdown_request(request) + + def server_close(self): + """Called to clean-up the server. + + May be overridden. + + """ + pass + + def finish_request(self, request, client_address): + """Finish one request by instantiating RequestHandlerClass.""" + self.RequestHandlerClass(request, client_address, self) + + def shutdown_request(self, request): + """Called to shutdown and close an individual request.""" + self.close_request(request) + + def close_request(self, request): + """Called to clean up an individual request.""" + pass + + def handle_error(self, request, client_address): + """Handle an error gracefully. May be overridden. + + The default is to print a traceback and continue. + + """ + print('-'*40) + print('Exception happened during processing of request from', end=' ') + print(client_address) + import traceback + traceback.print_exc() # XXX But this goes to stderr! + print('-'*40) + + +class TCPServer(BaseServer): + + """Base class for various socket-based server classes. + + Defaults to synchronous IP stream (i.e., TCP). + + Methods for the caller: + + - __init__(server_address, RequestHandlerClass, bind_and_activate=True) + - serve_forever(poll_interval=0.5) + - shutdown() + - handle_request() # if you don't use serve_forever() + - fileno() -> int # for select() + + Methods that may be overridden: + + - server_bind() + - server_activate() + - get_request() -> request, client_address + - handle_timeout() + - verify_request(request, client_address) + - process_request(request, client_address) + - shutdown_request(request) + - close_request(request) + - handle_error() + + Methods for derived classes: + + - finish_request(request, client_address) + + Class variables that may be overridden by derived classes or + instances: + + - timeout + - address_family + - socket_type + - request_queue_size (only for stream sockets) + - allow_reuse_address + + Instance variables: + + - server_address + - RequestHandlerClass + - socket + + """ + + address_family = socket.AF_INET + + socket_type = socket.SOCK_STREAM + + request_queue_size = 5 + + allow_reuse_address = False + + def __init__(self, server_address, RequestHandlerClass, bind_and_activate=True): + """Constructor. May be extended, do not override.""" + BaseServer.__init__(self, server_address, RequestHandlerClass) + self.socket = socket.socket(self.address_family, + self.socket_type) + if bind_and_activate: + self.server_bind() + self.server_activate() + + def server_bind(self): + """Called by constructor to bind the socket. + + May be overridden. + + """ + if self.allow_reuse_address: + self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + self.socket.bind(self.server_address) + self.server_address = self.socket.getsockname() + + def server_activate(self): + """Called by constructor to activate the server. + + May be overridden. + + """ + self.socket.listen(self.request_queue_size) + + def server_close(self): + """Called to clean-up the server. + + May be overridden. + + """ + self.socket.close() + + def fileno(self): + """Return socket file number. + + Interface required by select(). + + """ + return self.socket.fileno() + + def get_request(self): + """Get the request and client address from the socket. + + May be overridden. + + """ + return self.socket.accept() + + def shutdown_request(self, request): + """Called to shutdown and close an individual request.""" + try: + #explicitly shutdown. socket.close() merely releases + #the socket and waits for GC to perform the actual close. + request.shutdown(socket.SHUT_WR) + except socket.error: + pass #some platforms may raise ENOTCONN here + self.close_request(request) + + def close_request(self, request): + """Called to clean up an individual request.""" + request.close() + + +class UDPServer(TCPServer): + + """UDP server class.""" + + allow_reuse_address = False + + socket_type = socket.SOCK_DGRAM + + max_packet_size = 8192 + + def get_request(self): + data, client_addr = self.socket.recvfrom(self.max_packet_size) + return (data, self.socket), client_addr + + def server_activate(self): + # No need to call listen() for UDP. + pass + + def shutdown_request(self, request): + # No need to shutdown anything. + self.close_request(request) + + def close_request(self, request): + # No need to close anything. + pass + +class ForkingMixIn(object): + + """Mix-in class to handle each request in a new process.""" + + timeout = 300 + active_children = None + max_children = 40 + + def collect_children(self): + """Internal routine to wait for children that have exited.""" + if self.active_children is None: return + while len(self.active_children) >= self.max_children: + # XXX: This will wait for any child process, not just ones + # spawned by this library. This could confuse other + # libraries that expect to be able to wait for their own + # children. + try: + pid, status = os.waitpid(0, 0) + except os.error: + pid = None + if pid not in self.active_children: continue + self.active_children.remove(pid) + + # XXX: This loop runs more system calls than it ought + # to. There should be a way to put the active_children into a + # process group and then use os.waitpid(-pgid) to wait for any + # of that set, but I couldn't find a way to allocate pgids + # that couldn't collide. + for child in self.active_children: + try: + pid, status = os.waitpid(child, os.WNOHANG) + except os.error: + pid = None + if not pid: continue + try: + self.active_children.remove(pid) + except ValueError as e: + raise ValueError('%s. x=%d and list=%r' % (e.message, pid, + self.active_children)) + + def handle_timeout(self): + """Wait for zombies after self.timeout seconds of inactivity. + + May be extended, do not override. + """ + self.collect_children() + + def service_actions(self): + """Collect the zombie child processes regularly in the ForkingMixIn. + + service_actions is called in the BaseServer's serve_forver loop. + """ + self.collect_children() + + def process_request(self, request, client_address): + """Fork a new subprocess to process the request.""" + pid = os.fork() + if pid: + # Parent process + if self.active_children is None: + self.active_children = [] + self.active_children.append(pid) + self.close_request(request) + return + else: + # Child process. + # This must never return, hence os._exit()! + try: + self.finish_request(request, client_address) + self.shutdown_request(request) + os._exit(0) + except: + try: + self.handle_error(request, client_address) + self.shutdown_request(request) + finally: + os._exit(1) + + +class ThreadingMixIn(object): + """Mix-in class to handle each request in a new thread.""" + + # Decides how threads will act upon termination of the + # main process + daemon_threads = False + + def process_request_thread(self, request, client_address): + """Same as in BaseServer but as a thread. + + In addition, exception handling is done here. + + """ + try: + self.finish_request(request, client_address) + self.shutdown_request(request) + except: + self.handle_error(request, client_address) + self.shutdown_request(request) + + def process_request(self, request, client_address): + """Start a new thread to process the request.""" + t = threading.Thread(target = self.process_request_thread, + args = (request, client_address)) + t.daemon = self.daemon_threads + t.start() + + +class ForkingUDPServer(ForkingMixIn, UDPServer): pass +class ForkingTCPServer(ForkingMixIn, TCPServer): pass + +class ThreadingUDPServer(ThreadingMixIn, UDPServer): pass +class ThreadingTCPServer(ThreadingMixIn, TCPServer): pass + +if hasattr(socket, 'AF_UNIX'): + + class UnixStreamServer(TCPServer): + address_family = socket.AF_UNIX + + class UnixDatagramServer(UDPServer): + address_family = socket.AF_UNIX + + class ThreadingUnixStreamServer(ThreadingMixIn, UnixStreamServer): pass + + class ThreadingUnixDatagramServer(ThreadingMixIn, UnixDatagramServer): pass + +class BaseRequestHandler(object): + + """Base class for request handler classes. + + This class is instantiated for each request to be handled. The + constructor sets the instance variables request, client_address + and server, and then calls the handle() method. To implement a + specific service, all you need to do is to derive a class which + defines a handle() method. + + The handle() method can find the request as self.request, the + client address as self.client_address, and the server (in case it + needs access to per-server information) as self.server. Since a + separate instance is created for each request, the handle() method + can define arbitrary other instance variariables. + + """ + + def __init__(self, request, client_address, server): + self.request = request + self.client_address = client_address + self.server = server + self.setup() + try: + self.handle() + finally: + self.finish() + + def setup(self): + pass + + def handle(self): + pass + + def finish(self): + pass + + +# The following two classes make it possible to use the same service +# class for stream or datagram servers. +# Each class sets up these instance variables: +# - rfile: a file object from which receives the request is read +# - wfile: a file object to which the reply is written +# When the handle() method returns, wfile is flushed properly + + +class StreamRequestHandler(BaseRequestHandler): + + """Define self.rfile and self.wfile for stream sockets.""" + + # Default buffer sizes for rfile, wfile. + # We default rfile to buffered because otherwise it could be + # really slow for large data (a getc() call per byte); we make + # wfile unbuffered because (a) often after a write() we want to + # read and we need to flush the line; (b) big writes to unbuffered + # files are typically optimized by stdio even when big reads + # aren't. + rbufsize = -1 + wbufsize = 0 + + # A timeout to apply to the request socket, if not None. + timeout = None + + # Disable nagle algorithm for this socket, if True. + # Use only when wbufsize != 0, to avoid small packets. + disable_nagle_algorithm = False + + def setup(self): + self.connection = self.request + if self.timeout is not None: + self.connection.settimeout(self.timeout) + if self.disable_nagle_algorithm: + self.connection.setsockopt(socket.IPPROTO_TCP, + socket.TCP_NODELAY, True) + self.rfile = self.connection.makefile('rb', self.rbufsize) + self.wfile = self.connection.makefile('wb', self.wbufsize) + + def finish(self): + if not self.wfile.closed: + try: + self.wfile.flush() + except socket.error: + # An final socket error may have occurred here, such as + # the local error ECONNABORTED. + pass + self.wfile.close() + self.rfile.close() + + +class DatagramRequestHandler(BaseRequestHandler): + + # XXX Regrettably, I cannot get this working on Linux; + # s.recvfrom() doesn't return a meaningful client address. + + """Define self.rfile and self.wfile for datagram sockets.""" + + def setup(self): + from io import BytesIO + self.packet, self.socket = self.request + self.rfile = BytesIO(self.packet) + self.wfile = BytesIO() + + def finish(self): + self.socket.sendto(self.wfile.getvalue(), self.client_address) diff --git a/lib/future/backports/test/__init__.py b/lib/future/backports/test/__init__.py new file mode 100644 index 00000000..0bba5e69 --- /dev/null +++ b/lib/future/backports/test/__init__.py @@ -0,0 +1,9 @@ +""" +test package backported for python-future. + +Its primary purpose is to allow use of "import test.support" for running +the Python standard library unit tests using the new Python 3 stdlib +import location. + +Python 3 renamed test.test_support to test.support. +""" diff --git a/lib/future/backports/test/badcert.pem b/lib/future/backports/test/badcert.pem new file mode 100644 index 00000000..c4191460 --- /dev/null +++ b/lib/future/backports/test/badcert.pem @@ -0,0 +1,36 @@ +-----BEGIN RSA PRIVATE KEY----- +MIICXwIBAAKBgQC8ddrhm+LutBvjYcQlnH21PPIseJ1JVG2HMmN2CmZk2YukO+9L +opdJhTvbGfEj0DQs1IE8M+kTUyOmuKfVrFMKwtVeCJphrAnhoz7TYOuLBSqt7lVH +fhi/VwovESJlaBOp+WMnfhcduPEYHYx/6cnVapIkZnLt30zu2um+DzA9jQIDAQAB +AoGBAK0FZpaKj6WnJZN0RqhhK+ggtBWwBnc0U/ozgKz2j1s3fsShYeiGtW6CK5nU +D1dZ5wzhbGThI7LiOXDvRucc9n7vUgi0alqPQ/PFodPxAN/eEYkmXQ7W2k7zwsDA +IUK0KUhktQbLu8qF/m8qM86ba9y9/9YkXuQbZ3COl5ahTZrhAkEA301P08RKv3KM +oXnGU2UHTuJ1MAD2hOrPxjD4/wxA/39EWG9bZczbJyggB4RHu0I3NOSFjAm3HQm0 +ANOu5QK9owJBANgOeLfNNcF4pp+UikRFqxk5hULqRAWzVxVrWe85FlPm0VVmHbb/ +loif7mqjU8o1jTd/LM7RD9f2usZyE2psaw8CQQCNLhkpX3KO5kKJmS9N7JMZSc4j +oog58yeYO8BBqKKzpug0LXuQultYv2K4veaIO04iL9VLe5z9S/Q1jaCHBBuXAkEA +z8gjGoi1AOp6PBBLZNsncCvcV/0aC+1se4HxTNo2+duKSDnbq+ljqOM+E7odU+Nq +ewvIWOG//e8fssd0mq3HywJBAJ8l/c8GVmrpFTx8r/nZ2Pyyjt3dH1widooDXYSV +q6Gbf41Llo5sYAtmxdndTLASuHKecacTgZVhy0FryZpLKrU= +-----END RSA PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +Just bad cert data +-----END CERTIFICATE----- +-----BEGIN RSA PRIVATE KEY----- +MIICXwIBAAKBgQC8ddrhm+LutBvjYcQlnH21PPIseJ1JVG2HMmN2CmZk2YukO+9L +opdJhTvbGfEj0DQs1IE8M+kTUyOmuKfVrFMKwtVeCJphrAnhoz7TYOuLBSqt7lVH +fhi/VwovESJlaBOp+WMnfhcduPEYHYx/6cnVapIkZnLt30zu2um+DzA9jQIDAQAB +AoGBAK0FZpaKj6WnJZN0RqhhK+ggtBWwBnc0U/ozgKz2j1s3fsShYeiGtW6CK5nU +D1dZ5wzhbGThI7LiOXDvRucc9n7vUgi0alqPQ/PFodPxAN/eEYkmXQ7W2k7zwsDA +IUK0KUhktQbLu8qF/m8qM86ba9y9/9YkXuQbZ3COl5ahTZrhAkEA301P08RKv3KM +oXnGU2UHTuJ1MAD2hOrPxjD4/wxA/39EWG9bZczbJyggB4RHu0I3NOSFjAm3HQm0 +ANOu5QK9owJBANgOeLfNNcF4pp+UikRFqxk5hULqRAWzVxVrWe85FlPm0VVmHbb/ +loif7mqjU8o1jTd/LM7RD9f2usZyE2psaw8CQQCNLhkpX3KO5kKJmS9N7JMZSc4j +oog58yeYO8BBqKKzpug0LXuQultYv2K4veaIO04iL9VLe5z9S/Q1jaCHBBuXAkEA +z8gjGoi1AOp6PBBLZNsncCvcV/0aC+1se4HxTNo2+duKSDnbq+ljqOM+E7odU+Nq +ewvIWOG//e8fssd0mq3HywJBAJ8l/c8GVmrpFTx8r/nZ2Pyyjt3dH1widooDXYSV +q6Gbf41Llo5sYAtmxdndTLASuHKecacTgZVhy0FryZpLKrU= +-----END RSA PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +Just bad cert data +-----END CERTIFICATE----- diff --git a/lib/future/backports/test/badkey.pem b/lib/future/backports/test/badkey.pem new file mode 100644 index 00000000..1c8a9557 --- /dev/null +++ b/lib/future/backports/test/badkey.pem @@ -0,0 +1,40 @@ +-----BEGIN RSA PRIVATE KEY----- +Bad Key, though the cert should be OK +-----END RSA PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICpzCCAhCgAwIBAgIJAP+qStv1cIGNMA0GCSqGSIb3DQEBBQUAMIGJMQswCQYD +VQQGEwJVUzERMA8GA1UECBMIRGVsYXdhcmUxEzARBgNVBAcTCldpbG1pbmd0b24x +IzAhBgNVBAoTGlB5dGhvbiBTb2Z0d2FyZSBGb3VuZGF0aW9uMQwwCgYDVQQLEwNT +U0wxHzAdBgNVBAMTFnNvbWVtYWNoaW5lLnB5dGhvbi5vcmcwHhcNMDcwODI3MTY1 +NDUwWhcNMTMwMjE2MTY1NDUwWjCBiTELMAkGA1UEBhMCVVMxETAPBgNVBAgTCERl +bGF3YXJlMRMwEQYDVQQHEwpXaWxtaW5ndG9uMSMwIQYDVQQKExpQeXRob24gU29m +dHdhcmUgRm91bmRhdGlvbjEMMAoGA1UECxMDU1NMMR8wHQYDVQQDExZzb21lbWFj +aGluZS5weXRob24ub3JnMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC8ddrh +m+LutBvjYcQlnH21PPIseJ1JVG2HMmN2CmZk2YukO+9LopdJhTvbGfEj0DQs1IE8 +M+kTUyOmuKfVrFMKwtVeCJphrAnhoz7TYOuLBSqt7lVHfhi/VwovESJlaBOp+WMn +fhcduPEYHYx/6cnVapIkZnLt30zu2um+DzA9jQIDAQABoxUwEzARBglghkgBhvhC +AQEEBAMCBkAwDQYJKoZIhvcNAQEFBQADgYEAF4Q5BVqmCOLv1n8je/Jw9K669VXb +08hyGzQhkemEBYQd6fzQ9A/1ZzHkJKb1P6yreOLSEh4KcxYPyrLRC1ll8nr5OlCx +CMhKkTnR6qBsdNV0XtdU2+N25hqW+Ma4ZeqsN/iiJVCGNOZGnvQuvCAGWF8+J/f/ +iHkC6gGdBJhogs4= +-----END CERTIFICATE----- +-----BEGIN RSA PRIVATE KEY----- +Bad Key, though the cert should be OK +-----END RSA PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICpzCCAhCgAwIBAgIJAP+qStv1cIGNMA0GCSqGSIb3DQEBBQUAMIGJMQswCQYD +VQQGEwJVUzERMA8GA1UECBMIRGVsYXdhcmUxEzARBgNVBAcTCldpbG1pbmd0b24x +IzAhBgNVBAoTGlB5dGhvbiBTb2Z0d2FyZSBGb3VuZGF0aW9uMQwwCgYDVQQLEwNT +U0wxHzAdBgNVBAMTFnNvbWVtYWNoaW5lLnB5dGhvbi5vcmcwHhcNMDcwODI3MTY1 +NDUwWhcNMTMwMjE2MTY1NDUwWjCBiTELMAkGA1UEBhMCVVMxETAPBgNVBAgTCERl +bGF3YXJlMRMwEQYDVQQHEwpXaWxtaW5ndG9uMSMwIQYDVQQKExpQeXRob24gU29m +dHdhcmUgRm91bmRhdGlvbjEMMAoGA1UECxMDU1NMMR8wHQYDVQQDExZzb21lbWFj +aGluZS5weXRob24ub3JnMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC8ddrh +m+LutBvjYcQlnH21PPIseJ1JVG2HMmN2CmZk2YukO+9LopdJhTvbGfEj0DQs1IE8 +M+kTUyOmuKfVrFMKwtVeCJphrAnhoz7TYOuLBSqt7lVHfhi/VwovESJlaBOp+WMn +fhcduPEYHYx/6cnVapIkZnLt30zu2um+DzA9jQIDAQABoxUwEzARBglghkgBhvhC +AQEEBAMCBkAwDQYJKoZIhvcNAQEFBQADgYEAF4Q5BVqmCOLv1n8je/Jw9K669VXb +08hyGzQhkemEBYQd6fzQ9A/1ZzHkJKb1P6yreOLSEh4KcxYPyrLRC1ll8nr5OlCx +CMhKkTnR6qBsdNV0XtdU2+N25hqW+Ma4ZeqsN/iiJVCGNOZGnvQuvCAGWF8+J/f/ +iHkC6gGdBJhogs4= +-----END CERTIFICATE----- diff --git a/lib/future/backports/test/dh512.pem b/lib/future/backports/test/dh512.pem new file mode 100644 index 00000000..200d16cd --- /dev/null +++ b/lib/future/backports/test/dh512.pem @@ -0,0 +1,9 @@ +-----BEGIN DH PARAMETERS----- +MEYCQQD1Kv884bEpQBgRjXyEpwpy1obEAxnIByl6ypUM2Zafq9AKUJsCRtMIPWak +XUGfnHy9iUsiGSa6q6Jew1XpKgVfAgEC +-----END DH PARAMETERS----- + +These are the 512 bit DH parameters from "Assigned Number for SKIP Protocols" +(http://www.skip-vpn.org/spec/numbers.html). +See there for how they were generated. +Note that g is not a generator, but this is not a problem since p is a safe prime. diff --git a/lib/future/backports/test/https_svn_python_org_root.pem b/lib/future/backports/test/https_svn_python_org_root.pem new file mode 100644 index 00000000..e7dfc829 --- /dev/null +++ b/lib/future/backports/test/https_svn_python_org_root.pem @@ -0,0 +1,41 @@ +-----BEGIN CERTIFICATE----- +MIIHPTCCBSWgAwIBAgIBADANBgkqhkiG9w0BAQQFADB5MRAwDgYDVQQKEwdSb290 +IENBMR4wHAYDVQQLExVodHRwOi8vd3d3LmNhY2VydC5vcmcxIjAgBgNVBAMTGUNB +IENlcnQgU2lnbmluZyBBdXRob3JpdHkxITAfBgkqhkiG9w0BCQEWEnN1cHBvcnRA +Y2FjZXJ0Lm9yZzAeFw0wMzAzMzAxMjI5NDlaFw0zMzAzMjkxMjI5NDlaMHkxEDAO +BgNVBAoTB1Jvb3QgQ0ExHjAcBgNVBAsTFWh0dHA6Ly93d3cuY2FjZXJ0Lm9yZzEi +MCAGA1UEAxMZQ0EgQ2VydCBTaWduaW5nIEF1dGhvcml0eTEhMB8GCSqGSIb3DQEJ +ARYSc3VwcG9ydEBjYWNlcnQub3JnMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC +CgKCAgEAziLA4kZ97DYoB1CW8qAzQIxL8TtmPzHlawI229Z89vGIj053NgVBlfkJ +8BLPRoZzYLdufujAWGSuzbCtRRcMY/pnCujW0r8+55jE8Ez64AO7NV1sId6eINm6 +zWYyN3L69wj1x81YyY7nDl7qPv4coRQKFWyGhFtkZip6qUtTefWIonvuLwphK42y +fk1WpRPs6tqSnqxEQR5YYGUFZvjARL3LlPdCfgv3ZWiYUQXw8wWRBB0bF4LsyFe7 +w2t6iPGwcswlWyCR7BYCEo8y6RcYSNDHBS4CMEK4JZwFaz+qOqfrU0j36NK2B5jc +G8Y0f3/JHIJ6BVgrCFvzOKKrF11myZjXnhCLotLddJr3cQxyYN/Nb5gznZY0dj4k +epKwDpUeb+agRThHqtdB7Uq3EvbXG4OKDy7YCbZZ16oE/9KTfWgu3YtLq1i6L43q +laegw1SJpfvbi1EinbLDvhG+LJGGi5Z4rSDTii8aP8bQUWWHIbEZAWV/RRyH9XzQ +QUxPKZgh/TMfdQwEUfoZd9vUFBzugcMd9Zi3aQaRIt0AUMyBMawSB3s42mhb5ivU +fslfrejrckzzAeVLIL+aplfKkQABi6F1ITe1Yw1nPkZPcCBnzsXWWdsC4PDSy826 +YreQQejdIOQpvGQpQsgi3Hia/0PsmBsJUUtaWsJx8cTLc6nloQsCAwEAAaOCAc4w +ggHKMB0GA1UdDgQWBBQWtTIb1Mfz4OaO873SsDrusjkY0TCBowYDVR0jBIGbMIGY +gBQWtTIb1Mfz4OaO873SsDrusjkY0aF9pHsweTEQMA4GA1UEChMHUm9vdCBDQTEe +MBwGA1UECxMVaHR0cDovL3d3dy5jYWNlcnQub3JnMSIwIAYDVQQDExlDQSBDZXJ0 +IFNpZ25pbmcgQXV0aG9yaXR5MSEwHwYJKoZIhvcNAQkBFhJzdXBwb3J0QGNhY2Vy +dC5vcmeCAQAwDwYDVR0TAQH/BAUwAwEB/zAyBgNVHR8EKzApMCegJaAjhiFodHRw +czovL3d3dy5jYWNlcnQub3JnL3Jldm9rZS5jcmwwMAYJYIZIAYb4QgEEBCMWIWh0 +dHBzOi8vd3d3LmNhY2VydC5vcmcvcmV2b2tlLmNybDA0BglghkgBhvhCAQgEJxYl +aHR0cDovL3d3dy5jYWNlcnQub3JnL2luZGV4LnBocD9pZD0xMDBWBglghkgBhvhC +AQ0ESRZHVG8gZ2V0IHlvdXIgb3duIGNlcnRpZmljYXRlIGZvciBGUkVFIGhlYWQg +b3ZlciB0byBodHRwOi8vd3d3LmNhY2VydC5vcmcwDQYJKoZIhvcNAQEEBQADggIB +ACjH7pyCArpcgBLKNQodgW+JapnM8mgPf6fhjViVPr3yBsOQWqy1YPaZQwGjiHCc +nWKdpIevZ1gNMDY75q1I08t0AoZxPuIrA2jxNGJARjtT6ij0rPtmlVOKTV39O9lg +18p5aTuxZZKmxoGCXJzN600BiqXfEVWqFcofN8CCmHBh22p8lqOOLlQ+TyGpkO/c +gr/c6EWtTZBzCDyUZbAEmXZ/4rzCahWqlwQ3JNgelE5tDlG+1sSPypZt90Pf6DBl +Jzt7u0NDY8RD97LsaMzhGY4i+5jhe1o+ATc7iwiwovOVThrLm82asduycPAtStvY +sONvRUgzEv/+PDIqVPfE94rwiCPCR/5kenHA0R6mY7AHfqQv0wGP3J8rtsYIqQ+T +SCX8Ev2fQtzzxD72V7DX3WnRBnc0CkvSyqD/HMaMyRa+xMwyN2hzXwj7UfdJUzYF +CpUCTPJ5GhD22Dp1nPMd8aINcGeGG7MW9S/lpOt5hvk9C8JzC6WZrG/8Z7jlLwum +GCSNe9FINSkYQKyTYOGWhlC0elnYjyELn8+CkcY7v2vcB5G5l1YjqrZslMZIBjzk +zk6q5PYvCdxTby78dOs6Y5nCpqyJvKeyRKANihDjbPIky/qbn3BHLt4Ui9SyIAmW +omTxJBzcoTWcFbLUvFUufQb1nA5V9FrWk9p2rSVzTMVD +-----END CERTIFICATE----- diff --git a/lib/future/backports/test/keycert.passwd.pem b/lib/future/backports/test/keycert.passwd.pem new file mode 100644 index 00000000..e9057488 --- /dev/null +++ b/lib/future/backports/test/keycert.passwd.pem @@ -0,0 +1,33 @@ +-----BEGIN RSA PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: DES-EDE3-CBC,1A8D9D2A02EC698A + +kJYbfZ8L0sfe9Oty3gw0aloNnY5E8fegRfQLZlNoxTl6jNt0nIwI8kDJ36CZgR9c +u3FDJm/KqrfUoz8vW+qEnWhSG7QPX2wWGPHd4K94Yz/FgrRzZ0DoK7XxXq9gOtVA +AVGQhnz32p+6WhfGsCr9ArXEwRZrTk/FvzEPaU5fHcoSkrNVAGX8IpSVkSDwEDQr +Gv17+cfk99UV1OCza6yKHoFkTtrC+PZU71LomBabivS2Oc4B9hYuSR2hF01wTHP+ +YlWNagZOOVtNz4oKK9x9eNQpmfQXQvPPTfusexKIbKfZrMvJoxcm1gfcZ0H/wK6P +6wmXSG35qMOOztCZNtperjs1wzEBXznyK8QmLcAJBjkfarABJX9vBEzZV0OUKhy+ +noORFwHTllphbmydLhu6ehLUZMHPhzAS5UN7srtpSN81eerDMy0RMUAwA7/PofX1 +94Me85Q8jP0PC9ETdsJcPqLzAPETEYu0ELewKRcrdyWi+tlLFrpE5KT/s5ecbl9l +7B61U4Kfd1PIXc/siINhU3A3bYK+845YyUArUOnKf1kEox7p1RpD7yFqVT04lRTo +cibNKATBusXSuBrp2G6GNuhWEOSafWCKJQAzgCYIp6ZTV2khhMUGppc/2H3CF6cO +zX0KtlPVZC7hLkB6HT8SxYUwF1zqWY7+/XPPdc37MeEZ87Q3UuZwqORLY+Z0hpgt +L5JXBCoklZhCAaN2GqwFLXtGiRSRFGY7xXIhbDTlE65Wv1WGGgDLMKGE1gOz3yAo +2jjG1+yAHJUdE69XTFHSqSkvaloA1W03LdMXZ9VuQJ/ySXCie6ABAQ== +-----END RSA PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICVDCCAb2gAwIBAgIJANfHOBkZr8JOMA0GCSqGSIb3DQEBBQUAMF8xCzAJBgNV +BAYTAlhZMRcwFQYDVQQHEw5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9u +IFNvZnR3YXJlIEZvdW5kYXRpb24xEjAQBgNVBAMTCWxvY2FsaG9zdDAeFw0xMDEw +MDgyMzAxNTZaFw0yMDEwMDUyMzAxNTZaMF8xCzAJBgNVBAYTAlhZMRcwFQYDVQQH +Ew5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9uIFNvZnR3YXJlIEZvdW5k +YXRpb24xEjAQBgNVBAMTCWxvY2FsaG9zdDCBnzANBgkqhkiG9w0BAQEFAAOBjQAw +gYkCgYEA21vT5isq7F68amYuuNpSFlKDPrMUCa4YWYqZRt2OZ+/3NKaZ2xAiSwr7 +6MrQF70t5nLbSPpqE5+5VrS58SY+g/sXLiFd6AplH1wJZwh78DofbFYXUggktFMt +pTyiX8jtP66bkcPkDADA089RI1TQR6Ca+n7HFa7c1fabVV6i3zkCAwEAAaMYMBYw +FAYDVR0RBA0wC4IJbG9jYWxob3N0MA0GCSqGSIb3DQEBBQUAA4GBAHPctQBEQ4wd +BJ6+JcpIraopLn8BGhbjNWj40mmRqWB/NAWF6M5ne7KpGAu7tLeG4hb1zLaldK8G +lxy2GPSRF6LFS48dpEj2HbMv2nvv6xxalDMJ9+DicWgAKTQ6bcX2j3GUkCR0g/T1 +CRlNBAAlvhKzO7Clpf9l0YKBEfraJByX +-----END CERTIFICATE----- diff --git a/lib/future/backports/test/keycert.pem b/lib/future/backports/test/keycert.pem new file mode 100644 index 00000000..64318aa2 --- /dev/null +++ b/lib/future/backports/test/keycert.pem @@ -0,0 +1,31 @@ +-----BEGIN PRIVATE KEY----- +MIICdwIBADANBgkqhkiG9w0BAQEFAASCAmEwggJdAgEAAoGBANtb0+YrKuxevGpm +LrjaUhZSgz6zFAmuGFmKmUbdjmfv9zSmmdsQIksK++jK0Be9LeZy20j6ahOfuVa0 +ufEmPoP7Fy4hXegKZR9cCWcIe/A6H2xWF1IIJLRTLaU8ol/I7T+um5HD5AwAwNPP +USNU0Eegmvp+xxWu3NX2m1Veot85AgMBAAECgYA3ZdZ673X0oexFlq7AAmrutkHt +CL7LvwrpOiaBjhyTxTeSNWzvtQBkIU8DOI0bIazA4UreAFffwtvEuPmonDb3F+Iq +SMAu42XcGyVZEl+gHlTPU9XRX7nTOXVt+MlRRRxL6t9GkGfUAXI3XxJDXW3c0vBK +UL9xqD8cORXOfE06rQJBAP8mEX1ERkR64Ptsoe4281vjTlNfIbs7NMPkUnrn9N/Y +BLhjNIfQ3HFZG8BTMLfX7kCS9D593DW5tV4Z9BP/c6cCQQDcFzCcVArNh2JSywOQ +ZfTfRbJg/Z5Lt9Fkngv1meeGNPgIMLN8Sg679pAOOWmzdMO3V706rNPzSVMME7E5 +oPIfAkEA8pDddarP5tCvTTgUpmTFbakm0KoTZm2+FzHcnA4jRh+XNTjTOv98Y6Ik +eO5d1ZnKXseWvkZncQgxfdnMqqpj5wJAcNq/RVne1DbYlwWchT2Si65MYmmJ8t+F +0mcsULqjOnEMwf5e+ptq5LzwbyrHZYq5FNk7ocufPv/ZQrcSSC+cFwJBAKvOJByS +x56qyGeZLOQlWS2JS3KJo59XuLFGqcbgN9Om9xFa41Yb4N9NvplFivsvZdw3m1Q/ +SPIXQuT8RMPDVNQ= +-----END PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICVDCCAb2gAwIBAgIJANfHOBkZr8JOMA0GCSqGSIb3DQEBBQUAMF8xCzAJBgNV +BAYTAlhZMRcwFQYDVQQHEw5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9u +IFNvZnR3YXJlIEZvdW5kYXRpb24xEjAQBgNVBAMTCWxvY2FsaG9zdDAeFw0xMDEw +MDgyMzAxNTZaFw0yMDEwMDUyMzAxNTZaMF8xCzAJBgNVBAYTAlhZMRcwFQYDVQQH +Ew5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9uIFNvZnR3YXJlIEZvdW5k +YXRpb24xEjAQBgNVBAMTCWxvY2FsaG9zdDCBnzANBgkqhkiG9w0BAQEFAAOBjQAw +gYkCgYEA21vT5isq7F68amYuuNpSFlKDPrMUCa4YWYqZRt2OZ+/3NKaZ2xAiSwr7 +6MrQF70t5nLbSPpqE5+5VrS58SY+g/sXLiFd6AplH1wJZwh78DofbFYXUggktFMt +pTyiX8jtP66bkcPkDADA089RI1TQR6Ca+n7HFa7c1fabVV6i3zkCAwEAAaMYMBYw +FAYDVR0RBA0wC4IJbG9jYWxob3N0MA0GCSqGSIb3DQEBBQUAA4GBAHPctQBEQ4wd +BJ6+JcpIraopLn8BGhbjNWj40mmRqWB/NAWF6M5ne7KpGAu7tLeG4hb1zLaldK8G +lxy2GPSRF6LFS48dpEj2HbMv2nvv6xxalDMJ9+DicWgAKTQ6bcX2j3GUkCR0g/T1 +CRlNBAAlvhKzO7Clpf9l0YKBEfraJByX +-----END CERTIFICATE----- diff --git a/lib/future/backports/test/keycert2.pem b/lib/future/backports/test/keycert2.pem new file mode 100644 index 00000000..e8a9e082 --- /dev/null +++ b/lib/future/backports/test/keycert2.pem @@ -0,0 +1,31 @@ +-----BEGIN PRIVATE KEY----- +MIICdwIBADANBgkqhkiG9w0BAQEFAASCAmEwggJdAgEAAoGBAJnsJZVrppL+W5I9 +zGQrrawWwE5QJpBK9nWw17mXrZ03R1cD9BamLGivVISbPlRlAVnZBEyh1ATpsB7d +CUQ+WHEvALquvx4+Yw5l+fXeiYRjrLRBYZuVy8yNtXzU3iWcGObcYRkUdiXdOyP7 +sLF2YZHRvQZpzgDBKkrraeQ81w21AgMBAAECgYBEm7n07FMHWlE+0kT0sXNsLYfy +YE+QKZnJw9WkaDN+zFEEPELkhZVt5BjsMraJr6v2fIEqF0gGGJPkbenffVq2B5dC +lWUOxvJHufMK4sM3Cp6s/gOp3LP+QkzVnvJSfAyZU6l+4PGX5pLdUsXYjPxgzjzL +S36tF7/2Uv1WePyLUQJBAMsPhYzUXOPRgmbhcJiqi9A9c3GO8kvSDYTCKt3VMnqz +HBn6MQ4VQasCD1F+7jWTI0FU/3vdw8non/Fj8hhYqZcCQQDCDRdvmZqDiZnpMqDq +L6ZSrLTVtMvZXZbgwForaAD9uHj51TME7+eYT7EG2YCgJTXJ4YvRJEnPNyskwdKt +vTSTAkEAtaaN/vyemEJ82BIGStwONNw0ILsSr5cZ9tBHzqiA/tipY+e36HRFiXhP +QcU9zXlxyWkDH8iz9DSAmE2jbfoqwwJANlMJ65E543cjIlitGcKLMnvtCCLcKpb7 +xSG0XJB6Lo11OKPJ66jp0gcFTSCY1Lx2CXVd+gfJrfwI1Pp562+bhwJBAJ9IfDPU +R8OpO9v1SGd8x33Owm7uXOpB9d63/T70AD1QOXjKUC4eXYbt0WWfWuny/RNPRuyh +w7DXSfUF+kPKolU= +-----END PRIVATE KEY----- +-----BEGIN CERTIFICATE----- +MIICXTCCAcagAwIBAgIJAIO3upAG445fMA0GCSqGSIb3DQEBBQUAMGIxCzAJBgNV +BAYTAlhZMRcwFQYDVQQHEw5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9u +IFNvZnR3YXJlIEZvdW5kYXRpb24xFTATBgNVBAMTDGZha2Vob3N0bmFtZTAeFw0x +MDEwMDkxNTAxMDBaFw0yMDEwMDYxNTAxMDBaMGIxCzAJBgNVBAYTAlhZMRcwFQYD +VQQHEw5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9uIFNvZnR3YXJlIEZv +dW5kYXRpb24xFTATBgNVBAMTDGZha2Vob3N0bmFtZTCBnzANBgkqhkiG9w0BAQEF +AAOBjQAwgYkCgYEAmewllWumkv5bkj3MZCutrBbATlAmkEr2dbDXuZetnTdHVwP0 +FqYsaK9UhJs+VGUBWdkETKHUBOmwHt0JRD5YcS8Auq6/Hj5jDmX59d6JhGOstEFh +m5XLzI21fNTeJZwY5txhGRR2Jd07I/uwsXZhkdG9BmnOAMEqSutp5DzXDbUCAwEA +AaMbMBkwFwYDVR0RBBAwDoIMZmFrZWhvc3RuYW1lMA0GCSqGSIb3DQEBBQUAA4GB +AH+iMClLLGSaKWgwXsmdVo4FhTZZHo8Uprrtg3N9FxEeE50btpDVQysgRt5ias3K +m+bME9zbKwvbVWD5zZdjus4pDgzwF/iHyccL8JyYhxOvS/9zmvAtFXj/APIIbZFp +IT75d9f88ScIGEtknZQejnrdhB64tYki/EqluiuKBqKD +-----END CERTIFICATE----- diff --git a/lib/future/backports/test/nokia.pem b/lib/future/backports/test/nokia.pem new file mode 100644 index 00000000..0d044df4 --- /dev/null +++ b/lib/future/backports/test/nokia.pem @@ -0,0 +1,31 @@ +# Certificate for projects.developer.nokia.com:443 (see issue 13034) +-----BEGIN CERTIFICATE----- +MIIFLDCCBBSgAwIBAgIQLubqdkCgdc7lAF9NfHlUmjANBgkqhkiG9w0BAQUFADCB +vDELMAkGA1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQL +ExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMTswOQYDVQQLEzJUZXJtcyBvZiB1c2Ug +YXQgaHR0cHM6Ly93d3cudmVyaXNpZ24uY29tL3JwYSAoYykxMDE2MDQGA1UEAxMt +VmVyaVNpZ24gQ2xhc3MgMyBJbnRlcm5hdGlvbmFsIFNlcnZlciBDQSAtIEczMB4X +DTExMDkyMTAwMDAwMFoXDTEyMDkyMDIzNTk1OVowcTELMAkGA1UEBhMCRkkxDjAM +BgNVBAgTBUVzcG9vMQ4wDAYDVQQHFAVFc3BvbzEOMAwGA1UEChQFTm9raWExCzAJ +BgNVBAsUAkJJMSUwIwYDVQQDFBxwcm9qZWN0cy5kZXZlbG9wZXIubm9raWEuY29t +MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCr92w1bpHYSYxUEx8N/8Iddda2 +lYi+aXNtQfV/l2Fw9Ykv3Ipw4nLeGTj18FFlAZgMdPRlgrzF/NNXGw/9l3/qKdow +CypkQf8lLaxb9Ze1E/KKmkRJa48QTOqvo6GqKuTI6HCeGlG1RxDb8YSKcQWLiytn +yj3Wp4MgRQO266xmMQIDAQABo4IB9jCCAfIwQQYDVR0RBDowOIIccHJvamVjdHMu +ZGV2ZWxvcGVyLm5va2lhLmNvbYIYcHJvamVjdHMuZm9ydW0ubm9raWEuY29tMAkG +A1UdEwQCMAAwCwYDVR0PBAQDAgWgMEEGA1UdHwQ6MDgwNqA0oDKGMGh0dHA6Ly9T +VlJJbnRsLUczLWNybC52ZXJpc2lnbi5jb20vU1ZSSW50bEczLmNybDBEBgNVHSAE +PTA7MDkGC2CGSAGG+EUBBxcDMCowKAYIKwYBBQUHAgEWHGh0dHBzOi8vd3d3LnZl +cmlzaWduLmNvbS9ycGEwKAYDVR0lBCEwHwYJYIZIAYb4QgQBBggrBgEFBQcDAQYI +KwYBBQUHAwIwcgYIKwYBBQUHAQEEZjBkMCQGCCsGAQUFBzABhhhodHRwOi8vb2Nz +cC52ZXJpc2lnbi5jb20wPAYIKwYBBQUHMAKGMGh0dHA6Ly9TVlJJbnRsLUczLWFp +YS52ZXJpc2lnbi5jb20vU1ZSSW50bEczLmNlcjBuBggrBgEFBQcBDARiMGChXqBc +MFowWDBWFglpbWFnZS9naWYwITAfMAcGBSsOAwIaBBRLa7kolgYMu9BSOJsprEsH +iyEFGDAmFiRodHRwOi8vbG9nby52ZXJpc2lnbi5jb20vdnNsb2dvMS5naWYwDQYJ +KoZIhvcNAQEFBQADggEBACQuPyIJqXwUyFRWw9x5yDXgMW4zYFopQYOw/ItRY522 +O5BsySTh56BWS6mQB07XVfxmYUGAvRQDA5QHpmY8jIlNwSmN3s8RKo+fAtiNRlcL +x/mWSfuMs3D/S6ev3D6+dpEMZtjrhOdctsarMKp8n/hPbwhAbg5hVjpkW5n8vz2y +0KxvvkA1AxpLwpVv7OlK17ttzIHw8bp9HTlHBU5s8bKz4a565V/a5HI0CSEv/+0y +ko4/ghTnZc1CkmUngKKeFMSah/mT/xAh8XnE2l1AazFa8UKuYki1e+ArHaGZc4ix +UYOtiRphwfuYQhRZ7qX9q2MMkCMI65XNK/SaFrAbbG0= +-----END CERTIFICATE----- diff --git a/lib/future/backports/test/nullbytecert.pem b/lib/future/backports/test/nullbytecert.pem new file mode 100644 index 00000000..447186c9 --- /dev/null +++ b/lib/future/backports/test/nullbytecert.pem @@ -0,0 +1,90 @@ +Certificate: + Data: + Version: 3 (0x2) + Serial Number: 0 (0x0) + Signature Algorithm: sha1WithRSAEncryption + Issuer: C=US, ST=Oregon, L=Beaverton, O=Python Software Foundation, OU=Python Core Development, CN=null.python.org\x00example.org/emailAddress=python-dev@python.org + Validity + Not Before: Aug 7 13:11:52 2013 GMT + Not After : Aug 7 13:12:52 2013 GMT + Subject: C=US, ST=Oregon, L=Beaverton, O=Python Software Foundation, OU=Python Core Development, CN=null.python.org\x00example.org/emailAddress=python-dev@python.org + Subject Public Key Info: + Public Key Algorithm: rsaEncryption + Public-Key: (2048 bit) + Modulus: + 00:b5:ea:ed:c9:fb:46:7d:6f:3b:76:80:dd:3a:f3: + 03:94:0b:a7:a6:db:ec:1d:df:ff:23:74:08:9d:97: + 16:3f:a3:a4:7b:3e:1b:0e:96:59:25:03:a7:26:e2: + 88:a9:cf:79:cd:f7:04:56:b0:ab:79:32:6e:59:c1: + 32:30:54:eb:58:a8:cb:91:f0:42:a5:64:27:cb:d4: + 56:31:88:52:ad:cf:bd:7f:f0:06:64:1f:cc:27:b8: + a3:8b:8c:f3:d8:29:1f:25:0b:f5:46:06:1b:ca:02: + 45:ad:7b:76:0a:9c:bf:bb:b9:ae:0d:16:ab:60:75: + ae:06:3e:9c:7c:31:dc:92:2f:29:1a:e0:4b:0c:91: + 90:6c:e9:37:c5:90:d7:2a:d7:97:15:a3:80:8f:5d: + 7b:49:8f:54:30:d4:97:2c:1c:5b:37:b5:ab:69:30: + 68:43:d3:33:78:4b:02:60:f5:3c:44:80:a1:8f:e7: + f0:0f:d1:5e:87:9e:46:cf:62:fc:f9:bf:0c:65:12: + f1:93:c8:35:79:3f:c8:ec:ec:47:f5:ef:be:44:d5: + ae:82:1e:2d:9a:9f:98:5a:67:65:e1:74:70:7c:cb: + d3:c2:ce:0e:45:49:27:dc:e3:2d:d4:fb:48:0e:2f: + 9e:77:b8:14:46:c0:c4:36:ca:02:ae:6a:91:8c:da: + 2f:85 + Exponent: 65537 (0x10001) + X509v3 extensions: + X509v3 Basic Constraints: critical + CA:FALSE + X509v3 Subject Key Identifier: + 88:5A:55:C0:52:FF:61:CD:52:A3:35:0F:EA:5A:9C:24:38:22:F7:5C + X509v3 Key Usage: + Digital Signature, Non Repudiation, Key Encipherment + X509v3 Subject Alternative Name: + ************************************************************* + WARNING: The values for DNS, email and URI are WRONG. OpenSSL + doesn't print the text after a NULL byte. + ************************************************************* + DNS:altnull.python.org, email:null@python.org, URI:http://null.python.org, IP Address:192.0.2.1, IP Address:2001:DB8:0:0:0:0:0:1 + Signature Algorithm: sha1WithRSAEncryption + ac:4f:45:ef:7d:49:a8:21:70:8e:88:59:3e:d4:36:42:70:f5: + a3:bd:8b:d7:a8:d0:58:f6:31:4a:b1:a4:a6:dd:6f:d9:e8:44: + 3c:b6:0a:71:d6:7f:b1:08:61:9d:60:ce:75:cf:77:0c:d2:37: + 86:02:8d:5e:5d:f9:0f:71:b4:16:a8:c1:3d:23:1c:f1:11:b3: + 56:6e:ca:d0:8d:34:94:e6:87:2a:99:f2:ae:ae:cc:c2:e8:86: + de:08:a8:7f:c5:05:fa:6f:81:a7:82:e6:d0:53:9d:34:f4:ac: + 3e:40:fe:89:57:7a:29:a4:91:7e:0b:c6:51:31:e5:10:2f:a4: + 60:76:cd:95:51:1a:be:8b:a1:b0:fd:ad:52:bd:d7:1b:87:60: + d2:31:c7:17:c4:18:4f:2d:08:25:a3:a7:4f:b7:92:ca:e2:f5: + 25:f1:54:75:81:9d:b3:3d:61:a2:f7:da:ed:e1:c6:6f:2c:60: + 1f:d8:6f:c5:92:05:ab:c9:09:62:49:a9:14:ad:55:11:cc:d6: + 4a:19:94:99:97:37:1d:81:5f:8b:cf:a3:a8:96:44:51:08:3d: + 0b:05:65:12:eb:b6:70:80:88:48:72:4f:c6:c2:da:cf:cd:8e: + 5b:ba:97:2f:60:b4:96:56:49:5e:3a:43:76:63:04:be:2a:f6: + c1:ca:a9:94 +-----BEGIN CERTIFICATE----- +MIIE2DCCA8CgAwIBAgIBADANBgkqhkiG9w0BAQUFADCBxTELMAkGA1UEBhMCVVMx +DzANBgNVBAgMBk9yZWdvbjESMBAGA1UEBwwJQmVhdmVydG9uMSMwIQYDVQQKDBpQ +eXRob24gU29mdHdhcmUgRm91bmRhdGlvbjEgMB4GA1UECwwXUHl0aG9uIENvcmUg +RGV2ZWxvcG1lbnQxJDAiBgNVBAMMG251bGwucHl0aG9uLm9yZwBleGFtcGxlLm9y +ZzEkMCIGCSqGSIb3DQEJARYVcHl0aG9uLWRldkBweXRob24ub3JnMB4XDTEzMDgw +NzEzMTE1MloXDTEzMDgwNzEzMTI1MlowgcUxCzAJBgNVBAYTAlVTMQ8wDQYDVQQI +DAZPcmVnb24xEjAQBgNVBAcMCUJlYXZlcnRvbjEjMCEGA1UECgwaUHl0aG9uIFNv +ZnR3YXJlIEZvdW5kYXRpb24xIDAeBgNVBAsMF1B5dGhvbiBDb3JlIERldmVsb3Bt +ZW50MSQwIgYDVQQDDBtudWxsLnB5dGhvbi5vcmcAZXhhbXBsZS5vcmcxJDAiBgkq +hkiG9w0BCQEWFXB5dGhvbi1kZXZAcHl0aG9uLm9yZzCCASIwDQYJKoZIhvcNAQEB +BQADggEPADCCAQoCggEBALXq7cn7Rn1vO3aA3TrzA5QLp6bb7B3f/yN0CJ2XFj+j +pHs+Gw6WWSUDpybiiKnPec33BFawq3kyblnBMjBU61ioy5HwQqVkJ8vUVjGIUq3P +vX/wBmQfzCe4o4uM89gpHyUL9UYGG8oCRa17dgqcv7u5rg0Wq2B1rgY+nHwx3JIv +KRrgSwyRkGzpN8WQ1yrXlxWjgI9de0mPVDDUlywcWze1q2kwaEPTM3hLAmD1PESA +oY/n8A/RXoeeRs9i/Pm/DGUS8ZPINXk/yOzsR/XvvkTVroIeLZqfmFpnZeF0cHzL +08LODkVJJ9zjLdT7SA4vnne4FEbAxDbKAq5qkYzaL4UCAwEAAaOB0DCBzTAMBgNV +HRMBAf8EAjAAMB0GA1UdDgQWBBSIWlXAUv9hzVKjNQ/qWpwkOCL3XDALBgNVHQ8E +BAMCBeAwgZAGA1UdEQSBiDCBhYIeYWx0bnVsbC5weXRob24ub3JnAGV4YW1wbGUu +Y29tgSBudWxsQHB5dGhvbi5vcmcAdXNlckBleGFtcGxlLm9yZ4YpaHR0cDovL251 +bGwucHl0aG9uLm9yZwBodHRwOi8vZXhhbXBsZS5vcmeHBMAAAgGHECABDbgAAAAA +AAAAAAAAAAEwDQYJKoZIhvcNAQEFBQADggEBAKxPRe99SaghcI6IWT7UNkJw9aO9 +i9eo0Fj2MUqxpKbdb9noRDy2CnHWf7EIYZ1gznXPdwzSN4YCjV5d+Q9xtBaowT0j +HPERs1ZuytCNNJTmhyqZ8q6uzMLoht4IqH/FBfpvgaeC5tBTnTT0rD5A/olXeimk +kX4LxlEx5RAvpGB2zZVRGr6LobD9rVK91xuHYNIxxxfEGE8tCCWjp0+3ksri9SXx +VHWBnbM9YaL32u3hxm8sYB/Yb8WSBavJCWJJqRStVRHM1koZlJmXNx2BX4vPo6iW +RFEIPQsFZRLrtnCAiEhyT8bC2s/Njlu6ly9gtJZWSV46Q3ZjBL4q9sHKqZQ= +-----END CERTIFICATE----- diff --git a/lib/future/backports/test/nullcert.pem b/lib/future/backports/test/nullcert.pem new file mode 100644 index 00000000..e69de29b diff --git a/lib/future/backports/test/pystone.py b/lib/future/backports/test/pystone.py new file mode 100644 index 00000000..7652027b --- /dev/null +++ b/lib/future/backports/test/pystone.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python3 + +""" +"PYSTONE" Benchmark Program + +Version: Python/1.1 (corresponds to C/1.1 plus 2 Pystone fixes) + +Author: Reinhold P. Weicker, CACM Vol 27, No 10, 10/84 pg. 1013. + + Translated from ADA to C by Rick Richardson. + Every method to preserve ADA-likeness has been used, + at the expense of C-ness. + + Translated from C to Python by Guido van Rossum. + +Version History: + + Version 1.1 corrects two bugs in version 1.0: + + First, it leaked memory: in Proc1(), NextRecord ends + up having a pointer to itself. I have corrected this + by zapping NextRecord.PtrComp at the end of Proc1(). + + Second, Proc3() used the operator != to compare a + record to None. This is rather inefficient and not + true to the intention of the original benchmark (where + a pointer comparison to None is intended; the != + operator attempts to find a method __cmp__ to do value + comparison of the record). Version 1.1 runs 5-10 + percent faster than version 1.0, so benchmark figures + of different versions can't be compared directly. + +""" + +from __future__ import print_function + +from time import clock + +LOOPS = 50000 + +__version__ = "1.1" + +[Ident1, Ident2, Ident3, Ident4, Ident5] = range(1, 6) + +class Record(object): + + def __init__(self, PtrComp = None, Discr = 0, EnumComp = 0, + IntComp = 0, StringComp = 0): + self.PtrComp = PtrComp + self.Discr = Discr + self.EnumComp = EnumComp + self.IntComp = IntComp + self.StringComp = StringComp + + def copy(self): + return Record(self.PtrComp, self.Discr, self.EnumComp, + self.IntComp, self.StringComp) + +TRUE = 1 +FALSE = 0 + +def main(loops=LOOPS): + benchtime, stones = pystones(loops) + print("Pystone(%s) time for %d passes = %g" % \ + (__version__, loops, benchtime)) + print("This machine benchmarks at %g pystones/second" % stones) + + +def pystones(loops=LOOPS): + return Proc0(loops) + +IntGlob = 0 +BoolGlob = FALSE +Char1Glob = '\0' +Char2Glob = '\0' +Array1Glob = [0]*51 +Array2Glob = [x[:] for x in [Array1Glob]*51] +PtrGlb = None +PtrGlbNext = None + +def Proc0(loops=LOOPS): + global IntGlob + global BoolGlob + global Char1Glob + global Char2Glob + global Array1Glob + global Array2Glob + global PtrGlb + global PtrGlbNext + + starttime = clock() + for i in range(loops): + pass + nulltime = clock() - starttime + + PtrGlbNext = Record() + PtrGlb = Record() + PtrGlb.PtrComp = PtrGlbNext + PtrGlb.Discr = Ident1 + PtrGlb.EnumComp = Ident3 + PtrGlb.IntComp = 40 + PtrGlb.StringComp = "DHRYSTONE PROGRAM, SOME STRING" + String1Loc = "DHRYSTONE PROGRAM, 1'ST STRING" + Array2Glob[8][7] = 10 + + starttime = clock() + + for i in range(loops): + Proc5() + Proc4() + IntLoc1 = 2 + IntLoc2 = 3 + String2Loc = "DHRYSTONE PROGRAM, 2'ND STRING" + EnumLoc = Ident2 + BoolGlob = not Func2(String1Loc, String2Loc) + while IntLoc1 < IntLoc2: + IntLoc3 = 5 * IntLoc1 - IntLoc2 + IntLoc3 = Proc7(IntLoc1, IntLoc2) + IntLoc1 = IntLoc1 + 1 + Proc8(Array1Glob, Array2Glob, IntLoc1, IntLoc3) + PtrGlb = Proc1(PtrGlb) + CharIndex = 'A' + while CharIndex <= Char2Glob: + if EnumLoc == Func1(CharIndex, 'C'): + EnumLoc = Proc6(Ident1) + CharIndex = chr(ord(CharIndex)+1) + IntLoc3 = IntLoc2 * IntLoc1 + IntLoc2 = IntLoc3 / IntLoc1 + IntLoc2 = 7 * (IntLoc3 - IntLoc2) - IntLoc1 + IntLoc1 = Proc2(IntLoc1) + + benchtime = clock() - starttime - nulltime + if benchtime == 0.0: + loopsPerBenchtime = 0.0 + else: + loopsPerBenchtime = (loops / benchtime) + return benchtime, loopsPerBenchtime + +def Proc1(PtrParIn): + PtrParIn.PtrComp = NextRecord = PtrGlb.copy() + PtrParIn.IntComp = 5 + NextRecord.IntComp = PtrParIn.IntComp + NextRecord.PtrComp = PtrParIn.PtrComp + NextRecord.PtrComp = Proc3(NextRecord.PtrComp) + if NextRecord.Discr == Ident1: + NextRecord.IntComp = 6 + NextRecord.EnumComp = Proc6(PtrParIn.EnumComp) + NextRecord.PtrComp = PtrGlb.PtrComp + NextRecord.IntComp = Proc7(NextRecord.IntComp, 10) + else: + PtrParIn = NextRecord.copy() + NextRecord.PtrComp = None + return PtrParIn + +def Proc2(IntParIO): + IntLoc = IntParIO + 10 + while 1: + if Char1Glob == 'A': + IntLoc = IntLoc - 1 + IntParIO = IntLoc - IntGlob + EnumLoc = Ident1 + if EnumLoc == Ident1: + break + return IntParIO + +def Proc3(PtrParOut): + global IntGlob + + if PtrGlb is not None: + PtrParOut = PtrGlb.PtrComp + else: + IntGlob = 100 + PtrGlb.IntComp = Proc7(10, IntGlob) + return PtrParOut + +def Proc4(): + global Char2Glob + + BoolLoc = Char1Glob == 'A' + BoolLoc = BoolLoc or BoolGlob + Char2Glob = 'B' + +def Proc5(): + global Char1Glob + global BoolGlob + + Char1Glob = 'A' + BoolGlob = FALSE + +def Proc6(EnumParIn): + EnumParOut = EnumParIn + if not Func3(EnumParIn): + EnumParOut = Ident4 + if EnumParIn == Ident1: + EnumParOut = Ident1 + elif EnumParIn == Ident2: + if IntGlob > 100: + EnumParOut = Ident1 + else: + EnumParOut = Ident4 + elif EnumParIn == Ident3: + EnumParOut = Ident2 + elif EnumParIn == Ident4: + pass + elif EnumParIn == Ident5: + EnumParOut = Ident3 + return EnumParOut + +def Proc7(IntParI1, IntParI2): + IntLoc = IntParI1 + 2 + IntParOut = IntParI2 + IntLoc + return IntParOut + +def Proc8(Array1Par, Array2Par, IntParI1, IntParI2): + global IntGlob + + IntLoc = IntParI1 + 5 + Array1Par[IntLoc] = IntParI2 + Array1Par[IntLoc+1] = Array1Par[IntLoc] + Array1Par[IntLoc+30] = IntLoc + for IntIndex in range(IntLoc, IntLoc+2): + Array2Par[IntLoc][IntIndex] = IntLoc + Array2Par[IntLoc][IntLoc-1] = Array2Par[IntLoc][IntLoc-1] + 1 + Array2Par[IntLoc+20][IntLoc] = Array1Par[IntLoc] + IntGlob = 5 + +def Func1(CharPar1, CharPar2): + CharLoc1 = CharPar1 + CharLoc2 = CharLoc1 + if CharLoc2 != CharPar2: + return Ident1 + else: + return Ident2 + +def Func2(StrParI1, StrParI2): + IntLoc = 1 + while IntLoc <= 1: + if Func1(StrParI1[IntLoc], StrParI2[IntLoc+1]) == Ident1: + CharLoc = 'A' + IntLoc = IntLoc + 1 + if CharLoc >= 'W' and CharLoc <= 'Z': + IntLoc = 7 + if CharLoc == 'X': + return TRUE + else: + if StrParI1 > StrParI2: + IntLoc = IntLoc + 7 + return TRUE + else: + return FALSE + +def Func3(EnumParIn): + EnumLoc = EnumParIn + if EnumLoc == Ident3: return TRUE + return FALSE + +if __name__ == '__main__': + import sys + def error(msg): + print(msg, end=' ', file=sys.stderr) + print("usage: %s [number_of_loops]" % sys.argv[0], file=sys.stderr) + sys.exit(100) + nargs = len(sys.argv) - 1 + if nargs > 1: + error("%d arguments are too many;" % nargs) + elif nargs == 1: + try: loops = int(sys.argv[1]) + except ValueError: + error("Invalid argument %r;" % sys.argv[1]) + else: + loops = LOOPS + main(loops) diff --git a/lib/future/backports/test/sha256.pem b/lib/future/backports/test/sha256.pem new file mode 100644 index 00000000..d3db4b85 --- /dev/null +++ b/lib/future/backports/test/sha256.pem @@ -0,0 +1,128 @@ +# Certificate chain for https://sha256.tbs-internet.com + 0 s:/C=FR/postalCode=14000/ST=Calvados/L=CAEN/street=22 rue de Bretagne/O=TBS INTERNET/OU=0002 440443810/OU=sha-256 production/CN=sha256.tbs-internet.com + i:/C=FR/ST=Calvados/L=Caen/O=TBS INTERNET/OU=Terms and Conditions: http://www.tbs-internet.com/CA/repository/OU=TBS INTERNET CA/CN=TBS X509 CA SGC +-----BEGIN CERTIFICATE----- +MIIGXDCCBUSgAwIBAgIRAKpVmHgg9nfCodAVwcP4siwwDQYJKoZIhvcNAQELBQAw +gcQxCzAJBgNVBAYTAkZSMREwDwYDVQQIEwhDYWx2YWRvczENMAsGA1UEBxMEQ2Fl +bjEVMBMGA1UEChMMVEJTIElOVEVSTkVUMUgwRgYDVQQLEz9UZXJtcyBhbmQgQ29u +ZGl0aW9uczogaHR0cDovL3d3dy50YnMtaW50ZXJuZXQuY29tL0NBL3JlcG9zaXRv +cnkxGDAWBgNVBAsTD1RCUyBJTlRFUk5FVCBDQTEYMBYGA1UEAxMPVEJTIFg1MDkg +Q0EgU0dDMB4XDTEyMDEwNDAwMDAwMFoXDTE0MDIxNzIzNTk1OVowgcsxCzAJBgNV +BAYTAkZSMQ4wDAYDVQQREwUxNDAwMDERMA8GA1UECBMIQ2FsdmFkb3MxDTALBgNV +BAcTBENBRU4xGzAZBgNVBAkTEjIyIHJ1ZSBkZSBCcmV0YWduZTEVMBMGA1UEChMM +VEJTIElOVEVSTkVUMRcwFQYDVQQLEw4wMDAyIDQ0MDQ0MzgxMDEbMBkGA1UECxMS +c2hhLTI1NiBwcm9kdWN0aW9uMSAwHgYDVQQDExdzaGEyNTYudGJzLWludGVybmV0 +LmNvbTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAKQIX/zdJcyxty0m +PM1XQSoSSifueS3AVcgqMsaIKS/u+rYzsv4hQ/qA6vLn5m5/ewUcZDj7zdi6rBVf +PaVNXJ6YinLX0tkaW8TEjeVuZG5yksGZlhCt1CJ1Ho9XLiLaP4uJ7MCoNUntpJ+E +LfrOdgsIj91kPmwjDJeztVcQCvKzhjVJA/KxdInc0JvOATn7rpaSmQI5bvIjufgo +qVsTPwVFzuUYULXBk7KxRT7MiEqnd5HvviNh0285QC478zl3v0I0Fb5El4yD3p49 +IthcRnxzMKc0UhU5ogi0SbONyBfm/mzONVfSxpM+MlyvZmJqrbuuLoEDzJD+t8PU +xSuzgbcCAwEAAaOCAj4wggI6MB8GA1UdIwQYMBaAFAdEdoWTKLx/bXjSCuv6TEvf +2YIfMB0GA1UdDgQWBBT/qTGYdaj+f61c2IRFL/B1eEsM8DAOBgNVHQ8BAf8EBAMC +BaAwDAYDVR0TAQH/BAIwADA0BgNVHSUELTArBggrBgEFBQcDAQYIKwYBBQUHAwIG +CisGAQQBgjcKAwMGCWCGSAGG+EIEATBLBgNVHSAERDBCMEAGCisGAQQB5TcCBAEw +MjAwBggrBgEFBQcCARYkaHR0cHM6Ly93d3cudGJzLWludGVybmV0LmNvbS9DQS9D +UFM0MG0GA1UdHwRmMGQwMqAwoC6GLGh0dHA6Ly9jcmwudGJzLWludGVybmV0LmNv +bS9UQlNYNTA5Q0FTR0MuY3JsMC6gLKAqhihodHRwOi8vY3JsLnRicy14NTA5LmNv +bS9UQlNYNTA5Q0FTR0MuY3JsMIGmBggrBgEFBQcBAQSBmTCBljA4BggrBgEFBQcw +AoYsaHR0cDovL2NydC50YnMtaW50ZXJuZXQuY29tL1RCU1g1MDlDQVNHQy5jcnQw +NAYIKwYBBQUHMAKGKGh0dHA6Ly9jcnQudGJzLXg1MDkuY29tL1RCU1g1MDlDQVNH +Qy5jcnQwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLnRicy14NTA5LmNvbTA/BgNV +HREEODA2ghdzaGEyNTYudGJzLWludGVybmV0LmNvbYIbd3d3LnNoYTI1Ni50YnMt +aW50ZXJuZXQuY29tMA0GCSqGSIb3DQEBCwUAA4IBAQA0pOuL8QvAa5yksTbGShzX +ABApagunUGoEydv4YJT1MXy9tTp7DrWaozZSlsqBxrYAXP1d9r2fuKbEniYHxaQ0 +UYaf1VSIlDo1yuC8wE7wxbHDIpQ/E5KAyxiaJ8obtDhFstWAPAH+UoGXq0kj2teN +21sFQ5dXgA95nldvVFsFhrRUNB6xXAcaj0VZFhttI0ZfQZmQwEI/P+N9Jr40OGun +aa+Dn0TMeUH4U20YntfLbu2nDcJcYfyurm+8/0Tr4HznLnedXu9pCPYj0TaddrgT +XO0oFiyy7qGaY6+qKh71yD64Y3ycCJ/HR9Wm39mjZYc9ezYwT4noP6r7Lk8YO7/q +-----END CERTIFICATE----- + 1 s:/C=FR/ST=Calvados/L=Caen/O=TBS INTERNET/OU=Terms and Conditions: http://www.tbs-internet.com/CA/repository/OU=TBS INTERNET CA/CN=TBS X509 CA SGC + i:/C=SE/O=AddTrust AB/OU=AddTrust External TTP Network/CN=AddTrust External CA Root +-----BEGIN CERTIFICATE----- +MIIFVjCCBD6gAwIBAgIQXpDZ0ETJMV02WTx3GTnhhTANBgkqhkiG9w0BAQUFADBv +MQswCQYDVQQGEwJTRTEUMBIGA1UEChMLQWRkVHJ1c3QgQUIxJjAkBgNVBAsTHUFk +ZFRydXN0IEV4dGVybmFsIFRUUCBOZXR3b3JrMSIwIAYDVQQDExlBZGRUcnVzdCBF +eHRlcm5hbCBDQSBSb290MB4XDTA1MTIwMTAwMDAwMFoXDTE5MDYyNDE5MDYzMFow +gcQxCzAJBgNVBAYTAkZSMREwDwYDVQQIEwhDYWx2YWRvczENMAsGA1UEBxMEQ2Fl +bjEVMBMGA1UEChMMVEJTIElOVEVSTkVUMUgwRgYDVQQLEz9UZXJtcyBhbmQgQ29u +ZGl0aW9uczogaHR0cDovL3d3dy50YnMtaW50ZXJuZXQuY29tL0NBL3JlcG9zaXRv +cnkxGDAWBgNVBAsTD1RCUyBJTlRFUk5FVCBDQTEYMBYGA1UEAxMPVEJTIFg1MDkg +Q0EgU0dDMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAsgOkO3f7wzN6 +rOjg45tR5vjBfzK7qmV9IBxb/QW9EEXxG+E7FNhZqQLtwGBKoSsHTnQqV75wWMk0 +9tinWvftBkSpj5sTi/8cbzJfUvTSVYh3Qxv6AVVjMMH/ruLjE6y+4PoaPs8WoYAQ +ts5R4Z1g8c/WnTepLst2x0/Wv7GmuoQi+gXvHU6YrBiu7XkeYhzc95QdviWSJRDk +owhb5K43qhcvjRmBfO/paGlCliDGZp8mHwrI21mwobWpVjTxZRwYO3bd4+TGcI4G +Ie5wmHwE8F7SK1tgSqbBacKjDa93j7txKkfz/Yd2n7TGqOXiHPsJpG655vrKtnXk +9vs1zoDeJQIDAQABo4IBljCCAZIwHQYDVR0OBBYEFAdEdoWTKLx/bXjSCuv6TEvf +2YIfMA4GA1UdDwEB/wQEAwIBBjASBgNVHRMBAf8ECDAGAQH/AgEAMCAGA1UdJQQZ +MBcGCisGAQQBgjcKAwMGCWCGSAGG+EIEATAYBgNVHSAEETAPMA0GCysGAQQBgOU3 +AgQBMHsGA1UdHwR0MHIwOKA2oDSGMmh0dHA6Ly9jcmwuY29tb2RvY2EuY29tL0Fk +ZFRydXN0RXh0ZXJuYWxDQVJvb3QuY3JsMDagNKAyhjBodHRwOi8vY3JsLmNvbW9k +by5uZXQvQWRkVHJ1c3RFeHRlcm5hbENBUm9vdC5jcmwwgYAGCCsGAQUFBwEBBHQw +cjA4BggrBgEFBQcwAoYsaHR0cDovL2NydC5jb21vZG9jYS5jb20vQWRkVHJ1c3RV +VE5TR0NDQS5jcnQwNgYIKwYBBQUHMAKGKmh0dHA6Ly9jcnQuY29tb2RvLm5ldC9B +ZGRUcnVzdFVUTlNHQ0NBLmNydDARBglghkgBhvhCAQEEBAMCAgQwDQYJKoZIhvcN +AQEFBQADggEBAK2zEzs+jcIrVK9oDkdDZNvhuBYTdCfpxfFs+OAujW0bIfJAy232 +euVsnJm6u/+OrqKudD2tad2BbejLLXhMZViaCmK7D9nrXHx4te5EP8rL19SUVqLY +1pTnv5dhNgEgvA7n5lIzDSYs7yRLsr7HJsYPr6SeYSuZizyX1SNz7ooJ32/F3X98 +RB0Mlc/E0OyOrkQ9/y5IrnpnaSora8CnUrV5XNOg+kyCz9edCyx4D5wXYcwZPVWz +8aDqquESrezPyjtfi4WRO4s/VD3HLZvOxzMrWAVYCDG9FxaOhF0QGuuG1F7F3GKV +v6prNyCl016kRl2j1UT+a7gLd8fA25A4C9E= +-----END CERTIFICATE----- + 2 s:/C=SE/O=AddTrust AB/OU=AddTrust External TTP Network/CN=AddTrust External CA Root + i:/C=US/ST=UT/L=Salt Lake City/O=The USERTRUST Network/OU=http://www.usertrust.com/CN=UTN - DATACorp SGC +-----BEGIN CERTIFICATE----- +MIIEZjCCA06gAwIBAgIQUSYKkxzif5zDpV954HKugjANBgkqhkiG9w0BAQUFADCB +kzELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAlVUMRcwFQYDVQQHEw5TYWx0IExha2Ug +Q2l0eTEeMBwGA1UEChMVVGhlIFVTRVJUUlVTVCBOZXR3b3JrMSEwHwYDVQQLExho +dHRwOi8vd3d3LnVzZXJ0cnVzdC5jb20xGzAZBgNVBAMTElVUTiAtIERBVEFDb3Jw +IFNHQzAeFw0wNTA2MDcwODA5MTBaFw0xOTA2MjQxOTA2MzBaMG8xCzAJBgNVBAYT +AlNFMRQwEgYDVQQKEwtBZGRUcnVzdCBBQjEmMCQGA1UECxMdQWRkVHJ1c3QgRXh0 +ZXJuYWwgVFRQIE5ldHdvcmsxIjAgBgNVBAMTGUFkZFRydXN0IEV4dGVybmFsIENB +IFJvb3QwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQC39xoz5vIABC05 +4E5b7R+8bA/Ntfojts7emxEzl6QpTH2Tn71KvJPtAxrjj8/lbVBa1pcplFqAsEl6 +2y6V/bjKvzc4LR4+kUGtcFbH8E8/6DKedMrIkFTpxl8PeJ2aQDwOrGGqXhSPnoeh +alDc15pOrwWzpnGUnHGzUGAKxxOdOAeGAqjpqGkmGJCrTLBPI6s6T4TY386f4Wlv +u9dC12tE5Met7m1BX3JacQg3s3llpFmglDf3AC8NwpJy2tA4ctsUqEXEXSp9t7TW +xO6szRNEt8kr3UMAJfphuWlqWCMRt6czj1Z1WfXNKddGtworZbbTQm8Vsrh7++/p +XVPVNFonAgMBAAGjgdgwgdUwHwYDVR0jBBgwFoAUUzLRs89/+uDxoF2FTpLSnkUd +tE8wHQYDVR0OBBYEFK29mHo0tCb3+sQmVO8DveAky1QaMA4GA1UdDwEB/wQEAwIB +BjAPBgNVHRMBAf8EBTADAQH/MBEGCWCGSAGG+EIBAQQEAwIBAjAgBgNVHSUEGTAX +BgorBgEEAYI3CgMDBglghkgBhvhCBAEwPQYDVR0fBDYwNDAyoDCgLoYsaHR0cDov +L2NybC51c2VydHJ1c3QuY29tL1VUTi1EQVRBQ29ycFNHQy5jcmwwDQYJKoZIhvcN +AQEFBQADggEBAMbuUxdoFLJRIh6QWA2U/b3xcOWGLcM2MY9USEbnLQg3vGwKYOEO +rVE04BKT6b64q7gmtOmWPSiPrmQH/uAB7MXjkesYoPF1ftsK5p+R26+udd8jkWjd +FwBaS/9kbHDrARrQkNnHptZt9hPk/7XJ0h4qy7ElQyZ42TCbTg0evmnv3+r+LbPM ++bDdtRTKkdSytaX7ARmjR3mfnYyVhzT4HziS2jamEfpr62vp3EV4FTkG101B5CHI +3C+H0be/SGB1pWLLJN47YaApIKa+xWycxOkKaSLvkTr6Jq/RW0GnOuL4OAdCq8Fb ++M5tug8EPzI0rNwEKNdwMBQmBsTkm5jVz3g= +-----END CERTIFICATE----- + 3 s:/C=US/ST=UT/L=Salt Lake City/O=The USERTRUST Network/OU=http://www.usertrust.com/CN=UTN - DATACorp SGC + i:/C=US/ST=UT/L=Salt Lake City/O=The USERTRUST Network/OU=http://www.usertrust.com/CN=UTN - DATACorp SGC +-----BEGIN CERTIFICATE----- +MIIEXjCCA0agAwIBAgIQRL4Mi1AAIbQR0ypoBqmtaTANBgkqhkiG9w0BAQUFADCB +kzELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAlVUMRcwFQYDVQQHEw5TYWx0IExha2Ug +Q2l0eTEeMBwGA1UEChMVVGhlIFVTRVJUUlVTVCBOZXR3b3JrMSEwHwYDVQQLExho +dHRwOi8vd3d3LnVzZXJ0cnVzdC5jb20xGzAZBgNVBAMTElVUTiAtIERBVEFDb3Jw +IFNHQzAeFw05OTA2MjQxODU3MjFaFw0xOTA2MjQxOTA2MzBaMIGTMQswCQYDVQQG +EwJVUzELMAkGA1UECBMCVVQxFzAVBgNVBAcTDlNhbHQgTGFrZSBDaXR5MR4wHAYD +VQQKExVUaGUgVVNFUlRSVVNUIE5ldHdvcmsxITAfBgNVBAsTGGh0dHA6Ly93d3cu +dXNlcnRydXN0LmNvbTEbMBkGA1UEAxMSVVROIC0gREFUQUNvcnAgU0dDMIIBIjAN +BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA3+5YEKIrblXEjr8uRgnn4AgPLit6 +E5Qbvfa2gI5lBZMAHryv4g+OGQ0SR+ysraP6LnD43m77VkIVni5c7yPeIbkFdicZ +D0/Ww5y0vpQZY/KmEQrrU0icvvIpOxboGqBMpsn0GFlowHDyUwDAXlCCpVZvNvlK +4ESGoE1O1kduSUrLZ9emxAW5jh70/P/N5zbgnAVssjMiFdC04MwXwLLA9P4yPykq +lXvY8qdOD1R8oQ2AswkDwf9c3V6aPryuvEeKaq5xyh+xKrhfQgUL7EYw0XILyulW +bfXv33i+Ybqypa4ETLyorGkVl73v67SMvzX41MPRKA5cOp9wGDMgd8SirwIDAQAB +o4GrMIGoMAsGA1UdDwQEAwIBxjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBRT +MtGzz3/64PGgXYVOktKeRR20TzA9BgNVHR8ENjA0MDKgMKAuhixodHRwOi8vY3Js +LnVzZXJ0cnVzdC5jb20vVVROLURBVEFDb3JwU0dDLmNybDAqBgNVHSUEIzAhBggr +BgEFBQcDAQYKKwYBBAGCNwoDAwYJYIZIAYb4QgQBMA0GCSqGSIb3DQEBBQUAA4IB +AQAnNZcAiosovcYzMB4p/OL31ZjUQLtgyr+rFywJNn9Q+kHcrpY6CiM+iVnJowft +Gzet/Hy+UUla3joKVAgWRcKZsYfNjGjgaQPpxE6YsjuMFrMOoAyYUJuTqXAJyCyj +j98C5OBxOvG0I3KgqgHf35g+FFCgMSa9KOlaMCZ1+XtgHI3zzVAmbQQnmt/VDUVH +KWss5nbZqSl9Mt3JNjy9rjXxEZ4du5A/EkdOjtd+D2JzHVImOBwYSf0wdJrE5SIv +2MCN7ZF6TACPcn9d2t0bi0Vr591pl6jFVkwPDPafepE39peC4N1xaf92P2BNPM/3 +mfnGV/TJVTl4uix5yaaIK/QI +-----END CERTIFICATE----- diff --git a/lib/future/backports/test/ssl_cert.pem b/lib/future/backports/test/ssl_cert.pem new file mode 100644 index 00000000..47a7d7e3 --- /dev/null +++ b/lib/future/backports/test/ssl_cert.pem @@ -0,0 +1,15 @@ +-----BEGIN CERTIFICATE----- +MIICVDCCAb2gAwIBAgIJANfHOBkZr8JOMA0GCSqGSIb3DQEBBQUAMF8xCzAJBgNV +BAYTAlhZMRcwFQYDVQQHEw5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9u +IFNvZnR3YXJlIEZvdW5kYXRpb24xEjAQBgNVBAMTCWxvY2FsaG9zdDAeFw0xMDEw +MDgyMzAxNTZaFw0yMDEwMDUyMzAxNTZaMF8xCzAJBgNVBAYTAlhZMRcwFQYDVQQH +Ew5DYXN0bGUgQW50aHJheDEjMCEGA1UEChMaUHl0aG9uIFNvZnR3YXJlIEZvdW5k +YXRpb24xEjAQBgNVBAMTCWxvY2FsaG9zdDCBnzANBgkqhkiG9w0BAQEFAAOBjQAw +gYkCgYEA21vT5isq7F68amYuuNpSFlKDPrMUCa4YWYqZRt2OZ+/3NKaZ2xAiSwr7 +6MrQF70t5nLbSPpqE5+5VrS58SY+g/sXLiFd6AplH1wJZwh78DofbFYXUggktFMt +pTyiX8jtP66bkcPkDADA089RI1TQR6Ca+n7HFa7c1fabVV6i3zkCAwEAAaMYMBYw +FAYDVR0RBA0wC4IJbG9jYWxob3N0MA0GCSqGSIb3DQEBBQUAA4GBAHPctQBEQ4wd +BJ6+JcpIraopLn8BGhbjNWj40mmRqWB/NAWF6M5ne7KpGAu7tLeG4hb1zLaldK8G +lxy2GPSRF6LFS48dpEj2HbMv2nvv6xxalDMJ9+DicWgAKTQ6bcX2j3GUkCR0g/T1 +CRlNBAAlvhKzO7Clpf9l0YKBEfraJByX +-----END CERTIFICATE----- diff --git a/lib/future/backports/test/ssl_key.passwd.pem b/lib/future/backports/test/ssl_key.passwd.pem new file mode 100644 index 00000000..2524672e --- /dev/null +++ b/lib/future/backports/test/ssl_key.passwd.pem @@ -0,0 +1,18 @@ +-----BEGIN RSA PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: DES-EDE3-CBC,1A8D9D2A02EC698A + +kJYbfZ8L0sfe9Oty3gw0aloNnY5E8fegRfQLZlNoxTl6jNt0nIwI8kDJ36CZgR9c +u3FDJm/KqrfUoz8vW+qEnWhSG7QPX2wWGPHd4K94Yz/FgrRzZ0DoK7XxXq9gOtVA +AVGQhnz32p+6WhfGsCr9ArXEwRZrTk/FvzEPaU5fHcoSkrNVAGX8IpSVkSDwEDQr +Gv17+cfk99UV1OCza6yKHoFkTtrC+PZU71LomBabivS2Oc4B9hYuSR2hF01wTHP+ +YlWNagZOOVtNz4oKK9x9eNQpmfQXQvPPTfusexKIbKfZrMvJoxcm1gfcZ0H/wK6P +6wmXSG35qMOOztCZNtperjs1wzEBXznyK8QmLcAJBjkfarABJX9vBEzZV0OUKhy+ +noORFwHTllphbmydLhu6ehLUZMHPhzAS5UN7srtpSN81eerDMy0RMUAwA7/PofX1 +94Me85Q8jP0PC9ETdsJcPqLzAPETEYu0ELewKRcrdyWi+tlLFrpE5KT/s5ecbl9l +7B61U4Kfd1PIXc/siINhU3A3bYK+845YyUArUOnKf1kEox7p1RpD7yFqVT04lRTo +cibNKATBusXSuBrp2G6GNuhWEOSafWCKJQAzgCYIp6ZTV2khhMUGppc/2H3CF6cO +zX0KtlPVZC7hLkB6HT8SxYUwF1zqWY7+/XPPdc37MeEZ87Q3UuZwqORLY+Z0hpgt +L5JXBCoklZhCAaN2GqwFLXtGiRSRFGY7xXIhbDTlE65Wv1WGGgDLMKGE1gOz3yAo +2jjG1+yAHJUdE69XTFHSqSkvaloA1W03LdMXZ9VuQJ/ySXCie6ABAQ== +-----END RSA PRIVATE KEY----- diff --git a/lib/future/backports/test/ssl_key.pem b/lib/future/backports/test/ssl_key.pem new file mode 100644 index 00000000..3fd3bbd5 --- /dev/null +++ b/lib/future/backports/test/ssl_key.pem @@ -0,0 +1,16 @@ +-----BEGIN PRIVATE KEY----- +MIICdwIBADANBgkqhkiG9w0BAQEFAASCAmEwggJdAgEAAoGBANtb0+YrKuxevGpm +LrjaUhZSgz6zFAmuGFmKmUbdjmfv9zSmmdsQIksK++jK0Be9LeZy20j6ahOfuVa0 +ufEmPoP7Fy4hXegKZR9cCWcIe/A6H2xWF1IIJLRTLaU8ol/I7T+um5HD5AwAwNPP +USNU0Eegmvp+xxWu3NX2m1Veot85AgMBAAECgYA3ZdZ673X0oexFlq7AAmrutkHt +CL7LvwrpOiaBjhyTxTeSNWzvtQBkIU8DOI0bIazA4UreAFffwtvEuPmonDb3F+Iq +SMAu42XcGyVZEl+gHlTPU9XRX7nTOXVt+MlRRRxL6t9GkGfUAXI3XxJDXW3c0vBK +UL9xqD8cORXOfE06rQJBAP8mEX1ERkR64Ptsoe4281vjTlNfIbs7NMPkUnrn9N/Y +BLhjNIfQ3HFZG8BTMLfX7kCS9D593DW5tV4Z9BP/c6cCQQDcFzCcVArNh2JSywOQ +ZfTfRbJg/Z5Lt9Fkngv1meeGNPgIMLN8Sg679pAOOWmzdMO3V706rNPzSVMME7E5 +oPIfAkEA8pDddarP5tCvTTgUpmTFbakm0KoTZm2+FzHcnA4jRh+XNTjTOv98Y6Ik +eO5d1ZnKXseWvkZncQgxfdnMqqpj5wJAcNq/RVne1DbYlwWchT2Si65MYmmJ8t+F +0mcsULqjOnEMwf5e+ptq5LzwbyrHZYq5FNk7ocufPv/ZQrcSSC+cFwJBAKvOJByS +x56qyGeZLOQlWS2JS3KJo59XuLFGqcbgN9Om9xFa41Yb4N9NvplFivsvZdw3m1Q/ +SPIXQuT8RMPDVNQ= +-----END PRIVATE KEY----- diff --git a/lib/future/backports/test/ssl_servers.py b/lib/future/backports/test/ssl_servers.py new file mode 100644 index 00000000..87a3fb85 --- /dev/null +++ b/lib/future/backports/test/ssl_servers.py @@ -0,0 +1,207 @@ +from __future__ import absolute_import, division, print_function, unicode_literals +from future.builtins import filter, str +from future import utils +import os +import sys +import ssl +import pprint +import socket +from future.backports.urllib import parse as urllib_parse +from future.backports.http.server import (HTTPServer as _HTTPServer, + SimpleHTTPRequestHandler, BaseHTTPRequestHandler) +from future.backports.test import support +threading = support.import_module("threading") + +here = os.path.dirname(__file__) + +HOST = support.HOST +CERTFILE = os.path.join(here, 'keycert.pem') + +# This one's based on HTTPServer, which is based on SocketServer + +class HTTPSServer(_HTTPServer): + + def __init__(self, server_address, handler_class, context): + _HTTPServer.__init__(self, server_address, handler_class) + self.context = context + + def __str__(self): + return ('<%s %s:%s>' % + (self.__class__.__name__, + self.server_name, + self.server_port)) + + def get_request(self): + # override this to wrap socket with SSL + try: + sock, addr = self.socket.accept() + sslconn = self.context.wrap_socket(sock, server_side=True) + except socket.error as e: + # socket errors are silenced by the caller, print them here + if support.verbose: + sys.stderr.write("Got an error:\n%s\n" % e) + raise + return sslconn, addr + +class RootedHTTPRequestHandler(SimpleHTTPRequestHandler): + # need to override translate_path to get a known root, + # instead of using os.curdir, since the test could be + # run from anywhere + + server_version = "TestHTTPS/1.0" + root = here + # Avoid hanging when a request gets interrupted by the client + timeout = 5 + + def translate_path(self, path): + """Translate a /-separated PATH to the local filename syntax. + + Components that mean special things to the local file system + (e.g. drive or directory names) are ignored. (XXX They should + probably be diagnosed.) + + """ + # abandon query parameters + path = urllib.parse.urlparse(path)[2] + path = os.path.normpath(urllib.parse.unquote(path)) + words = path.split('/') + words = filter(None, words) + path = self.root + for word in words: + drive, word = os.path.splitdrive(word) + head, word = os.path.split(word) + path = os.path.join(path, word) + return path + + def log_message(self, format, *args): + # we override this to suppress logging unless "verbose" + if support.verbose: + sys.stdout.write(" server (%s:%d %s):\n [%s] %s\n" % + (self.server.server_address, + self.server.server_port, + self.request.cipher(), + self.log_date_time_string(), + format%args)) + + +class StatsRequestHandler(BaseHTTPRequestHandler): + """Example HTTP request handler which returns SSL statistics on GET + requests. + """ + + server_version = "StatsHTTPS/1.0" + + def do_GET(self, send_body=True): + """Serve a GET request.""" + sock = self.rfile.raw._sock + context = sock.context + stats = { + 'session_cache': context.session_stats(), + 'cipher': sock.cipher(), + 'compression': sock.compression(), + } + body = pprint.pformat(stats) + body = body.encode('utf-8') + self.send_response(200) + self.send_header("Content-type", "text/plain; charset=utf-8") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + if send_body: + self.wfile.write(body) + + def do_HEAD(self): + """Serve a HEAD request.""" + self.do_GET(send_body=False) + + def log_request(self, format, *args): + if support.verbose: + BaseHTTPRequestHandler.log_request(self, format, *args) + + +class HTTPSServerThread(threading.Thread): + + def __init__(self, context, host=HOST, handler_class=None): + self.flag = None + self.server = HTTPSServer((host, 0), + handler_class or RootedHTTPRequestHandler, + context) + self.port = self.server.server_port + threading.Thread.__init__(self) + self.daemon = True + + def __str__(self): + return "<%s %s>" % (self.__class__.__name__, self.server) + + def start(self, flag=None): + self.flag = flag + threading.Thread.start(self) + + def run(self): + if self.flag: + self.flag.set() + try: + self.server.serve_forever(0.05) + finally: + self.server.server_close() + + def stop(self): + self.server.shutdown() + + +def make_https_server(case, certfile=CERTFILE, host=HOST, handler_class=None): + # we assume the certfile contains both private key and certificate + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context.load_cert_chain(certfile) + server = HTTPSServerThread(context, host, handler_class) + flag = threading.Event() + server.start(flag) + flag.wait() + def cleanup(): + if support.verbose: + sys.stdout.write('stopping HTTPS server\n') + server.stop() + if support.verbose: + sys.stdout.write('joining HTTPS thread\n') + server.join() + case.addCleanup(cleanup) + return server + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser( + description='Run a test HTTPS server. ' + 'By default, the current directory is served.') + parser.add_argument('-p', '--port', type=int, default=4433, + help='port to listen on (default: %(default)s)') + parser.add_argument('-q', '--quiet', dest='verbose', default=True, + action='store_false', help='be less verbose') + parser.add_argument('-s', '--stats', dest='use_stats_handler', default=False, + action='store_true', help='always return stats page') + parser.add_argument('--curve-name', dest='curve_name', type=str, + action='store', + help='curve name for EC-based Diffie-Hellman') + parser.add_argument('--dh', dest='dh_file', type=str, action='store', + help='PEM file containing DH parameters') + args = parser.parse_args() + + support.verbose = args.verbose + if args.use_stats_handler: + handler_class = StatsRequestHandler + else: + handler_class = RootedHTTPRequestHandler + if utils.PY2: + handler_class.root = os.getcwdu() + else: + handler_class.root = os.getcwd() + context = ssl.SSLContext(ssl.PROTOCOL_TLSv1) + context.load_cert_chain(CERTFILE) + if args.curve_name: + context.set_ecdh_curve(args.curve_name) + if args.dh_file: + context.load_dh_params(args.dh_file) + + server = HTTPSServer(("", args.port), handler_class, context) + if args.verbose: + print("Listening on https://localhost:{0.port}".format(args)) + server.serve_forever(0.1) diff --git a/lib/future/backports/test/support.py b/lib/future/backports/test/support.py new file mode 100644 index 00000000..1999e208 --- /dev/null +++ b/lib/future/backports/test/support.py @@ -0,0 +1,2048 @@ +# -*- coding: utf-8 -*- +"""Supporting definitions for the Python regression tests. + +Backported for python-future from Python 3.3 test/support.py. +""" + +from __future__ import (absolute_import, division, + print_function, unicode_literals) +from future import utils +from future.builtins import str, range, open, int, map, list + +import contextlib +import errno +import functools +import gc +import socket +import sys +import os +import platform +import shutil +import warnings +import unittest +# For Python 2.6 compatibility: +if not hasattr(unittest, 'skip'): + import unittest2 as unittest + +import importlib +# import collections.abc # not present on Py2.7 +import re +import subprocess +import imp +import time +try: + import sysconfig +except ImportError: + # sysconfig is not available on Python 2.6. Try using distutils.sysconfig instead: + from distutils import sysconfig +import fnmatch +import logging.handlers +import struct +import tempfile + +try: + if utils.PY3: + import _thread, threading + else: + import thread as _thread, threading +except ImportError: + _thread = None + threading = None +try: + import multiprocessing.process +except ImportError: + multiprocessing = None + +try: + import zlib +except ImportError: + zlib = None + +try: + import gzip +except ImportError: + gzip = None + +try: + import bz2 +except ImportError: + bz2 = None + +try: + import lzma +except ImportError: + lzma = None + +__all__ = [ + "Error", "TestFailed", "ResourceDenied", "import_module", "verbose", + "use_resources", "max_memuse", "record_original_stdout", + "get_original_stdout", "unload", "unlink", "rmtree", "forget", + "is_resource_enabled", "requires", "requires_freebsd_version", + "requires_linux_version", "requires_mac_ver", "find_unused_port", + "bind_port", "IPV6_ENABLED", "is_jython", "TESTFN", "HOST", "SAVEDCWD", + "temp_cwd", "findfile", "create_empty_file", "sortdict", + "check_syntax_error", "open_urlresource", "check_warnings", "CleanImport", + "EnvironmentVarGuard", "TransientResource", "captured_stdout", + "captured_stdin", "captured_stderr", "time_out", "socket_peer_reset", + "ioerror_peer_reset", "run_with_locale", 'temp_umask', + "transient_internet", "set_memlimit", "bigmemtest", "bigaddrspacetest", + "BasicTestRunner", "run_unittest", "run_doctest", "threading_setup", + "threading_cleanup", "reap_children", "cpython_only", "check_impl_detail", + "get_attribute", "swap_item", "swap_attr", "requires_IEEE_754", + "TestHandler", "Matcher", "can_symlink", "skip_unless_symlink", + "skip_unless_xattr", "import_fresh_module", "requires_zlib", + "PIPE_MAX_SIZE", "failfast", "anticipate_failure", "run_with_tz", + "requires_gzip", "requires_bz2", "requires_lzma", "suppress_crash_popup", + ] + +class Error(Exception): + """Base class for regression test exceptions.""" + +class TestFailed(Error): + """Test failed.""" + +class ResourceDenied(unittest.SkipTest): + """Test skipped because it requested a disallowed resource. + + This is raised when a test calls requires() for a resource that + has not be enabled. It is used to distinguish between expected + and unexpected skips. + """ + +@contextlib.contextmanager +def _ignore_deprecated_imports(ignore=True): + """Context manager to suppress package and module deprecation + warnings when importing them. + + If ignore is False, this context manager has no effect.""" + if ignore: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", ".+ (module|package)", + DeprecationWarning) + yield + else: + yield + + +def import_module(name, deprecated=False): + """Import and return the module to be tested, raising SkipTest if + it is not available. + + If deprecated is True, any module or package deprecation messages + will be suppressed.""" + with _ignore_deprecated_imports(deprecated): + try: + return importlib.import_module(name) + except ImportError as msg: + raise unittest.SkipTest(str(msg)) + + +def _save_and_remove_module(name, orig_modules): + """Helper function to save and remove a module from sys.modules + + Raise ImportError if the module can't be imported. + """ + # try to import the module and raise an error if it can't be imported + if name not in sys.modules: + __import__(name) + del sys.modules[name] + for modname in list(sys.modules): + if modname == name or modname.startswith(name + '.'): + orig_modules[modname] = sys.modules[modname] + del sys.modules[modname] + +def _save_and_block_module(name, orig_modules): + """Helper function to save and block a module in sys.modules + + Return True if the module was in sys.modules, False otherwise. + """ + saved = True + try: + orig_modules[name] = sys.modules[name] + except KeyError: + saved = False + sys.modules[name] = None + return saved + + +def anticipate_failure(condition): + """Decorator to mark a test that is known to be broken in some cases + + Any use of this decorator should have a comment identifying the + associated tracker issue. + """ + if condition: + return unittest.expectedFailure + return lambda f: f + + +def import_fresh_module(name, fresh=(), blocked=(), deprecated=False): + """Import and return a module, deliberately bypassing sys.modules. + This function imports and returns a fresh copy of the named Python module + by removing the named module from sys.modules before doing the import. + Note that unlike reload, the original module is not affected by + this operation. + + *fresh* is an iterable of additional module names that are also removed + from the sys.modules cache before doing the import. + + *blocked* is an iterable of module names that are replaced with None + in the module cache during the import to ensure that attempts to import + them raise ImportError. + + The named module and any modules named in the *fresh* and *blocked* + parameters are saved before starting the import and then reinserted into + sys.modules when the fresh import is complete. + + Module and package deprecation messages are suppressed during this import + if *deprecated* is True. + + This function will raise ImportError if the named module cannot be + imported. + + If deprecated is True, any module or package deprecation messages + will be suppressed. + """ + # NOTE: test_heapq, test_json and test_warnings include extra sanity checks + # to make sure that this utility function is working as expected + with _ignore_deprecated_imports(deprecated): + # Keep track of modules saved for later restoration as well + # as those which just need a blocking entry removed + orig_modules = {} + names_to_remove = [] + _save_and_remove_module(name, orig_modules) + try: + for fresh_name in fresh: + _save_and_remove_module(fresh_name, orig_modules) + for blocked_name in blocked: + if not _save_and_block_module(blocked_name, orig_modules): + names_to_remove.append(blocked_name) + fresh_module = importlib.import_module(name) + except ImportError: + fresh_module = None + finally: + for orig_name, module in orig_modules.items(): + sys.modules[orig_name] = module + for name_to_remove in names_to_remove: + del sys.modules[name_to_remove] + return fresh_module + + +def get_attribute(obj, name): + """Get an attribute, raising SkipTest if AttributeError is raised.""" + try: + attribute = getattr(obj, name) + except AttributeError: + raise unittest.SkipTest("object %r has no attribute %r" % (obj, name)) + else: + return attribute + +verbose = 1 # Flag set to 0 by regrtest.py +use_resources = None # Flag set to [] by regrtest.py +max_memuse = 0 # Disable bigmem tests (they will still be run with + # small sizes, to make sure they work.) +real_max_memuse = 0 +failfast = False +match_tests = None + +# _original_stdout is meant to hold stdout at the time regrtest began. +# This may be "the real" stdout, or IDLE's emulation of stdout, or whatever. +# The point is to have some flavor of stdout the user can actually see. +_original_stdout = None +def record_original_stdout(stdout): + global _original_stdout + _original_stdout = stdout + +def get_original_stdout(): + return _original_stdout or sys.stdout + +def unload(name): + try: + del sys.modules[name] + except KeyError: + pass + +if sys.platform.startswith("win"): + def _waitfor(func, pathname, waitall=False): + # Perform the operation + func(pathname) + # Now setup the wait loop + if waitall: + dirname = pathname + else: + dirname, name = os.path.split(pathname) + dirname = dirname or '.' + # Check for `pathname` to be removed from the filesystem. + # The exponential backoff of the timeout amounts to a total + # of ~1 second after which the deletion is probably an error + # anyway. + # Testing on a i7@4.3GHz shows that usually only 1 iteration is + # required when contention occurs. + timeout = 0.001 + while timeout < 1.0: + # Note we are only testing for the existence of the file(s) in + # the contents of the directory regardless of any security or + # access rights. If we have made it this far, we have sufficient + # permissions to do that much using Python's equivalent of the + # Windows API FindFirstFile. + # Other Windows APIs can fail or give incorrect results when + # dealing with files that are pending deletion. + L = os.listdir(dirname) + if not (L if waitall else name in L): + return + # Increase the timeout and try again + time.sleep(timeout) + timeout *= 2 + warnings.warn('tests may fail, delete still pending for ' + pathname, + RuntimeWarning, stacklevel=4) + + def _unlink(filename): + _waitfor(os.unlink, filename) + + def _rmdir(dirname): + _waitfor(os.rmdir, dirname) + + def _rmtree(path): + def _rmtree_inner(path): + for name in os.listdir(path): + fullname = os.path.join(path, name) + if os.path.isdir(fullname): + _waitfor(_rmtree_inner, fullname, waitall=True) + os.rmdir(fullname) + else: + os.unlink(fullname) + _waitfor(_rmtree_inner, path, waitall=True) + _waitfor(os.rmdir, path) +else: + _unlink = os.unlink + _rmdir = os.rmdir + _rmtree = shutil.rmtree + +def unlink(filename): + try: + _unlink(filename) + except OSError as error: + # The filename need not exist. + if error.errno not in (errno.ENOENT, errno.ENOTDIR): + raise + +def rmdir(dirname): + try: + _rmdir(dirname) + except OSError as error: + # The directory need not exist. + if error.errno != errno.ENOENT: + raise + +def rmtree(path): + try: + _rmtree(path) + except OSError as error: + if error.errno != errno.ENOENT: + raise + +def make_legacy_pyc(source): + """Move a PEP 3147 pyc/pyo file to its legacy pyc/pyo location. + + The choice of .pyc or .pyo extension is done based on the __debug__ flag + value. + + :param source: The file system path to the source file. The source file + does not need to exist, however the PEP 3147 pyc file must exist. + :return: The file system path to the legacy pyc file. + """ + pyc_file = imp.cache_from_source(source) + up_one = os.path.dirname(os.path.abspath(source)) + legacy_pyc = os.path.join(up_one, source + ('c' if __debug__ else 'o')) + os.rename(pyc_file, legacy_pyc) + return legacy_pyc + +def forget(modname): + """'Forget' a module was ever imported. + + This removes the module from sys.modules and deletes any PEP 3147 or + legacy .pyc and .pyo files. + """ + unload(modname) + for dirname in sys.path: + source = os.path.join(dirname, modname + '.py') + # It doesn't matter if they exist or not, unlink all possible + # combinations of PEP 3147 and legacy pyc and pyo files. + unlink(source + 'c') + unlink(source + 'o') + unlink(imp.cache_from_source(source, debug_override=True)) + unlink(imp.cache_from_source(source, debug_override=False)) + +# On some platforms, should not run gui test even if it is allowed +# in `use_resources'. +if sys.platform.startswith('win'): + import ctypes + import ctypes.wintypes + def _is_gui_available(): + UOI_FLAGS = 1 + WSF_VISIBLE = 0x0001 + class USEROBJECTFLAGS(ctypes.Structure): + _fields_ = [("fInherit", ctypes.wintypes.BOOL), + ("fReserved", ctypes.wintypes.BOOL), + ("dwFlags", ctypes.wintypes.DWORD)] + dll = ctypes.windll.user32 + h = dll.GetProcessWindowStation() + if not h: + raise ctypes.WinError() + uof = USEROBJECTFLAGS() + needed = ctypes.wintypes.DWORD() + res = dll.GetUserObjectInformationW(h, + UOI_FLAGS, + ctypes.byref(uof), + ctypes.sizeof(uof), + ctypes.byref(needed)) + if not res: + raise ctypes.WinError() + return bool(uof.dwFlags & WSF_VISIBLE) +else: + def _is_gui_available(): + return True + +def is_resource_enabled(resource): + """Test whether a resource is enabled. Known resources are set by + regrtest.py.""" + return use_resources is not None and resource in use_resources + +def requires(resource, msg=None): + """Raise ResourceDenied if the specified resource is not available. + + If the caller's module is __main__ then automatically return True. The + possibility of False being returned occurs when regrtest.py is + executing. + """ + if resource == 'gui' and not _is_gui_available(): + raise unittest.SkipTest("Cannot use the 'gui' resource") + # see if the caller's module is __main__ - if so, treat as if + # the resource was set + if sys._getframe(1).f_globals.get("__name__") == "__main__": + return + if not is_resource_enabled(resource): + if msg is None: + msg = "Use of the %r resource not enabled" % resource + raise ResourceDenied(msg) + +def _requires_unix_version(sysname, min_version): + """Decorator raising SkipTest if the OS is `sysname` and the version is less + than `min_version`. + + For example, @_requires_unix_version('FreeBSD', (7, 2)) raises SkipTest if + the FreeBSD version is less than 7.2. + """ + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kw): + if platform.system() == sysname: + version_txt = platform.release().split('-', 1)[0] + try: + version = tuple(map(int, version_txt.split('.'))) + except ValueError: + pass + else: + if version < min_version: + min_version_txt = '.'.join(map(str, min_version)) + raise unittest.SkipTest( + "%s version %s or higher required, not %s" + % (sysname, min_version_txt, version_txt)) + return func(*args, **kw) + wrapper.min_version = min_version + return wrapper + return decorator + +def requires_freebsd_version(*min_version): + """Decorator raising SkipTest if the OS is FreeBSD and the FreeBSD version is + less than `min_version`. + + For example, @requires_freebsd_version(7, 2) raises SkipTest if the FreeBSD + version is less than 7.2. + """ + return _requires_unix_version('FreeBSD', min_version) + +def requires_linux_version(*min_version): + """Decorator raising SkipTest if the OS is Linux and the Linux version is + less than `min_version`. + + For example, @requires_linux_version(2, 6, 32) raises SkipTest if the Linux + version is less than 2.6.32. + """ + return _requires_unix_version('Linux', min_version) + +def requires_mac_ver(*min_version): + """Decorator raising SkipTest if the OS is Mac OS X and the OS X + version if less than min_version. + + For example, @requires_mac_ver(10, 5) raises SkipTest if the OS X version + is lesser than 10.5. + """ + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kw): + if sys.platform == 'darwin': + version_txt = platform.mac_ver()[0] + try: + version = tuple(map(int, version_txt.split('.'))) + except ValueError: + pass + else: + if version < min_version: + min_version_txt = '.'.join(map(str, min_version)) + raise unittest.SkipTest( + "Mac OS X %s or higher required, not %s" + % (min_version_txt, version_txt)) + return func(*args, **kw) + wrapper.min_version = min_version + return wrapper + return decorator + +# Don't use "localhost", since resolving it uses the DNS under recent +# Windows versions (see issue #18792). +HOST = "127.0.0.1" +HOSTv6 = "::1" + + +def find_unused_port(family=socket.AF_INET, socktype=socket.SOCK_STREAM): + """Returns an unused port that should be suitable for binding. This is + achieved by creating a temporary socket with the same family and type as + the 'sock' parameter (default is AF_INET, SOCK_STREAM), and binding it to + the specified host address (defaults to 0.0.0.0) with the port set to 0, + eliciting an unused ephemeral port from the OS. The temporary socket is + then closed and deleted, and the ephemeral port is returned. + + Either this method or bind_port() should be used for any tests where a + server socket needs to be bound to a particular port for the duration of + the test. Which one to use depends on whether the calling code is creating + a python socket, or if an unused port needs to be provided in a constructor + or passed to an external program (i.e. the -accept argument to openssl's + s_server mode). Always prefer bind_port() over find_unused_port() where + possible. Hard coded ports should *NEVER* be used. As soon as a server + socket is bound to a hard coded port, the ability to run multiple instances + of the test simultaneously on the same host is compromised, which makes the + test a ticking time bomb in a buildbot environment. On Unix buildbots, this + may simply manifest as a failed test, which can be recovered from without + intervention in most cases, but on Windows, the entire python process can + completely and utterly wedge, requiring someone to log in to the buildbot + and manually kill the affected process. + + (This is easy to reproduce on Windows, unfortunately, and can be traced to + the SO_REUSEADDR socket option having different semantics on Windows versus + Unix/Linux. On Unix, you can't have two AF_INET SOCK_STREAM sockets bind, + listen and then accept connections on identical host/ports. An EADDRINUSE + socket.error will be raised at some point (depending on the platform and + the order bind and listen were called on each socket). + + However, on Windows, if SO_REUSEADDR is set on the sockets, no EADDRINUSE + will ever be raised when attempting to bind two identical host/ports. When + accept() is called on each socket, the second caller's process will steal + the port from the first caller, leaving them both in an awkwardly wedged + state where they'll no longer respond to any signals or graceful kills, and + must be forcibly killed via OpenProcess()/TerminateProcess(). + + The solution on Windows is to use the SO_EXCLUSIVEADDRUSE socket option + instead of SO_REUSEADDR, which effectively affords the same semantics as + SO_REUSEADDR on Unix. Given the propensity of Unix developers in the Open + Source world compared to Windows ones, this is a common mistake. A quick + look over OpenSSL's 0.9.8g source shows that they use SO_REUSEADDR when + openssl.exe is called with the 's_server' option, for example. See + http://bugs.python.org/issue2550 for more info. The following site also + has a very thorough description about the implications of both REUSEADDR + and EXCLUSIVEADDRUSE on Windows: + http://msdn2.microsoft.com/en-us/library/ms740621(VS.85).aspx) + + XXX: although this approach is a vast improvement on previous attempts to + elicit unused ports, it rests heavily on the assumption that the ephemeral + port returned to us by the OS won't immediately be dished back out to some + other process when we close and delete our temporary socket but before our + calling code has a chance to bind the returned port. We can deal with this + issue if/when we come across it. + """ + + tempsock = socket.socket(family, socktype) + port = bind_port(tempsock) + tempsock.close() + del tempsock + return port + +def bind_port(sock, host=HOST): + """Bind the socket to a free port and return the port number. Relies on + ephemeral ports in order to ensure we are using an unbound port. This is + important as many tests may be running simultaneously, especially in a + buildbot environment. This method raises an exception if the sock.family + is AF_INET and sock.type is SOCK_STREAM, *and* the socket has SO_REUSEADDR + or SO_REUSEPORT set on it. Tests should *never* set these socket options + for TCP/IP sockets. The only case for setting these options is testing + multicasting via multiple UDP sockets. + + Additionally, if the SO_EXCLUSIVEADDRUSE socket option is available (i.e. + on Windows), it will be set on the socket. This will prevent anyone else + from bind()'ing to our host/port for the duration of the test. + """ + + if sock.family == socket.AF_INET and sock.type == socket.SOCK_STREAM: + if hasattr(socket, 'SO_REUSEADDR'): + if sock.getsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR) == 1: + raise TestFailed("tests should never set the SO_REUSEADDR " \ + "socket option on TCP/IP sockets!") + if hasattr(socket, 'SO_REUSEPORT'): + try: + if sock.getsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT) == 1: + raise TestFailed("tests should never set the SO_REUSEPORT " \ + "socket option on TCP/IP sockets!") + except socket.error: + # Python's socket module was compiled using modern headers + # thus defining SO_REUSEPORT but this process is running + # under an older kernel that does not support SO_REUSEPORT. + pass + if hasattr(socket, 'SO_EXCLUSIVEADDRUSE'): + sock.setsockopt(socket.SOL_SOCKET, socket.SO_EXCLUSIVEADDRUSE, 1) + + sock.bind((host, 0)) + port = sock.getsockname()[1] + return port + +def _is_ipv6_enabled(): + """Check whether IPv6 is enabled on this host.""" + if socket.has_ipv6: + sock = None + try: + sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM) + sock.bind(('::1', 0)) + return True + except (socket.error, socket.gaierror): + pass + finally: + if sock: + sock.close() + return False + +IPV6_ENABLED = _is_ipv6_enabled() + + +# A constant likely larger than the underlying OS pipe buffer size, to +# make writes blocking. +# Windows limit seems to be around 512 B, and many Unix kernels have a +# 64 KiB pipe buffer size or 16 * PAGE_SIZE: take a few megs to be sure. +# (see issue #17835 for a discussion of this number). +PIPE_MAX_SIZE = 4 * 1024 * 1024 + 1 + +# A constant likely larger than the underlying OS socket buffer size, to make +# writes blocking. +# The socket buffer sizes can usually be tuned system-wide (e.g. through sysctl +# on Linux), or on a per-socket basis (SO_SNDBUF/SO_RCVBUF). See issue #18643 +# for a discussion of this number). +SOCK_MAX_SIZE = 16 * 1024 * 1024 + 1 + +# # decorator for skipping tests on non-IEEE 754 platforms +# requires_IEEE_754 = unittest.skipUnless( +# float.__getformat__("double").startswith("IEEE"), +# "test requires IEEE 754 doubles") + +requires_zlib = unittest.skipUnless(zlib, 'requires zlib') + +requires_bz2 = unittest.skipUnless(bz2, 'requires bz2') + +requires_lzma = unittest.skipUnless(lzma, 'requires lzma') + +is_jython = sys.platform.startswith('java') + +# Filename used for testing +if os.name == 'java': + # Jython disallows @ in module names + TESTFN = '$test' +else: + TESTFN = '@test' + +# Disambiguate TESTFN for parallel testing, while letting it remain a valid +# module name. +TESTFN = "{0}_{1}_tmp".format(TESTFN, os.getpid()) + +# # FS_NONASCII: non-ASCII character encodable by os.fsencode(), +# # or None if there is no such character. +# FS_NONASCII = None +# for character in ( +# # First try printable and common characters to have a readable filename. +# # For each character, the encoding list are just example of encodings able +# # to encode the character (the list is not exhaustive). +# +# # U+00E6 (Latin Small Letter Ae): cp1252, iso-8859-1 +# '\u00E6', +# # U+0130 (Latin Capital Letter I With Dot Above): cp1254, iso8859_3 +# '\u0130', +# # U+0141 (Latin Capital Letter L With Stroke): cp1250, cp1257 +# '\u0141', +# # U+03C6 (Greek Small Letter Phi): cp1253 +# '\u03C6', +# # U+041A (Cyrillic Capital Letter Ka): cp1251 +# '\u041A', +# # U+05D0 (Hebrew Letter Alef): Encodable to cp424 +# '\u05D0', +# # U+060C (Arabic Comma): cp864, cp1006, iso8859_6, mac_arabic +# '\u060C', +# # U+062A (Arabic Letter Teh): cp720 +# '\u062A', +# # U+0E01 (Thai Character Ko Kai): cp874 +# '\u0E01', +# +# # Then try more "special" characters. "special" because they may be +# # interpreted or displayed differently depending on the exact locale +# # encoding and the font. +# +# # U+00A0 (No-Break Space) +# '\u00A0', +# # U+20AC (Euro Sign) +# '\u20AC', +# ): +# try: +# os.fsdecode(os.fsencode(character)) +# except UnicodeError: +# pass +# else: +# FS_NONASCII = character +# break +# +# # TESTFN_UNICODE is a non-ascii filename +# TESTFN_UNICODE = TESTFN + "-\xe0\xf2\u0258\u0141\u011f" +# if sys.platform == 'darwin': +# # In Mac OS X's VFS API file names are, by definition, canonically +# # decomposed Unicode, encoded using UTF-8. See QA1173: +# # http://developer.apple.com/mac/library/qa/qa2001/qa1173.html +# import unicodedata +# TESTFN_UNICODE = unicodedata.normalize('NFD', TESTFN_UNICODE) +# TESTFN_ENCODING = sys.getfilesystemencoding() +# +# # TESTFN_UNENCODABLE is a filename (str type) that should *not* be able to be +# # encoded by the filesystem encoding (in strict mode). It can be None if we +# # cannot generate such filename. +# TESTFN_UNENCODABLE = None +# if os.name in ('nt', 'ce'): +# # skip win32s (0) or Windows 9x/ME (1) +# if sys.getwindowsversion().platform >= 2: +# # Different kinds of characters from various languages to minimize the +# # probability that the whole name is encodable to MBCS (issue #9819) +# TESTFN_UNENCODABLE = TESTFN + "-\u5171\u0141\u2661\u0363\uDC80" +# try: +# TESTFN_UNENCODABLE.encode(TESTFN_ENCODING) +# except UnicodeEncodeError: +# pass +# else: +# print('WARNING: The filename %r CAN be encoded by the filesystem encoding (%s). ' +# 'Unicode filename tests may not be effective' +# % (TESTFN_UNENCODABLE, TESTFN_ENCODING)) +# TESTFN_UNENCODABLE = None +# # Mac OS X denies unencodable filenames (invalid utf-8) +# elif sys.platform != 'darwin': +# try: +# # ascii and utf-8 cannot encode the byte 0xff +# b'\xff'.decode(TESTFN_ENCODING) +# except UnicodeDecodeError: +# # 0xff will be encoded using the surrogate character u+DCFF +# TESTFN_UNENCODABLE = TESTFN \ +# + b'-\xff'.decode(TESTFN_ENCODING, 'surrogateescape') +# else: +# # File system encoding (eg. ISO-8859-* encodings) can encode +# # the byte 0xff. Skip some unicode filename tests. +# pass +# +# # TESTFN_UNDECODABLE is a filename (bytes type) that should *not* be able to be +# # decoded from the filesystem encoding (in strict mode). It can be None if we +# # cannot generate such filename (ex: the latin1 encoding can decode any byte +# # sequence). On UNIX, TESTFN_UNDECODABLE can be decoded by os.fsdecode() thanks +# # to the surrogateescape error handler (PEP 383), but not from the filesystem +# # encoding in strict mode. +# TESTFN_UNDECODABLE = None +# for name in ( +# # b'\xff' is not decodable by os.fsdecode() with code page 932. Windows +# # accepts it to create a file or a directory, or don't accept to enter to +# # such directory (when the bytes name is used). So test b'\xe7' first: it is +# # not decodable from cp932. +# b'\xe7w\xf0', +# # undecodable from ASCII, UTF-8 +# b'\xff', +# # undecodable from iso8859-3, iso8859-6, iso8859-7, cp424, iso8859-8, cp856 +# # and cp857 +# b'\xae\xd5' +# # undecodable from UTF-8 (UNIX and Mac OS X) +# b'\xed\xb2\x80', b'\xed\xb4\x80', +# # undecodable from shift_jis, cp869, cp874, cp932, cp1250, cp1251, cp1252, +# # cp1253, cp1254, cp1255, cp1257, cp1258 +# b'\x81\x98', +# ): +# try: +# name.decode(TESTFN_ENCODING) +# except UnicodeDecodeError: +# TESTFN_UNDECODABLE = os.fsencode(TESTFN) + name +# break +# +# if FS_NONASCII: +# TESTFN_NONASCII = TESTFN + '-' + FS_NONASCII +# else: +# TESTFN_NONASCII = None + +# Save the initial cwd +SAVEDCWD = os.getcwd() + +@contextlib.contextmanager +def temp_cwd(name='tempcwd', quiet=False, path=None): + """ + Context manager that temporarily changes the CWD. + + An existing path may be provided as *path*, in which case this + function makes no changes to the file system. + + Otherwise, the new CWD is created in the current directory and it's + named *name*. If *quiet* is False (default) and it's not possible to + create or change the CWD, an error is raised. If it's True, only a + warning is raised and the original CWD is used. + """ + saved_dir = os.getcwd() + is_temporary = False + if path is None: + path = name + try: + os.mkdir(name) + is_temporary = True + except OSError: + if not quiet: + raise + warnings.warn('tests may fail, unable to create temp CWD ' + name, + RuntimeWarning, stacklevel=3) + try: + os.chdir(path) + except OSError: + if not quiet: + raise + warnings.warn('tests may fail, unable to change the CWD to ' + path, + RuntimeWarning, stacklevel=3) + try: + yield os.getcwd() + finally: + os.chdir(saved_dir) + if is_temporary: + rmtree(name) + + +if hasattr(os, "umask"): + @contextlib.contextmanager + def temp_umask(umask): + """Context manager that temporarily sets the process umask.""" + oldmask = os.umask(umask) + try: + yield + finally: + os.umask(oldmask) + + +def findfile(file, here=__file__, subdir=None): + """Try to find a file on sys.path and the working directory. If it is not + found the argument passed to the function is returned (this does not + necessarily signal failure; could still be the legitimate path).""" + if os.path.isabs(file): + return file + if subdir is not None: + file = os.path.join(subdir, file) + path = sys.path + path = [os.path.dirname(here)] + path + for dn in path: + fn = os.path.join(dn, file) + if os.path.exists(fn): return fn + return file + +def create_empty_file(filename): + """Create an empty file. If the file already exists, truncate it.""" + fd = os.open(filename, os.O_WRONLY | os.O_CREAT | os.O_TRUNC) + os.close(fd) + +def sortdict(dict): + "Like repr(dict), but in sorted order." + items = sorted(dict.items()) + reprpairs = ["%r: %r" % pair for pair in items] + withcommas = ", ".join(reprpairs) + return "{%s}" % withcommas + +def make_bad_fd(): + """ + Create an invalid file descriptor by opening and closing a file and return + its fd. + """ + file = open(TESTFN, "wb") + try: + return file.fileno() + finally: + file.close() + unlink(TESTFN) + +def check_syntax_error(testcase, statement): + testcase.assertRaises(SyntaxError, compile, statement, + '', 'exec') + +def open_urlresource(url, *args, **kw): + from future.backports.urllib import (request as urllib_request, + parse as urllib_parse) + + check = kw.pop('check', None) + + filename = urllib_parse.urlparse(url)[2].split('/')[-1] # '/': it's URL! + + fn = os.path.join(os.path.dirname(__file__), "data", filename) + + def check_valid_file(fn): + f = open(fn, *args, **kw) + if check is None: + return f + elif check(f): + f.seek(0) + return f + f.close() + + if os.path.exists(fn): + f = check_valid_file(fn) + if f is not None: + return f + unlink(fn) + + # Verify the requirement before downloading the file + requires('urlfetch') + + print('\tfetching %s ...' % url, file=get_original_stdout()) + f = urllib_request.urlopen(url, timeout=15) + try: + with open(fn, "wb") as out: + s = f.read() + while s: + out.write(s) + s = f.read() + finally: + f.close() + + f = check_valid_file(fn) + if f is not None: + return f + raise TestFailed('invalid resource %r' % fn) + + +class WarningsRecorder(object): + """Convenience wrapper for the warnings list returned on + entry to the warnings.catch_warnings() context manager. + """ + def __init__(self, warnings_list): + self._warnings = warnings_list + self._last = 0 + + def __getattr__(self, attr): + if len(self._warnings) > self._last: + return getattr(self._warnings[-1], attr) + elif attr in warnings.WarningMessage._WARNING_DETAILS: + return None + raise AttributeError("%r has no attribute %r" % (self, attr)) + + @property + def warnings(self): + return self._warnings[self._last:] + + def reset(self): + self._last = len(self._warnings) + + +def _filterwarnings(filters, quiet=False): + """Catch the warnings, then check if all the expected + warnings have been raised and re-raise unexpected warnings. + If 'quiet' is True, only re-raise the unexpected warnings. + """ + # Clear the warning registry of the calling module + # in order to re-raise the warnings. + frame = sys._getframe(2) + registry = frame.f_globals.get('__warningregistry__') + if registry: + if utils.PY3: + registry.clear() + else: + # Py2-compatible: + for i in range(len(registry)): + registry.pop() + with warnings.catch_warnings(record=True) as w: + # Set filter "always" to record all warnings. Because + # test_warnings swap the module, we need to look up in + # the sys.modules dictionary. + sys.modules['warnings'].simplefilter("always") + yield WarningsRecorder(w) + # Filter the recorded warnings + reraise = list(w) + missing = [] + for msg, cat in filters: + seen = False + for w in reraise[:]: + warning = w.message + # Filter out the matching messages + if (re.match(msg, str(warning), re.I) and + issubclass(warning.__class__, cat)): + seen = True + reraise.remove(w) + if not seen and not quiet: + # This filter caught nothing + missing.append((msg, cat.__name__)) + if reraise: + raise AssertionError("unhandled warning %s" % reraise[0]) + if missing: + raise AssertionError("filter (%r, %s) did not catch any warning" % + missing[0]) + + +@contextlib.contextmanager +def check_warnings(*filters, **kwargs): + """Context manager to silence warnings. + + Accept 2-tuples as positional arguments: + ("message regexp", WarningCategory) + + Optional argument: + - if 'quiet' is True, it does not fail if a filter catches nothing + (default True without argument, + default False if some filters are defined) + + Without argument, it defaults to: + check_warnings(("", Warning), quiet=True) + """ + quiet = kwargs.get('quiet') + if not filters: + filters = (("", Warning),) + # Preserve backward compatibility + if quiet is None: + quiet = True + return _filterwarnings(filters, quiet) + + +class CleanImport(object): + """Context manager to force import to return a new module reference. + + This is useful for testing module-level behaviours, such as + the emission of a DeprecationWarning on import. + + Use like this: + + with CleanImport("foo"): + importlib.import_module("foo") # new reference + """ + + def __init__(self, *module_names): + self.original_modules = sys.modules.copy() + for module_name in module_names: + if module_name in sys.modules: + module = sys.modules[module_name] + # It is possible that module_name is just an alias for + # another module (e.g. stub for modules renamed in 3.x). + # In that case, we also need delete the real module to clear + # the import cache. + if module.__name__ != module_name: + del sys.modules[module.__name__] + del sys.modules[module_name] + + def __enter__(self): + return self + + def __exit__(self, *ignore_exc): + sys.modules.update(self.original_modules) + +### Added for python-future: +if utils.PY3: + import collections.abc + mybase = collections.abc.MutableMapping +else: + import UserDict + mybase = UserDict.DictMixin +### + +class EnvironmentVarGuard(mybase): + + """Class to help protect the environment variable properly. Can be used as + a context manager.""" + + def __init__(self): + self._environ = os.environ + self._changed = {} + + def __getitem__(self, envvar): + return self._environ[envvar] + + def __setitem__(self, envvar, value): + # Remember the initial value on the first access + if envvar not in self._changed: + self._changed[envvar] = self._environ.get(envvar) + self._environ[envvar] = value + + def __delitem__(self, envvar): + # Remember the initial value on the first access + if envvar not in self._changed: + self._changed[envvar] = self._environ.get(envvar) + if envvar in self._environ: + del self._environ[envvar] + + def keys(self): + return self._environ.keys() + + def __iter__(self): + return iter(self._environ) + + def __len__(self): + return len(self._environ) + + def set(self, envvar, value): + self[envvar] = value + + def unset(self, envvar): + del self[envvar] + + def __enter__(self): + return self + + def __exit__(self, *ignore_exc): + for (k, v) in self._changed.items(): + if v is None: + if k in self._environ: + del self._environ[k] + else: + self._environ[k] = v + os.environ = self._environ + + +class DirsOnSysPath(object): + """Context manager to temporarily add directories to sys.path. + + This makes a copy of sys.path, appends any directories given + as positional arguments, then reverts sys.path to the copied + settings when the context ends. + + Note that *all* sys.path modifications in the body of the + context manager, including replacement of the object, + will be reverted at the end of the block. + """ + + def __init__(self, *paths): + self.original_value = sys.path[:] + self.original_object = sys.path + sys.path.extend(paths) + + def __enter__(self): + return self + + def __exit__(self, *ignore_exc): + sys.path = self.original_object + sys.path[:] = self.original_value + + +class TransientResource(object): + + """Raise ResourceDenied if an exception is raised while the context manager + is in effect that matches the specified exception and attributes.""" + + def __init__(self, exc, **kwargs): + self.exc = exc + self.attrs = kwargs + + def __enter__(self): + return self + + def __exit__(self, type_=None, value=None, traceback=None): + """If type_ is a subclass of self.exc and value has attributes matching + self.attrs, raise ResourceDenied. Otherwise let the exception + propagate (if any).""" + if type_ is not None and issubclass(self.exc, type_): + for attr, attr_value in self.attrs.items(): + if not hasattr(value, attr): + break + if getattr(value, attr) != attr_value: + break + else: + raise ResourceDenied("an optional resource is not available") + +# Context managers that raise ResourceDenied when various issues +# with the Internet connection manifest themselves as exceptions. +# XXX deprecate these and use transient_internet() instead +time_out = TransientResource(IOError, errno=errno.ETIMEDOUT) +socket_peer_reset = TransientResource(socket.error, errno=errno.ECONNRESET) +ioerror_peer_reset = TransientResource(IOError, errno=errno.ECONNRESET) + + +@contextlib.contextmanager +def transient_internet(resource_name, timeout=30.0, errnos=()): + """Return a context manager that raises ResourceDenied when various issues + with the Internet connection manifest themselves as exceptions.""" + default_errnos = [ + ('ECONNREFUSED', 111), + ('ECONNRESET', 104), + ('EHOSTUNREACH', 113), + ('ENETUNREACH', 101), + ('ETIMEDOUT', 110), + ] + default_gai_errnos = [ + ('EAI_AGAIN', -3), + ('EAI_FAIL', -4), + ('EAI_NONAME', -2), + ('EAI_NODATA', -5), + # Encountered when trying to resolve IPv6-only hostnames + ('WSANO_DATA', 11004), + ] + + denied = ResourceDenied("Resource %r is not available" % resource_name) + captured_errnos = errnos + gai_errnos = [] + if not captured_errnos: + captured_errnos = [getattr(errno, name, num) + for (name, num) in default_errnos] + gai_errnos = [getattr(socket, name, num) + for (name, num) in default_gai_errnos] + + def filter_error(err): + n = getattr(err, 'errno', None) + if (isinstance(err, socket.timeout) or + (isinstance(err, socket.gaierror) and n in gai_errnos) or + n in captured_errnos): + if not verbose: + sys.stderr.write(denied.args[0] + "\n") + # Was: raise denied from err + # For Python-Future: + exc = denied + exc.__cause__ = err + raise exc + + old_timeout = socket.getdefaulttimeout() + try: + if timeout is not None: + socket.setdefaulttimeout(timeout) + yield + except IOError as err: + # urllib can wrap original socket errors multiple times (!), we must + # unwrap to get at the original error. + while True: + a = err.args + if len(a) >= 1 and isinstance(a[0], IOError): + err = a[0] + # The error can also be wrapped as args[1]: + # except socket.error as msg: + # raise IOError('socket error', msg).with_traceback(sys.exc_info()[2]) + elif len(a) >= 2 and isinstance(a[1], IOError): + err = a[1] + else: + break + filter_error(err) + raise + # XXX should we catch generic exceptions and look for their + # __cause__ or __context__? + finally: + socket.setdefaulttimeout(old_timeout) + + +@contextlib.contextmanager +def captured_output(stream_name): + """Return a context manager used by captured_stdout/stdin/stderr + that temporarily replaces the sys stream *stream_name* with a StringIO.""" + import io + orig_stdout = getattr(sys, stream_name) + setattr(sys, stream_name, io.StringIO()) + try: + yield getattr(sys, stream_name) + finally: + setattr(sys, stream_name, orig_stdout) + +def captured_stdout(): + """Capture the output of sys.stdout: + + with captured_stdout() as s: + print("hello") + self.assertEqual(s.getvalue(), "hello") + """ + return captured_output("stdout") + +def captured_stderr(): + return captured_output("stderr") + +def captured_stdin(): + return captured_output("stdin") + + +def gc_collect(): + """Force as many objects as possible to be collected. + + In non-CPython implementations of Python, this is needed because timely + deallocation is not guaranteed by the garbage collector. (Even in CPython + this can be the case in case of reference cycles.) This means that __del__ + methods may be called later than expected and weakrefs may remain alive for + longer than expected. This function tries its best to force all garbage + objects to disappear. + """ + gc.collect() + if is_jython: + time.sleep(0.1) + gc.collect() + gc.collect() + +@contextlib.contextmanager +def disable_gc(): + have_gc = gc.isenabled() + gc.disable() + try: + yield + finally: + if have_gc: + gc.enable() + + +def python_is_optimized(): + """Find if Python was built with optimizations.""" + # We don't have sysconfig on Py2.6: + import sysconfig + cflags = sysconfig.get_config_var('PY_CFLAGS') or '' + final_opt = "" + for opt in cflags.split(): + if opt.startswith('-O'): + final_opt = opt + return final_opt != '' and final_opt != '-O0' + + +_header = 'nP' +_align = '0n' +if hasattr(sys, "gettotalrefcount"): + _header = '2P' + _header + _align = '0P' +_vheader = _header + 'n' + +def calcobjsize(fmt): + return struct.calcsize(_header + fmt + _align) + +def calcvobjsize(fmt): + return struct.calcsize(_vheader + fmt + _align) + + +_TPFLAGS_HAVE_GC = 1<<14 +_TPFLAGS_HEAPTYPE = 1<<9 + +def check_sizeof(test, o, size): + result = sys.getsizeof(o) + # add GC header size + if ((type(o) == type) and (o.__flags__ & _TPFLAGS_HEAPTYPE) or\ + ((type(o) != type) and (type(o).__flags__ & _TPFLAGS_HAVE_GC))): + size += _testcapi.SIZEOF_PYGC_HEAD + msg = 'wrong size for %s: got %d, expected %d' \ + % (type(o), result, size) + test.assertEqual(result, size, msg) + +#======================================================================= +# Decorator for running a function in a different locale, correctly resetting +# it afterwards. + +def run_with_locale(catstr, *locales): + def decorator(func): + def inner(*args, **kwds): + try: + import locale + category = getattr(locale, catstr) + orig_locale = locale.setlocale(category) + except AttributeError: + # if the test author gives us an invalid category string + raise + except: + # cannot retrieve original locale, so do nothing + locale = orig_locale = None + else: + for loc in locales: + try: + locale.setlocale(category, loc) + break + except: + pass + + # now run the function, resetting the locale on exceptions + try: + return func(*args, **kwds) + finally: + if locale and orig_locale: + locale.setlocale(category, orig_locale) + inner.__name__ = func.__name__ + inner.__doc__ = func.__doc__ + return inner + return decorator + +#======================================================================= +# Decorator for running a function in a specific timezone, correctly +# resetting it afterwards. + +def run_with_tz(tz): + def decorator(func): + def inner(*args, **kwds): + try: + tzset = time.tzset + except AttributeError: + raise unittest.SkipTest("tzset required") + if 'TZ' in os.environ: + orig_tz = os.environ['TZ'] + else: + orig_tz = None + os.environ['TZ'] = tz + tzset() + + # now run the function, resetting the tz on exceptions + try: + return func(*args, **kwds) + finally: + if orig_tz is None: + del os.environ['TZ'] + else: + os.environ['TZ'] = orig_tz + time.tzset() + + inner.__name__ = func.__name__ + inner.__doc__ = func.__doc__ + return inner + return decorator + +#======================================================================= +# Big-memory-test support. Separate from 'resources' because memory use +# should be configurable. + +# Some handy shorthands. Note that these are used for byte-limits as well +# as size-limits, in the various bigmem tests +_1M = 1024*1024 +_1G = 1024 * _1M +_2G = 2 * _1G +_4G = 4 * _1G + +MAX_Py_ssize_t = sys.maxsize + +def set_memlimit(limit): + global max_memuse + global real_max_memuse + sizes = { + 'k': 1024, + 'm': _1M, + 'g': _1G, + 't': 1024*_1G, + } + m = re.match(r'(\d+(\.\d+)?) (K|M|G|T)b?$', limit, + re.IGNORECASE | re.VERBOSE) + if m is None: + raise ValueError('Invalid memory limit %r' % (limit,)) + memlimit = int(float(m.group(1)) * sizes[m.group(3).lower()]) + real_max_memuse = memlimit + if memlimit > MAX_Py_ssize_t: + memlimit = MAX_Py_ssize_t + if memlimit < _2G - 1: + raise ValueError('Memory limit %r too low to be useful' % (limit,)) + max_memuse = memlimit + +class _MemoryWatchdog(object): + """An object which periodically watches the process' memory consumption + and prints it out. + """ + + def __init__(self): + self.procfile = '/proc/{pid}/statm'.format(pid=os.getpid()) + self.started = False + + def start(self): + try: + f = open(self.procfile, 'r') + except OSError as e: + warnings.warn('/proc not available for stats: {0}'.format(e), + RuntimeWarning) + sys.stderr.flush() + return + + watchdog_script = findfile("memory_watchdog.py") + self.mem_watchdog = subprocess.Popen([sys.executable, watchdog_script], + stdin=f, stderr=subprocess.DEVNULL) + f.close() + self.started = True + + def stop(self): + if self.started: + self.mem_watchdog.terminate() + self.mem_watchdog.wait() + + +def bigmemtest(size, memuse, dry_run=True): + """Decorator for bigmem tests. + + 'minsize' is the minimum useful size for the test (in arbitrary, + test-interpreted units.) 'memuse' is the number of 'bytes per size' for + the test, or a good estimate of it. + + if 'dry_run' is False, it means the test doesn't support dummy runs + when -M is not specified. + """ + def decorator(f): + def wrapper(self): + size = wrapper.size + memuse = wrapper.memuse + if not real_max_memuse: + maxsize = 5147 + else: + maxsize = size + + if ((real_max_memuse or not dry_run) + and real_max_memuse < maxsize * memuse): + raise unittest.SkipTest( + "not enough memory: %.1fG minimum needed" + % (size * memuse / (1024 ** 3))) + + if real_max_memuse and verbose: + print() + print(" ... expected peak memory use: {peak:.1f}G" + .format(peak=size * memuse / (1024 ** 3))) + watchdog = _MemoryWatchdog() + watchdog.start() + else: + watchdog = None + + try: + return f(self, maxsize) + finally: + if watchdog: + watchdog.stop() + + wrapper.size = size + wrapper.memuse = memuse + return wrapper + return decorator + +def bigaddrspacetest(f): + """Decorator for tests that fill the address space.""" + def wrapper(self): + if max_memuse < MAX_Py_ssize_t: + if MAX_Py_ssize_t >= 2**63 - 1 and max_memuse >= 2**31: + raise unittest.SkipTest( + "not enough memory: try a 32-bit build instead") + else: + raise unittest.SkipTest( + "not enough memory: %.1fG minimum needed" + % (MAX_Py_ssize_t / (1024 ** 3))) + else: + return f(self) + return wrapper + +#======================================================================= +# unittest integration. + +class BasicTestRunner(object): + def run(self, test): + result = unittest.TestResult() + test(result) + return result + +def _id(obj): + return obj + +def requires_resource(resource): + if resource == 'gui' and not _is_gui_available(): + return unittest.skip("resource 'gui' is not available") + if is_resource_enabled(resource): + return _id + else: + return unittest.skip("resource {0!r} is not enabled".format(resource)) + +def cpython_only(test): + """ + Decorator for tests only applicable on CPython. + """ + return impl_detail(cpython=True)(test) + +def impl_detail(msg=None, **guards): + if check_impl_detail(**guards): + return _id + if msg is None: + guardnames, default = _parse_guards(guards) + if default: + msg = "implementation detail not available on {0}" + else: + msg = "implementation detail specific to {0}" + guardnames = sorted(guardnames.keys()) + msg = msg.format(' or '.join(guardnames)) + return unittest.skip(msg) + +def _parse_guards(guards): + # Returns a tuple ({platform_name: run_me}, default_value) + if not guards: + return ({'cpython': True}, False) + is_true = list(guards.values())[0] + assert list(guards.values()) == [is_true] * len(guards) # all True or all False + return (guards, not is_true) + +# Use the following check to guard CPython's implementation-specific tests -- +# or to run them only on the implementation(s) guarded by the arguments. +def check_impl_detail(**guards): + """This function returns True or False depending on the host platform. + Examples: + if check_impl_detail(): # only on CPython (default) + if check_impl_detail(jython=True): # only on Jython + if check_impl_detail(cpython=False): # everywhere except on CPython + """ + guards, default = _parse_guards(guards) + return guards.get(platform.python_implementation().lower(), default) + + +def no_tracing(func): + """Decorator to temporarily turn off tracing for the duration of a test.""" + if not hasattr(sys, 'gettrace'): + return func + else: + @functools.wraps(func) + def wrapper(*args, **kwargs): + original_trace = sys.gettrace() + try: + sys.settrace(None) + return func(*args, **kwargs) + finally: + sys.settrace(original_trace) + return wrapper + + +def refcount_test(test): + """Decorator for tests which involve reference counting. + + To start, the decorator does not run the test if is not run by CPython. + After that, any trace function is unset during the test to prevent + unexpected refcounts caused by the trace function. + + """ + return no_tracing(cpython_only(test)) + + +def _filter_suite(suite, pred): + """Recursively filter test cases in a suite based on a predicate.""" + newtests = [] + for test in suite._tests: + if isinstance(test, unittest.TestSuite): + _filter_suite(test, pred) + newtests.append(test) + else: + if pred(test): + newtests.append(test) + suite._tests = newtests + +def _run_suite(suite): + """Run tests from a unittest.TestSuite-derived class.""" + if verbose: + runner = unittest.TextTestRunner(sys.stdout, verbosity=2, + failfast=failfast) + else: + runner = BasicTestRunner() + + result = runner.run(suite) + if not result.wasSuccessful(): + if len(result.errors) == 1 and not result.failures: + err = result.errors[0][1] + elif len(result.failures) == 1 and not result.errors: + err = result.failures[0][1] + else: + err = "multiple errors occurred" + if not verbose: err += "; run in verbose mode for details" + raise TestFailed(err) + + +def run_unittest(*classes): + """Run tests from unittest.TestCase-derived classes.""" + valid_types = (unittest.TestSuite, unittest.TestCase) + suite = unittest.TestSuite() + for cls in classes: + if isinstance(cls, str): + if cls in sys.modules: + suite.addTest(unittest.findTestCases(sys.modules[cls])) + else: + raise ValueError("str arguments must be keys in sys.modules") + elif isinstance(cls, valid_types): + suite.addTest(cls) + else: + suite.addTest(unittest.makeSuite(cls)) + def case_pred(test): + if match_tests is None: + return True + for name in test.id().split("."): + if fnmatch.fnmatchcase(name, match_tests): + return True + return False + _filter_suite(suite, case_pred) + _run_suite(suite) + +# We don't have sysconfig on Py2.6: +# #======================================================================= +# # Check for the presence of docstrings. +# +# HAVE_DOCSTRINGS = (check_impl_detail(cpython=False) or +# sys.platform == 'win32' or +# sysconfig.get_config_var('WITH_DOC_STRINGS')) +# +# requires_docstrings = unittest.skipUnless(HAVE_DOCSTRINGS, +# "test requires docstrings") +# +# +# #======================================================================= +# doctest driver. + +def run_doctest(module, verbosity=None, optionflags=0): + """Run doctest on the given module. Return (#failures, #tests). + + If optional argument verbosity is not specified (or is None), pass + support's belief about verbosity on to doctest. Else doctest's + usual behavior is used (it searches sys.argv for -v). + """ + + import doctest + + if verbosity is None: + verbosity = verbose + else: + verbosity = None + + f, t = doctest.testmod(module, verbose=verbosity, optionflags=optionflags) + if f: + raise TestFailed("%d of %d doctests failed" % (f, t)) + if verbose: + print('doctest (%s) ... %d tests with zero failures' % + (module.__name__, t)) + return f, t + + +#======================================================================= +# Support for saving and restoring the imported modules. + +def modules_setup(): + return sys.modules.copy(), + +def modules_cleanup(oldmodules): + # Encoders/decoders are registered permanently within the internal + # codec cache. If we destroy the corresponding modules their + # globals will be set to None which will trip up the cached functions. + encodings = [(k, v) for k, v in sys.modules.items() + if k.startswith('encodings.')] + # Was: + # sys.modules.clear() + # Py2-compatible: + for i in range(len(sys.modules)): + sys.modules.pop() + + sys.modules.update(encodings) + # XXX: This kind of problem can affect more than just encodings. In particular + # extension modules (such as _ssl) don't cope with reloading properly. + # Really, test modules should be cleaning out the test specific modules they + # know they added (ala test_runpy) rather than relying on this function (as + # test_importhooks and test_pkg do currently). + # Implicitly imported *real* modules should be left alone (see issue 10556). + sys.modules.update(oldmodules) + +#======================================================================= +# Backported versions of threading_setup() and threading_cleanup() which don't refer +# to threading._dangling (not available on Py2.7). + +# Threading support to prevent reporting refleaks when running regrtest.py -R + +# NOTE: we use thread._count() rather than threading.enumerate() (or the +# moral equivalent thereof) because a threading.Thread object is still alive +# until its __bootstrap() method has returned, even after it has been +# unregistered from the threading module. +# thread._count(), on the other hand, only gets decremented *after* the +# __bootstrap() method has returned, which gives us reliable reference counts +# at the end of a test run. + +def threading_setup(): + if _thread: + return _thread._count(), + else: + return 1, + +def threading_cleanup(nb_threads): + if not _thread: + return + + _MAX_COUNT = 10 + for count in range(_MAX_COUNT): + n = _thread._count() + if n == nb_threads: + break + time.sleep(0.1) + # XXX print a warning in case of failure? + +def reap_threads(func): + """Use this function when threads are being used. This will + ensure that the threads are cleaned up even when the test fails. + If threading is unavailable this function does nothing. + """ + if not _thread: + return func + + @functools.wraps(func) + def decorator(*args): + key = threading_setup() + try: + return func(*args) + finally: + threading_cleanup(*key) + return decorator + +def reap_children(): + """Use this function at the end of test_main() whenever sub-processes + are started. This will help ensure that no extra children (zombies) + stick around to hog resources and create problems when looking + for refleaks. + """ + + # Reap all our dead child processes so we don't leave zombies around. + # These hog resources and might be causing some of the buildbots to die. + if hasattr(os, 'waitpid'): + any_process = -1 + while True: + try: + # This will raise an exception on Windows. That's ok. + pid, status = os.waitpid(any_process, os.WNOHANG) + if pid == 0: + break + except: + break + +@contextlib.contextmanager +def swap_attr(obj, attr, new_val): + """Temporary swap out an attribute with a new object. + + Usage: + with swap_attr(obj, "attr", 5): + ... + + This will set obj.attr to 5 for the duration of the with: block, + restoring the old value at the end of the block. If `attr` doesn't + exist on `obj`, it will be created and then deleted at the end of the + block. + """ + if hasattr(obj, attr): + real_val = getattr(obj, attr) + setattr(obj, attr, new_val) + try: + yield + finally: + setattr(obj, attr, real_val) + else: + setattr(obj, attr, new_val) + try: + yield + finally: + delattr(obj, attr) + +@contextlib.contextmanager +def swap_item(obj, item, new_val): + """Temporary swap out an item with a new object. + + Usage: + with swap_item(obj, "item", 5): + ... + + This will set obj["item"] to 5 for the duration of the with: block, + restoring the old value at the end of the block. If `item` doesn't + exist on `obj`, it will be created and then deleted at the end of the + block. + """ + if item in obj: + real_val = obj[item] + obj[item] = new_val + try: + yield + finally: + obj[item] = real_val + else: + obj[item] = new_val + try: + yield + finally: + del obj[item] + +def strip_python_stderr(stderr): + """Strip the stderr of a Python process from potential debug output + emitted by the interpreter. + + This will typically be run on the result of the communicate() method + of a subprocess.Popen object. + """ + stderr = re.sub(br"\[\d+ refs\]\r?\n?", b"", stderr).strip() + return stderr + +def args_from_interpreter_flags(): + """Return a list of command-line arguments reproducing the current + settings in sys.flags and sys.warnoptions.""" + return subprocess._args_from_interpreter_flags() + +#============================================================ +# Support for assertions about logging. +#============================================================ + +class TestHandler(logging.handlers.BufferingHandler): + def __init__(self, matcher): + # BufferingHandler takes a "capacity" argument + # so as to know when to flush. As we're overriding + # shouldFlush anyway, we can set a capacity of zero. + # You can call flush() manually to clear out the + # buffer. + logging.handlers.BufferingHandler.__init__(self, 0) + self.matcher = matcher + + def shouldFlush(self): + return False + + def emit(self, record): + self.format(record) + self.buffer.append(record.__dict__) + + def matches(self, **kwargs): + """ + Look for a saved dict whose keys/values match the supplied arguments. + """ + result = False + for d in self.buffer: + if self.matcher.matches(d, **kwargs): + result = True + break + return result + +class Matcher(object): + + _partial_matches = ('msg', 'message') + + def matches(self, d, **kwargs): + """ + Try to match a single dict with the supplied arguments. + + Keys whose values are strings and which are in self._partial_matches + will be checked for partial (i.e. substring) matches. You can extend + this scheme to (for example) do regular expression matching, etc. + """ + result = True + for k in kwargs: + v = kwargs[k] + dv = d.get(k) + if not self.match_value(k, dv, v): + result = False + break + return result + + def match_value(self, k, dv, v): + """ + Try to match a single stored value (dv) with a supplied value (v). + """ + if type(v) != type(dv): + result = False + elif type(dv) is not str or k not in self._partial_matches: + result = (v == dv) + else: + result = dv.find(v) >= 0 + return result + + +_can_symlink = None +def can_symlink(): + global _can_symlink + if _can_symlink is not None: + return _can_symlink + symlink_path = TESTFN + "can_symlink" + try: + os.symlink(TESTFN, symlink_path) + can = True + except (OSError, NotImplementedError, AttributeError): + can = False + else: + os.remove(symlink_path) + _can_symlink = can + return can + +def skip_unless_symlink(test): + """Skip decorator for tests that require functional symlink""" + ok = can_symlink() + msg = "Requires functional symlink implementation" + return test if ok else unittest.skip(msg)(test) + +_can_xattr = None +def can_xattr(): + global _can_xattr + if _can_xattr is not None: + return _can_xattr + if not hasattr(os, "setxattr"): + can = False + else: + tmp_fp, tmp_name = tempfile.mkstemp() + try: + with open(TESTFN, "wb") as fp: + try: + # TESTFN & tempfile may use different file systems with + # different capabilities + os.setxattr(tmp_fp, b"user.test", b"") + os.setxattr(fp.fileno(), b"user.test", b"") + # Kernels < 2.6.39 don't respect setxattr flags. + kernel_version = platform.release() + m = re.match("2.6.(\d{1,2})", kernel_version) + can = m is None or int(m.group(1)) >= 39 + except OSError: + can = False + finally: + unlink(TESTFN) + unlink(tmp_name) + _can_xattr = can + return can + +def skip_unless_xattr(test): + """Skip decorator for tests that require functional extended attributes""" + ok = can_xattr() + msg = "no non-broken extended attribute support" + return test if ok else unittest.skip(msg)(test) + + +if sys.platform.startswith('win'): + @contextlib.contextmanager + def suppress_crash_popup(): + """Disable Windows Error Reporting dialogs using SetErrorMode.""" + # see http://msdn.microsoft.com/en-us/library/windows/desktop/ms680621%28v=vs.85%29.aspx + # GetErrorMode is not available on Windows XP and Windows Server 2003, + # but SetErrorMode returns the previous value, so we can use that + import ctypes + k32 = ctypes.windll.kernel32 + SEM_NOGPFAULTERRORBOX = 0x02 + old_error_mode = k32.SetErrorMode(SEM_NOGPFAULTERRORBOX) + k32.SetErrorMode(old_error_mode | SEM_NOGPFAULTERRORBOX) + try: + yield + finally: + k32.SetErrorMode(old_error_mode) +else: + # this is a no-op for other platforms + @contextlib.contextmanager + def suppress_crash_popup(): + yield + + +def patch(test_instance, object_to_patch, attr_name, new_value): + """Override 'object_to_patch'.'attr_name' with 'new_value'. + + Also, add a cleanup procedure to 'test_instance' to restore + 'object_to_patch' value for 'attr_name'. + The 'attr_name' should be a valid attribute for 'object_to_patch'. + + """ + # check that 'attr_name' is a real attribute for 'object_to_patch' + # will raise AttributeError if it does not exist + getattr(object_to_patch, attr_name) + + # keep a copy of the old value + attr_is_local = False + try: + old_value = object_to_patch.__dict__[attr_name] + except (AttributeError, KeyError): + old_value = getattr(object_to_patch, attr_name, None) + else: + attr_is_local = True + + # restore the value when the test is done + def cleanup(): + if attr_is_local: + setattr(object_to_patch, attr_name, old_value) + else: + delattr(object_to_patch, attr_name) + + test_instance.addCleanup(cleanup) + + # actually override the attribute + setattr(object_to_patch, attr_name, new_value) diff --git a/lib/future/backports/total_ordering.py b/lib/future/backports/total_ordering.py new file mode 100644 index 00000000..760f06d6 --- /dev/null +++ b/lib/future/backports/total_ordering.py @@ -0,0 +1,38 @@ +""" +For Python < 2.7.2. total_ordering in versions prior to 2.7.2 is buggy. +See http://bugs.python.org/issue10042 for details. For these versions use +code borrowed from Python 2.7.3. + +From django.utils. +""" + +import sys +if sys.version_info >= (2, 7, 2): + from functools import total_ordering +else: + def total_ordering(cls): + """Class decorator that fills in missing ordering methods""" + convert = { + '__lt__': [('__gt__', lambda self, other: not (self < other or self == other)), + ('__le__', lambda self, other: self < other or self == other), + ('__ge__', lambda self, other: not self < other)], + '__le__': [('__ge__', lambda self, other: not self <= other or self == other), + ('__lt__', lambda self, other: self <= other and not self == other), + ('__gt__', lambda self, other: not self <= other)], + '__gt__': [('__lt__', lambda self, other: not (self > other or self == other)), + ('__ge__', lambda self, other: self > other or self == other), + ('__le__', lambda self, other: not self > other)], + '__ge__': [('__le__', lambda self, other: (not self >= other) or self == other), + ('__gt__', lambda self, other: self >= other and not self == other), + ('__lt__', lambda self, other: not self >= other)] + } + roots = set(dir(cls)) & set(convert) + if not roots: + raise ValueError('must define at least one ordering operation: < > <= >=') + root = max(roots) # prefer __lt__ to __le__ to __gt__ to __ge__ + for opname, opfunc in convert[root]: + if opname not in roots: + opfunc.__name__ = opname + opfunc.__doc__ = getattr(int, opname).__doc__ + setattr(cls, opname, opfunc) + return cls diff --git a/lib/future/backports/urllib/__init__.py b/lib/future/backports/urllib/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/future/backports/urllib/error.py b/lib/future/backports/urllib/error.py new file mode 100644 index 00000000..a473e445 --- /dev/null +++ b/lib/future/backports/urllib/error.py @@ -0,0 +1,75 @@ +"""Exception classes raised by urllib. + +The base exception class is URLError, which inherits from IOError. It +doesn't define any behavior of its own, but is the base class for all +exceptions defined in this package. + +HTTPError is an exception class that is also a valid HTTP response +instance. It behaves this way because HTTP protocol errors are valid +responses, with a status code, headers, and a body. In some contexts, +an application may want to handle an exception like a regular +response. +""" +from __future__ import absolute_import, division, unicode_literals +from future import standard_library + +from future.backports.urllib import response as urllib_response + + +__all__ = ['URLError', 'HTTPError', 'ContentTooShortError'] + + +# do these error classes make sense? +# make sure all of the IOError stuff is overridden. we just want to be +# subtypes. + +class URLError(IOError): + # URLError is a sub-type of IOError, but it doesn't share any of + # the implementation. need to override __init__ and __str__. + # It sets self.args for compatibility with other EnvironmentError + # subclasses, but args doesn't have the typical format with errno in + # slot 0 and strerror in slot 1. This may be better than nothing. + def __init__(self, reason, filename=None): + self.args = reason, + self.reason = reason + if filename is not None: + self.filename = filename + + def __str__(self): + return '' % self.reason + +class HTTPError(URLError, urllib_response.addinfourl): + """Raised when HTTP error occurs, but also acts like non-error return""" + __super_init = urllib_response.addinfourl.__init__ + + def __init__(self, url, code, msg, hdrs, fp): + self.code = code + self.msg = msg + self.hdrs = hdrs + self.fp = fp + self.filename = url + # The addinfourl classes depend on fp being a valid file + # object. In some cases, the HTTPError may not have a valid + # file object. If this happens, the simplest workaround is to + # not initialize the base classes. + if fp is not None: + self.__super_init(fp, hdrs, url, code) + + def __str__(self): + return 'HTTP Error %s: %s' % (self.code, self.msg) + + # since URLError specifies a .reason attribute, HTTPError should also + # provide this attribute. See issue13211 for discussion. + @property + def reason(self): + return self.msg + + def info(self): + return self.hdrs + + +# exception raised when downloaded size does not match content-length +class ContentTooShortError(URLError): + def __init__(self, message, content): + URLError.__init__(self, message) + self.content = content diff --git a/lib/future/backports/urllib/parse.py b/lib/future/backports/urllib/parse.py new file mode 100644 index 00000000..04e52d49 --- /dev/null +++ b/lib/future/backports/urllib/parse.py @@ -0,0 +1,991 @@ +""" +Ported using Python-Future from the Python 3.3 standard library. + +Parse (absolute and relative) URLs. + +urlparse module is based upon the following RFC specifications. + +RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding +and L. Masinter, January 2005. + +RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter +and L.Masinter, December 1999. + +RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T. +Berners-Lee, R. Fielding, and L. Masinter, August 1998. + +RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998. + +RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June +1995. + +RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. +McCahill, December 1994 + +RFC 3986 is considered the current standard and any future changes to +urlparse module should conform with it. The urlparse module is +currently not entirely compliant with this RFC due to defacto +scenarios for parsing, and for backward compatibility purposes, some +parsing quirks from older RFCs are retained. The testcases in +test_urlparse.py provides a good indicator of parsing behavior. +""" +from __future__ import absolute_import, division, unicode_literals +from future.builtins import bytes, chr, dict, int, range, str +from future.utils import raise_with_traceback + +import re +import sys +import collections + +__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", + "urlsplit", "urlunsplit", "urlencode", "parse_qs", + "parse_qsl", "quote", "quote_plus", "quote_from_bytes", + "unquote", "unquote_plus", "unquote_to_bytes"] + +# A classification of schemes ('' means apply by default) +uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', + 'wais', 'file', 'https', 'shttp', 'mms', + 'prospero', 'rtsp', 'rtspu', '', 'sftp', + 'svn', 'svn+ssh'] +uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', + 'imap', 'wais', 'file', 'mms', 'https', 'shttp', + 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', + 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh'] +uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', + 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', + 'mms', '', 'sftp', 'tel'] + +# These are not actually used anymore, but should stay for backwards +# compatibility. (They are undocumented, but have a public-looking name.) +non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', + 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] +uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', + 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] +uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', + 'nntp', 'wais', 'https', 'shttp', 'snews', + 'file', 'prospero', ''] + +# Characters valid in scheme names +scheme_chars = ('abcdefghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + '0123456789' + '+-.') + +# XXX: Consider replacing with functools.lru_cache +MAX_CACHE_SIZE = 20 +_parse_cache = {} + +def clear_cache(): + """Clear the parse cache and the quoters cache.""" + _parse_cache.clear() + _safe_quoters.clear() + + +# Helpers for bytes handling +# For 3.2, we deliberately require applications that +# handle improperly quoted URLs to do their own +# decoding and encoding. If valid use cases are +# presented, we may relax this by using latin-1 +# decoding internally for 3.3 +_implicit_encoding = 'ascii' +_implicit_errors = 'strict' + +def _noop(obj): + return obj + +def _encode_result(obj, encoding=_implicit_encoding, + errors=_implicit_errors): + return obj.encode(encoding, errors) + +def _decode_args(args, encoding=_implicit_encoding, + errors=_implicit_errors): + return tuple(x.decode(encoding, errors) if x else '' for x in args) + +def _coerce_args(*args): + # Invokes decode if necessary to create str args + # and returns the coerced inputs along with + # an appropriate result coercion function + # - noop for str inputs + # - encoding function otherwise + str_input = isinstance(args[0], str) + for arg in args[1:]: + # We special-case the empty string to support the + # "scheme=''" default argument to some functions + if arg and isinstance(arg, str) != str_input: + raise TypeError("Cannot mix str and non-str arguments") + if str_input: + return args + (_noop,) + return _decode_args(args) + (_encode_result,) + +# Result objects are more helpful than simple tuples +class _ResultMixinStr(object): + """Standard approach to encoding parsed results from str to bytes""" + __slots__ = () + + def encode(self, encoding='ascii', errors='strict'): + return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self)) + + +class _ResultMixinBytes(object): + """Standard approach to decoding parsed results from bytes to str""" + __slots__ = () + + def decode(self, encoding='ascii', errors='strict'): + return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self)) + + +class _NetlocResultMixinBase(object): + """Shared methods for the parsed result objects containing a netloc element""" + __slots__ = () + + @property + def username(self): + return self._userinfo[0] + + @property + def password(self): + return self._userinfo[1] + + @property + def hostname(self): + hostname = self._hostinfo[0] + if not hostname: + hostname = None + elif hostname is not None: + hostname = hostname.lower() + return hostname + + @property + def port(self): + port = self._hostinfo[1] + if port is not None: + port = int(port, 10) + # Return None on an illegal port + if not ( 0 <= port <= 65535): + return None + return port + + +class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): + __slots__ = () + + @property + def _userinfo(self): + netloc = self.netloc + userinfo, have_info, hostinfo = netloc.rpartition('@') + if have_info: + username, have_password, password = userinfo.partition(':') + if not have_password: + password = None + else: + username = password = None + return username, password + + @property + def _hostinfo(self): + netloc = self.netloc + _, _, hostinfo = netloc.rpartition('@') + _, have_open_br, bracketed = hostinfo.partition('[') + if have_open_br: + hostname, _, port = bracketed.partition(']') + _, have_port, port = port.partition(':') + else: + hostname, have_port, port = hostinfo.partition(':') + if not have_port: + port = None + return hostname, port + + +class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): + __slots__ = () + + @property + def _userinfo(self): + netloc = self.netloc + userinfo, have_info, hostinfo = netloc.rpartition(b'@') + if have_info: + username, have_password, password = userinfo.partition(b':') + if not have_password: + password = None + else: + username = password = None + return username, password + + @property + def _hostinfo(self): + netloc = self.netloc + _, _, hostinfo = netloc.rpartition(b'@') + _, have_open_br, bracketed = hostinfo.partition(b'[') + if have_open_br: + hostname, _, port = bracketed.partition(b']') + _, have_port, port = port.partition(b':') + else: + hostname, have_port, port = hostinfo.partition(b':') + if not have_port: + port = None + return hostname, port + + +from collections import namedtuple + +_DefragResultBase = namedtuple('DefragResult', 'url fragment') +_SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment') +_ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fragment') + +# For backwards compatibility, alias _NetlocResultMixinStr +# ResultBase is no longer part of the documented API, but it is +# retained since deprecating it isn't worth the hassle +ResultBase = _NetlocResultMixinStr + +# Structured result objects for string data +class DefragResult(_DefragResultBase, _ResultMixinStr): + __slots__ = () + def geturl(self): + if self.fragment: + return self.url + '#' + self.fragment + else: + return self.url + +class SplitResult(_SplitResultBase, _NetlocResultMixinStr): + __slots__ = () + def geturl(self): + return urlunsplit(self) + +class ParseResult(_ParseResultBase, _NetlocResultMixinStr): + __slots__ = () + def geturl(self): + return urlunparse(self) + +# Structured result objects for bytes data +class DefragResultBytes(_DefragResultBase, _ResultMixinBytes): + __slots__ = () + def geturl(self): + if self.fragment: + return self.url + b'#' + self.fragment + else: + return self.url + +class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes): + __slots__ = () + def geturl(self): + return urlunsplit(self) + +class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes): + __slots__ = () + def geturl(self): + return urlunparse(self) + +# Set up the encode/decode result pairs +def _fix_result_transcoding(): + _result_pairs = ( + (DefragResult, DefragResultBytes), + (SplitResult, SplitResultBytes), + (ParseResult, ParseResultBytes), + ) + for _decoded, _encoded in _result_pairs: + _decoded._encoded_counterpart = _encoded + _encoded._decoded_counterpart = _decoded + +_fix_result_transcoding() +del _fix_result_transcoding + +def urlparse(url, scheme='', allow_fragments=True): + """Parse a URL into 6 components: + :///;?# + Return a 6-tuple: (scheme, netloc, path, params, query, fragment). + Note that we don't break the components up in smaller bits + (e.g. netloc is a single string) and we don't expand % escapes.""" + url, scheme, _coerce_result = _coerce_args(url, scheme) + splitresult = urlsplit(url, scheme, allow_fragments) + scheme, netloc, url, query, fragment = splitresult + if scheme in uses_params and ';' in url: + url, params = _splitparams(url) + else: + params = '' + result = ParseResult(scheme, netloc, url, params, query, fragment) + return _coerce_result(result) + +def _splitparams(url): + if '/' in url: + i = url.find(';', url.rfind('/')) + if i < 0: + return url, '' + else: + i = url.find(';') + return url[:i], url[i+1:] + +def _splitnetloc(url, start=0): + delim = len(url) # position of end of domain part of url, default is end + for c in '/?#': # look for delimiters; the order is NOT important + wdelim = url.find(c, start) # find first of this delim + if wdelim >= 0: # if found + delim = min(delim, wdelim) # use earliest delim position + return url[start:delim], url[delim:] # return (domain, rest) + +def urlsplit(url, scheme='', allow_fragments=True): + """Parse a URL into 5 components: + :///?# + Return a 5-tuple: (scheme, netloc, path, query, fragment). + Note that we don't break the components up in smaller bits + (e.g. netloc is a single string) and we don't expand % escapes.""" + url, scheme, _coerce_result = _coerce_args(url, scheme) + allow_fragments = bool(allow_fragments) + key = url, scheme, allow_fragments, type(url), type(scheme) + cached = _parse_cache.get(key, None) + if cached: + return _coerce_result(cached) + if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth + clear_cache() + netloc = query = fragment = '' + i = url.find(':') + if i > 0: + if url[:i] == 'http': # optimize the common case + scheme = url[:i].lower() + url = url[i+1:] + if url[:2] == '//': + netloc, url = _splitnetloc(url, 2) + if (('[' in netloc and ']' not in netloc) or + (']' in netloc and '[' not in netloc)): + raise ValueError("Invalid IPv6 URL") + if allow_fragments and '#' in url: + url, fragment = url.split('#', 1) + if '?' in url: + url, query = url.split('?', 1) + v = SplitResult(scheme, netloc, url, query, fragment) + _parse_cache[key] = v + return _coerce_result(v) + for c in url[:i]: + if c not in scheme_chars: + break + else: + # make sure "url" is not actually a port number (in which case + # "scheme" is really part of the path) + rest = url[i+1:] + if not rest or any(c not in '0123456789' for c in rest): + # not a port number + scheme, url = url[:i].lower(), rest + + if url[:2] == '//': + netloc, url = _splitnetloc(url, 2) + if (('[' in netloc and ']' not in netloc) or + (']' in netloc and '[' not in netloc)): + raise ValueError("Invalid IPv6 URL") + if allow_fragments and '#' in url: + url, fragment = url.split('#', 1) + if '?' in url: + url, query = url.split('?', 1) + v = SplitResult(scheme, netloc, url, query, fragment) + _parse_cache[key] = v + return _coerce_result(v) + +def urlunparse(components): + """Put a parsed URL back together again. This may result in a + slightly different, but equivalent URL, if the URL that was parsed + originally had redundant delimiters, e.g. a ? with an empty query + (the draft states that these are equivalent).""" + scheme, netloc, url, params, query, fragment, _coerce_result = ( + _coerce_args(*components)) + if params: + url = "%s;%s" % (url, params) + return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment))) + +def urlunsplit(components): + """Combine the elements of a tuple as returned by urlsplit() into a + complete URL as a string. The data argument can be any five-item iterable. + This may result in a slightly different, but equivalent URL, if the URL that + was parsed originally had unnecessary delimiters (for example, a ? with an + empty query; the RFC states that these are equivalent).""" + scheme, netloc, url, query, fragment, _coerce_result = ( + _coerce_args(*components)) + if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): + if url and url[:1] != '/': url = '/' + url + url = '//' + (netloc or '') + url + if scheme: + url = scheme + ':' + url + if query: + url = url + '?' + query + if fragment: + url = url + '#' + fragment + return _coerce_result(url) + +def urljoin(base, url, allow_fragments=True): + """Join a base URL and a possibly relative URL to form an absolute + interpretation of the latter.""" + if not base: + return url + if not url: + return base + base, url, _coerce_result = _coerce_args(base, url) + bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ + urlparse(base, '', allow_fragments) + scheme, netloc, path, params, query, fragment = \ + urlparse(url, bscheme, allow_fragments) + if scheme != bscheme or scheme not in uses_relative: + return _coerce_result(url) + if scheme in uses_netloc: + if netloc: + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) + netloc = bnetloc + if path[:1] == '/': + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) + if not path and not params: + path = bpath + params = bparams + if not query: + query = bquery + return _coerce_result(urlunparse((scheme, netloc, path, + params, query, fragment))) + segments = bpath.split('/')[:-1] + path.split('/') + # XXX The stuff below is bogus in various ways... + if segments[-1] == '.': + segments[-1] = '' + while '.' in segments: + segments.remove('.') + while 1: + i = 1 + n = len(segments) - 1 + while i < n: + if (segments[i] == '..' + and segments[i-1] not in ('', '..')): + del segments[i-1:i+1] + break + i = i+1 + else: + break + if segments == ['', '..']: + segments[-1] = '' + elif len(segments) >= 2 and segments[-1] == '..': + segments[-2:] = [''] + return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments), + params, query, fragment))) + +def urldefrag(url): + """Removes any existing fragment from URL. + + Returns a tuple of the defragmented URL and the fragment. If + the URL contained no fragments, the second element is the + empty string. + """ + url, _coerce_result = _coerce_args(url) + if '#' in url: + s, n, p, a, q, frag = urlparse(url) + defrag = urlunparse((s, n, p, a, q, '')) + else: + frag = '' + defrag = url + return _coerce_result(DefragResult(defrag, frag)) + +_hexdig = '0123456789ABCDEFabcdef' +_hextobyte = dict(((a + b).encode(), bytes([int(a + b, 16)])) + for a in _hexdig for b in _hexdig) + +def unquote_to_bytes(string): + """unquote_to_bytes('abc%20def') -> b'abc def'.""" + # Note: strings are encoded as UTF-8. This is only an issue if it contains + # unescaped non-ASCII characters, which URIs should not. + if not string: + # Is it a string-like object? + string.split + return bytes(b'') + if isinstance(string, str): + string = string.encode('utf-8') + ### For Python-Future: + # It is already a byte-string object, but force it to be newbytes here on + # Py2: + string = bytes(string) + ### + bits = string.split(b'%') + if len(bits) == 1: + return string + res = [bits[0]] + append = res.append + for item in bits[1:]: + try: + append(_hextobyte[item[:2]]) + append(item[2:]) + except KeyError: + append(b'%') + append(item) + return bytes(b'').join(res) + +_asciire = re.compile('([\x00-\x7f]+)') + +def unquote(string, encoding='utf-8', errors='replace'): + """Replace %xx escapes by their single-character equivalent. The optional + encoding and errors parameters specify how to decode percent-encoded + sequences into Unicode characters, as accepted by the bytes.decode() + method. + By default, percent-encoded sequences are decoded with UTF-8, and invalid + sequences are replaced by a placeholder character. + + unquote('abc%20def') -> 'abc def'. + """ + if '%' not in string: + string.split + return string + if encoding is None: + encoding = 'utf-8' + if errors is None: + errors = 'replace' + bits = _asciire.split(string) + res = [bits[0]] + append = res.append + for i in range(1, len(bits), 2): + append(unquote_to_bytes(bits[i]).decode(encoding, errors)) + append(bits[i + 1]) + return ''.join(res) + +def parse_qs(qs, keep_blank_values=False, strict_parsing=False, + encoding='utf-8', errors='replace'): + """Parse a query given as a string argument. + + Arguments: + + qs: percent-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + percent-encoded queries should be treated as blank strings. + A true value indicates that blanks should be retained as + blank strings. The default false value indicates that + blank values are to be ignored and treated as if they were + not included. + + strict_parsing: flag indicating what to do with parsing errors. + If false (the default), errors are silently ignored. + If true, errors raise a ValueError exception. + + encoding and errors: specify how to decode percent-encoded sequences + into Unicode characters, as accepted by the bytes.decode() method. + """ + parsed_result = {} + pairs = parse_qsl(qs, keep_blank_values, strict_parsing, + encoding=encoding, errors=errors) + for name, value in pairs: + if name in parsed_result: + parsed_result[name].append(value) + else: + parsed_result[name] = [value] + return parsed_result + +def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, + encoding='utf-8', errors='replace'): + """Parse a query given as a string argument. + + Arguments: + + qs: percent-encoded query string to be parsed + + keep_blank_values: flag indicating whether blank values in + percent-encoded queries should be treated as blank strings. A + true value indicates that blanks should be retained as blank + strings. The default false value indicates that blank values + are to be ignored and treated as if they were not included. + + strict_parsing: flag indicating what to do with parsing errors. If + false (the default), errors are silently ignored. If true, + errors raise a ValueError exception. + + encoding and errors: specify how to decode percent-encoded sequences + into Unicode characters, as accepted by the bytes.decode() method. + + Returns a list, as G-d intended. + """ + qs, _coerce_result = _coerce_args(qs) + pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] + r = [] + for name_value in pairs: + if not name_value and not strict_parsing: + continue + nv = name_value.split('=', 1) + if len(nv) != 2: + if strict_parsing: + raise ValueError("bad query field: %r" % (name_value,)) + # Handle case of a control-name with no equal sign + if keep_blank_values: + nv.append('') + else: + continue + if len(nv[1]) or keep_blank_values: + name = nv[0].replace('+', ' ') + name = unquote(name, encoding=encoding, errors=errors) + name = _coerce_result(name) + value = nv[1].replace('+', ' ') + value = unquote(value, encoding=encoding, errors=errors) + value = _coerce_result(value) + r.append((name, value)) + return r + +def unquote_plus(string, encoding='utf-8', errors='replace'): + """Like unquote(), but also replace plus signs by spaces, as required for + unquoting HTML form values. + + unquote_plus('%7e/abc+def') -> '~/abc def' + """ + string = string.replace('+', ' ') + return unquote(string, encoding, errors) + +_ALWAYS_SAFE = frozenset(bytes(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + b'abcdefghijklmnopqrstuvwxyz' + b'0123456789' + b'_.-')) +_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) +_safe_quoters = {} + +class Quoter(collections.defaultdict): + """A mapping from bytes (in range(0,256)) to strings. + + String values are percent-encoded byte values, unless the key < 128, and + in the "safe" set (either the specified safe set, or default set). + """ + # Keeps a cache internally, using defaultdict, for efficiency (lookups + # of cached keys don't call Python code at all). + def __init__(self, safe): + """safe: bytes object.""" + self.safe = _ALWAYS_SAFE.union(bytes(safe)) + + def __repr__(self): + # Without this, will just display as a defaultdict + return "" % dict(self) + + def __missing__(self, b): + # Handle a cache miss. Store quoted string in cache and return. + res = chr(b) if b in self.safe else '%{0:02X}'.format(b) + self[b] = res + return res + +def quote(string, safe='/', encoding=None, errors=None): + """quote('abc def') -> 'abc%20def' + + Each part of a URL, e.g. the path info, the query, etc., has a + different set of reserved characters that must be quoted. + + RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists + the following reserved characters. + + reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | + "$" | "," + + Each of these characters is reserved in some component of a URL, + but not necessarily in all of them. + + By default, the quote function is intended for quoting the path + section of a URL. Thus, it will not encode '/'. This character + is reserved, but in typical usage the quote function is being + called on a path where the existing slash characters are used as + reserved characters. + + string and safe may be either str or bytes objects. encoding must + not be specified if string is a str. + + The optional encoding and errors parameters specify how to deal with + non-ASCII characters, as accepted by the str.encode method. + By default, encoding='utf-8' (characters are encoded with UTF-8), and + errors='strict' (unsupported characters raise a UnicodeEncodeError). + """ + if isinstance(string, str): + if not string: + return string + if encoding is None: + encoding = 'utf-8' + if errors is None: + errors = 'strict' + string = string.encode(encoding, errors) + else: + if encoding is not None: + raise TypeError("quote() doesn't support 'encoding' for bytes") + if errors is not None: + raise TypeError("quote() doesn't support 'errors' for bytes") + return quote_from_bytes(string, safe) + +def quote_plus(string, safe='', encoding=None, errors=None): + """Like quote(), but also replace ' ' with '+', as required for quoting + HTML form values. Plus signs in the original string are escaped unless + they are included in safe. It also does not have safe default to '/'. + """ + # Check if ' ' in string, where string may either be a str or bytes. If + # there are no spaces, the regular quote will produce the right answer. + if ((isinstance(string, str) and ' ' not in string) or + (isinstance(string, bytes) and b' ' not in string)): + return quote(string, safe, encoding, errors) + if isinstance(safe, str): + space = str(' ') + else: + space = bytes(b' ') + string = quote(string, safe + space, encoding, errors) + return string.replace(' ', '+') + +def quote_from_bytes(bs, safe='/'): + """Like quote(), but accepts a bytes object rather than a str, and does + not perform string-to-bytes encoding. It always returns an ASCII string. + quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f' + """ + if not isinstance(bs, (bytes, bytearray)): + raise TypeError("quote_from_bytes() expected bytes") + if not bs: + return str('') + ### For Python-Future: + bs = bytes(bs) + ### + if isinstance(safe, str): + # Normalize 'safe' by converting to bytes and removing non-ASCII chars + safe = str(safe).encode('ascii', 'ignore') + else: + ### For Python-Future: + safe = bytes(safe) + ### + safe = bytes([c for c in safe if c < 128]) + if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): + return bs.decode() + try: + quoter = _safe_quoters[safe] + except KeyError: + _safe_quoters[safe] = quoter = Quoter(safe).__getitem__ + return str('').join([quoter(char) for char in bs]) + +def urlencode(query, doseq=False, safe='', encoding=None, errors=None): + """Encode a sequence of two-element tuples or dictionary into a URL query string. + + If any values in the query arg are sequences and doseq is true, each + sequence element is converted to a separate parameter. + + If the query arg is a sequence of two-element tuples, the order of the + parameters in the output will match the order of parameters in the + input. + + The query arg may be either a string or a bytes type. When query arg is a + string, the safe, encoding and error parameters are sent the quote_plus for + encoding. + """ + + if hasattr(query, "items"): + query = query.items() + else: + # It's a bother at times that strings and string-like objects are + # sequences. + try: + # non-sequence items should not work with len() + # non-empty strings will fail this + if len(query) and not isinstance(query[0], tuple): + raise TypeError + # Zero-length sequences of all types will get here and succeed, + # but that's a minor nit. Since the original implementation + # allowed empty dicts that type of behavior probably should be + # preserved for consistency + except TypeError: + ty, va, tb = sys.exc_info() + raise_with_traceback(TypeError("not a valid non-string sequence " + "or mapping object"), tb) + + l = [] + if not doseq: + for k, v in query: + if isinstance(k, bytes): + k = quote_plus(k, safe) + else: + k = quote_plus(str(k), safe, encoding, errors) + + if isinstance(v, bytes): + v = quote_plus(v, safe) + else: + v = quote_plus(str(v), safe, encoding, errors) + l.append(k + '=' + v) + else: + for k, v in query: + if isinstance(k, bytes): + k = quote_plus(k, safe) + else: + k = quote_plus(str(k), safe, encoding, errors) + + if isinstance(v, bytes): + v = quote_plus(v, safe) + l.append(k + '=' + v) + elif isinstance(v, str): + v = quote_plus(v, safe, encoding, errors) + l.append(k + '=' + v) + else: + try: + # Is this a sufficient test for sequence-ness? + x = len(v) + except TypeError: + # not a sequence + v = quote_plus(str(v), safe, encoding, errors) + l.append(k + '=' + v) + else: + # loop over the sequence + for elt in v: + if isinstance(elt, bytes): + elt = quote_plus(elt, safe) + else: + elt = quote_plus(str(elt), safe, encoding, errors) + l.append(k + '=' + elt) + return str('&').join(l) + +# Utilities to parse URLs (most of these return None for missing parts): +# unwrap('') --> 'type://host/path' +# splittype('type:opaquestring') --> 'type', 'opaquestring' +# splithost('//host[:port]/path') --> 'host[:port]', '/path' +# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' +# splitpasswd('user:passwd') -> 'user', 'passwd' +# splitport('host:port') --> 'host', 'port' +# splitquery('/path?query') --> '/path', 'query' +# splittag('/path#tag') --> '/path', 'tag' +# splitattr('/path;attr1=value1;attr2=value2;...') -> +# '/path', ['attr1=value1', 'attr2=value2', ...] +# splitvalue('attr=value') --> 'attr', 'value' +# urllib.parse.unquote('abc%20def') -> 'abc def' +# quote('abc def') -> 'abc%20def') + +def to_bytes(url): + """to_bytes(u"URL") --> 'URL'.""" + # Most URL schemes require ASCII. If that changes, the conversion + # can be relaxed. + # XXX get rid of to_bytes() + if isinstance(url, str): + try: + url = url.encode("ASCII").decode() + except UnicodeError: + raise UnicodeError("URL " + repr(url) + + " contains non-ASCII characters") + return url + +def unwrap(url): + """unwrap('') --> 'type://host/path'.""" + url = str(url).strip() + if url[:1] == '<' and url[-1:] == '>': + url = url[1:-1].strip() + if url[:4] == 'URL:': url = url[4:].strip() + return url + +_typeprog = None +def splittype(url): + """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" + global _typeprog + if _typeprog is None: + import re + _typeprog = re.compile('^([^/:]+):') + + match = _typeprog.match(url) + if match: + scheme = match.group(1) + return scheme.lower(), url[len(scheme) + 1:] + return None, url + +_hostprog = None +def splithost(url): + """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" + global _hostprog + if _hostprog is None: + import re + _hostprog = re.compile('^//([^/?]*)(.*)$') + + match = _hostprog.match(url) + if match: + host_port = match.group(1) + path = match.group(2) + if path and not path.startswith('/'): + path = '/' + path + return host_port, path + return None, url + +_userprog = None +def splituser(host): + """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" + global _userprog + if _userprog is None: + import re + _userprog = re.compile('^(.*)@(.*)$') + + match = _userprog.match(host) + if match: return match.group(1, 2) + return None, host + +_passwdprog = None +def splitpasswd(user): + """splitpasswd('user:passwd') -> 'user', 'passwd'.""" + global _passwdprog + if _passwdprog is None: + import re + _passwdprog = re.compile('^([^:]*):(.*)$',re.S) + + match = _passwdprog.match(user) + if match: return match.group(1, 2) + return user, None + +# splittag('/path#tag') --> '/path', 'tag' +_portprog = None +def splitport(host): + """splitport('host:port') --> 'host', 'port'.""" + global _portprog + if _portprog is None: + import re + _portprog = re.compile('^(.*):([0-9]+)$') + + match = _portprog.match(host) + if match: return match.group(1, 2) + return host, None + +_nportprog = None +def splitnport(host, defport=-1): + """Split host and port, returning numeric port. + Return given default port if no ':' found; defaults to -1. + Return numerical port if a valid number are found after ':'. + Return None if ':' but not a valid number.""" + global _nportprog + if _nportprog is None: + import re + _nportprog = re.compile('^(.*):(.*)$') + + match = _nportprog.match(host) + if match: + host, port = match.group(1, 2) + try: + if not port: raise ValueError("no digits") + nport = int(port) + except ValueError: + nport = None + return host, nport + return host, defport + +_queryprog = None +def splitquery(url): + """splitquery('/path?query') --> '/path', 'query'.""" + global _queryprog + if _queryprog is None: + import re + _queryprog = re.compile('^(.*)\?([^?]*)$') + + match = _queryprog.match(url) + if match: return match.group(1, 2) + return url, None + +_tagprog = None +def splittag(url): + """splittag('/path#tag') --> '/path', 'tag'.""" + global _tagprog + if _tagprog is None: + import re + _tagprog = re.compile('^(.*)#([^#]*)$') + + match = _tagprog.match(url) + if match: return match.group(1, 2) + return url, None + +def splitattr(url): + """splitattr('/path;attr1=value1;attr2=value2;...') -> + '/path', ['attr1=value1', 'attr2=value2', ...].""" + words = url.split(';') + return words[0], words[1:] + +_valueprog = None +def splitvalue(attr): + """splitvalue('attr=value') --> 'attr', 'value'.""" + global _valueprog + if _valueprog is None: + import re + _valueprog = re.compile('^([^=]*)=(.*)$') + + match = _valueprog.match(attr) + if match: return match.group(1, 2) + return attr, None diff --git a/lib/future/backports/urllib/request.py b/lib/future/backports/urllib/request.py new file mode 100644 index 00000000..baee5401 --- /dev/null +++ b/lib/future/backports/urllib/request.py @@ -0,0 +1,2647 @@ +""" +Ported using Python-Future from the Python 3.3 standard library. + +An extensible library for opening URLs using a variety of protocols + +The simplest way to use this module is to call the urlopen function, +which accepts a string containing a URL or a Request object (described +below). It opens the URL and returns the results as file-like +object; the returned object has some extra methods described below. + +The OpenerDirector manages a collection of Handler objects that do +all the actual work. Each Handler implements a particular protocol or +option. The OpenerDirector is a composite object that invokes the +Handlers needed to open the requested URL. For example, the +HTTPHandler performs HTTP GET and POST requests and deals with +non-error returns. The HTTPRedirectHandler automatically deals with +HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler +deals with digest authentication. + +urlopen(url, data=None) -- Basic usage is the same as original +urllib. pass the url and optionally data to post to an HTTP URL, and +get a file-like object back. One difference is that you can also pass +a Request instance instead of URL. Raises a URLError (subclass of +IOError); for HTTP errors, raises an HTTPError, which can also be +treated as a valid response. + +build_opener -- Function that creates a new OpenerDirector instance. +Will install the default handlers. Accepts one or more Handlers as +arguments, either instances or Handler classes that it will +instantiate. If one of the argument is a subclass of the default +handler, the argument will be installed instead of the default. + +install_opener -- Installs a new opener as the default opener. + +objects of interest: + +OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages +the Handler classes, while dealing with requests and responses. + +Request -- An object that encapsulates the state of a request. The +state can be as simple as the URL. It can also include extra HTTP +headers, e.g. a User-Agent. + +BaseHandler -- + +internals: +BaseHandler and parent +_call_chain conventions + +Example usage: + +import urllib.request + +# set up authentication info +authinfo = urllib.request.HTTPBasicAuthHandler() +authinfo.add_password(realm='PDQ Application', + uri='https://mahler:8092/site-updates.py', + user='klem', + passwd='geheim$parole') + +proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"}) + +# build a new opener that adds authentication and caching FTP handlers +opener = urllib.request.build_opener(proxy_support, authinfo, + urllib.request.CacheFTPHandler) + +# install it +urllib.request.install_opener(opener) + +f = urllib.request.urlopen('http://www.python.org/') +""" + +# XXX issues: +# If an authentication error handler that tries to perform +# authentication for some reason but fails, how should the error be +# signalled? The client needs to know the HTTP error code. But if +# the handler knows that the problem was, e.g., that it didn't know +# that hash algo that requested in the challenge, it would be good to +# pass that information along to the client, too. +# ftp errors aren't handled cleanly +# check digest against correct (i.e. non-apache) implementation + +# Possible extensions: +# complex proxies XXX not sure what exactly was meant by this +# abstract factory for opener + +from __future__ import absolute_import, division, print_function, unicode_literals +from future.builtins import bytes, dict, filter, input, int, map, open, str +from future.utils import PY2, PY3, raise_with_traceback + +import base64 +import bisect +import hashlib +import array + +from future.backports import email +from future.backports.http import client as http_client +from .error import URLError, HTTPError, ContentTooShortError +from .parse import ( + urlparse, urlsplit, urljoin, unwrap, quote, unquote, + splittype, splithost, splitport, splituser, splitpasswd, + splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse) +from .response import addinfourl, addclosehook + +import io +import os +import posixpath +import re +import socket +import sys +import time +import tempfile +import contextlib +import warnings + +from future.utils import PY2 + +if PY2: + from collections import Iterable +else: + from collections.abc import Iterable + +# check for SSL +try: + import ssl + # Not available in the SSL module in Py2: + from ssl import SSLContext +except ImportError: + _have_ssl = False +else: + _have_ssl = True + +__all__ = [ + # Classes + 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler', + 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler', + 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', + 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', + 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', + 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', + 'UnknownHandler', 'HTTPErrorProcessor', + # Functions + 'urlopen', 'install_opener', 'build_opener', + 'pathname2url', 'url2pathname', 'getproxies', + # Legacy interface + 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener', +] + +# used in User-Agent header sent +__version__ = sys.version[:3] + +_opener = None +def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **_3to2kwargs): + if 'cadefault' in _3to2kwargs: cadefault = _3to2kwargs['cadefault']; del _3to2kwargs['cadefault'] + else: cadefault = False + if 'capath' in _3to2kwargs: capath = _3to2kwargs['capath']; del _3to2kwargs['capath'] + else: capath = None + if 'cafile' in _3to2kwargs: cafile = _3to2kwargs['cafile']; del _3to2kwargs['cafile'] + else: cafile = None + global _opener + if cafile or capath or cadefault: + if not _have_ssl: + raise ValueError('SSL support not available') + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context.options |= ssl.OP_NO_SSLv2 + context.verify_mode = ssl.CERT_REQUIRED + if cafile or capath: + context.load_verify_locations(cafile, capath) + else: + context.set_default_verify_paths() + https_handler = HTTPSHandler(context=context, check_hostname=True) + opener = build_opener(https_handler) + elif _opener is None: + _opener = opener = build_opener() + else: + opener = _opener + return opener.open(url, data, timeout) + +def install_opener(opener): + global _opener + _opener = opener + +_url_tempfiles = [] +def urlretrieve(url, filename=None, reporthook=None, data=None): + """ + Retrieve a URL into a temporary location on disk. + + Requires a URL argument. If a filename is passed, it is used as + the temporary file location. The reporthook argument should be + a callable that accepts a block number, a read size, and the + total file size of the URL target. The data argument should be + valid URL encoded data. + + If a filename is passed and the URL points to a local resource, + the result is a copy from local file to new file. + + Returns a tuple containing the path to the newly created + data file as well as the resulting HTTPMessage object. + """ + url_type, path = splittype(url) + + with contextlib.closing(urlopen(url, data)) as fp: + headers = fp.info() + + # Just return the local path and the "headers" for file:// + # URLs. No sense in performing a copy unless requested. + if url_type == "file" and not filename: + return os.path.normpath(path), headers + + # Handle temporary file setup. + if filename: + tfp = open(filename, 'wb') + else: + tfp = tempfile.NamedTemporaryFile(delete=False) + filename = tfp.name + _url_tempfiles.append(filename) + + with tfp: + result = filename, headers + bs = 1024*8 + size = -1 + read = 0 + blocknum = 0 + if "content-length" in headers: + size = int(headers["Content-Length"]) + + if reporthook: + reporthook(blocknum, bs, size) + + while True: + block = fp.read(bs) + if not block: + break + read += len(block) + tfp.write(block) + blocknum += 1 + if reporthook: + reporthook(blocknum, bs, size) + + if size >= 0 and read < size: + raise ContentTooShortError( + "retrieval incomplete: got only %i out of %i bytes" + % (read, size), result) + + return result + +def urlcleanup(): + for temp_file in _url_tempfiles: + try: + os.unlink(temp_file) + except EnvironmentError: + pass + + del _url_tempfiles[:] + global _opener + if _opener: + _opener = None + +if PY3: + _cut_port_re = re.compile(r":\d+$", re.ASCII) +else: + _cut_port_re = re.compile(r":\d+$") + +def request_host(request): + + """Return request-host, as defined by RFC 2965. + + Variation from RFC: returned value is lowercased, for convenient + comparison. + + """ + url = request.full_url + host = urlparse(url)[1] + if host == "": + host = request.get_header("Host", "") + + # remove port, if present + host = _cut_port_re.sub("", host, 1) + return host.lower() + +class Request(object): + + def __init__(self, url, data=None, headers={}, + origin_req_host=None, unverifiable=False, + method=None): + # unwrap('') --> 'type://host/path' + self.full_url = unwrap(url) + self.full_url, self.fragment = splittag(self.full_url) + self.data = data + self.headers = {} + self._tunnel_host = None + for key, value in headers.items(): + self.add_header(key, value) + self.unredirected_hdrs = {} + if origin_req_host is None: + origin_req_host = request_host(self) + self.origin_req_host = origin_req_host + self.unverifiable = unverifiable + self.method = method + self._parse() + + def _parse(self): + self.type, rest = splittype(self.full_url) + if self.type is None: + raise ValueError("unknown url type: %r" % self.full_url) + self.host, self.selector = splithost(rest) + if self.host: + self.host = unquote(self.host) + + def get_method(self): + """Return a string indicating the HTTP request method.""" + if self.method is not None: + return self.method + elif self.data is not None: + return "POST" + else: + return "GET" + + def get_full_url(self): + if self.fragment: + return '%s#%s' % (self.full_url, self.fragment) + else: + return self.full_url + + # Begin deprecated methods + + def add_data(self, data): + msg = "Request.add_data method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + self.data = data + + def has_data(self): + msg = "Request.has_data method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + return self.data is not None + + def get_data(self): + msg = "Request.get_data method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + return self.data + + def get_type(self): + msg = "Request.get_type method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + return self.type + + def get_host(self): + msg = "Request.get_host method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + return self.host + + def get_selector(self): + msg = "Request.get_selector method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + return self.selector + + def is_unverifiable(self): + msg = "Request.is_unverifiable method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + return self.unverifiable + + def get_origin_req_host(self): + msg = "Request.get_origin_req_host method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) + return self.origin_req_host + + # End deprecated methods + + def set_proxy(self, host, type): + if self.type == 'https' and not self._tunnel_host: + self._tunnel_host = self.host + else: + self.type= type + self.selector = self.full_url + self.host = host + + def has_proxy(self): + return self.selector == self.full_url + + def add_header(self, key, val): + # useful for something like authentication + self.headers[key.capitalize()] = val + + def add_unredirected_header(self, key, val): + # will not be added to a redirected request + self.unredirected_hdrs[key.capitalize()] = val + + def has_header(self, header_name): + return (header_name in self.headers or + header_name in self.unredirected_hdrs) + + def get_header(self, header_name, default=None): + return self.headers.get( + header_name, + self.unredirected_hdrs.get(header_name, default)) + + def header_items(self): + hdrs = self.unredirected_hdrs.copy() + hdrs.update(self.headers) + return list(hdrs.items()) + +class OpenerDirector(object): + def __init__(self): + client_version = "Python-urllib/%s" % __version__ + self.addheaders = [('User-agent', client_version)] + # self.handlers is retained only for backward compatibility + self.handlers = [] + # manage the individual handlers + self.handle_open = {} + self.handle_error = {} + self.process_response = {} + self.process_request = {} + + def add_handler(self, handler): + if not hasattr(handler, "add_parent"): + raise TypeError("expected BaseHandler instance, got %r" % + type(handler)) + + added = False + for meth in dir(handler): + if meth in ["redirect_request", "do_open", "proxy_open"]: + # oops, coincidental match + continue + + i = meth.find("_") + protocol = meth[:i] + condition = meth[i+1:] + + if condition.startswith("error"): + j = condition.find("_") + i + 1 + kind = meth[j+1:] + try: + kind = int(kind) + except ValueError: + pass + lookup = self.handle_error.get(protocol, {}) + self.handle_error[protocol] = lookup + elif condition == "open": + kind = protocol + lookup = self.handle_open + elif condition == "response": + kind = protocol + lookup = self.process_response + elif condition == "request": + kind = protocol + lookup = self.process_request + else: + continue + + handlers = lookup.setdefault(kind, []) + if handlers: + bisect.insort(handlers, handler) + else: + handlers.append(handler) + added = True + + if added: + bisect.insort(self.handlers, handler) + handler.add_parent(self) + + def close(self): + # Only exists for backwards compatibility. + pass + + def _call_chain(self, chain, kind, meth_name, *args): + # Handlers raise an exception if no one else should try to handle + # the request, or return None if they can't but another handler + # could. Otherwise, they return the response. + handlers = chain.get(kind, ()) + for handler in handlers: + func = getattr(handler, meth_name) + result = func(*args) + if result is not None: + return result + + def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + """ + Accept a URL or a Request object + + Python-Future: if the URL is passed as a byte-string, decode it first. + """ + if isinstance(fullurl, bytes): + fullurl = fullurl.decode() + if isinstance(fullurl, str): + req = Request(fullurl, data) + else: + req = fullurl + if data is not None: + req.data = data + + req.timeout = timeout + protocol = req.type + + # pre-process request + meth_name = protocol+"_request" + for processor in self.process_request.get(protocol, []): + meth = getattr(processor, meth_name) + req = meth(req) + + response = self._open(req, data) + + # post-process response + meth_name = protocol+"_response" + for processor in self.process_response.get(protocol, []): + meth = getattr(processor, meth_name) + response = meth(req, response) + + return response + + def _open(self, req, data=None): + result = self._call_chain(self.handle_open, 'default', + 'default_open', req) + if result: + return result + + protocol = req.type + result = self._call_chain(self.handle_open, protocol, protocol + + '_open', req) + if result: + return result + + return self._call_chain(self.handle_open, 'unknown', + 'unknown_open', req) + + def error(self, proto, *args): + if proto in ('http', 'https'): + # XXX http[s] protocols are special-cased + dict = self.handle_error['http'] # https is not different than http + proto = args[2] # YUCK! + meth_name = 'http_error_%s' % proto + http_err = 1 + orig_args = args + else: + dict = self.handle_error + meth_name = proto + '_error' + http_err = 0 + args = (dict, proto, meth_name) + args + result = self._call_chain(*args) + if result: + return result + + if http_err: + args = (dict, 'default', 'http_error_default') + orig_args + return self._call_chain(*args) + +# XXX probably also want an abstract factory that knows when it makes +# sense to skip a superclass in favor of a subclass and when it might +# make sense to include both + +def build_opener(*handlers): + """Create an opener object from a list of handlers. + + The opener will use several default handlers, including support + for HTTP, FTP and when applicable HTTPS. + + If any of the handlers passed as arguments are subclasses of the + default handlers, the default handlers will not be used. + """ + def isclass(obj): + return isinstance(obj, type) or hasattr(obj, "__bases__") + + opener = OpenerDirector() + default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, + HTTPDefaultErrorHandler, HTTPRedirectHandler, + FTPHandler, FileHandler, HTTPErrorProcessor] + if hasattr(http_client, "HTTPSConnection"): + default_classes.append(HTTPSHandler) + skip = set() + for klass in default_classes: + for check in handlers: + if isclass(check): + if issubclass(check, klass): + skip.add(klass) + elif isinstance(check, klass): + skip.add(klass) + for klass in skip: + default_classes.remove(klass) + + for klass in default_classes: + opener.add_handler(klass()) + + for h in handlers: + if isclass(h): + h = h() + opener.add_handler(h) + return opener + +class BaseHandler(object): + handler_order = 500 + + def add_parent(self, parent): + self.parent = parent + + def close(self): + # Only exists for backwards compatibility + pass + + def __lt__(self, other): + if not hasattr(other, "handler_order"): + # Try to preserve the old behavior of having custom classes + # inserted after default ones (works only for custom user + # classes which are not aware of handler_order). + return True + return self.handler_order < other.handler_order + + +class HTTPErrorProcessor(BaseHandler): + """Process HTTP error responses.""" + handler_order = 1000 # after all other processing + + def http_response(self, request, response): + code, msg, hdrs = response.code, response.msg, response.info() + + # According to RFC 2616, "2xx" code indicates that the client's + # request was successfully received, understood, and accepted. + if not (200 <= code < 300): + response = self.parent.error( + 'http', request, response, code, msg, hdrs) + + return response + + https_response = http_response + +class HTTPDefaultErrorHandler(BaseHandler): + def http_error_default(self, req, fp, code, msg, hdrs): + raise HTTPError(req.full_url, code, msg, hdrs, fp) + +class HTTPRedirectHandler(BaseHandler): + # maximum number of redirections to any single URL + # this is needed because of the state that cookies introduce + max_repeats = 4 + # maximum total number of redirections (regardless of URL) before + # assuming we're in a loop + max_redirections = 10 + + def redirect_request(self, req, fp, code, msg, headers, newurl): + """Return a Request or None in response to a redirect. + + This is called by the http_error_30x methods when a + redirection response is received. If a redirection should + take place, return a new Request to allow http_error_30x to + perform the redirect. Otherwise, raise HTTPError if no-one + else should try to handle this url. Return None if you can't + but another Handler might. + """ + m = req.get_method() + if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD") + or code in (301, 302, 303) and m == "POST")): + raise HTTPError(req.full_url, code, msg, headers, fp) + + # Strictly (according to RFC 2616), 301 or 302 in response to + # a POST MUST NOT cause a redirection without confirmation + # from the user (of urllib.request, in this case). In practice, + # essentially all clients do redirect in this case, so we do + # the same. + # be conciliant with URIs containing a space + newurl = newurl.replace(' ', '%20') + CONTENT_HEADERS = ("content-length", "content-type") + newheaders = dict((k, v) for k, v in req.headers.items() + if k.lower() not in CONTENT_HEADERS) + return Request(newurl, + headers=newheaders, + origin_req_host=req.origin_req_host, + unverifiable=True) + + # Implementation note: To avoid the server sending us into an + # infinite loop, the request object needs to track what URLs we + # have already seen. Do this by adding a handler-specific + # attribute to the Request object. + def http_error_302(self, req, fp, code, msg, headers): + # Some servers (incorrectly) return multiple Location headers + # (so probably same goes for URI). Use first header. + if "location" in headers: + newurl = headers["location"] + elif "uri" in headers: + newurl = headers["uri"] + else: + return + + # fix a possible malformed URL + urlparts = urlparse(newurl) + + # For security reasons we don't allow redirection to anything other + # than http, https or ftp. + + if urlparts.scheme not in ('http', 'https', 'ftp', ''): + raise HTTPError( + newurl, code, + "%s - Redirection to url '%s' is not allowed" % (msg, newurl), + headers, fp) + + if not urlparts.path: + urlparts = list(urlparts) + urlparts[2] = "/" + newurl = urlunparse(urlparts) + + newurl = urljoin(req.full_url, newurl) + + # XXX Probably want to forget about the state of the current + # request, although that might interact poorly with other + # handlers that also use handler-specific request attributes + new = self.redirect_request(req, fp, code, msg, headers, newurl) + if new is None: + return + + # loop detection + # .redirect_dict has a key url if url was previously visited. + if hasattr(req, 'redirect_dict'): + visited = new.redirect_dict = req.redirect_dict + if (visited.get(newurl, 0) >= self.max_repeats or + len(visited) >= self.max_redirections): + raise HTTPError(req.full_url, code, + self.inf_msg + msg, headers, fp) + else: + visited = new.redirect_dict = req.redirect_dict = {} + visited[newurl] = visited.get(newurl, 0) + 1 + + # Don't close the fp until we are sure that we won't use it + # with HTTPError. + fp.read() + fp.close() + + return self.parent.open(new, timeout=req.timeout) + + http_error_301 = http_error_303 = http_error_307 = http_error_302 + + inf_msg = "The HTTP server returned a redirect error that would " \ + "lead to an infinite loop.\n" \ + "The last 30x error message was:\n" + + +def _parse_proxy(proxy): + """Return (scheme, user, password, host/port) given a URL or an authority. + + If a URL is supplied, it must have an authority (host:port) component. + According to RFC 3986, having an authority component means the URL must + have two slashes after the scheme: + + >>> _parse_proxy('file:/ftp.example.com/') + Traceback (most recent call last): + ValueError: proxy URL with no authority: 'file:/ftp.example.com/' + + The first three items of the returned tuple may be None. + + Examples of authority parsing: + + >>> _parse_proxy('proxy.example.com') + (None, None, None, 'proxy.example.com') + >>> _parse_proxy('proxy.example.com:3128') + (None, None, None, 'proxy.example.com:3128') + + The authority component may optionally include userinfo (assumed to be + username:password): + + >>> _parse_proxy('joe:password@proxy.example.com') + (None, 'joe', 'password', 'proxy.example.com') + >>> _parse_proxy('joe:password@proxy.example.com:3128') + (None, 'joe', 'password', 'proxy.example.com:3128') + + Same examples, but with URLs instead: + + >>> _parse_proxy('http://proxy.example.com/') + ('http', None, None, 'proxy.example.com') + >>> _parse_proxy('http://proxy.example.com:3128/') + ('http', None, None, 'proxy.example.com:3128') + >>> _parse_proxy('http://joe:password@proxy.example.com/') + ('http', 'joe', 'password', 'proxy.example.com') + >>> _parse_proxy('http://joe:password@proxy.example.com:3128') + ('http', 'joe', 'password', 'proxy.example.com:3128') + + Everything after the authority is ignored: + + >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128') + ('ftp', 'joe', 'password', 'proxy.example.com') + + Test for no trailing '/' case: + + >>> _parse_proxy('http://joe:password@proxy.example.com') + ('http', 'joe', 'password', 'proxy.example.com') + + """ + scheme, r_scheme = splittype(proxy) + if not r_scheme.startswith("/"): + # authority + scheme = None + authority = proxy + else: + # URL + if not r_scheme.startswith("//"): + raise ValueError("proxy URL with no authority: %r" % proxy) + # We have an authority, so for RFC 3986-compliant URLs (by ss 3. + # and 3.3.), path is empty or starts with '/' + end = r_scheme.find("/", 2) + if end == -1: + end = None + authority = r_scheme[2:end] + userinfo, hostport = splituser(authority) + if userinfo is not None: + user, password = splitpasswd(userinfo) + else: + user = password = None + return scheme, user, password, hostport + +class ProxyHandler(BaseHandler): + # Proxies must be in front + handler_order = 100 + + def __init__(self, proxies=None): + if proxies is None: + proxies = getproxies() + assert hasattr(proxies, 'keys'), "proxies must be a mapping" + self.proxies = proxies + for type, url in proxies.items(): + setattr(self, '%s_open' % type, + lambda r, proxy=url, type=type, meth=self.proxy_open: + meth(r, proxy, type)) + + def proxy_open(self, req, proxy, type): + orig_type = req.type + proxy_type, user, password, hostport = _parse_proxy(proxy) + if proxy_type is None: + proxy_type = orig_type + + if req.host and proxy_bypass(req.host): + return None + + if user and password: + user_pass = '%s:%s' % (unquote(user), + unquote(password)) + creds = base64.b64encode(user_pass.encode()).decode("ascii") + req.add_header('Proxy-authorization', 'Basic ' + creds) + hostport = unquote(hostport) + req.set_proxy(hostport, proxy_type) + if orig_type == proxy_type or orig_type == 'https': + # let other handlers take care of it + return None + else: + # need to start over, because the other handlers don't + # grok the proxy's URL type + # e.g. if we have a constructor arg proxies like so: + # {'http': 'ftp://proxy.example.com'}, we may end up turning + # a request for http://acme.example.com/a into one for + # ftp://proxy.example.com/a + return self.parent.open(req, timeout=req.timeout) + +class HTTPPasswordMgr(object): + + def __init__(self): + self.passwd = {} + + def add_password(self, realm, uri, user, passwd): + # uri could be a single URI or a sequence + if isinstance(uri, str): + uri = [uri] + if realm not in self.passwd: + self.passwd[realm] = {} + for default_port in True, False: + reduced_uri = tuple( + [self.reduce_uri(u, default_port) for u in uri]) + self.passwd[realm][reduced_uri] = (user, passwd) + + def find_user_password(self, realm, authuri): + domains = self.passwd.get(realm, {}) + for default_port in True, False: + reduced_authuri = self.reduce_uri(authuri, default_port) + for uris, authinfo in domains.items(): + for uri in uris: + if self.is_suburi(uri, reduced_authuri): + return authinfo + return None, None + + def reduce_uri(self, uri, default_port=True): + """Accept authority or URI and extract only the authority and path.""" + # note HTTP URLs do not have a userinfo component + parts = urlsplit(uri) + if parts[1]: + # URI + scheme = parts[0] + authority = parts[1] + path = parts[2] or '/' + else: + # host or host:port + scheme = None + authority = uri + path = '/' + host, port = splitport(authority) + if default_port and port is None and scheme is not None: + dport = {"http": 80, + "https": 443, + }.get(scheme) + if dport is not None: + authority = "%s:%d" % (host, dport) + return authority, path + + def is_suburi(self, base, test): + """Check if test is below base in a URI tree + + Both args must be URIs in reduced form. + """ + if base == test: + return True + if base[0] != test[0]: + return False + common = posixpath.commonprefix((base[1], test[1])) + if len(common) == len(base[1]): + return True + return False + + +class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr): + + def find_user_password(self, realm, authuri): + user, password = HTTPPasswordMgr.find_user_password(self, realm, + authuri) + if user is not None: + return user, password + return HTTPPasswordMgr.find_user_password(self, None, authuri) + + +class AbstractBasicAuthHandler(object): + + # XXX this allows for multiple auth-schemes, but will stupidly pick + # the last one with a realm specified. + + # allow for double- and single-quoted realm values + # (single quotes are a violation of the RFC, but appear in the wild) + rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+' + 'realm=(["\']?)([^"\']*)\\2', re.I) + + # XXX could pre-emptively send auth info already accepted (RFC 2617, + # end of section 2, and section 1.2 immediately after "credentials" + # production). + + def __init__(self, password_mgr=None): + if password_mgr is None: + password_mgr = HTTPPasswordMgr() + self.passwd = password_mgr + self.add_password = self.passwd.add_password + self.retried = 0 + + def reset_retry_count(self): + self.retried = 0 + + def http_error_auth_reqed(self, authreq, host, req, headers): + # host may be an authority (without userinfo) or a URL with an + # authority + # XXX could be multiple headers + authreq = headers.get(authreq, None) + + if self.retried > 5: + # retry sending the username:password 5 times before failing. + raise HTTPError(req.get_full_url(), 401, "basic auth failed", + headers, None) + else: + self.retried += 1 + + if authreq: + scheme = authreq.split()[0] + if scheme.lower() != 'basic': + raise ValueError("AbstractBasicAuthHandler does not" + " support the following scheme: '%s'" % + scheme) + else: + mo = AbstractBasicAuthHandler.rx.search(authreq) + if mo: + scheme, quote, realm = mo.groups() + if quote not in ['"',"'"]: + warnings.warn("Basic Auth Realm was unquoted", + UserWarning, 2) + if scheme.lower() == 'basic': + response = self.retry_http_basic_auth(host, req, realm) + if response and response.code != 401: + self.retried = 0 + return response + + def retry_http_basic_auth(self, host, req, realm): + user, pw = self.passwd.find_user_password(realm, host) + if pw is not None: + raw = "%s:%s" % (user, pw) + auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii") + if req.headers.get(self.auth_header, None) == auth: + return None + req.add_unredirected_header(self.auth_header, auth) + return self.parent.open(req, timeout=req.timeout) + else: + return None + + +class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): + + auth_header = 'Authorization' + + def http_error_401(self, req, fp, code, msg, headers): + url = req.full_url + response = self.http_error_auth_reqed('www-authenticate', + url, req, headers) + self.reset_retry_count() + return response + + +class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): + + auth_header = 'Proxy-authorization' + + def http_error_407(self, req, fp, code, msg, headers): + # http_error_auth_reqed requires that there is no userinfo component in + # authority. Assume there isn't one, since urllib.request does not (and + # should not, RFC 3986 s. 3.2.1) support requests for URLs containing + # userinfo. + authority = req.host + response = self.http_error_auth_reqed('proxy-authenticate', + authority, req, headers) + self.reset_retry_count() + return response + + +# Return n random bytes. +_randombytes = os.urandom + + +class AbstractDigestAuthHandler(object): + # Digest authentication is specified in RFC 2617. + + # XXX The client does not inspect the Authentication-Info header + # in a successful response. + + # XXX It should be possible to test this implementation against + # a mock server that just generates a static set of challenges. + + # XXX qop="auth-int" supports is shaky + + def __init__(self, passwd=None): + if passwd is None: + passwd = HTTPPasswordMgr() + self.passwd = passwd + self.add_password = self.passwd.add_password + self.retried = 0 + self.nonce_count = 0 + self.last_nonce = None + + def reset_retry_count(self): + self.retried = 0 + + def http_error_auth_reqed(self, auth_header, host, req, headers): + authreq = headers.get(auth_header, None) + if self.retried > 5: + # Don't fail endlessly - if we failed once, we'll probably + # fail a second time. Hm. Unless the Password Manager is + # prompting for the information. Crap. This isn't great + # but it's better than the current 'repeat until recursion + # depth exceeded' approach + raise HTTPError(req.full_url, 401, "digest auth failed", + headers, None) + else: + self.retried += 1 + if authreq: + scheme = authreq.split()[0] + if scheme.lower() == 'digest': + return self.retry_http_digest_auth(req, authreq) + elif scheme.lower() != 'basic': + raise ValueError("AbstractDigestAuthHandler does not support" + " the following scheme: '%s'" % scheme) + + def retry_http_digest_auth(self, req, auth): + token, challenge = auth.split(' ', 1) + chal = parse_keqv_list(filter(None, parse_http_list(challenge))) + auth = self.get_authorization(req, chal) + if auth: + auth_val = 'Digest %s' % auth + if req.headers.get(self.auth_header, None) == auth_val: + return None + req.add_unredirected_header(self.auth_header, auth_val) + resp = self.parent.open(req, timeout=req.timeout) + return resp + + def get_cnonce(self, nonce): + # The cnonce-value is an opaque + # quoted string value provided by the client and used by both client + # and server to avoid chosen plaintext attacks, to provide mutual + # authentication, and to provide some message integrity protection. + # This isn't a fabulous effort, but it's probably Good Enough. + s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime()) + b = s.encode("ascii") + _randombytes(8) + dig = hashlib.sha1(b).hexdigest() + return dig[:16] + + def get_authorization(self, req, chal): + try: + realm = chal['realm'] + nonce = chal['nonce'] + qop = chal.get('qop') + algorithm = chal.get('algorithm', 'MD5') + # mod_digest doesn't send an opaque, even though it isn't + # supposed to be optional + opaque = chal.get('opaque', None) + except KeyError: + return None + + H, KD = self.get_algorithm_impls(algorithm) + if H is None: + return None + + user, pw = self.passwd.find_user_password(realm, req.full_url) + if user is None: + return None + + # XXX not implemented yet + if req.data is not None: + entdig = self.get_entity_digest(req.data, chal) + else: + entdig = None + + A1 = "%s:%s:%s" % (user, realm, pw) + A2 = "%s:%s" % (req.get_method(), + # XXX selector: what about proxies and full urls + req.selector) + if qop == 'auth': + if nonce == self.last_nonce: + self.nonce_count += 1 + else: + self.nonce_count = 1 + self.last_nonce = nonce + ncvalue = '%08x' % self.nonce_count + cnonce = self.get_cnonce(nonce) + noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2)) + respdig = KD(H(A1), noncebit) + elif qop is None: + respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) + else: + # XXX handle auth-int. + raise URLError("qop '%s' is not supported." % qop) + + # XXX should the partial digests be encoded too? + + base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ + 'response="%s"' % (user, realm, nonce, req.selector, + respdig) + if opaque: + base += ', opaque="%s"' % opaque + if entdig: + base += ', digest="%s"' % entdig + base += ', algorithm="%s"' % algorithm + if qop: + base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce) + return base + + def get_algorithm_impls(self, algorithm): + # lambdas assume digest modules are imported at the top level + if algorithm == 'MD5': + H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest() + elif algorithm == 'SHA': + H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest() + # XXX MD5-sess + KD = lambda s, d: H("%s:%s" % (s, d)) + return H, KD + + def get_entity_digest(self, data, chal): + # XXX not implemented yet + return None + + +class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): + """An authentication protocol defined by RFC 2069 + + Digest authentication improves on basic authentication because it + does not transmit passwords in the clear. + """ + + auth_header = 'Authorization' + handler_order = 490 # before Basic auth + + def http_error_401(self, req, fp, code, msg, headers): + host = urlparse(req.full_url)[1] + retry = self.http_error_auth_reqed('www-authenticate', + host, req, headers) + self.reset_retry_count() + return retry + + +class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): + + auth_header = 'Proxy-Authorization' + handler_order = 490 # before Basic auth + + def http_error_407(self, req, fp, code, msg, headers): + host = req.host + retry = self.http_error_auth_reqed('proxy-authenticate', + host, req, headers) + self.reset_retry_count() + return retry + +class AbstractHTTPHandler(BaseHandler): + + def __init__(self, debuglevel=0): + self._debuglevel = debuglevel + + def set_http_debuglevel(self, level): + self._debuglevel = level + + def do_request_(self, request): + host = request.host + if not host: + raise URLError('no host given') + + if request.data is not None: # POST + data = request.data + if isinstance(data, str): + msg = "POST data should be bytes or an iterable of bytes. " \ + "It cannot be of type str." + raise TypeError(msg) + if not request.has_header('Content-type'): + request.add_unredirected_header( + 'Content-type', + 'application/x-www-form-urlencoded') + if not request.has_header('Content-length'): + size = None + try: + ### For Python-Future: + if PY2 and isinstance(data, array.array): + # memoryviews of arrays aren't supported + # in Py2.7. (e.g. memoryview(array.array('I', + # [1, 2, 3, 4])) raises a TypeError.) + # So we calculate the size manually instead: + size = len(data) * data.itemsize + ### + else: + mv = memoryview(data) + size = len(mv) * mv.itemsize + except TypeError: + if isinstance(data, Iterable): + raise ValueError("Content-Length should be specified " + "for iterable data of type %r %r" % (type(data), + data)) + else: + request.add_unredirected_header( + 'Content-length', '%d' % size) + + sel_host = host + if request.has_proxy(): + scheme, sel = splittype(request.selector) + sel_host, sel_path = splithost(sel) + if not request.has_header('Host'): + request.add_unredirected_header('Host', sel_host) + for name, value in self.parent.addheaders: + name = name.capitalize() + if not request.has_header(name): + request.add_unredirected_header(name, value) + + return request + + def do_open(self, http_class, req, **http_conn_args): + """Return an HTTPResponse object for the request, using http_class. + + http_class must implement the HTTPConnection API from http.client. + """ + host = req.host + if not host: + raise URLError('no host given') + + # will parse host:port + h = http_class(host, timeout=req.timeout, **http_conn_args) + + headers = dict(req.unredirected_hdrs) + headers.update(dict((k, v) for k, v in req.headers.items() + if k not in headers)) + + # TODO(jhylton): Should this be redesigned to handle + # persistent connections? + + # We want to make an HTTP/1.1 request, but the addinfourl + # class isn't prepared to deal with a persistent connection. + # It will try to read all remaining data from the socket, + # which will block while the server waits for the next request. + # So make sure the connection gets closed after the (only) + # request. + headers["Connection"] = "close" + headers = dict((name.title(), val) for name, val in headers.items()) + + if req._tunnel_host: + tunnel_headers = {} + proxy_auth_hdr = "Proxy-Authorization" + if proxy_auth_hdr in headers: + tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] + # Proxy-Authorization should not be sent to origin + # server. + del headers[proxy_auth_hdr] + h.set_tunnel(req._tunnel_host, headers=tunnel_headers) + + try: + h.request(req.get_method(), req.selector, req.data, headers) + except socket.error as err: # timeout error + h.close() + raise URLError(err) + else: + r = h.getresponse() + # If the server does not send us a 'Connection: close' header, + # HTTPConnection assumes the socket should be left open. Manually + # mark the socket to be closed when this response object goes away. + if h.sock: + h.sock.close() + h.sock = None + + + r.url = req.get_full_url() + # This line replaces the .msg attribute of the HTTPResponse + # with .headers, because urllib clients expect the response to + # have the reason in .msg. It would be good to mark this + # attribute is deprecated and get then to use info() or + # .headers. + r.msg = r.reason + return r + + +class HTTPHandler(AbstractHTTPHandler): + + def http_open(self, req): + return self.do_open(http_client.HTTPConnection, req) + + http_request = AbstractHTTPHandler.do_request_ + +if hasattr(http_client, 'HTTPSConnection'): + + class HTTPSHandler(AbstractHTTPHandler): + + def __init__(self, debuglevel=0, context=None, check_hostname=None): + AbstractHTTPHandler.__init__(self, debuglevel) + self._context = context + self._check_hostname = check_hostname + + def https_open(self, req): + return self.do_open(http_client.HTTPSConnection, req, + context=self._context, check_hostname=self._check_hostname) + + https_request = AbstractHTTPHandler.do_request_ + + __all__.append('HTTPSHandler') + +class HTTPCookieProcessor(BaseHandler): + def __init__(self, cookiejar=None): + import future.backports.http.cookiejar as http_cookiejar + if cookiejar is None: + cookiejar = http_cookiejar.CookieJar() + self.cookiejar = cookiejar + + def http_request(self, request): + self.cookiejar.add_cookie_header(request) + return request + + def http_response(self, request, response): + self.cookiejar.extract_cookies(response, request) + return response + + https_request = http_request + https_response = http_response + +class UnknownHandler(BaseHandler): + def unknown_open(self, req): + type = req.type + raise URLError('unknown url type: %s' % type) + +def parse_keqv_list(l): + """Parse list of key=value strings where keys are not duplicated.""" + parsed = {} + for elt in l: + k, v = elt.split('=', 1) + if v[0] == '"' and v[-1] == '"': + v = v[1:-1] + parsed[k] = v + return parsed + +def parse_http_list(s): + """Parse lists as described by RFC 2068 Section 2. + + In particular, parse comma-separated lists where the elements of + the list may include quoted-strings. A quoted-string could + contain a comma. A non-quoted string could have quotes in the + middle. Neither commas nor quotes count if they are escaped. + Only double-quotes count, not single-quotes. + """ + res = [] + part = '' + + escape = quote = False + for cur in s: + if escape: + part += cur + escape = False + continue + if quote: + if cur == '\\': + escape = True + continue + elif cur == '"': + quote = False + part += cur + continue + + if cur == ',': + res.append(part) + part = '' + continue + + if cur == '"': + quote = True + + part += cur + + # append last part + if part: + res.append(part) + + return [part.strip() for part in res] + +class FileHandler(BaseHandler): + # Use local file or FTP depending on form of URL + def file_open(self, req): + url = req.selector + if url[:2] == '//' and url[2:3] != '/' and (req.host and + req.host != 'localhost'): + if not req.host is self.get_names(): + raise URLError("file:// scheme is supported only on localhost") + else: + return self.open_local_file(req) + + # names for the localhost + names = None + def get_names(self): + if FileHandler.names is None: + try: + FileHandler.names = tuple( + socket.gethostbyname_ex('localhost')[2] + + socket.gethostbyname_ex(socket.gethostname())[2]) + except socket.gaierror: + FileHandler.names = (socket.gethostbyname('localhost'),) + return FileHandler.names + + # not entirely sure what the rules are here + def open_local_file(self, req): + import future.backports.email.utils as email_utils + import mimetypes + host = req.host + filename = req.selector + localfile = url2pathname(filename) + try: + stats = os.stat(localfile) + size = stats.st_size + modified = email_utils.formatdate(stats.st_mtime, usegmt=True) + mtype = mimetypes.guess_type(filename)[0] + headers = email.message_from_string( + 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % + (mtype or 'text/plain', size, modified)) + if host: + host, port = splitport(host) + if not host or \ + (not port and _safe_gethostbyname(host) in self.get_names()): + if host: + origurl = 'file://' + host + filename + else: + origurl = 'file://' + filename + return addinfourl(open(localfile, 'rb'), headers, origurl) + except OSError as exp: + # users shouldn't expect OSErrors coming from urlopen() + raise URLError(exp) + raise URLError('file not on local host') + +def _safe_gethostbyname(host): + try: + return socket.gethostbyname(host) + except socket.gaierror: + return None + +class FTPHandler(BaseHandler): + def ftp_open(self, req): + import ftplib + import mimetypes + host = req.host + if not host: + raise URLError('ftp error: no host given') + host, port = splitport(host) + if port is None: + port = ftplib.FTP_PORT + else: + port = int(port) + + # username/password handling + user, host = splituser(host) + if user: + user, passwd = splitpasswd(user) + else: + passwd = None + host = unquote(host) + user = user or '' + passwd = passwd or '' + + try: + host = socket.gethostbyname(host) + except socket.error as msg: + raise URLError(msg) + path, attrs = splitattr(req.selector) + dirs = path.split('/') + dirs = list(map(unquote, dirs)) + dirs, file = dirs[:-1], dirs[-1] + if dirs and not dirs[0]: + dirs = dirs[1:] + try: + fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout) + type = file and 'I' or 'D' + for attr in attrs: + attr, value = splitvalue(attr) + if attr.lower() == 'type' and \ + value in ('a', 'A', 'i', 'I', 'd', 'D'): + type = value.upper() + fp, retrlen = fw.retrfile(file, type) + headers = "" + mtype = mimetypes.guess_type(req.full_url)[0] + if mtype: + headers += "Content-type: %s\n" % mtype + if retrlen is not None and retrlen >= 0: + headers += "Content-length: %d\n" % retrlen + headers = email.message_from_string(headers) + return addinfourl(fp, headers, req.full_url) + except ftplib.all_errors as exp: + exc = URLError('ftp error: %r' % exp) + raise_with_traceback(exc) + + def connect_ftp(self, user, passwd, host, port, dirs, timeout): + return ftpwrapper(user, passwd, host, port, dirs, timeout, + persistent=False) + +class CacheFTPHandler(FTPHandler): + # XXX would be nice to have pluggable cache strategies + # XXX this stuff is definitely not thread safe + def __init__(self): + self.cache = {} + self.timeout = {} + self.soonest = 0 + self.delay = 60 + self.max_conns = 16 + + def setTimeout(self, t): + self.delay = t + + def setMaxConns(self, m): + self.max_conns = m + + def connect_ftp(self, user, passwd, host, port, dirs, timeout): + key = user, host, port, '/'.join(dirs), timeout + if key in self.cache: + self.timeout[key] = time.time() + self.delay + else: + self.cache[key] = ftpwrapper(user, passwd, host, port, + dirs, timeout) + self.timeout[key] = time.time() + self.delay + self.check_cache() + return self.cache[key] + + def check_cache(self): + # first check for old ones + t = time.time() + if self.soonest <= t: + for k, v in list(self.timeout.items()): + if v < t: + self.cache[k].close() + del self.cache[k] + del self.timeout[k] + self.soonest = min(list(self.timeout.values())) + + # then check the size + if len(self.cache) == self.max_conns: + for k, v in list(self.timeout.items()): + if v == self.soonest: + del self.cache[k] + del self.timeout[k] + break + self.soonest = min(list(self.timeout.values())) + + def clear_cache(self): + for conn in self.cache.values(): + conn.close() + self.cache.clear() + self.timeout.clear() + + +# Code move from the old urllib module + +MAXFTPCACHE = 10 # Trim the ftp cache beyond this size + +# Helper for non-unix systems +if os.name == 'nt': + from nturl2path import url2pathname, pathname2url +else: + def url2pathname(pathname): + """OS-specific conversion from a relative URL of the 'file' scheme + to a file system path; not recommended for general use.""" + return unquote(pathname) + + def pathname2url(pathname): + """OS-specific conversion from a file system path to a relative URL + of the 'file' scheme; not recommended for general use.""" + return quote(pathname) + +# This really consists of two pieces: +# (1) a class which handles opening of all sorts of URLs +# (plus assorted utilities etc.) +# (2) a set of functions for parsing URLs +# XXX Should these be separated out into different modules? + + +ftpcache = {} +class URLopener(object): + """Class to open URLs. + This is a class rather than just a subroutine because we may need + more than one set of global protocol-specific options. + Note -- this is a base class for those who don't want the + automatic handling of errors type 302 (relocated) and 401 + (authorization needed).""" + + __tempfiles = None + + version = "Python-urllib/%s" % __version__ + + # Constructor + def __init__(self, proxies=None, **x509): + msg = "%(class)s style of invoking requests is deprecated. " \ + "Use newer urlopen functions/methods" % {'class': self.__class__.__name__} + warnings.warn(msg, DeprecationWarning, stacklevel=3) + if proxies is None: + proxies = getproxies() + assert hasattr(proxies, 'keys'), "proxies must be a mapping" + self.proxies = proxies + self.key_file = x509.get('key_file') + self.cert_file = x509.get('cert_file') + self.addheaders = [('User-Agent', self.version)] + self.__tempfiles = [] + self.__unlink = os.unlink # See cleanup() + self.tempcache = None + # Undocumented feature: if you assign {} to tempcache, + # it is used to cache files retrieved with + # self.retrieve(). This is not enabled by default + # since it does not work for changing documents (and I + # haven't got the logic to check expiration headers + # yet). + self.ftpcache = ftpcache + # Undocumented feature: you can use a different + # ftp cache by assigning to the .ftpcache member; + # in case you want logically independent URL openers + # XXX This is not threadsafe. Bah. + + def __del__(self): + self.close() + + def close(self): + self.cleanup() + + def cleanup(self): + # This code sometimes runs when the rest of this module + # has already been deleted, so it can't use any globals + # or import anything. + if self.__tempfiles: + for file in self.__tempfiles: + try: + self.__unlink(file) + except OSError: + pass + del self.__tempfiles[:] + if self.tempcache: + self.tempcache.clear() + + def addheader(self, *args): + """Add a header to be used by the HTTP interface only + e.g. u.addheader('Accept', 'sound/basic')""" + self.addheaders.append(args) + + # External interface + def open(self, fullurl, data=None): + """Use URLopener().open(file) instead of open(file, 'r').""" + fullurl = unwrap(to_bytes(fullurl)) + fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") + if self.tempcache and fullurl in self.tempcache: + filename, headers = self.tempcache[fullurl] + fp = open(filename, 'rb') + return addinfourl(fp, headers, fullurl) + urltype, url = splittype(fullurl) + if not urltype: + urltype = 'file' + if urltype in self.proxies: + proxy = self.proxies[urltype] + urltype, proxyhost = splittype(proxy) + host, selector = splithost(proxyhost) + url = (host, fullurl) # Signal special case to open_*() + else: + proxy = None + name = 'open_' + urltype + self.type = urltype + name = name.replace('-', '_') + if not hasattr(self, name): + if proxy: + return self.open_unknown_proxy(proxy, fullurl, data) + else: + return self.open_unknown(fullurl, data) + try: + if data is None: + return getattr(self, name)(url) + else: + return getattr(self, name)(url, data) + except HTTPError: + raise + except socket.error as msg: + raise_with_traceback(IOError('socket error', msg)) + + def open_unknown(self, fullurl, data=None): + """Overridable interface to open unknown URL type.""" + type, url = splittype(fullurl) + raise IOError('url error', 'unknown url type', type) + + def open_unknown_proxy(self, proxy, fullurl, data=None): + """Overridable interface to open unknown URL type.""" + type, url = splittype(fullurl) + raise IOError('url error', 'invalid proxy for %s' % type, proxy) + + # External interface + def retrieve(self, url, filename=None, reporthook=None, data=None): + """retrieve(url) returns (filename, headers) for a local object + or (tempfilename, headers) for a remote object.""" + url = unwrap(to_bytes(url)) + if self.tempcache and url in self.tempcache: + return self.tempcache[url] + type, url1 = splittype(url) + if filename is None and (not type or type == 'file'): + try: + fp = self.open_local_file(url1) + hdrs = fp.info() + fp.close() + return url2pathname(splithost(url1)[1]), hdrs + except IOError as msg: + pass + fp = self.open(url, data) + try: + headers = fp.info() + if filename: + tfp = open(filename, 'wb') + else: + import tempfile + garbage, path = splittype(url) + garbage, path = splithost(path or "") + path, garbage = splitquery(path or "") + path, garbage = splitattr(path or "") + suffix = os.path.splitext(path)[1] + (fd, filename) = tempfile.mkstemp(suffix) + self.__tempfiles.append(filename) + tfp = os.fdopen(fd, 'wb') + try: + result = filename, headers + if self.tempcache is not None: + self.tempcache[url] = result + bs = 1024*8 + size = -1 + read = 0 + blocknum = 0 + if "content-length" in headers: + size = int(headers["Content-Length"]) + if reporthook: + reporthook(blocknum, bs, size) + while 1: + block = fp.read(bs) + if not block: + break + read += len(block) + tfp.write(block) + blocknum += 1 + if reporthook: + reporthook(blocknum, bs, size) + finally: + tfp.close() + finally: + fp.close() + + # raise exception if actual size does not match content-length header + if size >= 0 and read < size: + raise ContentTooShortError( + "retrieval incomplete: got only %i out of %i bytes" + % (read, size), result) + + return result + + # Each method named open_ knows how to open that type of URL + + def _open_generic_http(self, connection_factory, url, data): + """Make an HTTP connection using connection_class. + + This is an internal method that should be called from + open_http() or open_https(). + + Arguments: + - connection_factory should take a host name and return an + HTTPConnection instance. + - url is the url to retrieval or a host, relative-path pair. + - data is payload for a POST request or None. + """ + + user_passwd = None + proxy_passwd= None + if isinstance(url, str): + host, selector = splithost(url) + if host: + user_passwd, host = splituser(host) + host = unquote(host) + realhost = host + else: + host, selector = url + # check whether the proxy contains authorization information + proxy_passwd, host = splituser(host) + # now we proceed with the url we want to obtain + urltype, rest = splittype(selector) + url = rest + user_passwd = None + if urltype.lower() != 'http': + realhost = None + else: + realhost, rest = splithost(rest) + if realhost: + user_passwd, realhost = splituser(realhost) + if user_passwd: + selector = "%s://%s%s" % (urltype, realhost, rest) + if proxy_bypass(realhost): + host = realhost + + if not host: raise IOError('http error', 'no host given') + + if proxy_passwd: + proxy_passwd = unquote(proxy_passwd) + proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii') + else: + proxy_auth = None + + if user_passwd: + user_passwd = unquote(user_passwd) + auth = base64.b64encode(user_passwd.encode()).decode('ascii') + else: + auth = None + http_conn = connection_factory(host) + headers = {} + if proxy_auth: + headers["Proxy-Authorization"] = "Basic %s" % proxy_auth + if auth: + headers["Authorization"] = "Basic %s" % auth + if realhost: + headers["Host"] = realhost + + # Add Connection:close as we don't support persistent connections yet. + # This helps in closing the socket and avoiding ResourceWarning + + headers["Connection"] = "close" + + for header, value in self.addheaders: + headers[header] = value + + if data is not None: + headers["Content-Type"] = "application/x-www-form-urlencoded" + http_conn.request("POST", selector, data, headers) + else: + http_conn.request("GET", selector, headers=headers) + + try: + response = http_conn.getresponse() + except http_client.BadStatusLine: + # something went wrong with the HTTP status line + raise URLError("http protocol error: bad status line") + + # According to RFC 2616, "2xx" code indicates that the client's + # request was successfully received, understood, and accepted. + if 200 <= response.status < 300: + return addinfourl(response, response.msg, "http:" + url, + response.status) + else: + return self.http_error( + url, response.fp, + response.status, response.reason, response.msg, data) + + def open_http(self, url, data=None): + """Use HTTP protocol.""" + return self._open_generic_http(http_client.HTTPConnection, url, data) + + def http_error(self, url, fp, errcode, errmsg, headers, data=None): + """Handle http errors. + + Derived class can override this, or provide specific handlers + named http_error_DDD where DDD is the 3-digit error code.""" + # First check if there's a specific handler for this error + name = 'http_error_%d' % errcode + if hasattr(self, name): + method = getattr(self, name) + if data is None: + result = method(url, fp, errcode, errmsg, headers) + else: + result = method(url, fp, errcode, errmsg, headers, data) + if result: return result + return self.http_error_default(url, fp, errcode, errmsg, headers) + + def http_error_default(self, url, fp, errcode, errmsg, headers): + """Default error handler: close the connection and raise IOError.""" + fp.close() + raise HTTPError(url, errcode, errmsg, headers, None) + + if _have_ssl: + def _https_connection(self, host): + return http_client.HTTPSConnection(host, + key_file=self.key_file, + cert_file=self.cert_file) + + def open_https(self, url, data=None): + """Use HTTPS protocol.""" + return self._open_generic_http(self._https_connection, url, data) + + def open_file(self, url): + """Use local file or FTP depending on form of URL.""" + if not isinstance(url, str): + raise URLError('file error: proxy support for file protocol currently not implemented') + if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': + raise ValueError("file:// scheme is supported only on localhost") + else: + return self.open_local_file(url) + + def open_local_file(self, url): + """Use local file.""" + import future.backports.email.utils as email_utils + import mimetypes + host, file = splithost(url) + localname = url2pathname(file) + try: + stats = os.stat(localname) + except OSError as e: + raise URLError(e.strerror, e.filename) + size = stats.st_size + modified = email_utils.formatdate(stats.st_mtime, usegmt=True) + mtype = mimetypes.guess_type(url)[0] + headers = email.message_from_string( + 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % + (mtype or 'text/plain', size, modified)) + if not host: + urlfile = file + if file[:1] == '/': + urlfile = 'file://' + file + return addinfourl(open(localname, 'rb'), headers, urlfile) + host, port = splitport(host) + if (not port + and socket.gethostbyname(host) in ((localhost(),) + thishost())): + urlfile = file + if file[:1] == '/': + urlfile = 'file://' + file + elif file[:2] == './': + raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) + return addinfourl(open(localname, 'rb'), headers, urlfile) + raise URLError('local file error: not on local host') + + def open_ftp(self, url): + """Use FTP protocol.""" + if not isinstance(url, str): + raise URLError('ftp error: proxy support for ftp protocol currently not implemented') + import mimetypes + host, path = splithost(url) + if not host: raise URLError('ftp error: no host given') + host, port = splitport(host) + user, host = splituser(host) + if user: user, passwd = splitpasswd(user) + else: passwd = None + host = unquote(host) + user = unquote(user or '') + passwd = unquote(passwd or '') + host = socket.gethostbyname(host) + if not port: + import ftplib + port = ftplib.FTP_PORT + else: + port = int(port) + path, attrs = splitattr(path) + path = unquote(path) + dirs = path.split('/') + dirs, file = dirs[:-1], dirs[-1] + if dirs and not dirs[0]: dirs = dirs[1:] + if dirs and not dirs[0]: dirs[0] = '/' + key = user, host, port, '/'.join(dirs) + # XXX thread unsafe! + if len(self.ftpcache) > MAXFTPCACHE: + # Prune the cache, rather arbitrarily + for k in self.ftpcache.keys(): + if k != key: + v = self.ftpcache[k] + del self.ftpcache[k] + v.close() + try: + if key not in self.ftpcache: + self.ftpcache[key] = \ + ftpwrapper(user, passwd, host, port, dirs) + if not file: type = 'D' + else: type = 'I' + for attr in attrs: + attr, value = splitvalue(attr) + if attr.lower() == 'type' and \ + value in ('a', 'A', 'i', 'I', 'd', 'D'): + type = value.upper() + (fp, retrlen) = self.ftpcache[key].retrfile(file, type) + mtype = mimetypes.guess_type("ftp:" + url)[0] + headers = "" + if mtype: + headers += "Content-Type: %s\n" % mtype + if retrlen is not None and retrlen >= 0: + headers += "Content-Length: %d\n" % retrlen + headers = email.message_from_string(headers) + return addinfourl(fp, headers, "ftp:" + url) + except ftperrors() as exp: + raise_with_traceback(URLError('ftp error %r' % exp)) + + def open_data(self, url, data=None): + """Use "data" URL.""" + if not isinstance(url, str): + raise URLError('data error: proxy support for data protocol currently not implemented') + # ignore POSTed data + # + # syntax of data URLs: + # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data + # mediatype := [ type "/" subtype ] *( ";" parameter ) + # data := *urlchar + # parameter := attribute "=" value + try: + [type, data] = url.split(',', 1) + except ValueError: + raise IOError('data error', 'bad data URL') + if not type: + type = 'text/plain;charset=US-ASCII' + semi = type.rfind(';') + if semi >= 0 and '=' not in type[semi:]: + encoding = type[semi+1:] + type = type[:semi] + else: + encoding = '' + msg = [] + msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', + time.gmtime(time.time()))) + msg.append('Content-type: %s' % type) + if encoding == 'base64': + # XXX is this encoding/decoding ok? + data = base64.decodebytes(data.encode('ascii')).decode('latin-1') + else: + data = unquote(data) + msg.append('Content-Length: %d' % len(data)) + msg.append('') + msg.append(data) + msg = '\n'.join(msg) + headers = email.message_from_string(msg) + f = io.StringIO(msg) + #f.fileno = None # needed for addinfourl + return addinfourl(f, headers, url) + + +class FancyURLopener(URLopener): + """Derived class with handlers for errors we can handle (perhaps).""" + + def __init__(self, *args, **kwargs): + URLopener.__init__(self, *args, **kwargs) + self.auth_cache = {} + self.tries = 0 + self.maxtries = 10 + + def http_error_default(self, url, fp, errcode, errmsg, headers): + """Default error handling -- don't raise an exception.""" + return addinfourl(fp, headers, "http:" + url, errcode) + + def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): + """Error 302 -- relocated (temporarily).""" + self.tries += 1 + if self.maxtries and self.tries >= self.maxtries: + if hasattr(self, "http_error_500"): + meth = self.http_error_500 + else: + meth = self.http_error_default + self.tries = 0 + return meth(url, fp, 500, + "Internal Server Error: Redirect Recursion", headers) + result = self.redirect_internal(url, fp, errcode, errmsg, headers, + data) + self.tries = 0 + return result + + def redirect_internal(self, url, fp, errcode, errmsg, headers, data): + if 'location' in headers: + newurl = headers['location'] + elif 'uri' in headers: + newurl = headers['uri'] + else: + return + fp.close() + + # In case the server sent a relative URL, join with original: + newurl = urljoin(self.type + ":" + url, newurl) + + urlparts = urlparse(newurl) + + # For security reasons, we don't allow redirection to anything other + # than http, https and ftp. + + # We are using newer HTTPError with older redirect_internal method + # This older method will get deprecated in 3.3 + + if urlparts.scheme not in ('http', 'https', 'ftp', ''): + raise HTTPError(newurl, errcode, + errmsg + + " Redirection to url '%s' is not allowed." % newurl, + headers, fp) + + return self.open(newurl) + + def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): + """Error 301 -- also relocated (permanently).""" + return self.http_error_302(url, fp, errcode, errmsg, headers, data) + + def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): + """Error 303 -- also relocated (essentially identical to 302).""" + return self.http_error_302(url, fp, errcode, errmsg, headers, data) + + def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): + """Error 307 -- relocated, but turn POST into error.""" + if data is None: + return self.http_error_302(url, fp, errcode, errmsg, headers, data) + else: + return self.http_error_default(url, fp, errcode, errmsg, headers) + + def http_error_401(self, url, fp, errcode, errmsg, headers, data=None, + retry=False): + """Error 401 -- authentication required. + This function supports Basic authentication only.""" + if 'www-authenticate' not in headers: + URLopener.http_error_default(self, url, fp, + errcode, errmsg, headers) + stuff = headers['www-authenticate'] + match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) + if not match: + URLopener.http_error_default(self, url, fp, + errcode, errmsg, headers) + scheme, realm = match.groups() + if scheme.lower() != 'basic': + URLopener.http_error_default(self, url, fp, + errcode, errmsg, headers) + if not retry: + URLopener.http_error_default(self, url, fp, errcode, errmsg, + headers) + name = 'retry_' + self.type + '_basic_auth' + if data is None: + return getattr(self,name)(url, realm) + else: + return getattr(self,name)(url, realm, data) + + def http_error_407(self, url, fp, errcode, errmsg, headers, data=None, + retry=False): + """Error 407 -- proxy authentication required. + This function supports Basic authentication only.""" + if 'proxy-authenticate' not in headers: + URLopener.http_error_default(self, url, fp, + errcode, errmsg, headers) + stuff = headers['proxy-authenticate'] + match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) + if not match: + URLopener.http_error_default(self, url, fp, + errcode, errmsg, headers) + scheme, realm = match.groups() + if scheme.lower() != 'basic': + URLopener.http_error_default(self, url, fp, + errcode, errmsg, headers) + if not retry: + URLopener.http_error_default(self, url, fp, errcode, errmsg, + headers) + name = 'retry_proxy_' + self.type + '_basic_auth' + if data is None: + return getattr(self,name)(url, realm) + else: + return getattr(self,name)(url, realm, data) + + def retry_proxy_http_basic_auth(self, url, realm, data=None): + host, selector = splithost(url) + newurl = 'http://' + host + selector + proxy = self.proxies['http'] + urltype, proxyhost = splittype(proxy) + proxyhost, proxyselector = splithost(proxyhost) + i = proxyhost.find('@') + 1 + proxyhost = proxyhost[i:] + user, passwd = self.get_user_passwd(proxyhost, realm, i) + if not (user or passwd): return None + proxyhost = "%s:%s@%s" % (quote(user, safe=''), + quote(passwd, safe=''), proxyhost) + self.proxies['http'] = 'http://' + proxyhost + proxyselector + if data is None: + return self.open(newurl) + else: + return self.open(newurl, data) + + def retry_proxy_https_basic_auth(self, url, realm, data=None): + host, selector = splithost(url) + newurl = 'https://' + host + selector + proxy = self.proxies['https'] + urltype, proxyhost = splittype(proxy) + proxyhost, proxyselector = splithost(proxyhost) + i = proxyhost.find('@') + 1 + proxyhost = proxyhost[i:] + user, passwd = self.get_user_passwd(proxyhost, realm, i) + if not (user or passwd): return None + proxyhost = "%s:%s@%s" % (quote(user, safe=''), + quote(passwd, safe=''), proxyhost) + self.proxies['https'] = 'https://' + proxyhost + proxyselector + if data is None: + return self.open(newurl) + else: + return self.open(newurl, data) + + def retry_http_basic_auth(self, url, realm, data=None): + host, selector = splithost(url) + i = host.find('@') + 1 + host = host[i:] + user, passwd = self.get_user_passwd(host, realm, i) + if not (user or passwd): return None + host = "%s:%s@%s" % (quote(user, safe=''), + quote(passwd, safe=''), host) + newurl = 'http://' + host + selector + if data is None: + return self.open(newurl) + else: + return self.open(newurl, data) + + def retry_https_basic_auth(self, url, realm, data=None): + host, selector = splithost(url) + i = host.find('@') + 1 + host = host[i:] + user, passwd = self.get_user_passwd(host, realm, i) + if not (user or passwd): return None + host = "%s:%s@%s" % (quote(user, safe=''), + quote(passwd, safe=''), host) + newurl = 'https://' + host + selector + if data is None: + return self.open(newurl) + else: + return self.open(newurl, data) + + def get_user_passwd(self, host, realm, clear_cache=0): + key = realm + '@' + host.lower() + if key in self.auth_cache: + if clear_cache: + del self.auth_cache[key] + else: + return self.auth_cache[key] + user, passwd = self.prompt_user_passwd(host, realm) + if user or passwd: self.auth_cache[key] = (user, passwd) + return user, passwd + + def prompt_user_passwd(self, host, realm): + """Override this in a GUI environment!""" + import getpass + try: + user = input("Enter username for %s at %s: " % (realm, host)) + passwd = getpass.getpass("Enter password for %s in %s at %s: " % + (user, realm, host)) + return user, passwd + except KeyboardInterrupt: + print() + return None, None + + +# Utility functions + +_localhost = None +def localhost(): + """Return the IP address of the magic hostname 'localhost'.""" + global _localhost + if _localhost is None: + _localhost = socket.gethostbyname('localhost') + return _localhost + +_thishost = None +def thishost(): + """Return the IP addresses of the current host.""" + global _thishost + if _thishost is None: + try: + _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2]) + except socket.gaierror: + _thishost = tuple(socket.gethostbyname_ex('localhost')[2]) + return _thishost + +_ftperrors = None +def ftperrors(): + """Return the set of errors raised by the FTP class.""" + global _ftperrors + if _ftperrors is None: + import ftplib + _ftperrors = ftplib.all_errors + return _ftperrors + +_noheaders = None +def noheaders(): + """Return an empty email Message object.""" + global _noheaders + if _noheaders is None: + _noheaders = email.message_from_string("") + return _noheaders + + +# Utility classes + +class ftpwrapper(object): + """Class used by open_ftp() for cache of open FTP connections.""" + + def __init__(self, user, passwd, host, port, dirs, timeout=None, + persistent=True): + self.user = user + self.passwd = passwd + self.host = host + self.port = port + self.dirs = dirs + self.timeout = timeout + self.refcount = 0 + self.keepalive = persistent + self.init() + + def init(self): + import ftplib + self.busy = 0 + self.ftp = ftplib.FTP() + self.ftp.connect(self.host, self.port, self.timeout) + self.ftp.login(self.user, self.passwd) + _target = '/'.join(self.dirs) + self.ftp.cwd(_target) + + def retrfile(self, file, type): + import ftplib + self.endtransfer() + if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 + else: cmd = 'TYPE ' + type; isdir = 0 + try: + self.ftp.voidcmd(cmd) + except ftplib.all_errors: + self.init() + self.ftp.voidcmd(cmd) + conn = None + if file and not isdir: + # Try to retrieve as a file + try: + cmd = 'RETR ' + file + conn, retrlen = self.ftp.ntransfercmd(cmd) + except ftplib.error_perm as reason: + if str(reason)[:3] != '550': + raise_with_traceback(URLError('ftp error: %r' % reason)) + if not conn: + # Set transfer mode to ASCII! + self.ftp.voidcmd('TYPE A') + # Try a directory listing. Verify that directory exists. + if file: + pwd = self.ftp.pwd() + try: + try: + self.ftp.cwd(file) + except ftplib.error_perm as reason: + ### Was: + # raise URLError('ftp error: %r' % reason) from reason + exc = URLError('ftp error: %r' % reason) + exc.__cause__ = reason + raise exc + finally: + self.ftp.cwd(pwd) + cmd = 'LIST ' + file + else: + cmd = 'LIST' + conn, retrlen = self.ftp.ntransfercmd(cmd) + self.busy = 1 + + ftpobj = addclosehook(conn.makefile('rb'), self.file_close) + self.refcount += 1 + conn.close() + # Pass back both a suitably decorated object and a retrieval length + return (ftpobj, retrlen) + + def endtransfer(self): + self.busy = 0 + + def close(self): + self.keepalive = False + if self.refcount <= 0: + self.real_close() + + def file_close(self): + self.endtransfer() + self.refcount -= 1 + if self.refcount <= 0 and not self.keepalive: + self.real_close() + + def real_close(self): + self.endtransfer() + try: + self.ftp.close() + except ftperrors(): + pass + +# Proxy handling +def getproxies_environment(): + """Return a dictionary of scheme -> proxy server URL mappings. + + Scan the environment for variables named _proxy; + this seems to be the standard convention. If you need a + different way, you can pass a proxies dictionary to the + [Fancy]URLopener constructor. + + """ + proxies = {} + for name, value in os.environ.items(): + name = name.lower() + if value and name[-6:] == '_proxy': + proxies[name[:-6]] = value + return proxies + +def proxy_bypass_environment(host): + """Test if proxies should not be used for a particular host. + + Checks the environment for a variable named no_proxy, which should + be a list of DNS suffixes separated by commas, or '*' for all hosts. + """ + no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') + # '*' is special case for always bypass + if no_proxy == '*': + return 1 + # strip port off host + hostonly, port = splitport(host) + # check if the host ends with any of the DNS suffixes + no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] + for name in no_proxy_list: + if name and (hostonly.endswith(name) or host.endswith(name)): + return 1 + # otherwise, don't bypass + return 0 + + +# This code tests an OSX specific data structure but is testable on all +# platforms +def _proxy_bypass_macosx_sysconf(host, proxy_settings): + """ + Return True iff this host shouldn't be accessed using a proxy + + This function uses the MacOSX framework SystemConfiguration + to fetch the proxy information. + + proxy_settings come from _scproxy._get_proxy_settings or get mocked ie: + { 'exclude_simple': bool, + 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16'] + } + """ + from fnmatch import fnmatch + + hostonly, port = splitport(host) + + def ip2num(ipAddr): + parts = ipAddr.split('.') + parts = list(map(int, parts)) + if len(parts) != 4: + parts = (parts + [0, 0, 0, 0])[:4] + return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] + + # Check for simple host names: + if '.' not in host: + if proxy_settings['exclude_simple']: + return True + + hostIP = None + + for value in proxy_settings.get('exceptions', ()): + # Items in the list are strings like these: *.local, 169.254/16 + if not value: continue + + m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) + if m is not None: + if hostIP is None: + try: + hostIP = socket.gethostbyname(hostonly) + hostIP = ip2num(hostIP) + except socket.error: + continue + + base = ip2num(m.group(1)) + mask = m.group(2) + if mask is None: + mask = 8 * (m.group(1).count('.') + 1) + else: + mask = int(mask[1:]) + mask = 32 - mask + + if (hostIP >> mask) == (base >> mask): + return True + + elif fnmatch(host, value): + return True + + return False + + +if sys.platform == 'darwin': + from _scproxy import _get_proxy_settings, _get_proxies + + def proxy_bypass_macosx_sysconf(host): + proxy_settings = _get_proxy_settings() + return _proxy_bypass_macosx_sysconf(host, proxy_settings) + + def getproxies_macosx_sysconf(): + """Return a dictionary of scheme -> proxy server URL mappings. + + This function uses the MacOSX framework SystemConfiguration + to fetch the proxy information. + """ + return _get_proxies() + + + + def proxy_bypass(host): + if getproxies_environment(): + return proxy_bypass_environment(host) + else: + return proxy_bypass_macosx_sysconf(host) + + def getproxies(): + return getproxies_environment() or getproxies_macosx_sysconf() + + +elif os.name == 'nt': + def getproxies_registry(): + """Return a dictionary of scheme -> proxy server URL mappings. + + Win32 uses the registry to store proxies. + + """ + proxies = {} + try: + import winreg + except ImportError: + # Std module, so should be around - but you never know! + return proxies + try: + internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, + r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') + proxyEnable = winreg.QueryValueEx(internetSettings, + 'ProxyEnable')[0] + if proxyEnable: + # Returned as Unicode but problems if not converted to ASCII + proxyServer = str(winreg.QueryValueEx(internetSettings, + 'ProxyServer')[0]) + if '=' in proxyServer: + # Per-protocol settings + for p in proxyServer.split(';'): + protocol, address = p.split('=', 1) + # See if address has a type:// prefix + if not re.match('^([^/:]+)://', address): + address = '%s://%s' % (protocol, address) + proxies[protocol] = address + else: + # Use one setting for all protocols + if proxyServer[:5] == 'http:': + proxies['http'] = proxyServer + else: + proxies['http'] = 'http://%s' % proxyServer + proxies['https'] = 'https://%s' % proxyServer + proxies['ftp'] = 'ftp://%s' % proxyServer + internetSettings.Close() + except (WindowsError, ValueError, TypeError): + # Either registry key not found etc, or the value in an + # unexpected format. + # proxies already set up to be empty so nothing to do + pass + return proxies + + def getproxies(): + """Return a dictionary of scheme -> proxy server URL mappings. + + Returns settings gathered from the environment, if specified, + or the registry. + + """ + return getproxies_environment() or getproxies_registry() + + def proxy_bypass_registry(host): + try: + import winreg + except ImportError: + # Std modules, so should be around - but you never know! + return 0 + try: + internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, + r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') + proxyEnable = winreg.QueryValueEx(internetSettings, + 'ProxyEnable')[0] + proxyOverride = str(winreg.QueryValueEx(internetSettings, + 'ProxyOverride')[0]) + # ^^^^ Returned as Unicode but problems if not converted to ASCII + except WindowsError: + return 0 + if not proxyEnable or not proxyOverride: + return 0 + # try to make a host list from name and IP address. + rawHost, port = splitport(host) + host = [rawHost] + try: + addr = socket.gethostbyname(rawHost) + if addr != rawHost: + host.append(addr) + except socket.error: + pass + try: + fqdn = socket.getfqdn(rawHost) + if fqdn != rawHost: + host.append(fqdn) + except socket.error: + pass + # make a check value list from the registry entry: replace the + # '' string by the localhost entry and the corresponding + # canonical entry. + proxyOverride = proxyOverride.split(';') + # now check if we match one of the registry values. + for test in proxyOverride: + if test == '': + if '.' not in rawHost: + return 1 + test = test.replace(".", r"\.") # mask dots + test = test.replace("*", r".*") # change glob sequence + test = test.replace("?", r".") # change glob char + for val in host: + if re.match(test, val, re.I): + return 1 + return 0 + + def proxy_bypass(host): + """Return a dictionary of scheme -> proxy server URL mappings. + + Returns settings gathered from the environment, if specified, + or the registry. + + """ + if getproxies_environment(): + return proxy_bypass_environment(host) + else: + return proxy_bypass_registry(host) + +else: + # By default use environment variables + getproxies = getproxies_environment + proxy_bypass = proxy_bypass_environment diff --git a/lib/future/backports/urllib/response.py b/lib/future/backports/urllib/response.py new file mode 100644 index 00000000..adbf6e5a --- /dev/null +++ b/lib/future/backports/urllib/response.py @@ -0,0 +1,103 @@ +"""Response classes used by urllib. + +The base class, addbase, defines a minimal file-like interface, +including read() and readline(). The typical response object is an +addinfourl instance, which defines an info() method that returns +headers and a geturl() method that returns the url. +""" +from __future__ import absolute_import, division, unicode_literals +from future.builtins import object + +class addbase(object): + """Base class for addinfo and addclosehook.""" + + # XXX Add a method to expose the timeout on the underlying socket? + + def __init__(self, fp): + # TODO(jhylton): Is there a better way to delegate using io? + self.fp = fp + self.read = self.fp.read + self.readline = self.fp.readline + # TODO(jhylton): Make sure an object with readlines() is also iterable + if hasattr(self.fp, "readlines"): + self.readlines = self.fp.readlines + if hasattr(self.fp, "fileno"): + self.fileno = self.fp.fileno + else: + self.fileno = lambda: None + + def __iter__(self): + # Assigning `__iter__` to the instance doesn't work as intended + # because the iter builtin does something like `cls.__iter__(obj)` + # and thus fails to find the _bound_ method `obj.__iter__`. + # Returning just `self.fp` works for built-in file objects but + # might not work for general file-like objects. + return iter(self.fp) + + def __repr__(self): + return '<%s at %r whose fp = %r>' % (self.__class__.__name__, + id(self), self.fp) + + def close(self): + if self.fp: + self.fp.close() + self.fp = None + self.read = None + self.readline = None + self.readlines = None + self.fileno = None + self.__iter__ = None + self.__next__ = None + + def __enter__(self): + if self.fp is None: + raise ValueError("I/O operation on closed file") + return self + + def __exit__(self, type, value, traceback): + self.close() + +class addclosehook(addbase): + """Class to add a close hook to an open file.""" + + def __init__(self, fp, closehook, *hookargs): + addbase.__init__(self, fp) + self.closehook = closehook + self.hookargs = hookargs + + def close(self): + if self.closehook: + self.closehook(*self.hookargs) + self.closehook = None + self.hookargs = None + addbase.close(self) + +class addinfo(addbase): + """class to add an info() method to an open file.""" + + def __init__(self, fp, headers): + addbase.__init__(self, fp) + self.headers = headers + + def info(self): + return self.headers + +class addinfourl(addbase): + """class to add info() and geturl() methods to an open file.""" + + def __init__(self, fp, headers, url, code=None): + addbase.__init__(self, fp) + self.headers = headers + self.url = url + self.code = code + + def info(self): + return self.headers + + def getcode(self): + return self.code + + def geturl(self): + return self.url + +del absolute_import, division, unicode_literals, object diff --git a/lib/future/backports/urllib/robotparser.py b/lib/future/backports/urllib/robotparser.py new file mode 100644 index 00000000..a0f36511 --- /dev/null +++ b/lib/future/backports/urllib/robotparser.py @@ -0,0 +1,211 @@ +from __future__ import absolute_import, division, unicode_literals +from future.builtins import str +""" robotparser.py + + Copyright (C) 2000 Bastian Kleineidam + + You can choose between two licenses when using this package: + 1) GNU GPLv2 + 2) PSF license for Python 2.2 + + The robots.txt Exclusion Protocol is implemented as specified in + http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html +""" + +# Was: import urllib.parse, urllib.request +from future.backports import urllib +from future.backports.urllib import parse as _parse, request as _request +urllib.parse = _parse +urllib.request = _request + + +__all__ = ["RobotFileParser"] + +class RobotFileParser(object): + """ This class provides a set of methods to read, parse and answer + questions about a single robots.txt file. + + """ + + def __init__(self, url=''): + self.entries = [] + self.default_entry = None + self.disallow_all = False + self.allow_all = False + self.set_url(url) + self.last_checked = 0 + + def mtime(self): + """Returns the time the robots.txt file was last fetched. + + This is useful for long-running web spiders that need to + check for new robots.txt files periodically. + + """ + return self.last_checked + + def modified(self): + """Sets the time the robots.txt file was last fetched to the + current time. + + """ + import time + self.last_checked = time.time() + + def set_url(self, url): + """Sets the URL referring to a robots.txt file.""" + self.url = url + self.host, self.path = urllib.parse.urlparse(url)[1:3] + + def read(self): + """Reads the robots.txt URL and feeds it to the parser.""" + try: + f = urllib.request.urlopen(self.url) + except urllib.error.HTTPError as err: + if err.code in (401, 403): + self.disallow_all = True + elif err.code >= 400: + self.allow_all = True + else: + raw = f.read() + self.parse(raw.decode("utf-8").splitlines()) + + def _add_entry(self, entry): + if "*" in entry.useragents: + # the default entry is considered last + if self.default_entry is None: + # the first default entry wins + self.default_entry = entry + else: + self.entries.append(entry) + + def parse(self, lines): + """Parse the input lines from a robots.txt file. + + We allow that a user-agent: line is not preceded by + one or more blank lines. + """ + # states: + # 0: start state + # 1: saw user-agent line + # 2: saw an allow or disallow line + state = 0 + entry = Entry() + + for line in lines: + if not line: + if state == 1: + entry = Entry() + state = 0 + elif state == 2: + self._add_entry(entry) + entry = Entry() + state = 0 + # remove optional comment and strip line + i = line.find('#') + if i >= 0: + line = line[:i] + line = line.strip() + if not line: + continue + line = line.split(':', 1) + if len(line) == 2: + line[0] = line[0].strip().lower() + line[1] = urllib.parse.unquote(line[1].strip()) + if line[0] == "user-agent": + if state == 2: + self._add_entry(entry) + entry = Entry() + entry.useragents.append(line[1]) + state = 1 + elif line[0] == "disallow": + if state != 0: + entry.rulelines.append(RuleLine(line[1], False)) + state = 2 + elif line[0] == "allow": + if state != 0: + entry.rulelines.append(RuleLine(line[1], True)) + state = 2 + if state == 2: + self._add_entry(entry) + + + def can_fetch(self, useragent, url): + """using the parsed robots.txt decide if useragent can fetch url""" + if self.disallow_all: + return False + if self.allow_all: + return True + # search for given user agent matches + # the first match counts + parsed_url = urllib.parse.urlparse(urllib.parse.unquote(url)) + url = urllib.parse.urlunparse(('','',parsed_url.path, + parsed_url.params,parsed_url.query, parsed_url.fragment)) + url = urllib.parse.quote(url) + if not url: + url = "/" + for entry in self.entries: + if entry.applies_to(useragent): + return entry.allowance(url) + # try the default entry last + if self.default_entry: + return self.default_entry.allowance(url) + # agent not found ==> access granted + return True + + def __str__(self): + return ''.join([str(entry) + "\n" for entry in self.entries]) + + +class RuleLine(object): + """A rule line is a single "Allow:" (allowance==True) or "Disallow:" + (allowance==False) followed by a path.""" + def __init__(self, path, allowance): + if path == '' and not allowance: + # an empty value means allow all + allowance = True + self.path = urllib.parse.quote(path) + self.allowance = allowance + + def applies_to(self, filename): + return self.path == "*" or filename.startswith(self.path) + + def __str__(self): + return (self.allowance and "Allow" or "Disallow") + ": " + self.path + + +class Entry(object): + """An entry has one or more user-agents and zero or more rulelines""" + def __init__(self): + self.useragents = [] + self.rulelines = [] + + def __str__(self): + ret = [] + for agent in self.useragents: + ret.extend(["User-agent: ", agent, "\n"]) + for line in self.rulelines: + ret.extend([str(line), "\n"]) + return ''.join(ret) + + def applies_to(self, useragent): + """check if this entry applies to the specified agent""" + # split the name token and make it lower case + useragent = useragent.split("/")[0].lower() + for agent in self.useragents: + if agent == '*': + # we have the catch-all agent + return True + agent = agent.lower() + if agent in useragent: + return True + return False + + def allowance(self, filename): + """Preconditions: + - our agent applies to this entry + - filename is URL decoded""" + for line in self.rulelines: + if line.applies_to(filename): + return line.allowance + return True diff --git a/lib/future/backports/xmlrpc/__init__.py b/lib/future/backports/xmlrpc/__init__.py new file mode 100644 index 00000000..196d3788 --- /dev/null +++ b/lib/future/backports/xmlrpc/__init__.py @@ -0,0 +1 @@ +# This directory is a Python package. diff --git a/lib/future/backports/xmlrpc/client.py b/lib/future/backports/xmlrpc/client.py new file mode 100644 index 00000000..b78e5bad --- /dev/null +++ b/lib/future/backports/xmlrpc/client.py @@ -0,0 +1,1496 @@ +# +# XML-RPC CLIENT LIBRARY +# $Id$ +# +# an XML-RPC client interface for Python. +# +# the marshalling and response parser code can also be used to +# implement XML-RPC servers. +# +# Notes: +# this version is designed to work with Python 2.1 or newer. +# +# History: +# 1999-01-14 fl Created +# 1999-01-15 fl Changed dateTime to use localtime +# 1999-01-16 fl Added Binary/base64 element, default to RPC2 service +# 1999-01-19 fl Fixed array data element (from Skip Montanaro) +# 1999-01-21 fl Fixed dateTime constructor, etc. +# 1999-02-02 fl Added fault handling, handle empty sequences, etc. +# 1999-02-10 fl Fixed problem with empty responses (from Skip Montanaro) +# 1999-06-20 fl Speed improvements, pluggable parsers/transports (0.9.8) +# 2000-11-28 fl Changed boolean to check the truth value of its argument +# 2001-02-24 fl Added encoding/Unicode/SafeTransport patches +# 2001-02-26 fl Added compare support to wrappers (0.9.9/1.0b1) +# 2001-03-28 fl Make sure response tuple is a singleton +# 2001-03-29 fl Don't require empty params element (from Nicholas Riley) +# 2001-06-10 fl Folded in _xmlrpclib accelerator support (1.0b2) +# 2001-08-20 fl Base xmlrpclib.Error on built-in Exception (from Paul Prescod) +# 2001-09-03 fl Allow Transport subclass to override getparser +# 2001-09-10 fl Lazy import of urllib, cgi, xmllib (20x import speedup) +# 2001-10-01 fl Remove containers from memo cache when done with them +# 2001-10-01 fl Use faster escape method (80% dumps speedup) +# 2001-10-02 fl More dumps microtuning +# 2001-10-04 fl Make sure import expat gets a parser (from Guido van Rossum) +# 2001-10-10 sm Allow long ints to be passed as ints if they don't overflow +# 2001-10-17 sm Test for int and long overflow (allows use on 64-bit systems) +# 2001-11-12 fl Use repr() to marshal doubles (from Paul Felix) +# 2002-03-17 fl Avoid buffered read when possible (from James Rucker) +# 2002-04-07 fl Added pythondoc comments +# 2002-04-16 fl Added __str__ methods to datetime/binary wrappers +# 2002-05-15 fl Added error constants (from Andrew Kuchling) +# 2002-06-27 fl Merged with Python CVS version +# 2002-10-22 fl Added basic authentication (based on code from Phillip Eby) +# 2003-01-22 sm Add support for the bool type +# 2003-02-27 gvr Remove apply calls +# 2003-04-24 sm Use cStringIO if available +# 2003-04-25 ak Add support for nil +# 2003-06-15 gn Add support for time.struct_time +# 2003-07-12 gp Correct marshalling of Faults +# 2003-10-31 mvl Add multicall support +# 2004-08-20 mvl Bump minimum supported Python version to 2.1 +# +# Copyright (c) 1999-2002 by Secret Labs AB. +# Copyright (c) 1999-2002 by Fredrik Lundh. +# +# info@pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The XML-RPC client interface is +# +# Copyright (c) 1999-2002 by Secret Labs AB +# Copyright (c) 1999-2002 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +""" +Ported using Python-Future from the Python 3.3 standard library. + +An XML-RPC client interface for Python. + +The marshalling and response parser code can also be used to +implement XML-RPC servers. + +Exported exceptions: + + Error Base class for client errors + ProtocolError Indicates an HTTP protocol error + ResponseError Indicates a broken response package + Fault Indicates an XML-RPC fault package + +Exported classes: + + ServerProxy Represents a logical connection to an XML-RPC server + + MultiCall Executor of boxcared xmlrpc requests + DateTime dateTime wrapper for an ISO 8601 string or time tuple or + localtime integer value to generate a "dateTime.iso8601" + XML-RPC value + Binary binary data wrapper + + Marshaller Generate an XML-RPC params chunk from a Python data structure + Unmarshaller Unmarshal an XML-RPC response from incoming XML event message + Transport Handles an HTTP transaction to an XML-RPC server + SafeTransport Handles an HTTPS transaction to an XML-RPC server + +Exported constants: + + (none) + +Exported functions: + + getparser Create instance of the fastest available parser & attach + to an unmarshalling object + dumps Convert an argument tuple or a Fault instance to an XML-RPC + request (or response, if the methodresponse option is used). + loads Convert an XML-RPC packet to unmarshalled data plus a method + name (None if not present). +""" + +from __future__ import (absolute_import, division, print_function, + unicode_literals) +from future.builtins import bytes, dict, int, range, str + +import base64 +# Py2.7 compatibility hack +base64.encodebytes = base64.encodestring +base64.decodebytes = base64.decodestring +import sys +import time +from datetime import datetime +from future.backports.http import client as http_client +from future.backports.urllib import parse as urllib_parse +from future.utils import ensure_new_type +from xml.parsers import expat +import socket +import errno +from io import BytesIO +try: + import gzip +except ImportError: + gzip = None #python can be built without zlib/gzip support + +# -------------------------------------------------------------------- +# Internal stuff + +def escape(s): + s = s.replace("&", "&") + s = s.replace("<", "<") + return s.replace(">", ">",) + +# used in User-Agent header sent +__version__ = sys.version[:3] + +# xmlrpc integer limits +MAXINT = 2**31-1 +MININT = -2**31 + +# -------------------------------------------------------------------- +# Error constants (from Dan Libby's specification at +# http://xmlrpc-epi.sourceforge.net/specs/rfc.fault_codes.php) + +# Ranges of errors +PARSE_ERROR = -32700 +SERVER_ERROR = -32600 +APPLICATION_ERROR = -32500 +SYSTEM_ERROR = -32400 +TRANSPORT_ERROR = -32300 + +# Specific errors +NOT_WELLFORMED_ERROR = -32700 +UNSUPPORTED_ENCODING = -32701 +INVALID_ENCODING_CHAR = -32702 +INVALID_XMLRPC = -32600 +METHOD_NOT_FOUND = -32601 +INVALID_METHOD_PARAMS = -32602 +INTERNAL_ERROR = -32603 + +# -------------------------------------------------------------------- +# Exceptions + +## +# Base class for all kinds of client-side errors. + +class Error(Exception): + """Base class for client errors.""" + def __str__(self): + return repr(self) + +## +# Indicates an HTTP-level protocol error. This is raised by the HTTP +# transport layer, if the server returns an error code other than 200 +# (OK). +# +# @param url The target URL. +# @param errcode The HTTP error code. +# @param errmsg The HTTP error message. +# @param headers The HTTP header dictionary. + +class ProtocolError(Error): + """Indicates an HTTP protocol error.""" + def __init__(self, url, errcode, errmsg, headers): + Error.__init__(self) + self.url = url + self.errcode = errcode + self.errmsg = errmsg + self.headers = headers + def __repr__(self): + return ( + "" % + (self.url, self.errcode, self.errmsg) + ) + +## +# Indicates a broken XML-RPC response package. This exception is +# raised by the unmarshalling layer, if the XML-RPC response is +# malformed. + +class ResponseError(Error): + """Indicates a broken response package.""" + pass + +## +# Indicates an XML-RPC fault response package. This exception is +# raised by the unmarshalling layer, if the XML-RPC response contains +# a fault string. This exception can also be used as a class, to +# generate a fault XML-RPC message. +# +# @param faultCode The XML-RPC fault code. +# @param faultString The XML-RPC fault string. + +class Fault(Error): + """Indicates an XML-RPC fault package.""" + def __init__(self, faultCode, faultString, **extra): + Error.__init__(self) + self.faultCode = faultCode + self.faultString = faultString + def __repr__(self): + return "" % (ensure_new_type(self.faultCode), + ensure_new_type(self.faultString)) + +# -------------------------------------------------------------------- +# Special values + +## +# Backwards compatibility + +boolean = Boolean = bool + +## +# Wrapper for XML-RPC DateTime values. This converts a time value to +# the format used by XML-RPC. +#

+# The value can be given as a datetime object, as a string in the +# format "yyyymmddThh:mm:ss", as a 9-item time tuple (as returned by +# time.localtime()), or an integer value (as returned by time.time()). +# The wrapper uses time.localtime() to convert an integer to a time +# tuple. +# +# @param value The time, given as a datetime object, an ISO 8601 string, +# a time tuple, or an integer time value. + + +### For Python-Future: +def _iso8601_format(value): + return "%04d%02d%02dT%02d:%02d:%02d" % ( + value.year, value.month, value.day, + value.hour, value.minute, value.second) +### +# Issue #13305: different format codes across platforms +# _day0 = datetime(1, 1, 1) +# if _day0.strftime('%Y') == '0001': # Mac OS X +# def _iso8601_format(value): +# return value.strftime("%Y%m%dT%H:%M:%S") +# elif _day0.strftime('%4Y') == '0001': # Linux +# def _iso8601_format(value): +# return value.strftime("%4Y%m%dT%H:%M:%S") +# else: +# def _iso8601_format(value): +# return value.strftime("%Y%m%dT%H:%M:%S").zfill(17) +# del _day0 + + +def _strftime(value): + if isinstance(value, datetime): + return _iso8601_format(value) + + if not isinstance(value, (tuple, time.struct_time)): + if value == 0: + value = time.time() + value = time.localtime(value) + + return "%04d%02d%02dT%02d:%02d:%02d" % value[:6] + +class DateTime(object): + """DateTime wrapper for an ISO 8601 string or time tuple or + localtime integer value to generate 'dateTime.iso8601' XML-RPC + value. + """ + + def __init__(self, value=0): + if isinstance(value, str): + self.value = value + else: + self.value = _strftime(value) + + def make_comparable(self, other): + if isinstance(other, DateTime): + s = self.value + o = other.value + elif isinstance(other, datetime): + s = self.value + o = _iso8601_format(other) + elif isinstance(other, str): + s = self.value + o = other + elif hasattr(other, "timetuple"): + s = self.timetuple() + o = other.timetuple() + else: + otype = (hasattr(other, "__class__") + and other.__class__.__name__ + or type(other)) + raise TypeError("Can't compare %s and %s" % + (self.__class__.__name__, otype)) + return s, o + + def __lt__(self, other): + s, o = self.make_comparable(other) + return s < o + + def __le__(self, other): + s, o = self.make_comparable(other) + return s <= o + + def __gt__(self, other): + s, o = self.make_comparable(other) + return s > o + + def __ge__(self, other): + s, o = self.make_comparable(other) + return s >= o + + def __eq__(self, other): + s, o = self.make_comparable(other) + return s == o + + def __ne__(self, other): + s, o = self.make_comparable(other) + return s != o + + def timetuple(self): + return time.strptime(self.value, "%Y%m%dT%H:%M:%S") + + ## + # Get date/time value. + # + # @return Date/time value, as an ISO 8601 string. + + def __str__(self): + return self.value + + def __repr__(self): + return "" % (ensure_new_type(self.value), id(self)) + + def decode(self, data): + self.value = str(data).strip() + + def encode(self, out): + out.write("") + out.write(self.value) + out.write("\n") + +def _datetime(data): + # decode xml element contents into a DateTime structure. + value = DateTime() + value.decode(data) + return value + +def _datetime_type(data): + return datetime.strptime(data, "%Y%m%dT%H:%M:%S") + +## +# Wrapper for binary data. This can be used to transport any kind +# of binary data over XML-RPC, using BASE64 encoding. +# +# @param data An 8-bit string containing arbitrary data. + +class Binary(object): + """Wrapper for binary data.""" + + def __init__(self, data=None): + if data is None: + data = b"" + else: + if not isinstance(data, (bytes, bytearray)): + raise TypeError("expected bytes or bytearray, not %s" % + data.__class__.__name__) + data = bytes(data) # Make a copy of the bytes! + self.data = data + + ## + # Get buffer contents. + # + # @return Buffer contents, as an 8-bit string. + + def __str__(self): + return str(self.data, "latin-1") # XXX encoding?! + + def __eq__(self, other): + if isinstance(other, Binary): + other = other.data + return self.data == other + + def __ne__(self, other): + if isinstance(other, Binary): + other = other.data + return self.data != other + + def decode(self, data): + self.data = base64.decodebytes(data) + + def encode(self, out): + out.write("\n") + encoded = base64.encodebytes(self.data) + out.write(encoded.decode('ascii')) + out.write("\n") + +def _binary(data): + # decode xml element contents into a Binary structure + value = Binary() + value.decode(data) + return value + +WRAPPERS = (DateTime, Binary) + +# -------------------------------------------------------------------- +# XML parsers + +class ExpatParser(object): + # fast expat parser for Python 2.0 and later. + def __init__(self, target): + self._parser = parser = expat.ParserCreate(None, None) + self._target = target + parser.StartElementHandler = target.start + parser.EndElementHandler = target.end + parser.CharacterDataHandler = target.data + encoding = None + target.xml(encoding, None) + + def feed(self, data): + self._parser.Parse(data, 0) + + def close(self): + self._parser.Parse("", 1) # end of data + del self._target, self._parser # get rid of circular references + +# -------------------------------------------------------------------- +# XML-RPC marshalling and unmarshalling code + +## +# XML-RPC marshaller. +# +# @param encoding Default encoding for 8-bit strings. The default +# value is None (interpreted as UTF-8). +# @see dumps + +class Marshaller(object): + """Generate an XML-RPC params chunk from a Python data structure. + + Create a Marshaller instance for each set of parameters, and use + the "dumps" method to convert your data (represented as a tuple) + to an XML-RPC params chunk. To write a fault response, pass a + Fault instance instead. You may prefer to use the "dumps" module + function for this purpose. + """ + + # by the way, if you don't understand what's going on in here, + # that's perfectly ok. + + def __init__(self, encoding=None, allow_none=False): + self.memo = {} + self.data = None + self.encoding = encoding + self.allow_none = allow_none + + dispatch = {} + + def dumps(self, values): + out = [] + write = out.append + dump = self.__dump + if isinstance(values, Fault): + # fault instance + write("\n") + dump({'faultCode': values.faultCode, + 'faultString': values.faultString}, + write) + write("\n") + else: + # parameter block + # FIXME: the xml-rpc specification allows us to leave out + # the entire block if there are no parameters. + # however, changing this may break older code (including + # old versions of xmlrpclib.py), so this is better left as + # is for now. See @XMLRPC3 for more information. /F + write("\n") + for v in values: + write("\n") + dump(v, write) + write("\n") + write("\n") + result = "".join(out) + return str(result) + + def __dump(self, value, write): + try: + f = self.dispatch[type(ensure_new_type(value))] + except KeyError: + # check if this object can be marshalled as a structure + if not hasattr(value, '__dict__'): + raise TypeError("cannot marshal %s objects" % type(value)) + # check if this class is a sub-class of a basic type, + # because we don't know how to marshal these types + # (e.g. a string sub-class) + for type_ in type(value).__mro__: + if type_ in self.dispatch.keys(): + raise TypeError("cannot marshal %s objects" % type(value)) + # XXX(twouters): using "_arbitrary_instance" as key as a quick-fix + # for the p3yk merge, this should probably be fixed more neatly. + f = self.dispatch["_arbitrary_instance"] + f(self, value, write) + + def dump_nil (self, value, write): + if not self.allow_none: + raise TypeError("cannot marshal None unless allow_none is enabled") + write("") + dispatch[type(None)] = dump_nil + + def dump_bool(self, value, write): + write("") + write(value and "1" or "0") + write("\n") + dispatch[bool] = dump_bool + + def dump_long(self, value, write): + if value > MAXINT or value < MININT: + raise OverflowError("long int exceeds XML-RPC limits") + write("") + write(str(int(value))) + write("\n") + dispatch[int] = dump_long + + # backward compatible + dump_int = dump_long + + def dump_double(self, value, write): + write("") + write(repr(ensure_new_type(value))) + write("\n") + dispatch[float] = dump_double + + def dump_unicode(self, value, write, escape=escape): + write("") + write(escape(value)) + write("\n") + dispatch[str] = dump_unicode + + def dump_bytes(self, value, write): + write("\n") + encoded = base64.encodebytes(value) + write(encoded.decode('ascii')) + write("\n") + dispatch[bytes] = dump_bytes + dispatch[bytearray] = dump_bytes + + def dump_array(self, value, write): + i = id(value) + if i in self.memo: + raise TypeError("cannot marshal recursive sequences") + self.memo[i] = None + dump = self.__dump + write("\n") + for v in value: + dump(v, write) + write("\n") + del self.memo[i] + dispatch[tuple] = dump_array + dispatch[list] = dump_array + + def dump_struct(self, value, write, escape=escape): + i = id(value) + if i in self.memo: + raise TypeError("cannot marshal recursive dictionaries") + self.memo[i] = None + dump = self.__dump + write("\n") + for k, v in value.items(): + write("\n") + if not isinstance(k, str): + raise TypeError("dictionary key must be string") + write("%s\n" % escape(k)) + dump(v, write) + write("\n") + write("\n") + del self.memo[i] + dispatch[dict] = dump_struct + + def dump_datetime(self, value, write): + write("") + write(_strftime(value)) + write("\n") + dispatch[datetime] = dump_datetime + + def dump_instance(self, value, write): + # check for special wrappers + if value.__class__ in WRAPPERS: + self.write = write + value.encode(self) + del self.write + else: + # store instance attributes as a struct (really?) + self.dump_struct(value.__dict__, write) + dispatch[DateTime] = dump_instance + dispatch[Binary] = dump_instance + # XXX(twouters): using "_arbitrary_instance" as key as a quick-fix + # for the p3yk merge, this should probably be fixed more neatly. + dispatch["_arbitrary_instance"] = dump_instance + +## +# XML-RPC unmarshaller. +# +# @see loads + +class Unmarshaller(object): + """Unmarshal an XML-RPC response, based on incoming XML event + messages (start, data, end). Call close() to get the resulting + data structure. + + Note that this reader is fairly tolerant, and gladly accepts bogus + XML-RPC data without complaining (but not bogus XML). + """ + + # and again, if you don't understand what's going on in here, + # that's perfectly ok. + + def __init__(self, use_datetime=False, use_builtin_types=False): + self._type = None + self._stack = [] + self._marks = [] + self._data = [] + self._methodname = None + self._encoding = "utf-8" + self.append = self._stack.append + self._use_datetime = use_builtin_types or use_datetime + self._use_bytes = use_builtin_types + + def close(self): + # return response tuple and target method + if self._type is None or self._marks: + raise ResponseError() + if self._type == "fault": + raise Fault(**self._stack[0]) + return tuple(self._stack) + + def getmethodname(self): + return self._methodname + + # + # event handlers + + def xml(self, encoding, standalone): + self._encoding = encoding + # FIXME: assert standalone == 1 ??? + + def start(self, tag, attrs): + # prepare to handle this element + if tag == "array" or tag == "struct": + self._marks.append(len(self._stack)) + self._data = [] + self._value = (tag == "value") + + def data(self, text): + self._data.append(text) + + def end(self, tag): + # call the appropriate end tag handler + try: + f = self.dispatch[tag] + except KeyError: + pass # unknown tag ? + else: + return f(self, "".join(self._data)) + + # + # accelerator support + + def end_dispatch(self, tag, data): + # dispatch data + try: + f = self.dispatch[tag] + except KeyError: + pass # unknown tag ? + else: + return f(self, data) + + # + # element decoders + + dispatch = {} + + def end_nil (self, data): + self.append(None) + self._value = 0 + dispatch["nil"] = end_nil + + def end_boolean(self, data): + if data == "0": + self.append(False) + elif data == "1": + self.append(True) + else: + raise TypeError("bad boolean value") + self._value = 0 + dispatch["boolean"] = end_boolean + + def end_int(self, data): + self.append(int(data)) + self._value = 0 + dispatch["i4"] = end_int + dispatch["i8"] = end_int + dispatch["int"] = end_int + + def end_double(self, data): + self.append(float(data)) + self._value = 0 + dispatch["double"] = end_double + + def end_string(self, data): + if self._encoding: + data = data.decode(self._encoding) + self.append(data) + self._value = 0 + dispatch["string"] = end_string + dispatch["name"] = end_string # struct keys are always strings + + def end_array(self, data): + mark = self._marks.pop() + # map arrays to Python lists + self._stack[mark:] = [self._stack[mark:]] + self._value = 0 + dispatch["array"] = end_array + + def end_struct(self, data): + mark = self._marks.pop() + # map structs to Python dictionaries + dict = {} + items = self._stack[mark:] + for i in range(0, len(items), 2): + dict[items[i]] = items[i+1] + self._stack[mark:] = [dict] + self._value = 0 + dispatch["struct"] = end_struct + + def end_base64(self, data): + value = Binary() + value.decode(data.encode("ascii")) + if self._use_bytes: + value = value.data + self.append(value) + self._value = 0 + dispatch["base64"] = end_base64 + + def end_dateTime(self, data): + value = DateTime() + value.decode(data) + if self._use_datetime: + value = _datetime_type(data) + self.append(value) + dispatch["dateTime.iso8601"] = end_dateTime + + def end_value(self, data): + # if we stumble upon a value element with no internal + # elements, treat it as a string element + if self._value: + self.end_string(data) + dispatch["value"] = end_value + + def end_params(self, data): + self._type = "params" + dispatch["params"] = end_params + + def end_fault(self, data): + self._type = "fault" + dispatch["fault"] = end_fault + + def end_methodName(self, data): + if self._encoding: + data = data.decode(self._encoding) + self._methodname = data + self._type = "methodName" # no params + dispatch["methodName"] = end_methodName + +## Multicall support +# + +class _MultiCallMethod(object): + # some lesser magic to store calls made to a MultiCall object + # for batch execution + def __init__(self, call_list, name): + self.__call_list = call_list + self.__name = name + def __getattr__(self, name): + return _MultiCallMethod(self.__call_list, "%s.%s" % (self.__name, name)) + def __call__(self, *args): + self.__call_list.append((self.__name, args)) + +class MultiCallIterator(object): + """Iterates over the results of a multicall. Exceptions are + raised in response to xmlrpc faults.""" + + def __init__(self, results): + self.results = results + + def __getitem__(self, i): + item = self.results[i] + if isinstance(type(item), dict): + raise Fault(item['faultCode'], item['faultString']) + elif type(item) == type([]): + return item[0] + else: + raise ValueError("unexpected type in multicall result") + +class MultiCall(object): + """server -> a object used to boxcar method calls + + server should be a ServerProxy object. + + Methods can be added to the MultiCall using normal + method call syntax e.g.: + + multicall = MultiCall(server_proxy) + multicall.add(2,3) + multicall.get_address("Guido") + + To execute the multicall, call the MultiCall object e.g.: + + add_result, address = multicall() + """ + + def __init__(self, server): + self.__server = server + self.__call_list = [] + + def __repr__(self): + return "" % id(self) + + __str__ = __repr__ + + def __getattr__(self, name): + return _MultiCallMethod(self.__call_list, name) + + def __call__(self): + marshalled_list = [] + for name, args in self.__call_list: + marshalled_list.append({'methodName' : name, 'params' : args}) + + return MultiCallIterator(self.__server.system.multicall(marshalled_list)) + +# -------------------------------------------------------------------- +# convenience functions + +FastMarshaller = FastParser = FastUnmarshaller = None + +## +# Create a parser object, and connect it to an unmarshalling instance. +# This function picks the fastest available XML parser. +# +# return A (parser, unmarshaller) tuple. + +def getparser(use_datetime=False, use_builtin_types=False): + """getparser() -> parser, unmarshaller + + Create an instance of the fastest available parser, and attach it + to an unmarshalling object. Return both objects. + """ + if FastParser and FastUnmarshaller: + if use_builtin_types: + mkdatetime = _datetime_type + mkbytes = base64.decodebytes + elif use_datetime: + mkdatetime = _datetime_type + mkbytes = _binary + else: + mkdatetime = _datetime + mkbytes = _binary + target = FastUnmarshaller(True, False, mkbytes, mkdatetime, Fault) + parser = FastParser(target) + else: + target = Unmarshaller(use_datetime=use_datetime, use_builtin_types=use_builtin_types) + if FastParser: + parser = FastParser(target) + else: + parser = ExpatParser(target) + return parser, target + +## +# Convert a Python tuple or a Fault instance to an XML-RPC packet. +# +# @def dumps(params, **options) +# @param params A tuple or Fault instance. +# @keyparam methodname If given, create a methodCall request for +# this method name. +# @keyparam methodresponse If given, create a methodResponse packet. +# If used with a tuple, the tuple must be a singleton (that is, +# it must contain exactly one element). +# @keyparam encoding The packet encoding. +# @return A string containing marshalled data. + +def dumps(params, methodname=None, methodresponse=None, encoding=None, + allow_none=False): + """data [,options] -> marshalled data + + Convert an argument tuple or a Fault instance to an XML-RPC + request (or response, if the methodresponse option is used). + + In addition to the data object, the following options can be given + as keyword arguments: + + methodname: the method name for a methodCall packet + + methodresponse: true to create a methodResponse packet. + If this option is used with a tuple, the tuple must be + a singleton (i.e. it can contain only one element). + + encoding: the packet encoding (default is UTF-8) + + All byte strings in the data structure are assumed to use the + packet encoding. Unicode strings are automatically converted, + where necessary. + """ + + assert isinstance(params, (tuple, Fault)), "argument must be tuple or Fault instance" + if isinstance(params, Fault): + methodresponse = 1 + elif methodresponse and isinstance(params, tuple): + assert len(params) == 1, "response tuple must be a singleton" + + if not encoding: + encoding = "utf-8" + + if FastMarshaller: + m = FastMarshaller(encoding) + else: + m = Marshaller(encoding, allow_none) + + data = m.dumps(params) + + if encoding != "utf-8": + xmlheader = "\n" % str(encoding) + else: + xmlheader = "\n" # utf-8 is default + + # standard XML-RPC wrappings + if methodname: + # a method call + if not isinstance(methodname, str): + methodname = methodname.encode(encoding) + data = ( + xmlheader, + "\n" + "", methodname, "\n", + data, + "\n" + ) + elif methodresponse: + # a method response, or a fault structure + data = ( + xmlheader, + "\n", + data, + "\n" + ) + else: + return data # return as is + return str("").join(data) + +## +# Convert an XML-RPC packet to a Python object. If the XML-RPC packet +# represents a fault condition, this function raises a Fault exception. +# +# @param data An XML-RPC packet, given as an 8-bit string. +# @return A tuple containing the unpacked data, and the method name +# (None if not present). +# @see Fault + +def loads(data, use_datetime=False, use_builtin_types=False): + """data -> unmarshalled data, method name + + Convert an XML-RPC packet to unmarshalled data plus a method + name (None if not present). + + If the XML-RPC packet represents a fault condition, this function + raises a Fault exception. + """ + p, u = getparser(use_datetime=use_datetime, use_builtin_types=use_builtin_types) + p.feed(data) + p.close() + return u.close(), u.getmethodname() + +## +# Encode a string using the gzip content encoding such as specified by the +# Content-Encoding: gzip +# in the HTTP header, as described in RFC 1952 +# +# @param data the unencoded data +# @return the encoded data + +def gzip_encode(data): + """data -> gzip encoded data + + Encode data using the gzip content encoding as described in RFC 1952 + """ + if not gzip: + raise NotImplementedError + f = BytesIO() + gzf = gzip.GzipFile(mode="wb", fileobj=f, compresslevel=1) + gzf.write(data) + gzf.close() + encoded = f.getvalue() + f.close() + return encoded + +## +# Decode a string using the gzip content encoding such as specified by the +# Content-Encoding: gzip +# in the HTTP header, as described in RFC 1952 +# +# @param data The encoded data +# @return the unencoded data +# @raises ValueError if data is not correctly coded. + +def gzip_decode(data): + """gzip encoded data -> unencoded data + + Decode data using the gzip content encoding as described in RFC 1952 + """ + if not gzip: + raise NotImplementedError + f = BytesIO(data) + gzf = gzip.GzipFile(mode="rb", fileobj=f) + try: + decoded = gzf.read() + except IOError: + raise ValueError("invalid data") + f.close() + gzf.close() + return decoded + +## +# Return a decoded file-like object for the gzip encoding +# as described in RFC 1952. +# +# @param response A stream supporting a read() method +# @return a file-like object that the decoded data can be read() from + +class GzipDecodedResponse(gzip.GzipFile if gzip else object): + """a file-like object to decode a response encoded with the gzip + method, as described in RFC 1952. + """ + def __init__(self, response): + #response doesn't support tell() and read(), required by + #GzipFile + if not gzip: + raise NotImplementedError + self.io = BytesIO(response.read()) + gzip.GzipFile.__init__(self, mode="rb", fileobj=self.io) + + def close(self): + gzip.GzipFile.close(self) + self.io.close() + + +# -------------------------------------------------------------------- +# request dispatcher + +class _Method(object): + # some magic to bind an XML-RPC method to an RPC server. + # supports "nested" methods (e.g. examples.getStateName) + def __init__(self, send, name): + self.__send = send + self.__name = name + def __getattr__(self, name): + return _Method(self.__send, "%s.%s" % (self.__name, name)) + def __call__(self, *args): + return self.__send(self.__name, args) + +## +# Standard transport class for XML-RPC over HTTP. +#

+# You can create custom transports by subclassing this method, and +# overriding selected methods. + +class Transport(object): + """Handles an HTTP transaction to an XML-RPC server.""" + + # client identifier (may be overridden) + user_agent = "Python-xmlrpc/%s" % __version__ + + #if true, we'll request gzip encoding + accept_gzip_encoding = True + + # if positive, encode request using gzip if it exceeds this threshold + # note that many server will get confused, so only use it if you know + # that they can decode such a request + encode_threshold = None #None = don't encode + + def __init__(self, use_datetime=False, use_builtin_types=False): + self._use_datetime = use_datetime + self._use_builtin_types = use_builtin_types + self._connection = (None, None) + self._extra_headers = [] + + ## + # Send a complete request, and parse the response. + # Retry request if a cached connection has disconnected. + # + # @param host Target host. + # @param handler Target PRC handler. + # @param request_body XML-RPC request body. + # @param verbose Debugging flag. + # @return Parsed response. + + def request(self, host, handler, request_body, verbose=False): + #retry request once if cached connection has gone cold + for i in (0, 1): + try: + return self.single_request(host, handler, request_body, verbose) + except socket.error as e: + if i or e.errno not in (errno.ECONNRESET, errno.ECONNABORTED, errno.EPIPE): + raise + except http_client.BadStatusLine: #close after we sent request + if i: + raise + + def single_request(self, host, handler, request_body, verbose=False): + # issue XML-RPC request + try: + http_conn = self.send_request(host, handler, request_body, verbose) + resp = http_conn.getresponse() + if resp.status == 200: + self.verbose = verbose + return self.parse_response(resp) + + except Fault: + raise + except Exception: + #All unexpected errors leave connection in + # a strange state, so we clear it. + self.close() + raise + + #We got an error response. + #Discard any response data and raise exception + if resp.getheader("content-length", ""): + resp.read() + raise ProtocolError( + host + handler, + resp.status, resp.reason, + dict(resp.getheaders()) + ) + + + ## + # Create parser. + # + # @return A 2-tuple containing a parser and a unmarshaller. + + def getparser(self): + # get parser and unmarshaller + return getparser(use_datetime=self._use_datetime, + use_builtin_types=self._use_builtin_types) + + ## + # Get authorization info from host parameter + # Host may be a string, or a (host, x509-dict) tuple; if a string, + # it is checked for a "user:pw@host" format, and a "Basic + # Authentication" header is added if appropriate. + # + # @param host Host descriptor (URL or (URL, x509 info) tuple). + # @return A 3-tuple containing (actual host, extra headers, + # x509 info). The header and x509 fields may be None. + + def get_host_info(self, host): + + x509 = {} + if isinstance(host, tuple): + host, x509 = host + + auth, host = urllib_parse.splituser(host) + + if auth: + auth = urllib_parse.unquote_to_bytes(auth) + auth = base64.encodebytes(auth).decode("utf-8") + auth = "".join(auth.split()) # get rid of whitespace + extra_headers = [ + ("Authorization", "Basic " + auth) + ] + else: + extra_headers = [] + + return host, extra_headers, x509 + + ## + # Connect to server. + # + # @param host Target host. + # @return An HTTPConnection object + + def make_connection(self, host): + #return an existing connection if possible. This allows + #HTTP/1.1 keep-alive. + if self._connection and host == self._connection[0]: + return self._connection[1] + # create a HTTP connection object from a host descriptor + chost, self._extra_headers, x509 = self.get_host_info(host) + self._connection = host, http_client.HTTPConnection(chost) + return self._connection[1] + + ## + # Clear any cached connection object. + # Used in the event of socket errors. + # + def close(self): + if self._connection[1]: + self._connection[1].close() + self._connection = (None, None) + + ## + # Send HTTP request. + # + # @param host Host descriptor (URL or (URL, x509 info) tuple). + # @param handler Targer RPC handler (a path relative to host) + # @param request_body The XML-RPC request body + # @param debug Enable debugging if debug is true. + # @return An HTTPConnection. + + def send_request(self, host, handler, request_body, debug): + connection = self.make_connection(host) + headers = self._extra_headers[:] + if debug: + connection.set_debuglevel(1) + if self.accept_gzip_encoding and gzip: + connection.putrequest("POST", handler, skip_accept_encoding=True) + headers.append(("Accept-Encoding", "gzip")) + else: + connection.putrequest("POST", handler) + headers.append(("Content-Type", "text/xml")) + headers.append(("User-Agent", self.user_agent)) + self.send_headers(connection, headers) + self.send_content(connection, request_body) + return connection + + ## + # Send request headers. + # This function provides a useful hook for subclassing + # + # @param connection httpConnection. + # @param headers list of key,value pairs for HTTP headers + + def send_headers(self, connection, headers): + for key, val in headers: + connection.putheader(key, val) + + ## + # Send request body. + # This function provides a useful hook for subclassing + # + # @param connection httpConnection. + # @param request_body XML-RPC request body. + + def send_content(self, connection, request_body): + #optionally encode the request + if (self.encode_threshold is not None and + self.encode_threshold < len(request_body) and + gzip): + connection.putheader("Content-Encoding", "gzip") + request_body = gzip_encode(request_body) + + connection.putheader("Content-Length", str(len(request_body))) + connection.endheaders(request_body) + + ## + # Parse response. + # + # @param file Stream. + # @return Response tuple and target method. + + def parse_response(self, response): + # read response data from httpresponse, and parse it + # Check for new http response object, otherwise it is a file object. + if hasattr(response, 'getheader'): + if response.getheader("Content-Encoding", "") == "gzip": + stream = GzipDecodedResponse(response) + else: + stream = response + else: + stream = response + + p, u = self.getparser() + + while 1: + data = stream.read(1024) + if not data: + break + if self.verbose: + print("body:", repr(data)) + p.feed(data) + + if stream is not response: + stream.close() + p.close() + + return u.close() + +## +# Standard transport class for XML-RPC over HTTPS. + +class SafeTransport(Transport): + """Handles an HTTPS transaction to an XML-RPC server.""" + + # FIXME: mostly untested + + def make_connection(self, host): + if self._connection and host == self._connection[0]: + return self._connection[1] + + if not hasattr(http_client, "HTTPSConnection"): + raise NotImplementedError( + "your version of http.client doesn't support HTTPS") + # create a HTTPS connection object from a host descriptor + # host may be a string, or a (host, x509-dict) tuple + chost, self._extra_headers, x509 = self.get_host_info(host) + self._connection = host, http_client.HTTPSConnection(chost, + None, **(x509 or {})) + return self._connection[1] + +## +# Standard server proxy. This class establishes a virtual connection +# to an XML-RPC server. +#

+# This class is available as ServerProxy and Server. New code should +# use ServerProxy, to avoid confusion. +# +# @def ServerProxy(uri, **options) +# @param uri The connection point on the server. +# @keyparam transport A transport factory, compatible with the +# standard transport class. +# @keyparam encoding The default encoding used for 8-bit strings +# (default is UTF-8). +# @keyparam verbose Use a true value to enable debugging output. +# (printed to standard output). +# @see Transport + +class ServerProxy(object): + """uri [,options] -> a logical connection to an XML-RPC server + + uri is the connection point on the server, given as + scheme://host/target. + + The standard implementation always supports the "http" scheme. If + SSL socket support is available (Python 2.0), it also supports + "https". + + If the target part and the slash preceding it are both omitted, + "/RPC2" is assumed. + + The following options can be given as keyword arguments: + + transport: a transport factory + encoding: the request encoding (default is UTF-8) + + All 8-bit strings passed to the server proxy are assumed to use + the given encoding. + """ + + def __init__(self, uri, transport=None, encoding=None, verbose=False, + allow_none=False, use_datetime=False, use_builtin_types=False): + # establish a "logical" server connection + + # get the url + type, uri = urllib_parse.splittype(uri) + if type not in ("http", "https"): + raise IOError("unsupported XML-RPC protocol") + self.__host, self.__handler = urllib_parse.splithost(uri) + if not self.__handler: + self.__handler = "/RPC2" + + if transport is None: + if type == "https": + handler = SafeTransport + else: + handler = Transport + transport = handler(use_datetime=use_datetime, + use_builtin_types=use_builtin_types) + self.__transport = transport + + self.__encoding = encoding or 'utf-8' + self.__verbose = verbose + self.__allow_none = allow_none + + def __close(self): + self.__transport.close() + + def __request(self, methodname, params): + # call a method on the remote server + + request = dumps(params, methodname, encoding=self.__encoding, + allow_none=self.__allow_none).encode(self.__encoding) + + response = self.__transport.request( + self.__host, + self.__handler, + request, + verbose=self.__verbose + ) + + if len(response) == 1: + response = response[0] + + return response + + def __repr__(self): + return ( + "" % + (self.__host, self.__handler) + ) + + __str__ = __repr__ + + def __getattr__(self, name): + # magic method dispatcher + return _Method(self.__request, name) + + # note: to call a remote object with an non-standard name, use + # result getattr(server, "strange-python-name")(args) + + def __call__(self, attr): + """A workaround to get special attributes on the ServerProxy + without interfering with the magic __getattr__ + """ + if attr == "close": + return self.__close + elif attr == "transport": + return self.__transport + raise AttributeError("Attribute %r not found" % (attr,)) + +# compatibility + +Server = ServerProxy + +# -------------------------------------------------------------------- +# test code + +if __name__ == "__main__": + + # simple test program (from the XML-RPC specification) + + # local server, available from Lib/xmlrpc/server.py + server = ServerProxy("http://localhost:8000") + + try: + print(server.currentTime.getCurrentTime()) + except Error as v: + print("ERROR", v) + + multi = MultiCall(server) + multi.getData() + multi.pow(2,9) + multi.add(1,2) + try: + for response in multi(): + print(response) + except Error as v: + print("ERROR", v) diff --git a/lib/future/backports/xmlrpc/server.py b/lib/future/backports/xmlrpc/server.py new file mode 100644 index 00000000..28072bfe --- /dev/null +++ b/lib/future/backports/xmlrpc/server.py @@ -0,0 +1,999 @@ +r""" +Ported using Python-Future from the Python 3.3 standard library. + +XML-RPC Servers. + +This module can be used to create simple XML-RPC servers +by creating a server and either installing functions, a +class instance, or by extending the SimpleXMLRPCServer +class. + +It can also be used to handle XML-RPC requests in a CGI +environment using CGIXMLRPCRequestHandler. + +The Doc* classes can be used to create XML-RPC servers that +serve pydoc-style documentation in response to HTTP +GET requests. This documentation is dynamically generated +based on the functions and methods registered with the +server. + +A list of possible usage patterns follows: + +1. Install functions: + +server = SimpleXMLRPCServer(("localhost", 8000)) +server.register_function(pow) +server.register_function(lambda x,y: x+y, 'add') +server.serve_forever() + +2. Install an instance: + +class MyFuncs: + def __init__(self): + # make all of the sys functions available through sys.func_name + import sys + self.sys = sys + def _listMethods(self): + # implement this method so that system.listMethods + # knows to advertise the sys methods + return list_public_methods(self) + \ + ['sys.' + method for method in list_public_methods(self.sys)] + def pow(self, x, y): return pow(x, y) + def add(self, x, y) : return x + y + +server = SimpleXMLRPCServer(("localhost", 8000)) +server.register_introspection_functions() +server.register_instance(MyFuncs()) +server.serve_forever() + +3. Install an instance with custom dispatch method: + +class Math: + def _listMethods(self): + # this method must be present for system.listMethods + # to work + return ['add', 'pow'] + def _methodHelp(self, method): + # this method must be present for system.methodHelp + # to work + if method == 'add': + return "add(2,3) => 5" + elif method == 'pow': + return "pow(x, y[, z]) => number" + else: + # By convention, return empty + # string if no help is available + return "" + def _dispatch(self, method, params): + if method == 'pow': + return pow(*params) + elif method == 'add': + return params[0] + params[1] + else: + raise ValueError('bad method') + +server = SimpleXMLRPCServer(("localhost", 8000)) +server.register_introspection_functions() +server.register_instance(Math()) +server.serve_forever() + +4. Subclass SimpleXMLRPCServer: + +class MathServer(SimpleXMLRPCServer): + def _dispatch(self, method, params): + try: + # We are forcing the 'export_' prefix on methods that are + # callable through XML-RPC to prevent potential security + # problems + func = getattr(self, 'export_' + method) + except AttributeError: + raise Exception('method "%s" is not supported' % method) + else: + return func(*params) + + def export_add(self, x, y): + return x + y + +server = MathServer(("localhost", 8000)) +server.serve_forever() + +5. CGI script: + +server = CGIXMLRPCRequestHandler() +server.register_function(pow) +server.handle_request() +""" + +from __future__ import absolute_import, division, print_function, unicode_literals +from future.builtins import int, str + +# Written by Brian Quinlan (brian@sweetapp.com). +# Based on code written by Fredrik Lundh. + +from future.backports.xmlrpc.client import Fault, dumps, loads, gzip_encode, gzip_decode +from future.backports.http.server import BaseHTTPRequestHandler +import future.backports.http.server as http_server +from future.backports import socketserver +import sys +import os +import re +import pydoc +import inspect +import traceback +try: + import fcntl +except ImportError: + fcntl = None + +def resolve_dotted_attribute(obj, attr, allow_dotted_names=True): + """resolve_dotted_attribute(a, 'b.c.d') => a.b.c.d + + Resolves a dotted attribute name to an object. Raises + an AttributeError if any attribute in the chain starts with a '_'. + + If the optional allow_dotted_names argument is false, dots are not + supported and this function operates similar to getattr(obj, attr). + """ + + if allow_dotted_names: + attrs = attr.split('.') + else: + attrs = [attr] + + for i in attrs: + if i.startswith('_'): + raise AttributeError( + 'attempt to access private attribute "%s"' % i + ) + else: + obj = getattr(obj,i) + return obj + +def list_public_methods(obj): + """Returns a list of attribute strings, found in the specified + object, which represent callable attributes""" + + return [member for member in dir(obj) + if not member.startswith('_') and + callable(getattr(obj, member))] + +class SimpleXMLRPCDispatcher(object): + """Mix-in class that dispatches XML-RPC requests. + + This class is used to register XML-RPC method handlers + and then to dispatch them. This class doesn't need to be + instanced directly when used by SimpleXMLRPCServer but it + can be instanced when used by the MultiPathXMLRPCServer + """ + + def __init__(self, allow_none=False, encoding=None, + use_builtin_types=False): + self.funcs = {} + self.instance = None + self.allow_none = allow_none + self.encoding = encoding or 'utf-8' + self.use_builtin_types = use_builtin_types + + def register_instance(self, instance, allow_dotted_names=False): + """Registers an instance to respond to XML-RPC requests. + + Only one instance can be installed at a time. + + If the registered instance has a _dispatch method then that + method will be called with the name of the XML-RPC method and + its parameters as a tuple + e.g. instance._dispatch('add',(2,3)) + + If the registered instance does not have a _dispatch method + then the instance will be searched to find a matching method + and, if found, will be called. Methods beginning with an '_' + are considered private and will not be called by + SimpleXMLRPCServer. + + If a registered function matches a XML-RPC request, then it + will be called instead of the registered instance. + + If the optional allow_dotted_names argument is true and the + instance does not have a _dispatch method, method names + containing dots are supported and resolved, as long as none of + the name segments start with an '_'. + + *** SECURITY WARNING: *** + + Enabling the allow_dotted_names options allows intruders + to access your module's global variables and may allow + intruders to execute arbitrary code on your machine. Only + use this option on a secure, closed network. + + """ + + self.instance = instance + self.allow_dotted_names = allow_dotted_names + + def register_function(self, function, name=None): + """Registers a function to respond to XML-RPC requests. + + The optional name argument can be used to set a Unicode name + for the function. + """ + + if name is None: + name = function.__name__ + self.funcs[name] = function + + def register_introspection_functions(self): + """Registers the XML-RPC introspection methods in the system + namespace. + + see http://xmlrpc.usefulinc.com/doc/reserved.html + """ + + self.funcs.update({'system.listMethods' : self.system_listMethods, + 'system.methodSignature' : self.system_methodSignature, + 'system.methodHelp' : self.system_methodHelp}) + + def register_multicall_functions(self): + """Registers the XML-RPC multicall method in the system + namespace. + + see http://www.xmlrpc.com/discuss/msgReader$1208""" + + self.funcs.update({'system.multicall' : self.system_multicall}) + + def _marshaled_dispatch(self, data, dispatch_method = None, path = None): + """Dispatches an XML-RPC method from marshalled (XML) data. + + XML-RPC methods are dispatched from the marshalled (XML) data + using the _dispatch method and the result is returned as + marshalled data. For backwards compatibility, a dispatch + function can be provided as an argument (see comment in + SimpleXMLRPCRequestHandler.do_POST) but overriding the + existing method through subclassing is the preferred means + of changing method dispatch behavior. + """ + + try: + params, method = loads(data, use_builtin_types=self.use_builtin_types) + + # generate response + if dispatch_method is not None: + response = dispatch_method(method, params) + else: + response = self._dispatch(method, params) + # wrap response in a singleton tuple + response = (response,) + response = dumps(response, methodresponse=1, + allow_none=self.allow_none, encoding=self.encoding) + except Fault as fault: + response = dumps(fault, allow_none=self.allow_none, + encoding=self.encoding) + except: + # report exception back to server + exc_type, exc_value, exc_tb = sys.exc_info() + response = dumps( + Fault(1, "%s:%s" % (exc_type, exc_value)), + encoding=self.encoding, allow_none=self.allow_none, + ) + + return response.encode(self.encoding) + + def system_listMethods(self): + """system.listMethods() => ['add', 'subtract', 'multiple'] + + Returns a list of the methods supported by the server.""" + + methods = set(self.funcs.keys()) + if self.instance is not None: + # Instance can implement _listMethod to return a list of + # methods + if hasattr(self.instance, '_listMethods'): + methods |= set(self.instance._listMethods()) + # if the instance has a _dispatch method then we + # don't have enough information to provide a list + # of methods + elif not hasattr(self.instance, '_dispatch'): + methods |= set(list_public_methods(self.instance)) + return sorted(methods) + + def system_methodSignature(self, method_name): + """system.methodSignature('add') => [double, int, int] + + Returns a list describing the signature of the method. In the + above example, the add method takes two integers as arguments + and returns a double result. + + This server does NOT support system.methodSignature.""" + + # See http://xmlrpc.usefulinc.com/doc/sysmethodsig.html + + return 'signatures not supported' + + def system_methodHelp(self, method_name): + """system.methodHelp('add') => "Adds two integers together" + + Returns a string containing documentation for the specified method.""" + + method = None + if method_name in self.funcs: + method = self.funcs[method_name] + elif self.instance is not None: + # Instance can implement _methodHelp to return help for a method + if hasattr(self.instance, '_methodHelp'): + return self.instance._methodHelp(method_name) + # if the instance has a _dispatch method then we + # don't have enough information to provide help + elif not hasattr(self.instance, '_dispatch'): + try: + method = resolve_dotted_attribute( + self.instance, + method_name, + self.allow_dotted_names + ) + except AttributeError: + pass + + # Note that we aren't checking that the method actually + # be a callable object of some kind + if method is None: + return "" + else: + return pydoc.getdoc(method) + + def system_multicall(self, call_list): + """system.multicall([{'methodName': 'add', 'params': [2, 2]}, ...]) => \ +[[4], ...] + + Allows the caller to package multiple XML-RPC calls into a single + request. + + See http://www.xmlrpc.com/discuss/msgReader$1208 + """ + + results = [] + for call in call_list: + method_name = call['methodName'] + params = call['params'] + + try: + # XXX A marshalling error in any response will fail the entire + # multicall. If someone cares they should fix this. + results.append([self._dispatch(method_name, params)]) + except Fault as fault: + results.append( + {'faultCode' : fault.faultCode, + 'faultString' : fault.faultString} + ) + except: + exc_type, exc_value, exc_tb = sys.exc_info() + results.append( + {'faultCode' : 1, + 'faultString' : "%s:%s" % (exc_type, exc_value)} + ) + return results + + def _dispatch(self, method, params): + """Dispatches the XML-RPC method. + + XML-RPC calls are forwarded to a registered function that + matches the called XML-RPC method name. If no such function + exists then the call is forwarded to the registered instance, + if available. + + If the registered instance has a _dispatch method then that + method will be called with the name of the XML-RPC method and + its parameters as a tuple + e.g. instance._dispatch('add',(2,3)) + + If the registered instance does not have a _dispatch method + then the instance will be searched to find a matching method + and, if found, will be called. + + Methods beginning with an '_' are considered private and will + not be called. + """ + + func = None + try: + # check to see if a matching function has been registered + func = self.funcs[method] + except KeyError: + if self.instance is not None: + # check for a _dispatch method + if hasattr(self.instance, '_dispatch'): + return self.instance._dispatch(method, params) + else: + # call instance method directly + try: + func = resolve_dotted_attribute( + self.instance, + method, + self.allow_dotted_names + ) + except AttributeError: + pass + + if func is not None: + return func(*params) + else: + raise Exception('method "%s" is not supported' % method) + +class SimpleXMLRPCRequestHandler(BaseHTTPRequestHandler): + """Simple XML-RPC request handler class. + + Handles all HTTP POST requests and attempts to decode them as + XML-RPC requests. + """ + + # Class attribute listing the accessible path components; + # paths not on this list will result in a 404 error. + rpc_paths = ('/', '/RPC2') + + #if not None, encode responses larger than this, if possible + encode_threshold = 1400 #a common MTU + + #Override form StreamRequestHandler: full buffering of output + #and no Nagle. + wbufsize = -1 + disable_nagle_algorithm = True + + # a re to match a gzip Accept-Encoding + aepattern = re.compile(r""" + \s* ([^\s;]+) \s* #content-coding + (;\s* q \s*=\s* ([0-9\.]+))? #q + """, re.VERBOSE | re.IGNORECASE) + + def accept_encodings(self): + r = {} + ae = self.headers.get("Accept-Encoding", "") + for e in ae.split(","): + match = self.aepattern.match(e) + if match: + v = match.group(3) + v = float(v) if v else 1.0 + r[match.group(1)] = v + return r + + def is_rpc_path_valid(self): + if self.rpc_paths: + return self.path in self.rpc_paths + else: + # If .rpc_paths is empty, just assume all paths are legal + return True + + def do_POST(self): + """Handles the HTTP POST request. + + Attempts to interpret all HTTP POST requests as XML-RPC calls, + which are forwarded to the server's _dispatch method for handling. + """ + + # Check that the path is legal + if not self.is_rpc_path_valid(): + self.report_404() + return + + try: + # Get arguments by reading body of request. + # We read this in chunks to avoid straining + # socket.read(); around the 10 or 15Mb mark, some platforms + # begin to have problems (bug #792570). + max_chunk_size = 10*1024*1024 + size_remaining = int(self.headers["content-length"]) + L = [] + while size_remaining: + chunk_size = min(size_remaining, max_chunk_size) + chunk = self.rfile.read(chunk_size) + if not chunk: + break + L.append(chunk) + size_remaining -= len(L[-1]) + data = b''.join(L) + + data = self.decode_request_content(data) + if data is None: + return #response has been sent + + # In previous versions of SimpleXMLRPCServer, _dispatch + # could be overridden in this class, instead of in + # SimpleXMLRPCDispatcher. To maintain backwards compatibility, + # check to see if a subclass implements _dispatch and dispatch + # using that method if present. + response = self.server._marshaled_dispatch( + data, getattr(self, '_dispatch', None), self.path + ) + except Exception as e: # This should only happen if the module is buggy + # internal error, report as HTTP server error + self.send_response(500) + + # Send information about the exception if requested + if hasattr(self.server, '_send_traceback_header') and \ + self.server._send_traceback_header: + self.send_header("X-exception", str(e)) + trace = traceback.format_exc() + trace = str(trace.encode('ASCII', 'backslashreplace'), 'ASCII') + self.send_header("X-traceback", trace) + + self.send_header("Content-length", "0") + self.end_headers() + else: + self.send_response(200) + self.send_header("Content-type", "text/xml") + if self.encode_threshold is not None: + if len(response) > self.encode_threshold: + q = self.accept_encodings().get("gzip", 0) + if q: + try: + response = gzip_encode(response) + self.send_header("Content-Encoding", "gzip") + except NotImplementedError: + pass + self.send_header("Content-length", str(len(response))) + self.end_headers() + self.wfile.write(response) + + def decode_request_content(self, data): + #support gzip encoding of request + encoding = self.headers.get("content-encoding", "identity").lower() + if encoding == "identity": + return data + if encoding == "gzip": + try: + return gzip_decode(data) + except NotImplementedError: + self.send_response(501, "encoding %r not supported" % encoding) + except ValueError: + self.send_response(400, "error decoding gzip content") + else: + self.send_response(501, "encoding %r not supported" % encoding) + self.send_header("Content-length", "0") + self.end_headers() + + def report_404 (self): + # Report a 404 error + self.send_response(404) + response = b'No such page' + self.send_header("Content-type", "text/plain") + self.send_header("Content-length", str(len(response))) + self.end_headers() + self.wfile.write(response) + + def log_request(self, code='-', size='-'): + """Selectively log an accepted request.""" + + if self.server.logRequests: + BaseHTTPRequestHandler.log_request(self, code, size) + +class SimpleXMLRPCServer(socketserver.TCPServer, + SimpleXMLRPCDispatcher): + """Simple XML-RPC server. + + Simple XML-RPC server that allows functions and a single instance + to be installed to handle requests. The default implementation + attempts to dispatch XML-RPC calls to the functions or instance + installed in the server. Override the _dispatch method inherited + from SimpleXMLRPCDispatcher to change this behavior. + """ + + allow_reuse_address = True + + # Warning: this is for debugging purposes only! Never set this to True in + # production code, as will be sending out sensitive information (exception + # and stack trace details) when exceptions are raised inside + # SimpleXMLRPCRequestHandler.do_POST + _send_traceback_header = False + + def __init__(self, addr, requestHandler=SimpleXMLRPCRequestHandler, + logRequests=True, allow_none=False, encoding=None, + bind_and_activate=True, use_builtin_types=False): + self.logRequests = logRequests + + SimpleXMLRPCDispatcher.__init__(self, allow_none, encoding, use_builtin_types) + socketserver.TCPServer.__init__(self, addr, requestHandler, bind_and_activate) + + # [Bug #1222790] If possible, set close-on-exec flag; if a + # method spawns a subprocess, the subprocess shouldn't have + # the listening socket open. + if fcntl is not None and hasattr(fcntl, 'FD_CLOEXEC'): + flags = fcntl.fcntl(self.fileno(), fcntl.F_GETFD) + flags |= fcntl.FD_CLOEXEC + fcntl.fcntl(self.fileno(), fcntl.F_SETFD, flags) + +class MultiPathXMLRPCServer(SimpleXMLRPCServer): + """Multipath XML-RPC Server + This specialization of SimpleXMLRPCServer allows the user to create + multiple Dispatcher instances and assign them to different + HTTP request paths. This makes it possible to run two or more + 'virtual XML-RPC servers' at the same port. + Make sure that the requestHandler accepts the paths in question. + """ + def __init__(self, addr, requestHandler=SimpleXMLRPCRequestHandler, + logRequests=True, allow_none=False, encoding=None, + bind_and_activate=True, use_builtin_types=False): + + SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests, allow_none, + encoding, bind_and_activate, use_builtin_types) + self.dispatchers = {} + self.allow_none = allow_none + self.encoding = encoding or 'utf-8' + + def add_dispatcher(self, path, dispatcher): + self.dispatchers[path] = dispatcher + return dispatcher + + def get_dispatcher(self, path): + return self.dispatchers[path] + + def _marshaled_dispatch(self, data, dispatch_method = None, path = None): + try: + response = self.dispatchers[path]._marshaled_dispatch( + data, dispatch_method, path) + except: + # report low level exception back to server + # (each dispatcher should have handled their own + # exceptions) + exc_type, exc_value = sys.exc_info()[:2] + response = dumps( + Fault(1, "%s:%s" % (exc_type, exc_value)), + encoding=self.encoding, allow_none=self.allow_none) + response = response.encode(self.encoding) + return response + +class CGIXMLRPCRequestHandler(SimpleXMLRPCDispatcher): + """Simple handler for XML-RPC data passed through CGI.""" + + def __init__(self, allow_none=False, encoding=None, use_builtin_types=False): + SimpleXMLRPCDispatcher.__init__(self, allow_none, encoding, use_builtin_types) + + def handle_xmlrpc(self, request_text): + """Handle a single XML-RPC request""" + + response = self._marshaled_dispatch(request_text) + + print('Content-Type: text/xml') + print('Content-Length: %d' % len(response)) + print() + sys.stdout.flush() + sys.stdout.buffer.write(response) + sys.stdout.buffer.flush() + + def handle_get(self): + """Handle a single HTTP GET request. + + Default implementation indicates an error because + XML-RPC uses the POST method. + """ + + code = 400 + message, explain = BaseHTTPRequestHandler.responses[code] + + response = http_server.DEFAULT_ERROR_MESSAGE % \ + { + 'code' : code, + 'message' : message, + 'explain' : explain + } + response = response.encode('utf-8') + print('Status: %d %s' % (code, message)) + print('Content-Type: %s' % http_server.DEFAULT_ERROR_CONTENT_TYPE) + print('Content-Length: %d' % len(response)) + print() + sys.stdout.flush() + sys.stdout.buffer.write(response) + sys.stdout.buffer.flush() + + def handle_request(self, request_text=None): + """Handle a single XML-RPC request passed through a CGI post method. + + If no XML data is given then it is read from stdin. The resulting + XML-RPC response is printed to stdout along with the correct HTTP + headers. + """ + + if request_text is None and \ + os.environ.get('REQUEST_METHOD', None) == 'GET': + self.handle_get() + else: + # POST data is normally available through stdin + try: + length = int(os.environ.get('CONTENT_LENGTH', None)) + except (ValueError, TypeError): + length = -1 + if request_text is None: + request_text = sys.stdin.read(length) + + self.handle_xmlrpc(request_text) + + +# ----------------------------------------------------------------------------- +# Self documenting XML-RPC Server. + +class ServerHTMLDoc(pydoc.HTMLDoc): + """Class used to generate pydoc HTML document for a server""" + + def markup(self, text, escape=None, funcs={}, classes={}, methods={}): + """Mark up some plain text, given a context of symbols to look for. + Each context dictionary maps object names to anchor names.""" + escape = escape or self.escape + results = [] + here = 0 + + # XXX Note that this regular expression does not allow for the + # hyperlinking of arbitrary strings being used as method + # names. Only methods with names consisting of word characters + # and '.'s are hyperlinked. + pattern = re.compile(r'\b((http|ftp)://\S+[\w/]|' + r'RFC[- ]?(\d+)|' + r'PEP[- ]?(\d+)|' + r'(self\.)?((?:\w|\.)+))\b') + while 1: + match = pattern.search(text, here) + if not match: break + start, end = match.span() + results.append(escape(text[here:start])) + + all, scheme, rfc, pep, selfdot, name = match.groups() + if scheme: + url = escape(all).replace('"', '"') + results.append('%s' % (url, url)) + elif rfc: + url = 'http://www.rfc-editor.org/rfc/rfc%d.txt' % int(rfc) + results.append('%s' % (url, escape(all))) + elif pep: + url = 'http://www.python.org/dev/peps/pep-%04d/' % int(pep) + results.append('%s' % (url, escape(all))) + elif text[end:end+1] == '(': + results.append(self.namelink(name, methods, funcs, classes)) + elif selfdot: + results.append('self.%s' % name) + else: + results.append(self.namelink(name, classes)) + here = end + results.append(escape(text[here:])) + return ''.join(results) + + def docroutine(self, object, name, mod=None, + funcs={}, classes={}, methods={}, cl=None): + """Produce HTML documentation for a function or method object.""" + + anchor = (cl and cl.__name__ or '') + '-' + name + note = '' + + title = '%s' % ( + self.escape(anchor), self.escape(name)) + + if inspect.ismethod(object): + args = inspect.getfullargspec(object) + # exclude the argument bound to the instance, it will be + # confusing to the non-Python user + argspec = inspect.formatargspec ( + args.args[1:], + args.varargs, + args.varkw, + args.defaults, + annotations=args.annotations, + formatvalue=self.formatvalue + ) + elif inspect.isfunction(object): + args = inspect.getfullargspec(object) + argspec = inspect.formatargspec( + args.args, args.varargs, args.varkw, args.defaults, + annotations=args.annotations, + formatvalue=self.formatvalue) + else: + argspec = '(...)' + + if isinstance(object, tuple): + argspec = object[0] or argspec + docstring = object[1] or "" + else: + docstring = pydoc.getdoc(object) + + decl = title + argspec + (note and self.grey( + '%s' % note)) + + doc = self.markup( + docstring, self.preformat, funcs, classes, methods) + doc = doc and '

%s
' % doc + return '
%s
%s
\n' % (decl, doc) + + def docserver(self, server_name, package_documentation, methods): + """Produce HTML documentation for an XML-RPC server.""" + + fdict = {} + for key, value in methods.items(): + fdict[key] = '#-' + key + fdict[value] = fdict[key] + + server_name = self.escape(server_name) + head = '%s' % server_name + result = self.heading(head, '#ffffff', '#7799ee') + + doc = self.markup(package_documentation, self.preformat, fdict) + doc = doc and '%s' % doc + result = result + '

%s

\n' % doc + + contents = [] + method_items = sorted(methods.items()) + for key, value in method_items: + contents.append(self.docroutine(value, key, funcs=fdict)) + result = result + self.bigsection( + 'Methods', '#ffffff', '#eeaa77', ''.join(contents)) + + return result + +class XMLRPCDocGenerator(object): + """Generates documentation for an XML-RPC server. + + This class is designed as mix-in and should not + be constructed directly. + """ + + def __init__(self): + # setup variables used for HTML documentation + self.server_name = 'XML-RPC Server Documentation' + self.server_documentation = \ + "This server exports the following methods through the XML-RPC "\ + "protocol." + self.server_title = 'XML-RPC Server Documentation' + + def set_server_title(self, server_title): + """Set the HTML title of the generated server documentation""" + + self.server_title = server_title + + def set_server_name(self, server_name): + """Set the name of the generated HTML server documentation""" + + self.server_name = server_name + + def set_server_documentation(self, server_documentation): + """Set the documentation string for the entire server.""" + + self.server_documentation = server_documentation + + def generate_html_documentation(self): + """generate_html_documentation() => html documentation for the server + + Generates HTML documentation for the server using introspection for + installed functions and instances that do not implement the + _dispatch method. Alternatively, instances can choose to implement + the _get_method_argstring(method_name) method to provide the + argument string used in the documentation and the + _methodHelp(method_name) method to provide the help text used + in the documentation.""" + + methods = {} + + for method_name in self.system_listMethods(): + if method_name in self.funcs: + method = self.funcs[method_name] + elif self.instance is not None: + method_info = [None, None] # argspec, documentation + if hasattr(self.instance, '_get_method_argstring'): + method_info[0] = self.instance._get_method_argstring(method_name) + if hasattr(self.instance, '_methodHelp'): + method_info[1] = self.instance._methodHelp(method_name) + + method_info = tuple(method_info) + if method_info != (None, None): + method = method_info + elif not hasattr(self.instance, '_dispatch'): + try: + method = resolve_dotted_attribute( + self.instance, + method_name + ) + except AttributeError: + method = method_info + else: + method = method_info + else: + assert 0, "Could not find method in self.functions and no "\ + "instance installed" + + methods[method_name] = method + + documenter = ServerHTMLDoc() + documentation = documenter.docserver( + self.server_name, + self.server_documentation, + methods + ) + + return documenter.page(self.server_title, documentation) + +class DocXMLRPCRequestHandler(SimpleXMLRPCRequestHandler): + """XML-RPC and documentation request handler class. + + Handles all HTTP POST requests and attempts to decode them as + XML-RPC requests. + + Handles all HTTP GET requests and interprets them as requests + for documentation. + """ + + def do_GET(self): + """Handles the HTTP GET request. + + Interpret all HTTP GET requests as requests for server + documentation. + """ + # Check that the path is legal + if not self.is_rpc_path_valid(): + self.report_404() + return + + response = self.server.generate_html_documentation().encode('utf-8') + self.send_response(200) + self.send_header("Content-type", "text/html") + self.send_header("Content-length", str(len(response))) + self.end_headers() + self.wfile.write(response) + +class DocXMLRPCServer( SimpleXMLRPCServer, + XMLRPCDocGenerator): + """XML-RPC and HTML documentation server. + + Adds the ability to serve server documentation to the capabilities + of SimpleXMLRPCServer. + """ + + def __init__(self, addr, requestHandler=DocXMLRPCRequestHandler, + logRequests=True, allow_none=False, encoding=None, + bind_and_activate=True, use_builtin_types=False): + SimpleXMLRPCServer.__init__(self, addr, requestHandler, logRequests, + allow_none, encoding, bind_and_activate, + use_builtin_types) + XMLRPCDocGenerator.__init__(self) + +class DocCGIXMLRPCRequestHandler( CGIXMLRPCRequestHandler, + XMLRPCDocGenerator): + """Handler for XML-RPC data and documentation requests passed through + CGI""" + + def handle_get(self): + """Handles the HTTP GET request. + + Interpret all HTTP GET requests as requests for server + documentation. + """ + + response = self.generate_html_documentation().encode('utf-8') + + print('Content-Type: text/html') + print('Content-Length: %d' % len(response)) + print() + sys.stdout.flush() + sys.stdout.buffer.write(response) + sys.stdout.buffer.flush() + + def __init__(self): + CGIXMLRPCRequestHandler.__init__(self) + XMLRPCDocGenerator.__init__(self) + + +if __name__ == '__main__': + import datetime + + class ExampleService: + def getData(self): + return '42' + + class currentTime: + @staticmethod + def getCurrentTime(): + return datetime.datetime.now() + + server = SimpleXMLRPCServer(("localhost", 8000)) + server.register_function(pow) + server.register_function(lambda x,y: x+y, 'add') + server.register_instance(ExampleService(), allow_dotted_names=True) + server.register_multicall_functions() + print('Serving XML-RPC on localhost port 8000') + print('It is advisable to run this example server within a secure, closed network.') + try: + server.serve_forever() + except KeyboardInterrupt: + print("\nKeyboard interrupt received, exiting.") + server.server_close() + sys.exit(0) diff --git a/lib/future/builtins/__init__.py b/lib/future/builtins/__init__.py new file mode 100644 index 00000000..8bc1649d --- /dev/null +++ b/lib/future/builtins/__init__.py @@ -0,0 +1,51 @@ +""" +A module that brings in equivalents of the new and modified Python 3 +builtins into Py2. Has no effect on Py3. + +See the docs `here `_ +(``docs/what-else.rst``) for more information. + +""" + +from future.builtins.iterators import (filter, map, zip) +# The isinstance import is no longer needed. We provide it only for +# backward-compatibility with future v0.8.2. It will be removed in future v1.0. +from future.builtins.misc import (ascii, chr, hex, input, isinstance, next, + oct, open, pow, round, super, max, min) +from future.utils import PY3 + +if PY3: + import builtins + bytes = builtins.bytes + dict = builtins.dict + int = builtins.int + list = builtins.list + object = builtins.object + range = builtins.range + str = builtins.str + __all__ = [] +else: + from future.types import (newbytes as bytes, + newdict as dict, + newint as int, + newlist as list, + newobject as object, + newrange as range, + newstr as str) +from future import utils + + +if not utils.PY3: + # We only import names that shadow the builtins on Py2. No other namespace + # pollution on Py2. + + # Only shadow builtins on Py2; no new names + __all__ = ['filter', 'map', 'zip', + 'ascii', 'chr', 'hex', 'input', 'next', 'oct', 'open', 'pow', + 'round', 'super', + 'bytes', 'dict', 'int', 'list', 'object', 'range', 'str', 'max', 'min' + ] + +else: + # No namespace pollution on Py3 + __all__ = [] diff --git a/lib/future/builtins/disabled.py b/lib/future/builtins/disabled.py new file mode 100644 index 00000000..f6d6ea9b --- /dev/null +++ b/lib/future/builtins/disabled.py @@ -0,0 +1,66 @@ +""" +This disables builtin functions (and one exception class) which are +removed from Python 3.3. + +This module is designed to be used like this:: + + from future.builtins.disabled import * + +This disables the following obsolete Py2 builtin functions:: + + apply, cmp, coerce, execfile, file, input, long, + raw_input, reduce, reload, unicode, xrange + +We don't hack __builtin__, which is very fragile because it contaminates +imported modules too. Instead, we just create new functions with +the same names as the obsolete builtins from Python 2 which raise +NameError exceptions when called. + +Note that both ``input()`` and ``raw_input()`` are among the disabled +functions (in this module). Although ``input()`` exists as a builtin in +Python 3, the Python 2 ``input()`` builtin is unsafe to use because it +can lead to shell injection. Therefore we shadow it by default upon ``from +future.builtins.disabled import *``, in case someone forgets to import our +replacement ``input()`` somehow and expects Python 3 semantics. + +See the ``future.builtins.misc`` module for a working version of +``input`` with Python 3 semantics. + +(Note that callable() is not among the functions disabled; this was +reintroduced into Python 3.2.) + +This exception class is also disabled: + + StandardError + +""" + +from __future__ import division, absolute_import, print_function + +from future import utils + + +OBSOLETE_BUILTINS = ['apply', 'chr', 'cmp', 'coerce', 'execfile', 'file', + 'input', 'long', 'raw_input', 'reduce', 'reload', + 'unicode', 'xrange', 'StandardError'] + + +def disabled_function(name): + ''' + Returns a function that cannot be called + ''' + def disabled(*args, **kwargs): + ''' + A function disabled by the ``future`` module. This function is + no longer a builtin in Python 3. + ''' + raise NameError('obsolete Python 2 builtin {0} is disabled'.format(name)) + return disabled + + +if not utils.PY3: + for fname in OBSOLETE_BUILTINS: + locals()[fname] = disabled_function(fname) + __all__ = OBSOLETE_BUILTINS +else: + __all__ = [] diff --git a/lib/future/builtins/iterators.py b/lib/future/builtins/iterators.py new file mode 100644 index 00000000..dff651e0 --- /dev/null +++ b/lib/future/builtins/iterators.py @@ -0,0 +1,52 @@ +""" +This module is designed to be used as follows:: + + from future.builtins.iterators import * + +And then, for example:: + + for i in range(10**15): + pass + + for (a, b) in zip(range(10**15), range(-10**15, 0)): + pass + +Note that this is standard Python 3 code, plus some imports that do +nothing on Python 3. + +The iterators this brings in are:: + +- ``range`` +- ``filter`` +- ``map`` +- ``zip`` + +On Python 2, ``range`` is a pure-Python backport of Python 3's ``range`` +iterator with slicing support. The other iterators (``filter``, ``map``, +``zip``) are from the ``itertools`` module on Python 2. On Python 3 these +are available in the module namespace but not exported for * imports via +__all__ (zero no namespace pollution). + +Note that these are also available in the standard library +``future_builtins`` module on Python 2 -- but not Python 3, so using +the standard library version is not portable, nor anywhere near complete. +""" + +from __future__ import division, absolute_import, print_function + +import itertools +from future import utils + +if not utils.PY3: + filter = itertools.ifilter + map = itertools.imap + from future.types import newrange as range + zip = itertools.izip + __all__ = ['filter', 'map', 'range', 'zip'] +else: + import builtins + filter = builtins.filter + map = builtins.map + range = builtins.range + zip = builtins.zip + __all__ = [] diff --git a/lib/future/builtins/misc.py b/lib/future/builtins/misc.py new file mode 100644 index 00000000..f86ce5f3 --- /dev/null +++ b/lib/future/builtins/misc.py @@ -0,0 +1,135 @@ +""" +A module that brings in equivalents of various modified Python 3 builtins +into Py2. Has no effect on Py3. + +The builtin functions are: + +- ``ascii`` (from Py2's future_builtins module) +- ``hex`` (from Py2's future_builtins module) +- ``oct`` (from Py2's future_builtins module) +- ``chr`` (equivalent to ``unichr`` on Py2) +- ``input`` (equivalent to ``raw_input`` on Py2) +- ``next`` (calls ``__next__`` if it exists, else ``next`` method) +- ``open`` (equivalent to io.open on Py2) +- ``super`` (backport of Py3's magic zero-argument super() function +- ``round`` (new "Banker's Rounding" behaviour from Py3) +- ``max`` (new default option from Py3.4) +- ``min`` (new default option from Py3.4) + +``isinstance`` is also currently exported for backwards compatibility +with v0.8.2, although this has been deprecated since v0.9. + + +input() +------- +Like the new ``input()`` function from Python 3 (without eval()), except +that it returns bytes. Equivalent to Python 2's ``raw_input()``. + +Warning: By default, importing this module *removes* the old Python 2 +input() function entirely from ``__builtin__`` for safety. This is +because forgetting to import the new ``input`` from ``future`` might +otherwise lead to a security vulnerability (shell injection) on Python 2. + +To restore it, you can retrieve it yourself from +``__builtin__._old_input``. + +Fortunately, ``input()`` seems to be seldom used in the wild in Python +2... + +""" + +from future import utils + + +if utils.PY2: + from io import open + from future_builtins import ascii, oct, hex + from __builtin__ import unichr as chr, pow as _builtin_pow + import __builtin__ + + # Only for backward compatibility with future v0.8.2: + isinstance = __builtin__.isinstance + + # Warning: Python 2's input() is unsafe and MUST not be able to be used + # accidentally by someone who expects Python 3 semantics but forgets + # to import it on Python 2. Versions of ``future`` prior to 0.11 + # deleted it from __builtin__. Now we keep in __builtin__ but shadow + # the name like all others. Just be sure to import ``input``. + + input = raw_input + + from future.builtins.newnext import newnext as next + from future.builtins.newround import newround as round + from future.builtins.newsuper import newsuper as super + from future.builtins.new_min_max import newmax as max + from future.builtins.new_min_max import newmin as min + from future.types.newint import newint + + _SENTINEL = object() + + def pow(x, y, z=_SENTINEL): + """ + pow(x, y[, z]) -> number + + With two arguments, equivalent to x**y. With three arguments, + equivalent to (x**y) % z, but may be more efficient (e.g. for ints). + """ + # Handle newints + if isinstance(x, newint): + x = long(x) + if isinstance(y, newint): + y = long(y) + if isinstance(z, newint): + z = long(z) + + try: + if z == _SENTINEL: + return _builtin_pow(x, y) + else: + return _builtin_pow(x, y, z) + except ValueError: + if z == _SENTINEL: + return _builtin_pow(x+0j, y) + else: + return _builtin_pow(x+0j, y, z) + + + # ``future`` doesn't support Py3.0/3.1. If we ever did, we'd add this: + # callable = __builtin__.callable + + __all__ = ['ascii', 'chr', 'hex', 'input', 'isinstance', 'next', 'oct', + 'open', 'pow', 'round', 'super', 'max', 'min'] + +else: + import builtins + ascii = builtins.ascii + chr = builtins.chr + hex = builtins.hex + input = builtins.input + next = builtins.next + # Only for backward compatibility with future v0.8.2: + isinstance = builtins.isinstance + oct = builtins.oct + open = builtins.open + pow = builtins.pow + round = builtins.round + super = builtins.super + if utils.PY34_PLUS: + max = builtins.max + min = builtins.min + __all__ = [] + else: + from future.builtins.new_min_max import newmax as max + from future.builtins.new_min_max import newmin as min + __all__ = ['min', 'max'] + + # The callable() function was removed from Py3.0 and 3.1 and + # reintroduced into Py3.2+. ``future`` doesn't support Py3.0/3.1. If we ever + # did, we'd add this: + # try: + # callable = builtins.callable + # except AttributeError: + # # Definition from Pandas + # def callable(obj): + # return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) + # __all__.append('callable') diff --git a/lib/future/builtins/new_min_max.py b/lib/future/builtins/new_min_max.py new file mode 100644 index 00000000..6f0c2a86 --- /dev/null +++ b/lib/future/builtins/new_min_max.py @@ -0,0 +1,59 @@ +import itertools + +from future import utils +if utils.PY2: + from __builtin__ import max as _builtin_max, min as _builtin_min +else: + from builtins import max as _builtin_max, min as _builtin_min + +_SENTINEL = object() + + +def newmin(*args, **kwargs): + return new_min_max(_builtin_min, *args, **kwargs) + + +def newmax(*args, **kwargs): + return new_min_max(_builtin_max, *args, **kwargs) + + +def new_min_max(_builtin_func, *args, **kwargs): + """ + To support the argument "default" introduced in python 3.4 for min and max + :param _builtin_func: builtin min or builtin max + :param args: + :param kwargs: + :return: returns the min or max based on the arguments passed + """ + + for key, _ in kwargs.items(): + if key not in set(['key', 'default']): + raise TypeError('Illegal argument %s', key) + + if len(args) == 0: + raise TypeError + + if len(args) != 1 and kwargs.get('default', _SENTINEL) is not _SENTINEL: + raise TypeError + + if len(args) == 1: + iterator = iter(args[0]) + try: + first = next(iterator) + except StopIteration: + if kwargs.get('default', _SENTINEL) is not _SENTINEL: + return kwargs.get('default') + else: + raise ValueError('{}() arg is an empty sequence'.format(_builtin_func.__name__)) + else: + iterator = itertools.chain([first], iterator) + if kwargs.get('key') is not None: + return _builtin_func(iterator, key=kwargs.get('key')) + else: + return _builtin_func(iterator) + + if len(args) > 1: + if kwargs.get('key') is not None: + return _builtin_func(args, key=kwargs.get('key')) + else: + return _builtin_func(args) diff --git a/lib/future/builtins/newnext.py b/lib/future/builtins/newnext.py new file mode 100644 index 00000000..097638ac --- /dev/null +++ b/lib/future/builtins/newnext.py @@ -0,0 +1,70 @@ +''' +This module provides a newnext() function in Python 2 that mimics the +behaviour of ``next()`` in Python 3, falling back to Python 2's behaviour for +compatibility if this fails. + +``newnext(iterator)`` calls the iterator's ``__next__()`` method if it exists. If this +doesn't exist, it falls back to calling a ``next()`` method. + +For example: + + >>> class Odds(object): + ... def __init__(self, start=1): + ... self.value = start - 2 + ... def __next__(self): # note the Py3 interface + ... self.value += 2 + ... return self.value + ... def __iter__(self): + ... return self + ... + >>> iterator = Odds() + >>> next(iterator) + 1 + >>> next(iterator) + 3 + +If you are defining your own custom iterator class as above, it is preferable +to explicitly decorate the class with the @implements_iterator decorator from +``future.utils`` as follows: + + >>> @implements_iterator + ... class Odds(object): + ... # etc + ... pass + +This next() function is primarily for consuming iterators defined in Python 3 +code elsewhere that we would like to run on Python 2 or 3. +''' + +_builtin_next = next + +_SENTINEL = object() + +def newnext(iterator, default=_SENTINEL): + """ + next(iterator[, default]) + + Return the next item from the iterator. If default is given and the iterator + is exhausted, it is returned instead of raising StopIteration. + """ + + # args = [] + # if default is not _SENTINEL: + # args.append(default) + try: + try: + return iterator.__next__() + except AttributeError: + try: + return iterator.next() + except AttributeError: + raise TypeError("'{0}' object is not an iterator".format( + iterator.__class__.__name__)) + except StopIteration as e: + if default is _SENTINEL: + raise e + else: + return default + + +__all__ = ['newnext'] diff --git a/lib/future/builtins/newround.py b/lib/future/builtins/newround.py new file mode 100644 index 00000000..394a2c63 --- /dev/null +++ b/lib/future/builtins/newround.py @@ -0,0 +1,102 @@ +""" +``python-future``: pure Python implementation of Python 3 round(). +""" + +from future.utils import PYPY, PY26, bind_method + +# Use the decimal module for simplicity of implementation (and +# hopefully correctness). +from decimal import Decimal, ROUND_HALF_EVEN + + +def newround(number, ndigits=None): + """ + See Python 3 documentation: uses Banker's Rounding. + + Delegates to the __round__ method if for some reason this exists. + + If not, rounds a number to a given precision in decimal digits (default + 0 digits). This returns an int when called with one argument, + otherwise the same type as the number. ndigits may be negative. + + See the test_round method in future/tests/test_builtins.py for + examples. + """ + return_int = False + if ndigits is None: + return_int = True + ndigits = 0 + if hasattr(number, '__round__'): + return number.__round__(ndigits) + + if ndigits < 0: + raise NotImplementedError('negative ndigits not supported yet') + exponent = Decimal('10') ** (-ndigits) + + if PYPY: + # Work around issue #24: round() breaks on PyPy with NumPy's types + if 'numpy' in repr(type(number)): + number = float(number) + + if isinstance(number, Decimal): + d = number + else: + if not PY26: + d = Decimal.from_float(number).quantize(exponent, + rounding=ROUND_HALF_EVEN) + else: + d = from_float_26(number).quantize(exponent, rounding=ROUND_HALF_EVEN) + + if return_int: + return int(d) + else: + return float(d) + + +### From Python 2.7's decimal.py. Only needed to support Py2.6: + +def from_float_26(f): + """Converts a float to a decimal number, exactly. + + Note that Decimal.from_float(0.1) is not the same as Decimal('0.1'). + Since 0.1 is not exactly representable in binary floating point, the + value is stored as the nearest representable value which is + 0x1.999999999999ap-4. The exact equivalent of the value in decimal + is 0.1000000000000000055511151231257827021181583404541015625. + + >>> Decimal.from_float(0.1) + Decimal('0.1000000000000000055511151231257827021181583404541015625') + >>> Decimal.from_float(float('nan')) + Decimal('NaN') + >>> Decimal.from_float(float('inf')) + Decimal('Infinity') + >>> Decimal.from_float(-float('inf')) + Decimal('-Infinity') + >>> Decimal.from_float(-0.0) + Decimal('-0') + + """ + import math as _math + from decimal import _dec_from_triple # only available on Py2.6 and Py2.7 (not 3.3) + + if isinstance(f, (int, long)): # handle integer inputs + return Decimal(f) + if _math.isinf(f) or _math.isnan(f): # raises TypeError if not a float + return Decimal(repr(f)) + if _math.copysign(1.0, f) == 1.0: + sign = 0 + else: + sign = 1 + n, d = abs(f).as_integer_ratio() + # int.bit_length() method doesn't exist on Py2.6: + def bit_length(d): + if d != 0: + return len(bin(abs(d))) - 2 + else: + return 0 + k = bit_length(d) - 1 + result = _dec_from_triple(sign, str(n*5**k), -k) + return result + + +__all__ = ['newround'] diff --git a/lib/future/builtins/newsuper.py b/lib/future/builtins/newsuper.py new file mode 100644 index 00000000..5d3402bd --- /dev/null +++ b/lib/future/builtins/newsuper.py @@ -0,0 +1,114 @@ +''' +This module provides a newsuper() function in Python 2 that mimics the +behaviour of super() in Python 3. It is designed to be used as follows: + + from __future__ import division, absolute_import, print_function + from future.builtins import super + +And then, for example: + + class VerboseList(list): + def append(self, item): + print('Adding an item') + super().append(item) # new simpler super() function + +Importing this module on Python 3 has no effect. + +This is based on (i.e. almost identical to) Ryan Kelly's magicsuper +module here: + + https://github.com/rfk/magicsuper.git + +Excerpts from Ryan's docstring: + + "Of course, you can still explicitly pass in the arguments if you want + to do something strange. Sometimes you really do want that, e.g. to + skip over some classes in the method resolution order. + + "How does it work? By inspecting the calling frame to determine the + function object being executed and the object on which it's being + called, and then walking the object's __mro__ chain to find out where + that function was defined. Yuck, but it seems to work..." +''' + +from __future__ import absolute_import +import sys +from types import FunctionType + +from future.utils import PY3, PY26 + + +_builtin_super = super + +_SENTINEL = object() + +def newsuper(typ=_SENTINEL, type_or_obj=_SENTINEL, framedepth=1): + '''Like builtin super(), but capable of magic. + + This acts just like the builtin super() function, but if called + without any arguments it attempts to infer them at runtime. + ''' + # Infer the correct call if used without arguments. + if typ is _SENTINEL: + # We'll need to do some frame hacking. + f = sys._getframe(framedepth) + + try: + # Get the function's first positional argument. + type_or_obj = f.f_locals[f.f_code.co_varnames[0]] + except (IndexError, KeyError,): + raise RuntimeError('super() used in a function with no args') + + try: + # Get the MRO so we can crawl it. + mro = type_or_obj.__mro__ + except (AttributeError, RuntimeError): # see issue #160 + try: + mro = type_or_obj.__class__.__mro__ + except AttributeError: + raise RuntimeError('super() used with a non-newstyle class') + + # A ``for...else`` block? Yes! It's odd, but useful. + # If unfamiliar with for...else, see: + # + # http://psung.blogspot.com/2007/12/for-else-in-python.html + for typ in mro: + # Find the class that owns the currently-executing method. + for meth in typ.__dict__.values(): + # Drill down through any wrappers to the underlying func. + # This handles e.g. classmethod() and staticmethod(). + try: + while not isinstance(meth,FunctionType): + if isinstance(meth, property): + # Calling __get__ on the property will invoke + # user code which might throw exceptions or have + # side effects + meth = meth.fget + else: + try: + meth = meth.__func__ + except AttributeError: + meth = meth.__get__(type_or_obj, typ) + except (AttributeError, TypeError): + continue + if meth.func_code is f.f_code: + break # Aha! Found you. + else: + continue # Not found! Move onto the next class in MRO. + break # Found! Break out of the search loop. + else: + raise RuntimeError('super() called outside a method') + + # Dispatch to builtin super(). + if type_or_obj is not _SENTINEL: + return _builtin_super(typ, type_or_obj) + return _builtin_super(typ) + + +def superm(*args, **kwds): + f = sys._getframe(1) + nm = f.f_code.co_name + return getattr(newsuper(framedepth=2),nm)(*args, **kwds) + + +__all__ = ['newsuper'] diff --git a/lib/future/moves/__init__.py b/lib/future/moves/__init__.py new file mode 100644 index 00000000..0cd60d3d --- /dev/null +++ b/lib/future/moves/__init__.py @@ -0,0 +1,8 @@ +# future.moves package +from __future__ import absolute_import +import sys +__future_module__ = True +from future.standard_library import import_top_level_modules + +if sys.version_info[0] >= 3: + import_top_level_modules() diff --git a/lib/future/moves/_dummy_thread.py b/lib/future/moves/_dummy_thread.py new file mode 100644 index 00000000..688d249b --- /dev/null +++ b/lib/future/moves/_dummy_thread.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from _dummy_thread import * +else: + __future_module__ = True + from dummy_thread import * diff --git a/lib/future/moves/_markupbase.py b/lib/future/moves/_markupbase.py new file mode 100644 index 00000000..f9fb4bbf --- /dev/null +++ b/lib/future/moves/_markupbase.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from _markupbase import * +else: + __future_module__ = True + from markupbase import * diff --git a/lib/future/moves/_thread.py b/lib/future/moves/_thread.py new file mode 100644 index 00000000..c68018bb --- /dev/null +++ b/lib/future/moves/_thread.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from _thread import * +else: + __future_module__ = True + from thread import * diff --git a/lib/future/moves/builtins.py b/lib/future/moves/builtins.py new file mode 100644 index 00000000..e4b6221d --- /dev/null +++ b/lib/future/moves/builtins.py @@ -0,0 +1,10 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from builtins import * +else: + __future_module__ = True + from __builtin__ import * + # Overwrite any old definitions with the equivalent future.builtins ones: + from future.builtins import * diff --git a/lib/future/moves/collections.py b/lib/future/moves/collections.py new file mode 100644 index 00000000..664ee6a3 --- /dev/null +++ b/lib/future/moves/collections.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import +import sys + +from future.utils import PY2, PY26 +__future_module__ = True + +from collections import * + +if PY2: + from UserDict import UserDict + from UserList import UserList + from UserString import UserString + +if PY26: + from future.backports.misc import OrderedDict, Counter + +if sys.version_info < (3, 3): + from future.backports.misc import ChainMap, _count_elements diff --git a/lib/future/moves/configparser.py b/lib/future/moves/configparser.py new file mode 100644 index 00000000..33d9cf95 --- /dev/null +++ b/lib/future/moves/configparser.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import + +from future.utils import PY2 + +if PY2: + from ConfigParser import * +else: + from configparser import * diff --git a/lib/future/moves/copyreg.py b/lib/future/moves/copyreg.py new file mode 100644 index 00000000..9d08cdc5 --- /dev/null +++ b/lib/future/moves/copyreg.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + import copyreg, sys + # A "*" import uses Python 3's copyreg.__all__ which does not include + # all public names in the API surface for copyreg, this avoids that + # problem by just making our module _be_ a reference to the actual module. + sys.modules['future.moves.copyreg'] = copyreg +else: + __future_module__ = True + from copy_reg import * diff --git a/lib/future/moves/dbm/__init__.py b/lib/future/moves/dbm/__init__.py new file mode 100644 index 00000000..626b406f --- /dev/null +++ b/lib/future/moves/dbm/__init__.py @@ -0,0 +1,20 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from dbm import * +else: + __future_module__ = True + from whichdb import * + from anydbm import * + +# Py3.3's dbm/__init__.py imports ndbm but doesn't expose it via __all__. +# In case some (badly written) code depends on dbm.ndbm after import dbm, +# we simulate this: +if PY3: + from dbm import ndbm +else: + try: + from future.moves.dbm import ndbm + except ImportError: + ndbm = None diff --git a/lib/future/moves/dbm/dumb.py b/lib/future/moves/dbm/dumb.py new file mode 100644 index 00000000..528383f6 --- /dev/null +++ b/lib/future/moves/dbm/dumb.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from dbm.dumb import * +else: + __future_module__ = True + from dumbdbm import * diff --git a/lib/future/moves/dbm/gnu.py b/lib/future/moves/dbm/gnu.py new file mode 100644 index 00000000..68ccf67b --- /dev/null +++ b/lib/future/moves/dbm/gnu.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from dbm.gnu import * +else: + __future_module__ = True + from gdbm import * diff --git a/lib/future/moves/dbm/ndbm.py b/lib/future/moves/dbm/ndbm.py new file mode 100644 index 00000000..8c6fff8a --- /dev/null +++ b/lib/future/moves/dbm/ndbm.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from dbm.ndbm import * +else: + __future_module__ = True + from dbm import * diff --git a/lib/future/moves/html/__init__.py b/lib/future/moves/html/__init__.py new file mode 100644 index 00000000..22ed6e7d --- /dev/null +++ b/lib/future/moves/html/__init__.py @@ -0,0 +1,31 @@ +from __future__ import absolute_import +from future.utils import PY3 +__future_module__ = True + +if PY3: + from html import * +else: + # cgi.escape isn't good enough for the single Py3.3 html test to pass. + # Define it inline here instead. From the Py3.4 stdlib. Note that the + # html.escape() function from the Py3.3 stdlib is not suitable for use on + # Py2.x. + """ + General functions for HTML manipulation. + """ + + def escape(s, quote=True): + """ + Replace special characters "&", "<" and ">" to HTML-safe sequences. + If the optional flag quote is true (the default), the quotation mark + characters, both double quote (") and single quote (') characters are also + translated. + """ + s = s.replace("&", "&") # Must be done first! + s = s.replace("<", "<") + s = s.replace(">", ">") + if quote: + s = s.replace('"', """) + s = s.replace('\'', "'") + return s + + __all__ = ['escape'] diff --git a/lib/future/moves/html/entities.py b/lib/future/moves/html/entities.py new file mode 100644 index 00000000..56a88609 --- /dev/null +++ b/lib/future/moves/html/entities.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from html.entities import * +else: + __future_module__ = True + from htmlentitydefs import * diff --git a/lib/future/moves/html/parser.py b/lib/future/moves/html/parser.py new file mode 100644 index 00000000..a6115b59 --- /dev/null +++ b/lib/future/moves/html/parser.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 +__future_module__ = True + +if PY3: + from html.parser import * +else: + from HTMLParser import * diff --git a/lib/future/moves/http/__init__.py b/lib/future/moves/http/__init__.py new file mode 100644 index 00000000..917b3d71 --- /dev/null +++ b/lib/future/moves/http/__init__.py @@ -0,0 +1,4 @@ +from future.utils import PY3 + +if not PY3: + __future_module__ = True diff --git a/lib/future/moves/http/client.py b/lib/future/moves/http/client.py new file mode 100644 index 00000000..55f9c9c1 --- /dev/null +++ b/lib/future/moves/http/client.py @@ -0,0 +1,8 @@ +from future.utils import PY3 + +if PY3: + from http.client import * +else: + from httplib import * + from httplib import HTTPMessage + __future_module__ = True diff --git a/lib/future/moves/http/cookiejar.py b/lib/future/moves/http/cookiejar.py new file mode 100644 index 00000000..ea00df77 --- /dev/null +++ b/lib/future/moves/http/cookiejar.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from http.cookiejar import * +else: + __future_module__ = True + from cookielib import * diff --git a/lib/future/moves/http/cookies.py b/lib/future/moves/http/cookies.py new file mode 100644 index 00000000..1b74fe2d --- /dev/null +++ b/lib/future/moves/http/cookies.py @@ -0,0 +1,9 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from http.cookies import * +else: + __future_module__ = True + from Cookie import * + from Cookie import Morsel # left out of __all__ on Py2.7! diff --git a/lib/future/moves/http/server.py b/lib/future/moves/http/server.py new file mode 100644 index 00000000..4e75cc1d --- /dev/null +++ b/lib/future/moves/http/server.py @@ -0,0 +1,20 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from http.server import * +else: + __future_module__ = True + from BaseHTTPServer import * + from CGIHTTPServer import * + from SimpleHTTPServer import * + try: + from CGIHTTPServer import _url_collapse_path # needed for a test + except ImportError: + try: + # Python 2.7.0 to 2.7.3 + from CGIHTTPServer import ( + _url_collapse_path_split as _url_collapse_path) + except ImportError: + # Doesn't exist on Python 2.6.x. Ignore it. + pass diff --git a/lib/future/moves/itertools.py b/lib/future/moves/itertools.py new file mode 100644 index 00000000..e5eb20d5 --- /dev/null +++ b/lib/future/moves/itertools.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import + +from itertools import * +try: + zip_longest = izip_longest + filterfalse = ifilterfalse +except NameError: + pass diff --git a/lib/future/moves/pickle.py b/lib/future/moves/pickle.py new file mode 100644 index 00000000..c53d6939 --- /dev/null +++ b/lib/future/moves/pickle.py @@ -0,0 +1,11 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from pickle import * +else: + __future_module__ = True + try: + from cPickle import * + except ImportError: + from pickle import * diff --git a/lib/future/moves/queue.py b/lib/future/moves/queue.py new file mode 100644 index 00000000..1cb1437d --- /dev/null +++ b/lib/future/moves/queue.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from queue import * +else: + __future_module__ = True + from Queue import * diff --git a/lib/future/moves/reprlib.py b/lib/future/moves/reprlib.py new file mode 100644 index 00000000..a313a13a --- /dev/null +++ b/lib/future/moves/reprlib.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from reprlib import * +else: + __future_module__ = True + from repr import * diff --git a/lib/future/moves/socketserver.py b/lib/future/moves/socketserver.py new file mode 100644 index 00000000..062e0848 --- /dev/null +++ b/lib/future/moves/socketserver.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from socketserver import * +else: + __future_module__ = True + from SocketServer import * diff --git a/lib/future/moves/subprocess.py b/lib/future/moves/subprocess.py new file mode 100644 index 00000000..43ffd2ac --- /dev/null +++ b/lib/future/moves/subprocess.py @@ -0,0 +1,11 @@ +from __future__ import absolute_import +from future.utils import PY2, PY26 + +from subprocess import * + +if PY2: + __future_module__ = True + from commands import getoutput, getstatusoutput + +if PY26: + from future.backports.misc import check_output diff --git a/lib/future/moves/sys.py b/lib/future/moves/sys.py new file mode 100644 index 00000000..1293bcb0 --- /dev/null +++ b/lib/future/moves/sys.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import + +from future.utils import PY2 + +from sys import * + +if PY2: + from __builtin__ import intern diff --git a/lib/future/moves/test/__init__.py b/lib/future/moves/test/__init__.py new file mode 100644 index 00000000..5cf428b6 --- /dev/null +++ b/lib/future/moves/test/__init__.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if not PY3: + __future_module__ = True diff --git a/lib/future/moves/test/support.py b/lib/future/moves/test/support.py new file mode 100644 index 00000000..e9aa0f48 --- /dev/null +++ b/lib/future/moves/test/support.py @@ -0,0 +1,10 @@ +from __future__ import absolute_import +from future.standard_library import suspend_hooks +from future.utils import PY3 + +if PY3: + from test.support import * +else: + __future_module__ = True + with suspend_hooks(): + from test.test_support import * diff --git a/lib/future/moves/tkinter/__init__.py b/lib/future/moves/tkinter/__init__.py new file mode 100644 index 00000000..e4082966 --- /dev/null +++ b/lib/future/moves/tkinter/__init__.py @@ -0,0 +1,27 @@ +from __future__ import absolute_import +from future.utils import PY3 +__future_module__ = True + +if not PY3: + from Tkinter import * + from Tkinter import (_cnfmerge, _default_root, _flatten, + _support_default_root, _test, + _tkinter, _setit) + + try: # >= 2.7.4 + from Tkinter import (_join) + except ImportError: + pass + + try: # >= 2.7.4 + from Tkinter import (_stringify) + except ImportError: + pass + + try: # >= 2.7.9 + from Tkinter import (_splitdict) + except ImportError: + pass + +else: + from tkinter import * diff --git a/lib/future/moves/tkinter/colorchooser.py b/lib/future/moves/tkinter/colorchooser.py new file mode 100644 index 00000000..6dde6e8d --- /dev/null +++ b/lib/future/moves/tkinter/colorchooser.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.colorchooser import * +else: + try: + from tkColorChooser import * + except ImportError: + raise ImportError('The tkColorChooser module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/lib/future/moves/tkinter/commondialog.py b/lib/future/moves/tkinter/commondialog.py new file mode 100644 index 00000000..eb7ae8d6 --- /dev/null +++ b/lib/future/moves/tkinter/commondialog.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.commondialog import * +else: + try: + from tkCommonDialog import * + except ImportError: + raise ImportError('The tkCommonDialog module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/lib/future/moves/tkinter/constants.py b/lib/future/moves/tkinter/constants.py new file mode 100644 index 00000000..ffe09815 --- /dev/null +++ b/lib/future/moves/tkinter/constants.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.constants import * +else: + try: + from Tkconstants import * + except ImportError: + raise ImportError('The Tkconstants module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/lib/future/moves/tkinter/dialog.py b/lib/future/moves/tkinter/dialog.py new file mode 100644 index 00000000..113370ca --- /dev/null +++ b/lib/future/moves/tkinter/dialog.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.dialog import * +else: + try: + from Dialog import * + except ImportError: + raise ImportError('The Dialog module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/lib/future/moves/tkinter/dnd.py b/lib/future/moves/tkinter/dnd.py new file mode 100644 index 00000000..1ab43791 --- /dev/null +++ b/lib/future/moves/tkinter/dnd.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.dnd import * +else: + try: + from Tkdnd import * + except ImportError: + raise ImportError('The Tkdnd module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/lib/future/moves/tkinter/filedialog.py b/lib/future/moves/tkinter/filedialog.py new file mode 100644 index 00000000..973923e2 --- /dev/null +++ b/lib/future/moves/tkinter/filedialog.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.filedialog import * +else: + try: + from FileDialog import * + except ImportError: + raise ImportError('The FileDialog module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/lib/future/moves/tkinter/font.py b/lib/future/moves/tkinter/font.py new file mode 100644 index 00000000..628f399a --- /dev/null +++ b/lib/future/moves/tkinter/font.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.font import * +else: + try: + from tkFont import * + except ImportError: + raise ImportError('The tkFont module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/lib/future/moves/tkinter/messagebox.py b/lib/future/moves/tkinter/messagebox.py new file mode 100644 index 00000000..b43d8702 --- /dev/null +++ b/lib/future/moves/tkinter/messagebox.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.messagebox import * +else: + try: + from tkMessageBox import * + except ImportError: + raise ImportError('The tkMessageBox module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/lib/future/moves/tkinter/scrolledtext.py b/lib/future/moves/tkinter/scrolledtext.py new file mode 100644 index 00000000..1c69db60 --- /dev/null +++ b/lib/future/moves/tkinter/scrolledtext.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.scrolledtext import * +else: + try: + from ScrolledText import * + except ImportError: + raise ImportError('The ScrolledText module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/lib/future/moves/tkinter/simpledialog.py b/lib/future/moves/tkinter/simpledialog.py new file mode 100644 index 00000000..dba93fbf --- /dev/null +++ b/lib/future/moves/tkinter/simpledialog.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.simpledialog import * +else: + try: + from SimpleDialog import * + except ImportError: + raise ImportError('The SimpleDialog module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/lib/future/moves/tkinter/tix.py b/lib/future/moves/tkinter/tix.py new file mode 100644 index 00000000..8d1718ad --- /dev/null +++ b/lib/future/moves/tkinter/tix.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.tix import * +else: + try: + from Tix import * + except ImportError: + raise ImportError('The Tix module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/lib/future/moves/tkinter/ttk.py b/lib/future/moves/tkinter/ttk.py new file mode 100644 index 00000000..081c1b49 --- /dev/null +++ b/lib/future/moves/tkinter/ttk.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import + +from future.utils import PY3 + +if PY3: + from tkinter.ttk import * +else: + try: + from ttk import * + except ImportError: + raise ImportError('The ttk module is missing. Does your Py2 ' + 'installation include tkinter?') diff --git a/lib/future/moves/urllib/__init__.py b/lib/future/moves/urllib/__init__.py new file mode 100644 index 00000000..5cf428b6 --- /dev/null +++ b/lib/future/moves/urllib/__init__.py @@ -0,0 +1,5 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if not PY3: + __future_module__ = True diff --git a/lib/future/moves/urllib/error.py b/lib/future/moves/urllib/error.py new file mode 100644 index 00000000..7d8ada73 --- /dev/null +++ b/lib/future/moves/urllib/error.py @@ -0,0 +1,16 @@ +from __future__ import absolute_import +from future.standard_library import suspend_hooks + +from future.utils import PY3 + +if PY3: + from urllib.error import * +else: + __future_module__ = True + + # We use this method to get at the original Py2 urllib before any renaming magic + # ContentTooShortError = sys.py2_modules['urllib'].ContentTooShortError + + with suspend_hooks(): + from urllib import ContentTooShortError + from urllib2 import URLError, HTTPError diff --git a/lib/future/moves/urllib/parse.py b/lib/future/moves/urllib/parse.py new file mode 100644 index 00000000..9074b816 --- /dev/null +++ b/lib/future/moves/urllib/parse.py @@ -0,0 +1,28 @@ +from __future__ import absolute_import +from future.standard_library import suspend_hooks + +from future.utils import PY3 + +if PY3: + from urllib.parse import * +else: + __future_module__ = True + from urlparse import (ParseResult, SplitResult, parse_qs, parse_qsl, + urldefrag, urljoin, urlparse, urlsplit, + urlunparse, urlunsplit) + + # we use this method to get at the original py2 urllib before any renaming + # quote = sys.py2_modules['urllib'].quote + # quote_plus = sys.py2_modules['urllib'].quote_plus + # unquote = sys.py2_modules['urllib'].unquote + # unquote_plus = sys.py2_modules['urllib'].unquote_plus + # urlencode = sys.py2_modules['urllib'].urlencode + # splitquery = sys.py2_modules['urllib'].splitquery + + with suspend_hooks(): + from urllib import (quote, + quote_plus, + unquote, + unquote_plus, + urlencode, + splitquery) diff --git a/lib/future/moves/urllib/request.py b/lib/future/moves/urllib/request.py new file mode 100644 index 00000000..972aa4ab --- /dev/null +++ b/lib/future/moves/urllib/request.py @@ -0,0 +1,94 @@ +from __future__ import absolute_import + +from future.standard_library import suspend_hooks +from future.utils import PY3 + +if PY3: + from urllib.request import * + # This aren't in __all__: + from urllib.request import (getproxies, + pathname2url, + proxy_bypass, + quote, + request_host, + thishost, + unquote, + url2pathname, + urlcleanup, + urljoin, + urlopen, + urlparse, + urlretrieve, + urlsplit, + urlunparse) + + from urllib.parse import (splitattr, + splithost, + splitpasswd, + splitport, + splitquery, + splittag, + splittype, + splituser, + splitvalue, + to_bytes, + unwrap) +else: + __future_module__ = True + with suspend_hooks(): + from urllib import * + from urllib2 import * + from urlparse import * + + # Rename: + from urllib import toBytes # missing from __all__ on Py2.6 + to_bytes = toBytes + + # from urllib import (pathname2url, + # url2pathname, + # getproxies, + # urlretrieve, + # urlcleanup, + # URLopener, + # FancyURLopener, + # proxy_bypass) + + # from urllib2 import ( + # AbstractBasicAuthHandler, + # AbstractDigestAuthHandler, + # BaseHandler, + # CacheFTPHandler, + # FileHandler, + # FTPHandler, + # HTTPBasicAuthHandler, + # HTTPCookieProcessor, + # HTTPDefaultErrorHandler, + # HTTPDigestAuthHandler, + # HTTPErrorProcessor, + # HTTPHandler, + # HTTPPasswordMgr, + # HTTPPasswordMgrWithDefaultRealm, + # HTTPRedirectHandler, + # HTTPSHandler, + # URLError, + # build_opener, + # install_opener, + # OpenerDirector, + # ProxyBasicAuthHandler, + # ProxyDigestAuthHandler, + # ProxyHandler, + # Request, + # UnknownHandler, + # urlopen, + # ) + + # from urlparse import ( + # urldefrag + # urljoin, + # urlparse, + # urlunparse, + # urlsplit, + # urlunsplit, + # parse_qs, + # parse_q" + # ) diff --git a/lib/future/moves/urllib/response.py b/lib/future/moves/urllib/response.py new file mode 100644 index 00000000..a287ae28 --- /dev/null +++ b/lib/future/moves/urllib/response.py @@ -0,0 +1,12 @@ +from future import standard_library +from future.utils import PY3 + +if PY3: + from urllib.response import * +else: + __future_module__ = True + with standard_library.suspend_hooks(): + from urllib import (addbase, + addclosehook, + addinfo, + addinfourl) diff --git a/lib/future/moves/urllib/robotparser.py b/lib/future/moves/urllib/robotparser.py new file mode 100644 index 00000000..0dc8f571 --- /dev/null +++ b/lib/future/moves/urllib/robotparser.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from urllib.robotparser import * +else: + __future_module__ = True + from robotparser import * diff --git a/lib/future/moves/winreg.py b/lib/future/moves/winreg.py new file mode 100644 index 00000000..c8b14756 --- /dev/null +++ b/lib/future/moves/winreg.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from winreg import * +else: + __future_module__ = True + from _winreg import * diff --git a/lib/future/moves/xmlrpc/__init__.py b/lib/future/moves/xmlrpc/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/future/moves/xmlrpc/client.py b/lib/future/moves/xmlrpc/client.py new file mode 100644 index 00000000..4708cf89 --- /dev/null +++ b/lib/future/moves/xmlrpc/client.py @@ -0,0 +1,7 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from xmlrpc.client import * +else: + from xmlrpclib import * diff --git a/lib/future/moves/xmlrpc/server.py b/lib/future/moves/xmlrpc/server.py new file mode 100644 index 00000000..1a8af345 --- /dev/null +++ b/lib/future/moves/xmlrpc/server.py @@ -0,0 +1,7 @@ +from __future__ import absolute_import +from future.utils import PY3 + +if PY3: + from xmlrpc.server import * +else: + from xmlrpclib import * diff --git a/lib/future/standard_library/__init__.py b/lib/future/standard_library/__init__.py new file mode 100644 index 00000000..cff02f95 --- /dev/null +++ b/lib/future/standard_library/__init__.py @@ -0,0 +1,815 @@ +""" +Python 3 reorganized the standard library (PEP 3108). This module exposes +several standard library modules to Python 2 under their new Python 3 +names. + +It is designed to be used as follows:: + + from future import standard_library + standard_library.install_aliases() + +And then these normal Py3 imports work on both Py3 and Py2:: + + import builtins + import copyreg + import queue + import reprlib + import socketserver + import winreg # on Windows only + import test.support + import html, html.parser, html.entites + import http, http.client, http.server + import http.cookies, http.cookiejar + import urllib.parse, urllib.request, urllib.response, urllib.error, urllib.robotparser + import xmlrpc.client, xmlrpc.server + + import _thread + import _dummy_thread + import _markupbase + + from itertools import filterfalse, zip_longest + from sys import intern + from collections import UserDict, UserList, UserString + from collections import OrderedDict, Counter, ChainMap # even on Py2.6 + from subprocess import getoutput, getstatusoutput + from subprocess import check_output # even on Py2.6 + +(The renamed modules and functions are still available under their old +names on Python 2.) + +This is a cleaner alternative to this idiom (see +http://docs.pythonsprints.com/python3_porting/py-porting.html):: + + try: + import queue + except ImportError: + import Queue as queue + + +Limitations +----------- +We don't currently support these modules, but would like to:: + + import dbm + import dbm.dumb + import dbm.gnu + import collections.abc # on Py33 + import pickle # should (optionally) bring in cPickle on Python 2 + +""" + +from __future__ import absolute_import, division, print_function + +import sys +import logging +import imp +import contextlib +import types +import copy +import os + +# Make a dedicated logger; leave the root logger to be configured +# by the application. +flog = logging.getLogger('future_stdlib') +_formatter = logging.Formatter(logging.BASIC_FORMAT) +_handler = logging.StreamHandler() +_handler.setFormatter(_formatter) +flog.addHandler(_handler) +flog.setLevel(logging.WARN) + +from future.utils import PY2, PY3 + +# The modules that are defined under the same names on Py3 but with +# different contents in a significant way (e.g. submodules) are: +# pickle (fast one) +# dbm +# urllib +# test +# email + +REPLACED_MODULES = set(['test', 'urllib', 'pickle', 'dbm']) # add email and dbm when we support it + +# The following module names are not present in Python 2.x, so they cause no +# potential clashes between the old and new names: +# http +# html +# tkinter +# xmlrpc +# Keys: Py2 / real module names +# Values: Py3 / simulated module names +RENAMES = { + # 'cStringIO': 'io', # there's a new io module in Python 2.6 + # that provides StringIO and BytesIO + # 'StringIO': 'io', # ditto + # 'cPickle': 'pickle', + '__builtin__': 'builtins', + 'copy_reg': 'copyreg', + 'Queue': 'queue', + 'future.moves.socketserver': 'socketserver', + 'ConfigParser': 'configparser', + 'repr': 'reprlib', + # 'FileDialog': 'tkinter.filedialog', + # 'tkFileDialog': 'tkinter.filedialog', + # 'SimpleDialog': 'tkinter.simpledialog', + # 'tkSimpleDialog': 'tkinter.simpledialog', + # 'tkColorChooser': 'tkinter.colorchooser', + # 'tkCommonDialog': 'tkinter.commondialog', + # 'Dialog': 'tkinter.dialog', + # 'Tkdnd': 'tkinter.dnd', + # 'tkFont': 'tkinter.font', + # 'tkMessageBox': 'tkinter.messagebox', + # 'ScrolledText': 'tkinter.scrolledtext', + # 'Tkconstants': 'tkinter.constants', + # 'Tix': 'tkinter.tix', + # 'ttk': 'tkinter.ttk', + # 'Tkinter': 'tkinter', + '_winreg': 'winreg', + 'thread': '_thread', + 'dummy_thread': '_dummy_thread', + # 'anydbm': 'dbm', # causes infinite import loop + # 'whichdb': 'dbm', # causes infinite import loop + # anydbm and whichdb are handled by fix_imports2 + # 'dbhash': 'dbm.bsd', + # 'dumbdbm': 'dbm.dumb', + # 'dbm': 'dbm.ndbm', + # 'gdbm': 'dbm.gnu', + 'future.moves.xmlrpc': 'xmlrpc', + # 'future.backports.email': 'email', # for use by urllib + # 'DocXMLRPCServer': 'xmlrpc.server', + # 'SimpleXMLRPCServer': 'xmlrpc.server', + # 'httplib': 'http.client', + # 'htmlentitydefs' : 'html.entities', + # 'HTMLParser' : 'html.parser', + # 'Cookie': 'http.cookies', + # 'cookielib': 'http.cookiejar', + # 'BaseHTTPServer': 'http.server', + # 'SimpleHTTPServer': 'http.server', + # 'CGIHTTPServer': 'http.server', + # 'future.backports.test': 'test', # primarily for renaming test_support to support + # 'commands': 'subprocess', + # 'urlparse' : 'urllib.parse', + # 'robotparser' : 'urllib.robotparser', + # 'abc': 'collections.abc', # for Py33 + # 'future.utils.six.moves.html': 'html', + # 'future.utils.six.moves.http': 'http', + 'future.moves.html': 'html', + 'future.moves.http': 'http', + # 'future.backports.urllib': 'urllib', + # 'future.utils.six.moves.urllib': 'urllib', + 'future.moves._markupbase': '_markupbase', + } + + +# It is complicated and apparently brittle to mess around with the +# ``sys.modules`` cache in order to support "import urllib" meaning two +# different things (Py2.7 urllib and backported Py3.3-like urllib) in different +# contexts. So we require explicit imports for these modules. +assert len(set(RENAMES.values()) & set(REPLACED_MODULES)) == 0 + + +# Harmless renames that we can insert. +# These modules need names from elsewhere being added to them: +# subprocess: should provide getoutput and other fns from commands +# module but these fns are missing: getstatus, mk2arg, +# mkarg +# re: needs an ASCII constant that works compatibly with Py3 + +# etc: see lib2to3/fixes/fix_imports.py + +# (New module name, new object name, old module name, old object name) +MOVES = [('collections', 'UserList', 'UserList', 'UserList'), + ('collections', 'UserDict', 'UserDict', 'UserDict'), + ('collections', 'UserString','UserString', 'UserString'), + ('collections', 'ChainMap', 'future.backports.misc', 'ChainMap'), + ('itertools', 'filterfalse','itertools', 'ifilterfalse'), + ('itertools', 'zip_longest','itertools', 'izip_longest'), + ('sys', 'intern','__builtin__', 'intern'), + # The re module has no ASCII flag in Py2, but this is the default. + # Set re.ASCII to a zero constant. stat.ST_MODE just happens to be one + # (and it exists on Py2.6+). + ('re', 'ASCII','stat', 'ST_MODE'), + ('base64', 'encodebytes','base64', 'encodestring'), + ('base64', 'decodebytes','base64', 'decodestring'), + ('subprocess', 'getoutput', 'commands', 'getoutput'), + ('subprocess', 'getstatusoutput', 'commands', 'getstatusoutput'), + ('subprocess', 'check_output', 'future.backports.misc', 'check_output'), + ('math', 'ceil', 'future.backports.misc', 'ceil'), + ('collections', 'OrderedDict', 'future.backports.misc', 'OrderedDict'), + ('collections', 'Counter', 'future.backports.misc', 'Counter'), + ('collections', 'ChainMap', 'future.backports.misc', 'ChainMap'), + ('itertools', 'count', 'future.backports.misc', 'count'), + ('reprlib', 'recursive_repr', 'future.backports.misc', 'recursive_repr'), + ('functools', 'cmp_to_key', 'future.backports.misc', 'cmp_to_key'), + +# This is no use, since "import urllib.request" etc. still fails: +# ('urllib', 'error', 'future.moves.urllib', 'error'), +# ('urllib', 'parse', 'future.moves.urllib', 'parse'), +# ('urllib', 'request', 'future.moves.urllib', 'request'), +# ('urllib', 'response', 'future.moves.urllib', 'response'), +# ('urllib', 'robotparser', 'future.moves.urllib', 'robotparser'), + ] + + +# A minimal example of an import hook: +# class WarnOnImport(object): +# def __init__(self, *args): +# self.module_names = args +# +# def find_module(self, fullname, path=None): +# if fullname in self.module_names: +# self.path = path +# return self +# return None +# +# def load_module(self, name): +# if name in sys.modules: +# return sys.modules[name] +# module_info = imp.find_module(name, self.path) +# module = imp.load_module(name, *module_info) +# sys.modules[name] = module +# flog.warning("Imported deprecated module %s", name) +# return module + + +class RenameImport(object): + """ + A class for import hooks mapping Py3 module names etc. to the Py2 equivalents. + """ + # Different RenameImport classes are created when importing this module from + # different source files. This causes isinstance(hook, RenameImport) checks + # to produce inconsistent results. We add this RENAMER attribute here so + # remove_hooks() and install_hooks() can find instances of these classes + # easily: + RENAMER = True + + def __init__(self, old_to_new): + ''' + Pass in a dictionary-like object mapping from old names to new + names. E.g. {'ConfigParser': 'configparser', 'cPickle': 'pickle'} + ''' + self.old_to_new = old_to_new + both = set(old_to_new.keys()) & set(old_to_new.values()) + assert (len(both) == 0 and + len(set(old_to_new.values())) == len(old_to_new.values())), \ + 'Ambiguity in renaming (handler not implemented)' + self.new_to_old = dict((new, old) for (old, new) in old_to_new.items()) + + def find_module(self, fullname, path=None): + # Handles hierarchical importing: package.module.module2 + new_base_names = set([s.split('.')[0] for s in self.new_to_old]) + # Before v0.12: Was: if fullname in set(self.old_to_new) | new_base_names: + if fullname in new_base_names: + return self + return None + + def load_module(self, name): + path = None + if name in sys.modules: + return sys.modules[name] + elif name in self.new_to_old: + # New name. Look up the corresponding old (Py2) name: + oldname = self.new_to_old[name] + module = self._find_and_load_module(oldname) + # module.__future_module__ = True + else: + module = self._find_and_load_module(name) + # In any case, make it available under the requested (Py3) name + sys.modules[name] = module + return module + + def _find_and_load_module(self, name, path=None): + """ + Finds and loads it. But if there's a . in the name, handles it + properly. + """ + bits = name.split('.') + while len(bits) > 1: + # Treat the first bit as a package + packagename = bits.pop(0) + package = self._find_and_load_module(packagename, path) + try: + path = package.__path__ + except AttributeError: + # This could be e.g. moves. + flog.debug('Package {0} has no __path__.'.format(package)) + if name in sys.modules: + return sys.modules[name] + flog.debug('What to do here?') + + name = bits[0] + module_info = imp.find_module(name, path) + return imp.load_module(name, *module_info) + + +class hooks(object): + """ + Acts as a context manager. Saves the state of sys.modules and restores it + after the 'with' block. + + Use like this: + + >>> from future import standard_library + >>> with standard_library.hooks(): + ... import http.client + >>> import requests + + For this to work, http.client will be scrubbed from sys.modules after the + 'with' block. That way the modules imported in the 'with' block will + continue to be accessible in the current namespace but not from any + imported modules (like requests). + """ + def __enter__(self): + # flog.debug('Entering hooks context manager') + self.old_sys_modules = copy.copy(sys.modules) + self.hooks_were_installed = detect_hooks() + # self.scrubbed = scrub_py2_sys_modules() + install_hooks() + return self + + def __exit__(self, *args): + # flog.debug('Exiting hooks context manager') + # restore_sys_modules(self.scrubbed) + if not self.hooks_were_installed: + remove_hooks() + # scrub_future_sys_modules() + +# Sanity check for is_py2_stdlib_module(): We aren't replacing any +# builtin modules names: +if PY2: + assert len(set(RENAMES.values()) & set(sys.builtin_module_names)) == 0 + + +def is_py2_stdlib_module(m): + """ + Tries to infer whether the module m is from the Python 2 standard library. + This may not be reliable on all systems. + """ + if PY3: + return False + if not 'stdlib_path' in is_py2_stdlib_module.__dict__: + stdlib_files = [contextlib.__file__, os.__file__, copy.__file__] + stdlib_paths = [os.path.split(f)[0] for f in stdlib_files] + if not len(set(stdlib_paths)) == 1: + # This seems to happen on travis-ci.org. Very strange. We'll try to + # ignore it. + flog.warn('Multiple locations found for the Python standard ' + 'library: %s' % stdlib_paths) + # Choose the first one arbitrarily + is_py2_stdlib_module.stdlib_path = stdlib_paths[0] + + if m.__name__ in sys.builtin_module_names: + return True + + if hasattr(m, '__file__'): + modpath = os.path.split(m.__file__) + if (modpath[0].startswith(is_py2_stdlib_module.stdlib_path) and + 'site-packages' not in modpath[0]): + return True + + return False + + +def scrub_py2_sys_modules(): + """ + Removes any Python 2 standard library modules from ``sys.modules`` that + would interfere with Py3-style imports using import hooks. Examples are + modules with the same names (like urllib or email). + + (Note that currently import hooks are disabled for modules like these + with ambiguous names anyway ...) + """ + if PY3: + return {} + scrubbed = {} + for modulename in REPLACED_MODULES & set(RENAMES.keys()): + if not modulename in sys.modules: + continue + + module = sys.modules[modulename] + + if is_py2_stdlib_module(module): + flog.debug('Deleting (Py2) {} from sys.modules'.format(modulename)) + scrubbed[modulename] = sys.modules[modulename] + del sys.modules[modulename] + return scrubbed + + +def scrub_future_sys_modules(): + """ + Deprecated. + """ + return {} + +class suspend_hooks(object): + """ + Acts as a context manager. Use like this: + + >>> from future import standard_library + >>> standard_library.install_hooks() + >>> import http.client + >>> # ... + >>> with standard_library.suspend_hooks(): + >>> import requests # incompatible with ``future``'s standard library hooks + + If the hooks were disabled before the context, they are not installed when + the context is left. + """ + def __enter__(self): + self.hooks_were_installed = detect_hooks() + remove_hooks() + # self.scrubbed = scrub_future_sys_modules() + return self + + def __exit__(self, *args): + if self.hooks_were_installed: + install_hooks() + # restore_sys_modules(self.scrubbed) + + +def restore_sys_modules(scrubbed): + """ + Add any previously scrubbed modules back to the sys.modules cache, + but only if it's safe to do so. + """ + clash = set(sys.modules) & set(scrubbed) + if len(clash) != 0: + # If several, choose one arbitrarily to raise an exception about + first = list(clash)[0] + raise ImportError('future module {} clashes with Py2 module' + .format(first)) + sys.modules.update(scrubbed) + + +def install_aliases(): + """ + Monkey-patches the standard library in Py2.6/7 to provide + aliases for better Py3 compatibility. + """ + if PY3: + return + # if hasattr(install_aliases, 'run_already'): + # return + for (newmodname, newobjname, oldmodname, oldobjname) in MOVES: + __import__(newmodname) + # We look up the module in sys.modules because __import__ just returns the + # top-level package: + newmod = sys.modules[newmodname] + # newmod.__future_module__ = True + + __import__(oldmodname) + oldmod = sys.modules[oldmodname] + + obj = getattr(oldmod, oldobjname) + setattr(newmod, newobjname, obj) + + # Hack for urllib so it appears to have the same structure on Py2 as on Py3 + import urllib + from future.backports.urllib import request + from future.backports.urllib import response + from future.backports.urllib import parse + from future.backports.urllib import error + from future.backports.urllib import robotparser + urllib.request = request + urllib.response = response + urllib.parse = parse + urllib.error = error + urllib.robotparser = robotparser + sys.modules['urllib.request'] = request + sys.modules['urllib.response'] = response + sys.modules['urllib.parse'] = parse + sys.modules['urllib.error'] = error + sys.modules['urllib.robotparser'] = robotparser + + # Patch the test module so it appears to have the same structure on Py2 as on Py3 + try: + import test + except ImportError: + pass + try: + from future.moves.test import support + except ImportError: + pass + else: + test.support = support + sys.modules['test.support'] = support + + # Patch the dbm module so it appears to have the same structure on Py2 as on Py3 + try: + import dbm + except ImportError: + pass + else: + from future.moves.dbm import dumb + dbm.dumb = dumb + sys.modules['dbm.dumb'] = dumb + try: + from future.moves.dbm import gnu + except ImportError: + pass + else: + dbm.gnu = gnu + sys.modules['dbm.gnu'] = gnu + try: + from future.moves.dbm import ndbm + except ImportError: + pass + else: + dbm.ndbm = ndbm + sys.modules['dbm.ndbm'] = ndbm + + # install_aliases.run_already = True + + +def install_hooks(): + """ + This function installs the future.standard_library import hook into + sys.meta_path. + """ + if PY3: + return + + install_aliases() + + flog.debug('sys.meta_path was: {0}'.format(sys.meta_path)) + flog.debug('Installing hooks ...') + + # Add it unless it's there already + newhook = RenameImport(RENAMES) + if not detect_hooks(): + sys.meta_path.append(newhook) + flog.debug('sys.meta_path is now: {0}'.format(sys.meta_path)) + + +def enable_hooks(): + """ + Deprecated. Use install_hooks() instead. This will be removed by + ``future`` v1.0. + """ + install_hooks() + + +def remove_hooks(scrub_sys_modules=False): + """ + This function removes the import hook from sys.meta_path. + """ + if PY3: + return + flog.debug('Uninstalling hooks ...') + # Loop backwards, so deleting items keeps the ordering: + for i, hook in list(enumerate(sys.meta_path))[::-1]: + if hasattr(hook, 'RENAMER'): + del sys.meta_path[i] + + # Explicit is better than implicit. In the future the interface should + # probably change so that scrubbing the import hooks requires a separate + # function call. Left as is for now for backward compatibility with + # v0.11.x. + if scrub_sys_modules: + scrub_future_sys_modules() + + +def disable_hooks(): + """ + Deprecated. Use remove_hooks() instead. This will be removed by + ``future`` v1.0. + """ + remove_hooks() + + +def detect_hooks(): + """ + Returns True if the import hooks are installed, False if not. + """ + flog.debug('Detecting hooks ...') + present = any([hasattr(hook, 'RENAMER') for hook in sys.meta_path]) + if present: + flog.debug('Detected.') + else: + flog.debug('Not detected.') + return present + + +# As of v0.12, this no longer happens implicitly: +# if not PY3: +# install_hooks() + + +if not hasattr(sys, 'py2_modules'): + sys.py2_modules = {} + +def cache_py2_modules(): + """ + Currently this function is unneeded, as we are not attempting to provide import hooks + for modules with ambiguous names: email, urllib, pickle. + """ + if len(sys.py2_modules) != 0: + return + assert not detect_hooks() + import urllib + sys.py2_modules['urllib'] = urllib + + import email + sys.py2_modules['email'] = email + + import pickle + sys.py2_modules['pickle'] = pickle + + # Not all Python installations have test module. (Anaconda doesn't, for example.) + # try: + # import test + # except ImportError: + # sys.py2_modules['test'] = None + # sys.py2_modules['test'] = test + + # import dbm + # sys.py2_modules['dbm'] = dbm + + +def import_(module_name, backport=False): + """ + Pass a (potentially dotted) module name of a Python 3 standard library + module. This function imports the module compatibly on Py2 and Py3 and + returns the top-level module. + + Example use: + >>> http = import_('http.client') + >>> http = import_('http.server') + >>> urllib = import_('urllib.request') + + Then: + >>> conn = http.client.HTTPConnection(...) + >>> response = urllib.request.urlopen('http://mywebsite.com') + >>> # etc. + + Use as follows: + >>> package_name = import_(module_name) + + On Py3, equivalent to this: + + >>> import module_name + + On Py2, equivalent to this if backport=False: + + >>> from future.moves import module_name + + or to this if backport=True: + + >>> from future.backports import module_name + + except that it also handles dotted module names such as ``http.client`` + The effect then is like this: + + >>> from future.backports import module + >>> from future.backports.module import submodule + >>> module.submodule = submodule + + Note that this would be a SyntaxError in Python: + + >>> from future.backports import http.client + + """ + # Python 2.6 doesn't have importlib in the stdlib, so it requires + # the backported ``importlib`` package from PyPI as a dependency to use + # this function: + import importlib + + if PY3: + return __import__(module_name) + else: + # client.blah = blah + # Then http.client = client + # etc. + if backport: + prefix = 'future.backports' + else: + prefix = 'future.moves' + parts = prefix.split('.') + module_name.split('.') + + modules = [] + for i, part in enumerate(parts): + sofar = '.'.join(parts[:i+1]) + modules.append(importlib.import_module(sofar)) + for i, part in reversed(list(enumerate(parts))): + if i == 0: + break + setattr(modules[i-1], part, modules[i]) + + # Return the next-most top-level module after future.backports / future.moves: + return modules[2] + + +def from_import(module_name, *symbol_names, **kwargs): + """ + Example use: + >>> HTTPConnection = from_import('http.client', 'HTTPConnection') + >>> HTTPServer = from_import('http.server', 'HTTPServer') + >>> urlopen, urlparse = from_import('urllib.request', 'urlopen', 'urlparse') + + Equivalent to this on Py3: + + >>> from module_name import symbol_names[0], symbol_names[1], ... + + and this on Py2: + + >>> from future.moves.module_name import symbol_names[0], ... + + or: + + >>> from future.backports.module_name import symbol_names[0], ... + + except that it also handles dotted module names such as ``http.client``. + """ + + if PY3: + return __import__(module_name) + else: + if 'backport' in kwargs and bool(kwargs['backport']): + prefix = 'future.backports' + else: + prefix = 'future.moves' + parts = prefix.split('.') + module_name.split('.') + module = importlib.import_module(prefix + '.' + module_name) + output = [getattr(module, name) for name in symbol_names] + if len(output) == 1: + return output[0] + else: + return output + + +class exclude_local_folder_imports(object): + """ + A context-manager that prevents standard library modules like configparser + from being imported from the local python-future source folder on Py3. + + (This was need prior to v0.16.0 because the presence of a configparser + folder would otherwise have prevented setuptools from running on Py3. Maybe + it's not needed any more?) + """ + def __init__(self, *args): + assert len(args) > 0 + self.module_names = args + # Disallow dotted module names like http.client: + if any(['.' in m for m in self.module_names]): + raise NotImplementedError('Dotted module names are not supported') + + def __enter__(self): + self.old_sys_path = copy.copy(sys.path) + self.old_sys_modules = copy.copy(sys.modules) + if sys.version_info[0] < 3: + return + # The presence of all these indicates we've found our source folder, + # because `builtins` won't have been installed in site-packages by setup.py: + FUTURE_SOURCE_SUBFOLDERS = ['future', 'past', 'libfuturize', 'libpasteurize', 'builtins'] + + # Look for the future source folder: + for folder in self.old_sys_path: + if all([os.path.exists(os.path.join(folder, subfolder)) + for subfolder in FUTURE_SOURCE_SUBFOLDERS]): + # Found it. Remove it. + sys.path.remove(folder) + + # Ensure we import the system module: + for m in self.module_names: + # Delete the module and any submodules from sys.modules: + # for key in list(sys.modules): + # if key == m or key.startswith(m + '.'): + # try: + # del sys.modules[key] + # except KeyError: + # pass + try: + module = __import__(m, level=0) + except ImportError: + # There's a problem importing the system module. E.g. the + # winreg module is not available except on Windows. + pass + + def __exit__(self, *args): + # Restore sys.path and sys.modules: + sys.path = self.old_sys_path + for m in set(self.old_sys_modules.keys()) - set(sys.modules.keys()): + sys.modules[m] = self.old_sys_modules[m] + +TOP_LEVEL_MODULES = ['builtins', + 'copyreg', + 'html', + 'http', + 'queue', + 'reprlib', + 'socketserver', + 'test', + 'tkinter', + 'winreg', + 'xmlrpc', + '_dummy_thread', + '_markupbase', + '_thread', + ] + +def import_top_level_modules(): + with exclude_local_folder_imports(*TOP_LEVEL_MODULES): + for m in TOP_LEVEL_MODULES: + try: + __import__(m) + except ImportError: # e.g. winreg + pass diff --git a/lib/future/tests/__init__.py b/lib/future/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lib/future/tests/base.py b/lib/future/tests/base.py new file mode 100644 index 00000000..4ef437ba --- /dev/null +++ b/lib/future/tests/base.py @@ -0,0 +1,539 @@ +from __future__ import print_function, absolute_import +import os +import tempfile +import unittest +import sys +import re +import warnings +import io +from textwrap import dedent + +from future.utils import bind_method, PY26, PY3, PY2, PY27 +from future.moves.subprocess import check_output, STDOUT, CalledProcessError + +if PY26: + import unittest2 as unittest + + +def reformat_code(code): + """ + Removes any leading \n and dedents. + """ + if code.startswith('\n'): + code = code[1:] + return dedent(code) + + +def order_future_lines(code): + """ + Returns the code block with any ``__future__`` import lines sorted, and + then any ``future`` import lines sorted, then any ``builtins`` import lines + sorted. + + This only sorts the lines within the expected blocks. + + See test_order_future_lines() for an example. + """ + + # We need .splitlines(keepends=True), which doesn't exist on Py2, + # so we use this instead: + lines = code.split('\n') + + uufuture_line_numbers = [i for i, line in enumerate(lines) + if line.startswith('from __future__ import ')] + + future_line_numbers = [i for i, line in enumerate(lines) + if line.startswith('from future') + or line.startswith('from past')] + + builtins_line_numbers = [i for i, line in enumerate(lines) + if line.startswith('from builtins')] + + assert code.lstrip() == code, ('internal usage error: ' + 'dedent the code before calling order_future_lines()') + + def mymax(numbers): + return max(numbers) if len(numbers) > 0 else 0 + + def mymin(numbers): + return min(numbers) if len(numbers) > 0 else float('inf') + + assert mymax(uufuture_line_numbers) <= mymin(future_line_numbers), \ + 'the __future__ and future imports are out of order' + + # assert mymax(future_line_numbers) <= mymin(builtins_line_numbers), \ + # 'the future and builtins imports are out of order' + + uul = sorted([lines[i] for i in uufuture_line_numbers]) + sorted_uufuture_lines = dict(zip(uufuture_line_numbers, uul)) + + fl = sorted([lines[i] for i in future_line_numbers]) + sorted_future_lines = dict(zip(future_line_numbers, fl)) + + bl = sorted([lines[i] for i in builtins_line_numbers]) + sorted_builtins_lines = dict(zip(builtins_line_numbers, bl)) + + # Replace the old unsorted "from __future__ import ..." lines with the + # new sorted ones: + new_lines = [] + for i in range(len(lines)): + if i in uufuture_line_numbers: + new_lines.append(sorted_uufuture_lines[i]) + elif i in future_line_numbers: + new_lines.append(sorted_future_lines[i]) + elif i in builtins_line_numbers: + new_lines.append(sorted_builtins_lines[i]) + else: + new_lines.append(lines[i]) + return '\n'.join(new_lines) + + +class VerboseCalledProcessError(CalledProcessError): + """ + Like CalledProcessError, but it displays more information (message and + script output) for diagnosing test failures etc. + """ + def __init__(self, msg, returncode, cmd, output=None): + self.msg = msg + self.returncode = returncode + self.cmd = cmd + self.output = output + + def __str__(self): + return ("Command '%s' failed with exit status %d\nMessage: %s\nOutput: %s" + % (self.cmd, self.returncode, self.msg, self.output)) + +class FuturizeError(VerboseCalledProcessError): + pass + +class PasteurizeError(VerboseCalledProcessError): + pass + + +class CodeHandler(unittest.TestCase): + """ + Handy mixin for test classes for writing / reading / futurizing / + running .py files in the test suite. + """ + def setUp(self): + """ + The outputs from the various futurize stages should have the + following headers: + """ + # After stage1: + # TODO: use this form after implementing a fixer to consolidate + # __future__ imports into a single line: + # self.headers1 = """ + # from __future__ import absolute_import, division, print_function + # """ + self.headers1 = reformat_code(""" + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function + """) + + # After stage2 --all-imports: + # TODO: use this form after implementing a fixer to consolidate + # __future__ imports into a single line: + # self.headers2 = """ + # from __future__ import (absolute_import, division, + # print_function, unicode_literals) + # from future import standard_library + # from future.builtins import * + # """ + self.headers2 = reformat_code(""" + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function + from __future__ import unicode_literals + from future import standard_library + standard_library.install_aliases() + from builtins import * + """) + self.interpreters = [sys.executable] + self.tempdir = tempfile.mkdtemp() + os.path.sep + pypath = os.getenv('PYTHONPATH') + if pypath: + self.env = {'PYTHONPATH': os.getcwd() + os.pathsep + pypath} + else: + self.env = {'PYTHONPATH': os.getcwd()} + + def convert(self, code, stages=(1, 2), all_imports=False, from3=False, + reformat=True, run=True, conservative=False): + """ + Converts the code block using ``futurize`` and returns the + resulting code. + + Passing stages=[1] or stages=[2] passes the flag ``--stage1`` or + ``stage2`` to ``futurize``. Passing both stages runs ``futurize`` + with both stages by default. + + If from3 is False, runs ``futurize``, converting from Python 2 to + both 2 and 3. If from3 is True, runs ``pasteurize`` to convert + from Python 3 to both 2 and 3. + + Optionally reformats the code block first using the reformat() function. + + If run is True, runs the resulting code under all Python + interpreters in self.interpreters. + """ + if reformat: + code = reformat_code(code) + self._write_test_script(code) + self._futurize_test_script(stages=stages, all_imports=all_imports, + from3=from3, conservative=conservative) + output = self._read_test_script() + if run: + for interpreter in self.interpreters: + _ = self._run_test_script(interpreter=interpreter) + return output + + def compare(self, output, expected, ignore_imports=True): + """ + Compares whether the code blocks are equal. If not, raises an + exception so the test fails. Ignores any trailing whitespace like + blank lines. + + If ignore_imports is True, passes the code blocks into the + strip_future_imports method. + + If one code block is a unicode string and the other a + byte-string, it assumes the byte-string is encoded as utf-8. + """ + if ignore_imports: + output = self.strip_future_imports(output) + expected = self.strip_future_imports(expected) + if isinstance(output, bytes) and not isinstance(expected, bytes): + output = output.decode('utf-8') + if isinstance(expected, bytes) and not isinstance(output, bytes): + expected = expected.decode('utf-8') + self.assertEqual(order_future_lines(output.rstrip()), + expected.rstrip()) + + def strip_future_imports(self, code): + """ + Strips any of these import lines: + + from __future__ import + from future + from future. + from builtins + + or any line containing: + install_hooks() + or: + install_aliases() + + Limitation: doesn't handle imports split across multiple lines like + this: + + from __future__ import (absolute_import, division, print_function, + unicode_literals) + """ + output = [] + # We need .splitlines(keepends=True), which doesn't exist on Py2, + # so we use this instead: + for line in code.split('\n'): + if not (line.startswith('from __future__ import ') + or line.startswith('from future ') + or line.startswith('from builtins ') + or 'install_hooks()' in line + or 'install_aliases()' in line + # but don't match "from future_builtins" :) + or line.startswith('from future.')): + output.append(line) + return '\n'.join(output) + + def convert_check(self, before, expected, stages=(1, 2), all_imports=False, + ignore_imports=True, from3=False, run=True, + conservative=False): + """ + Convenience method that calls convert() and compare(). + + Reformats the code blocks automatically using the reformat_code() + function. + + If all_imports is passed, we add the appropriate import headers + for the stage(s) selected to the ``expected`` code-block, so they + needn't appear repeatedly in the test code. + + If ignore_imports is True, ignores the presence of any lines + beginning: + + from __future__ import ... + from future import ... + + for the purpose of the comparison. + """ + output = self.convert(before, stages=stages, all_imports=all_imports, + from3=from3, run=run, conservative=conservative) + if all_imports: + headers = self.headers2 if 2 in stages else self.headers1 + else: + headers = '' + + reformatted = reformat_code(expected) + if headers in reformatted: + headers = '' + + self.compare(output, headers + reformatted, + ignore_imports=ignore_imports) + + def unchanged(self, code, **kwargs): + """ + Convenience method to ensure the code is unchanged by the + futurize process. + """ + self.convert_check(code, code, **kwargs) + + def _write_test_script(self, code, filename='mytestscript.py'): + """ + Dedents the given code (a multiline string) and writes it out to + a file in a temporary folder like /tmp/tmpUDCn7x/mytestscript.py. + """ + if isinstance(code, bytes): + code = code.decode('utf-8') + # Be explicit about encoding the temp file as UTF-8 (issue #63): + with io.open(self.tempdir + filename, 'wt', encoding='utf-8') as f: + f.write(dedent(code)) + + def _read_test_script(self, filename='mytestscript.py'): + with io.open(self.tempdir + filename, 'rt', encoding='utf-8') as f: + newsource = f.read() + return newsource + + def _futurize_test_script(self, filename='mytestscript.py', stages=(1, 2), + all_imports=False, from3=False, + conservative=False): + params = [] + stages = list(stages) + if all_imports: + params.append('--all-imports') + if from3: + script = 'pasteurize.py' + else: + script = 'futurize.py' + if stages == [1]: + params.append('--stage1') + elif stages == [2]: + params.append('--stage2') + else: + assert stages == [1, 2] + if conservative: + params.append('--conservative') + # No extra params needed + + # Absolute file path: + fn = self.tempdir + filename + call_args = [sys.executable, script] + params + ['-w', fn] + try: + output = check_output(call_args, stderr=STDOUT, env=self.env) + except CalledProcessError as e: + with open(fn) as f: + msg = ( + 'Error running the command %s\n' + '%s\n' + 'Contents of file %s:\n' + '\n' + '%s') % ( + ' '.join(call_args), + 'env=%s' % self.env, + fn, + '----\n%s\n----' % f.read(), + ) + ErrorClass = (FuturizeError if 'futurize' in script else PasteurizeError) + + if not hasattr(e, 'output'): + # The attribute CalledProcessError.output doesn't exist on Py2.6 + e.output = None + raise ErrorClass(msg, e.returncode, e.cmd, output=e.output) + return output + + def _run_test_script(self, filename='mytestscript.py', + interpreter=sys.executable): + # Absolute file path: + fn = self.tempdir + filename + try: + output = check_output([interpreter, fn], + env=self.env, stderr=STDOUT) + except CalledProcessError as e: + with open(fn) as f: + msg = ( + 'Error running the command %s\n' + '%s\n' + 'Contents of file %s:\n' + '\n' + '%s') % ( + ' '.join([interpreter, fn]), + 'env=%s' % self.env, + fn, + '----\n%s\n----' % f.read(), + ) + if not hasattr(e, 'output'): + # The attribute CalledProcessError.output doesn't exist on Py2.6 + e.output = None + raise VerboseCalledProcessError(msg, e.returncode, e.cmd, output=e.output) + return output + + +# Decorator to skip some tests on Python 2.6 ... +skip26 = unittest.skipIf(PY26, "this test is known to fail on Py2.6") + + +def expectedFailurePY3(func): + if not PY3: + return func + return unittest.expectedFailure(func) + +def expectedFailurePY26(func): + if not PY26: + return func + return unittest.expectedFailure(func) + + +def expectedFailurePY27(func): + if not PY27: + return func + return unittest.expectedFailure(func) + + +def expectedFailurePY2(func): + if not PY2: + return func + return unittest.expectedFailure(func) + + +# Renamed in Py3.3: +if not hasattr(unittest.TestCase, 'assertRaisesRegex'): + unittest.TestCase.assertRaisesRegex = unittest.TestCase.assertRaisesRegexp + +# From Py3.3: +def assertRegex(self, text, expected_regex, msg=None): + """Fail the test unless the text matches the regular expression.""" + if isinstance(expected_regex, (str, unicode)): + assert expected_regex, "expected_regex must not be empty." + expected_regex = re.compile(expected_regex) + if not expected_regex.search(text): + msg = msg or "Regex didn't match" + msg = '%s: %r not found in %r' % (msg, expected_regex.pattern, text) + raise self.failureException(msg) + +if not hasattr(unittest.TestCase, 'assertRegex'): + bind_method(unittest.TestCase, 'assertRegex', assertRegex) + +class _AssertRaisesBaseContext(object): + + def __init__(self, expected, test_case, callable_obj=None, + expected_regex=None): + self.expected = expected + self.test_case = test_case + if callable_obj is not None: + try: + self.obj_name = callable_obj.__name__ + except AttributeError: + self.obj_name = str(callable_obj) + else: + self.obj_name = None + if isinstance(expected_regex, (bytes, str)): + expected_regex = re.compile(expected_regex) + self.expected_regex = expected_regex + self.msg = None + + def _raiseFailure(self, standardMsg): + msg = self.test_case._formatMessage(self.msg, standardMsg) + raise self.test_case.failureException(msg) + + def handle(self, name, callable_obj, args, kwargs): + """ + If callable_obj is None, assertRaises/Warns is being used as a + context manager, so check for a 'msg' kwarg and return self. + If callable_obj is not None, call it passing args and kwargs. + """ + if callable_obj is None: + self.msg = kwargs.pop('msg', None) + return self + with self: + callable_obj(*args, **kwargs) + +class _AssertWarnsContext(_AssertRaisesBaseContext): + """A context manager used to implement TestCase.assertWarns* methods.""" + + def __enter__(self): + # The __warningregistry__'s need to be in a pristine state for tests + # to work properly. + for v in sys.modules.values(): + if getattr(v, '__warningregistry__', None): + v.__warningregistry__ = {} + self.warnings_manager = warnings.catch_warnings(record=True) + self.warnings = self.warnings_manager.__enter__() + warnings.simplefilter("always", self.expected) + return self + + def __exit__(self, exc_type, exc_value, tb): + self.warnings_manager.__exit__(exc_type, exc_value, tb) + if exc_type is not None: + # let unexpected exceptions pass through + return + try: + exc_name = self.expected.__name__ + except AttributeError: + exc_name = str(self.expected) + first_matching = None + for m in self.warnings: + w = m.message + if not isinstance(w, self.expected): + continue + if first_matching is None: + first_matching = w + if (self.expected_regex is not None and + not self.expected_regex.search(str(w))): + continue + # store warning for later retrieval + self.warning = w + self.filename = m.filename + self.lineno = m.lineno + return + # Now we simply try to choose a helpful failure message + if first_matching is not None: + self._raiseFailure('"{}" does not match "{}"'.format( + self.expected_regex.pattern, str(first_matching))) + if self.obj_name: + self._raiseFailure("{} not triggered by {}".format(exc_name, + self.obj_name)) + else: + self._raiseFailure("{} not triggered".format(exc_name)) + + +def assertWarns(self, expected_warning, callable_obj=None, *args, **kwargs): + """Fail unless a warning of class warnClass is triggered + by callable_obj when invoked with arguments args and keyword + arguments kwargs. If a different type of warning is + triggered, it will not be handled: depending on the other + warning filtering rules in effect, it might be silenced, printed + out, or raised as an exception. + + If called with callable_obj omitted or None, will return a + context object used like this:: + + with self.assertWarns(SomeWarning): + do_something() + + An optional keyword argument 'msg' can be provided when assertWarns + is used as a context object. + + The context manager keeps a reference to the first matching + warning as the 'warning' attribute; similarly, the 'filename' + and 'lineno' attributes give you information about the line + of Python code from which the warning was triggered. + This allows you to inspect the warning after the assertion:: + + with self.assertWarns(SomeWarning) as cm: + do_something() + the_warning = cm.warning + self.assertEqual(the_warning.some_attribute, 147) + """ + context = _AssertWarnsContext(expected_warning, self, callable_obj) + return context.handle('assertWarns', callable_obj, args, kwargs) + +if not hasattr(unittest.TestCase, 'assertWarns'): + bind_method(unittest.TestCase, 'assertWarns', assertWarns) diff --git a/lib/future/types/__init__.py b/lib/future/types/__init__.py new file mode 100644 index 00000000..06250770 --- /dev/null +++ b/lib/future/types/__init__.py @@ -0,0 +1,257 @@ +""" +This module contains backports the data types that were significantly changed +in the transition from Python 2 to Python 3. + +- an implementation of Python 3's bytes object (pure Python subclass of + Python 2's builtin 8-bit str type) +- an implementation of Python 3's str object (pure Python subclass of + Python 2's builtin unicode type) +- a backport of the range iterator from Py3 with slicing support + +It is used as follows:: + + from __future__ import division, absolute_import, print_function + from builtins import bytes, dict, int, range, str + +to bring in the new semantics for these functions from Python 3. And +then, for example:: + + b = bytes(b'ABCD') + assert list(b) == [65, 66, 67, 68] + assert repr(b) == "b'ABCD'" + assert [65, 66] in b + + # These raise TypeErrors: + # b + u'EFGH' + # b.split(u'B') + # bytes(b',').join([u'Fred', u'Bill']) + + + s = str(u'ABCD') + + # These raise TypeErrors: + # s.join([b'Fred', b'Bill']) + # s.startswith(b'A') + # b'B' in s + # s.find(b'A') + # s.replace(u'A', b'a') + + # This raises an AttributeError: + # s.decode('utf-8') + + assert repr(s) == 'ABCD' # consistent repr with Py3 (no u prefix) + + + for i in range(10**11)[:10]: + pass + +and:: + + class VerboseList(list): + def append(self, item): + print('Adding an item') + super().append(item) # new simpler super() function + +For more information: +--------------------- + +- future.types.newbytes +- future.types.newdict +- future.types.newint +- future.types.newobject +- future.types.newrange +- future.types.newstr + + +Notes +===== + +range() +------- +``range`` is a custom class that backports the slicing behaviour from +Python 3 (based on the ``xrange`` module by Dan Crosta). See the +``newrange`` module docstring for more details. + + +super() +------- +``super()`` is based on Ryan Kelly's ``magicsuper`` module. See the +``newsuper`` module docstring for more details. + + +round() +------- +Python 3 modifies the behaviour of ``round()`` to use "Banker's Rounding". +See http://stackoverflow.com/a/10825998. See the ``newround`` module +docstring for more details. + +""" + +from __future__ import absolute_import, division, print_function + +import functools +from numbers import Integral + +from future import utils + + +# Some utility functions to enforce strict type-separation of unicode str and +# bytes: +def disallow_types(argnums, disallowed_types): + """ + A decorator that raises a TypeError if any of the given numbered + arguments is of the corresponding given type (e.g. bytes or unicode + string). + + For example: + + @disallow_types([0, 1], [unicode, bytes]) + def f(a, b): + pass + + raises a TypeError when f is called if a unicode object is passed as + `a` or a bytes object is passed as `b`. + + This also skips over keyword arguments, so + + @disallow_types([0, 1], [unicode, bytes]) + def g(a, b=None): + pass + + doesn't raise an exception if g is called with only one argument a, + e.g.: + + g(b'Byte string') + + Example use: + + >>> class newbytes(object): + ... @disallow_types([1], [unicode]) + ... def __add__(self, other): + ... pass + + >>> newbytes('1234') + u'1234' #doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + TypeError: can't concat 'bytes' to (unicode) str + """ + + def decorator(function): + + @functools.wraps(function) + def wrapper(*args, **kwargs): + # These imports are just for this decorator, and are defined here + # to prevent circular imports: + from .newbytes import newbytes + from .newint import newint + from .newstr import newstr + + errmsg = "argument can't be {0}" + for (argnum, mytype) in zip(argnums, disallowed_types): + # Handle the case where the type is passed as a string like 'newbytes'. + if isinstance(mytype, str) or isinstance(mytype, bytes): + mytype = locals()[mytype] + + # Only restrict kw args only if they are passed: + if len(args) <= argnum: + break + + # Here we use type() rather than isinstance() because + # __instancecheck__ is being overridden. E.g. + # isinstance(b'abc', newbytes) is True on Py2. + if type(args[argnum]) == mytype: + raise TypeError(errmsg.format(mytype)) + + return function(*args, **kwargs) + return wrapper + return decorator + + +def no(mytype, argnums=(1,)): + """ + A shortcut for the disallow_types decorator that disallows only one type + (in any position in argnums). + + Example use: + + >>> class newstr(object): + ... @no('bytes') + ... def __add__(self, other): + ... pass + + >>> newstr(u'1234') + b'1234' #doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + TypeError: argument can't be bytes + + The object can also be passed directly, but passing the string helps + to prevent circular import problems. + """ + if isinstance(argnums, Integral): + argnums = (argnums,) + disallowed_types = [mytype] * len(argnums) + return disallow_types(argnums, disallowed_types) + + +def issubset(list1, list2): + """ + Examples: + + >>> issubset([], [65, 66, 67]) + True + >>> issubset([65], [65, 66, 67]) + True + >>> issubset([65, 66], [65, 66, 67]) + True + >>> issubset([65, 67], [65, 66, 67]) + False + """ + n = len(list1) + for startpos in range(len(list2) - n + 1): + if list2[startpos:startpos+n] == list1: + return True + return False + + +if utils.PY3: + import builtins + bytes = builtins.bytes + dict = builtins.dict + int = builtins.int + list = builtins.list + object = builtins.object + range = builtins.range + str = builtins.str + + # The identity mapping + newtypes = {bytes: bytes, + dict: dict, + int: int, + list: list, + object: object, + range: range, + str: str} + + __all__ = ['newtypes'] + +else: + + from .newbytes import newbytes + from .newdict import newdict + from .newint import newint + from .newlist import newlist + from .newrange import newrange + from .newobject import newobject + from .newstr import newstr + + newtypes = {bytes: newbytes, + dict: newdict, + int: newint, + long: newint, + list: newlist, + object: newobject, + range: newrange, + str: newbytes, + unicode: newstr} + + __all__ = ['newbytes', 'newdict', 'newint', 'newlist', 'newrange', 'newstr', 'newtypes'] diff --git a/lib/future/types/newbytes.py b/lib/future/types/newbytes.py new file mode 100644 index 00000000..c9d584a7 --- /dev/null +++ b/lib/future/types/newbytes.py @@ -0,0 +1,460 @@ +""" +Pure-Python implementation of a Python 3-like bytes object for Python 2. + +Why do this? Without it, the Python 2 bytes object is a very, very +different beast to the Python 3 bytes object. +""" + +from numbers import Integral +import string +import copy + +from future.utils import istext, isbytes, PY2, PY3, with_metaclass +from future.types import no, issubset +from future.types.newobject import newobject + +if PY2: + from collections import Iterable +else: + from collections.abc import Iterable + + +_builtin_bytes = bytes + +if PY3: + # We'll probably never use newstr on Py3 anyway... + unicode = str + + +class BaseNewBytes(type): + def __instancecheck__(cls, instance): + if cls == newbytes: + return isinstance(instance, _builtin_bytes) + else: + return issubclass(instance.__class__, cls) + + +def _newchr(x): + if isinstance(x, str): # this happens on pypy + return x.encode('ascii') + else: + return chr(x) + + +class newbytes(with_metaclass(BaseNewBytes, _builtin_bytes)): + """ + A backport of the Python 3 bytes object to Py2 + """ + def __new__(cls, *args, **kwargs): + """ + From the Py3 bytes docstring: + + bytes(iterable_of_ints) -> bytes + bytes(string, encoding[, errors]) -> bytes + bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer + bytes(int) -> bytes object of size given by the parameter initialized with null bytes + bytes() -> empty bytes object + + Construct an immutable array of bytes from: + - an iterable yielding integers in range(256) + - a text string encoded using the specified encoding + - any object implementing the buffer API. + - an integer + """ + + encoding = None + errors = None + + if len(args) == 0: + return super(newbytes, cls).__new__(cls) + elif len(args) >= 2: + args = list(args) + if len(args) == 3: + errors = args.pop() + encoding=args.pop() + # Was: elif isinstance(args[0], newbytes): + # We use type() instead of the above because we're redefining + # this to be True for all unicode string subclasses. Warning: + # This may render newstr un-subclassable. + if type(args[0]) == newbytes: + # Special-case: for consistency with Py3.3, we return the same object + # (with the same id) if a newbytes object is passed into the + # newbytes constructor. + return args[0] + elif isinstance(args[0], _builtin_bytes): + value = args[0] + elif isinstance(args[0], unicode): + try: + if 'encoding' in kwargs: + assert encoding is None + encoding = kwargs['encoding'] + if 'errors' in kwargs: + assert errors is None + errors = kwargs['errors'] + except AssertionError: + raise TypeError('Argument given by name and position') + if encoding is None: + raise TypeError('unicode string argument without an encoding') + ### + # Was: value = args[0].encode(**kwargs) + # Python 2.6 string encode() method doesn't take kwargs: + # Use this instead: + newargs = [encoding] + if errors is not None: + newargs.append(errors) + value = args[0].encode(*newargs) + ### + elif hasattr(args[0], '__bytes__'): + value = args[0].__bytes__() + elif isinstance(args[0], Iterable): + if len(args[0]) == 0: + # This could be an empty list or tuple. Return b'' as on Py3. + value = b'' + else: + # Was: elif len(args[0])>0 and isinstance(args[0][0], Integral): + # # It's a list of integers + # But then we can't index into e.g. frozensets. Try to proceed + # anyway. + try: + value = bytearray([_newchr(x) for x in args[0]]) + except: + raise ValueError('bytes must be in range(0, 256)') + elif isinstance(args[0], Integral): + if args[0] < 0: + raise ValueError('negative count') + value = b'\x00' * args[0] + else: + value = args[0] + if type(value) == newbytes: + # Above we use type(...) rather than isinstance(...) because the + # newbytes metaclass overrides __instancecheck__. + # oldbytes(value) gives the wrong thing on Py2: the same + # result as str(value) on Py3, e.g. "b'abc'". (Issue #193). + # So we handle this case separately: + return copy.copy(value) + else: + return super(newbytes, cls).__new__(cls, value) + + def __repr__(self): + return 'b' + super(newbytes, self).__repr__() + + def __str__(self): + return 'b' + "'{0}'".format(super(newbytes, self).__str__()) + + def __getitem__(self, y): + value = super(newbytes, self).__getitem__(y) + if isinstance(y, Integral): + return ord(value) + else: + return newbytes(value) + + def __getslice__(self, *args): + return self.__getitem__(slice(*args)) + + def __contains__(self, key): + if isinstance(key, int): + newbyteskey = newbytes([key]) + # Don't use isinstance() here because we only want to catch + # newbytes, not Python 2 str: + elif type(key) == newbytes: + newbyteskey = key + else: + newbyteskey = newbytes(key) + return issubset(list(newbyteskey), list(self)) + + @no(unicode) + def __add__(self, other): + return newbytes(super(newbytes, self).__add__(other)) + + @no(unicode) + def __radd__(self, left): + return newbytes(left) + self + + @no(unicode) + def __mul__(self, other): + return newbytes(super(newbytes, self).__mul__(other)) + + @no(unicode) + def __rmul__(self, other): + return newbytes(super(newbytes, self).__rmul__(other)) + + def __mod__(self, vals): + if isinstance(vals, newbytes): + vals = _builtin_bytes.__str__(vals) + + elif isinstance(vals, tuple): + newvals = [] + for v in vals: + if isinstance(v, newbytes): + v = _builtin_bytes.__str__(v) + newvals.append(v) + vals = tuple(newvals) + + elif (hasattr(vals.__class__, '__getitem__') and + hasattr(vals.__class__, 'iteritems')): + for k, v in vals.iteritems(): + if isinstance(v, newbytes): + vals[k] = _builtin_bytes.__str__(v) + + return _builtin_bytes.__mod__(self, vals) + + def __imod__(self, other): + return self.__mod__(other) + + def join(self, iterable_of_bytes): + errmsg = 'sequence item {0}: expected bytes, {1} found' + if isbytes(iterable_of_bytes) or istext(iterable_of_bytes): + raise TypeError(errmsg.format(0, type(iterable_of_bytes))) + for i, item in enumerate(iterable_of_bytes): + if istext(item): + raise TypeError(errmsg.format(i, type(item))) + return newbytes(super(newbytes, self).join(iterable_of_bytes)) + + @classmethod + def fromhex(cls, string): + # Only on Py2: + return cls(string.replace(' ', '').decode('hex')) + + @no(unicode) + def find(self, sub, *args): + return super(newbytes, self).find(sub, *args) + + @no(unicode) + def rfind(self, sub, *args): + return super(newbytes, self).rfind(sub, *args) + + @no(unicode, (1, 2)) + def replace(self, old, new, *args): + return newbytes(super(newbytes, self).replace(old, new, *args)) + + def encode(self, *args): + raise AttributeError("encode method has been disabled in newbytes") + + def decode(self, encoding='utf-8', errors='strict'): + """ + Returns a newstr (i.e. unicode subclass) + + Decode B using the codec registered for encoding. Default encoding + is 'utf-8'. errors may be given to set a different error + handling scheme. Default is 'strict' meaning that encoding errors raise + a UnicodeDecodeError. Other possible values are 'ignore' and 'replace' + as well as any other name registered with codecs.register_error that is + able to handle UnicodeDecodeErrors. + """ + # Py2 str.encode() takes encoding and errors as optional parameter, + # not keyword arguments as in Python 3 str. + + from future.types.newstr import newstr + + if errors == 'surrogateescape': + from future.utils.surrogateescape import register_surrogateescape + register_surrogateescape() + + return newstr(super(newbytes, self).decode(encoding, errors)) + + # This is currently broken: + # # We implement surrogateescape error handling here in addition rather + # # than relying on the custom error handler from + # # future.utils.surrogateescape to be registered globally, even though + # # that is fine in the case of decoding. (But not encoding: see the + # # comments in newstr.encode()``.) + # + # if errors == 'surrogateescape': + # # Decode char by char + # mybytes = [] + # for code in self: + # # Code is an int + # if 0x80 <= code <= 0xFF: + # b = 0xDC00 + code + # elif code <= 0x7F: + # b = _unichr(c).decode(encoding=encoding) + # else: + # # # It may be a bad byte + # # FIXME: What to do in this case? See the Py3 docs / tests. + # # # Try swallowing it. + # # continue + # # print("RAISE!") + # raise NotASurrogateError + # mybytes.append(b) + # return newbytes(mybytes) + # return newbytes(super(newstr, self).decode(encoding, errors)) + + @no(unicode) + def startswith(self, prefix, *args): + return super(newbytes, self).startswith(prefix, *args) + + @no(unicode) + def endswith(self, prefix, *args): + return super(newbytes, self).endswith(prefix, *args) + + @no(unicode) + def split(self, sep=None, maxsplit=-1): + # Py2 str.split() takes maxsplit as an optional parameter, not as a + # keyword argument as in Python 3 bytes. + parts = super(newbytes, self).split(sep, maxsplit) + return [newbytes(part) for part in parts] + + def splitlines(self, keepends=False): + """ + B.splitlines([keepends]) -> list of lines + + Return a list of the lines in B, breaking at line boundaries. + Line breaks are not included in the resulting list unless keepends + is given and true. + """ + # Py2 str.splitlines() takes keepends as an optional parameter, + # not as a keyword argument as in Python 3 bytes. + parts = super(newbytes, self).splitlines(keepends) + return [newbytes(part) for part in parts] + + @no(unicode) + def rsplit(self, sep=None, maxsplit=-1): + # Py2 str.rsplit() takes maxsplit as an optional parameter, not as a + # keyword argument as in Python 3 bytes. + parts = super(newbytes, self).rsplit(sep, maxsplit) + return [newbytes(part) for part in parts] + + @no(unicode) + def partition(self, sep): + parts = super(newbytes, self).partition(sep) + return tuple(newbytes(part) for part in parts) + + @no(unicode) + def rpartition(self, sep): + parts = super(newbytes, self).rpartition(sep) + return tuple(newbytes(part) for part in parts) + + @no(unicode, (1,)) + def rindex(self, sub, *args): + ''' + S.rindex(sub [,start [,end]]) -> int + + Like S.rfind() but raise ValueError when the substring is not found. + ''' + pos = self.rfind(sub, *args) + if pos == -1: + raise ValueError('substring not found') + + @no(unicode) + def index(self, sub, *args): + ''' + Returns index of sub in bytes. + Raises ValueError if byte is not in bytes and TypeError if can't + be converted bytes or its length is not 1. + ''' + if isinstance(sub, int): + if len(args) == 0: + start, end = 0, len(self) + elif len(args) == 1: + start = args[0] + elif len(args) == 2: + start, end = args + else: + raise TypeError('takes at most 3 arguments') + return list(self)[start:end].index(sub) + if not isinstance(sub, bytes): + try: + sub = self.__class__(sub) + except (TypeError, ValueError): + raise TypeError("can't convert sub to bytes") + try: + return super(newbytes, self).index(sub, *args) + except ValueError: + raise ValueError('substring not found') + + def __eq__(self, other): + if isinstance(other, (_builtin_bytes, bytearray)): + return super(newbytes, self).__eq__(other) + else: + return False + + def __ne__(self, other): + if isinstance(other, _builtin_bytes): + return super(newbytes, self).__ne__(other) + else: + return True + + unorderable_err = 'unorderable types: bytes() and {0}' + + def __lt__(self, other): + if isinstance(other, _builtin_bytes): + return super(newbytes, self).__lt__(other) + raise TypeError(self.unorderable_err.format(type(other))) + + def __le__(self, other): + if isinstance(other, _builtin_bytes): + return super(newbytes, self).__le__(other) + raise TypeError(self.unorderable_err.format(type(other))) + + def __gt__(self, other): + if isinstance(other, _builtin_bytes): + return super(newbytes, self).__gt__(other) + raise TypeError(self.unorderable_err.format(type(other))) + + def __ge__(self, other): + if isinstance(other, _builtin_bytes): + return super(newbytes, self).__ge__(other) + raise TypeError(self.unorderable_err.format(type(other))) + + def __native__(self): + # We can't just feed a newbytes object into str(), because + # newbytes.__str__() returns e.g. "b'blah'", consistent with Py3 bytes. + return super(newbytes, self).__str__() + + def __getattribute__(self, name): + """ + A trick to cause the ``hasattr`` builtin-fn to return False for + the 'encode' method on Py2. + """ + if name in ['encode', u'encode']: + raise AttributeError("encode method has been disabled in newbytes") + return super(newbytes, self).__getattribute__(name) + + @no(unicode) + def rstrip(self, bytes_to_strip=None): + """ + Strip trailing bytes contained in the argument. + If the argument is omitted, strip trailing ASCII whitespace. + """ + return newbytes(super(newbytes, self).rstrip(bytes_to_strip)) + + @no(unicode) + def strip(self, bytes_to_strip=None): + """ + Strip leading and trailing bytes contained in the argument. + If the argument is omitted, strip trailing ASCII whitespace. + """ + return newbytes(super(newbytes, self).strip(bytes_to_strip)) + + def lower(self): + """ + b.lower() -> copy of b + + Return a copy of b with all ASCII characters converted to lowercase. + """ + return newbytes(super(newbytes, self).lower()) + + @no(unicode) + def upper(self): + """ + b.upper() -> copy of b + + Return a copy of b with all ASCII characters converted to uppercase. + """ + return newbytes(super(newbytes, self).upper()) + + @classmethod + @no(unicode) + def maketrans(cls, frm, to): + """ + B.maketrans(frm, to) -> translation table + + Return a translation table (a bytes object of length 256) suitable + for use in the bytes or bytearray translate method where each byte + in frm is mapped to the byte at the same position in to. + The bytes objects frm and to must be of the same length. + """ + return newbytes(string.maketrans(frm, to)) + + +__all__ = ['newbytes'] diff --git a/lib/future/types/newdict.py b/lib/future/types/newdict.py new file mode 100644 index 00000000..3f3a559d --- /dev/null +++ b/lib/future/types/newdict.py @@ -0,0 +1,111 @@ +""" +A dict subclass for Python 2 that behaves like Python 3's dict + +Example use: + +>>> from builtins import dict +>>> d1 = dict() # instead of {} for an empty dict +>>> d2 = dict(key1='value1', key2='value2') + +The keys, values and items methods now return iterators on Python 2.x +(with set-like behaviour on Python 2.7). + +>>> for d in (d1, d2): +... assert not isinstance(d.keys(), list) +... assert not isinstance(d.values(), list) +... assert not isinstance(d.items(), list) +""" + +import sys + +from future.utils import with_metaclass +from future.types.newobject import newobject + + +_builtin_dict = dict +ver = sys.version_info[:2] + + +class BaseNewDict(type): + def __instancecheck__(cls, instance): + if cls == newdict: + return isinstance(instance, _builtin_dict) + else: + return issubclass(instance.__class__, cls) + + +class newdict(with_metaclass(BaseNewDict, _builtin_dict)): + """ + A backport of the Python 3 dict object to Py2 + """ + def items(self): + """ + On Python 2.7+: + D.items() -> a set-like object providing a view on D's items + On Python 2.6: + D.items() -> an iterator over D's items + """ + if ver == (2, 7): + return self.viewitems() + elif ver == (2, 6): + return self.iteritems() + elif ver >= (3, 0): + return self.items() + + def keys(self): + """ + On Python 2.7+: + D.keys() -> a set-like object providing a view on D's keys + On Python 2.6: + D.keys() -> an iterator over D's keys + """ + if ver == (2, 7): + return self.viewkeys() + elif ver == (2, 6): + return self.iterkeys() + elif ver >= (3, 0): + return self.keys() + + def values(self): + """ + On Python 2.7+: + D.values() -> a set-like object providing a view on D's values + On Python 2.6: + D.values() -> an iterator over D's values + """ + if ver == (2, 7): + return self.viewvalues() + elif ver == (2, 6): + return self.itervalues() + elif ver >= (3, 0): + return self.values() + + def __new__(cls, *args, **kwargs): + """ + dict() -> new empty dictionary + dict(mapping) -> new dictionary initialized from a mapping object's + (key, value) pairs + dict(iterable) -> new dictionary initialized as if via: + d = {} + for k, v in iterable: + d[k] = v + dict(**kwargs) -> new dictionary initialized with the name=value pairs + in the keyword argument list. For example: dict(one=1, two=2) + """ + + if len(args) == 0: + return super(newdict, cls).__new__(cls) + elif type(args[0]) == newdict: + value = args[0] + else: + value = args[0] + return super(newdict, cls).__new__(cls, value) + + def __native__(self): + """ + Hook for the future.utils.native() function + """ + return dict(self) + + +__all__ = ['newdict'] diff --git a/lib/future/types/newint.py b/lib/future/types/newint.py new file mode 100644 index 00000000..748dba9d --- /dev/null +++ b/lib/future/types/newint.py @@ -0,0 +1,381 @@ +""" +Backport of Python 3's int, based on Py2's long. + +They are very similar. The most notable difference is: + +- representation: trailing L in Python 2 removed in Python 3 +""" +from __future__ import division + +import struct + +from future.types.newbytes import newbytes +from future.types.newobject import newobject +from future.utils import PY3, isint, istext, isbytes, with_metaclass, native + + +if PY3: + long = int + from collections.abc import Iterable +else: + from collections import Iterable + + +class BaseNewInt(type): + def __instancecheck__(cls, instance): + if cls == newint: + # Special case for Py2 short or long int + return isinstance(instance, (int, long)) + else: + return issubclass(instance.__class__, cls) + + +class newint(with_metaclass(BaseNewInt, long)): + """ + A backport of the Python 3 int object to Py2 + """ + def __new__(cls, x=0, base=10): + """ + From the Py3 int docstring: + + | int(x=0) -> integer + | int(x, base=10) -> integer + | + | Convert a number or string to an integer, or return 0 if no + | arguments are given. If x is a number, return x.__int__(). For + | floating point numbers, this truncates towards zero. + | + | If x is not a number or if base is given, then x must be a string, + | bytes, or bytearray instance representing an integer literal in the + | given base. The literal can be preceded by '+' or '-' and be + | surrounded by whitespace. The base defaults to 10. Valid bases are + | 0 and 2-36. Base 0 means to interpret the base from the string as an + | integer literal. + | >>> int('0b100', base=0) + | 4 + + """ + try: + val = x.__int__() + except AttributeError: + val = x + else: + if not isint(val): + raise TypeError('__int__ returned non-int ({0})'.format( + type(val))) + + if base != 10: + # Explicit base + if not (istext(val) or isbytes(val) or isinstance(val, bytearray)): + raise TypeError( + "int() can't convert non-string with explicit base") + try: + return super(newint, cls).__new__(cls, val, base) + except TypeError: + return super(newint, cls).__new__(cls, newbytes(val), base) + # After here, base is 10 + try: + return super(newint, cls).__new__(cls, val) + except TypeError: + # Py2 long doesn't handle bytearray input with an explicit base, so + # handle this here. + # Py3: int(bytearray(b'10'), 2) == 2 + # Py2: int(bytearray(b'10'), 2) == 2 raises TypeError + # Py2: long(bytearray(b'10'), 2) == 2 raises TypeError + try: + return super(newint, cls).__new__(cls, newbytes(val)) + except: + raise TypeError("newint argument must be a string or a number," + "not '{0}'".format(type(val))) + + def __repr__(self): + """ + Without the L suffix + """ + value = super(newint, self).__repr__() + assert value[-1] == 'L' + return value[:-1] + + def __add__(self, other): + value = super(newint, self).__add__(other) + if value is NotImplemented: + return long(self) + other + return newint(value) + + def __radd__(self, other): + value = super(newint, self).__radd__(other) + if value is NotImplemented: + return other + long(self) + return newint(value) + + def __sub__(self, other): + value = super(newint, self).__sub__(other) + if value is NotImplemented: + return long(self) - other + return newint(value) + + def __rsub__(self, other): + value = super(newint, self).__rsub__(other) + if value is NotImplemented: + return other - long(self) + return newint(value) + + def __mul__(self, other): + value = super(newint, self).__mul__(other) + if isint(value): + return newint(value) + elif value is NotImplemented: + return long(self) * other + return value + + def __rmul__(self, other): + value = super(newint, self).__rmul__(other) + if isint(value): + return newint(value) + elif value is NotImplemented: + return other * long(self) + return value + + def __div__(self, other): + # We override this rather than e.g. relying on object.__div__ or + # long.__div__ because we want to wrap the value in a newint() + # call if other is another int + value = long(self) / other + if isinstance(other, (int, long)): + return newint(value) + else: + return value + + def __rdiv__(self, other): + value = other / long(self) + if isinstance(other, (int, long)): + return newint(value) + else: + return value + + def __idiv__(self, other): + # long has no __idiv__ method. Use __itruediv__ and cast back to + # newint: + value = self.__itruediv__(other) + if isinstance(other, (int, long)): + return newint(value) + else: + return value + + def __truediv__(self, other): + value = super(newint, self).__truediv__(other) + if value is NotImplemented: + value = long(self) / other + return value + + def __rtruediv__(self, other): + return super(newint, self).__rtruediv__(other) + + def __itruediv__(self, other): + # long has no __itruediv__ method + mylong = long(self) + mylong /= other + return mylong + + def __floordiv__(self, other): + return newint(super(newint, self).__floordiv__(other)) + + def __rfloordiv__(self, other): + return newint(super(newint, self).__rfloordiv__(other)) + + def __ifloordiv__(self, other): + # long has no __ifloordiv__ method + mylong = long(self) + mylong //= other + return newint(mylong) + + def __mod__(self, other): + value = super(newint, self).__mod__(other) + if value is NotImplemented: + return long(self) % other + return newint(value) + + def __rmod__(self, other): + value = super(newint, self).__rmod__(other) + if value is NotImplemented: + return other % long(self) + return newint(value) + + def __divmod__(self, other): + value = super(newint, self).__divmod__(other) + if value is NotImplemented: + mylong = long(self) + return (mylong // other, mylong % other) + return (newint(value[0]), newint(value[1])) + + def __rdivmod__(self, other): + value = super(newint, self).__rdivmod__(other) + if value is NotImplemented: + mylong = long(self) + return (other // mylong, other % mylong) + return (newint(value[0]), newint(value[1])) + + def __pow__(self, other): + value = super(newint, self).__pow__(other) + if value is NotImplemented: + return long(self) ** other + return newint(value) + + def __rpow__(self, other): + value = super(newint, self).__rpow__(other) + if value is NotImplemented: + return other ** long(self) + return newint(value) + + def __lshift__(self, other): + if not isint(other): + raise TypeError( + "unsupported operand type(s) for <<: '%s' and '%s'" % + (type(self).__name__, type(other).__name__)) + return newint(super(newint, self).__lshift__(other)) + + def __rshift__(self, other): + if not isint(other): + raise TypeError( + "unsupported operand type(s) for >>: '%s' and '%s'" % + (type(self).__name__, type(other).__name__)) + return newint(super(newint, self).__rshift__(other)) + + def __and__(self, other): + if not isint(other): + raise TypeError( + "unsupported operand type(s) for &: '%s' and '%s'" % + (type(self).__name__, type(other).__name__)) + return newint(super(newint, self).__and__(other)) + + def __or__(self, other): + if not isint(other): + raise TypeError( + "unsupported operand type(s) for |: '%s' and '%s'" % + (type(self).__name__, type(other).__name__)) + return newint(super(newint, self).__or__(other)) + + def __xor__(self, other): + if not isint(other): + raise TypeError( + "unsupported operand type(s) for ^: '%s' and '%s'" % + (type(self).__name__, type(other).__name__)) + return newint(super(newint, self).__xor__(other)) + + def __neg__(self): + return newint(super(newint, self).__neg__()) + + def __pos__(self): + return newint(super(newint, self).__pos__()) + + def __abs__(self): + return newint(super(newint, self).__abs__()) + + def __invert__(self): + return newint(super(newint, self).__invert__()) + + def __int__(self): + return self + + def __nonzero__(self): + return self.__bool__() + + def __bool__(self): + """ + So subclasses can override this, Py3-style + """ + return super(newint, self).__nonzero__() + + def __native__(self): + return long(self) + + def to_bytes(self, length, byteorder='big', signed=False): + """ + Return an array of bytes representing an integer. + + The integer is represented using length bytes. An OverflowError is + raised if the integer is not representable with the given number of + bytes. + + The byteorder argument determines the byte order used to represent the + integer. If byteorder is 'big', the most significant byte is at the + beginning of the byte array. If byteorder is 'little', the most + significant byte is at the end of the byte array. To request the native + byte order of the host system, use `sys.byteorder' as the byte order value. + + The signed keyword-only argument determines whether two's complement is + used to represent the integer. If signed is False and a negative integer + is given, an OverflowError is raised. + """ + if length < 0: + raise ValueError("length argument must be non-negative") + if length == 0 and self == 0: + return newbytes() + if signed and self < 0: + bits = length * 8 + num = (2**bits) + self + if num <= 0: + raise OverflowError("int too smal to convert") + else: + if self < 0: + raise OverflowError("can't convert negative int to unsigned") + num = self + if byteorder not in ('little', 'big'): + raise ValueError("byteorder must be either 'little' or 'big'") + h = b'%x' % num + s = newbytes((b'0'*(len(h) % 2) + h).zfill(length*2).decode('hex')) + if signed: + high_set = s[0] & 0x80 + if self > 0 and high_set: + raise OverflowError("int too big to convert") + if self < 0 and not high_set: + raise OverflowError("int too small to convert") + if len(s) > length: + raise OverflowError("int too big to convert") + return s if byteorder == 'big' else s[::-1] + + @classmethod + def from_bytes(cls, mybytes, byteorder='big', signed=False): + """ + Return the integer represented by the given array of bytes. + + The mybytes argument must either support the buffer protocol or be an + iterable object producing bytes. Bytes and bytearray are examples of + built-in objects that support the buffer protocol. + + The byteorder argument determines the byte order used to represent the + integer. If byteorder is 'big', the most significant byte is at the + beginning of the byte array. If byteorder is 'little', the most + significant byte is at the end of the byte array. To request the native + byte order of the host system, use `sys.byteorder' as the byte order value. + + The signed keyword-only argument indicates whether two's complement is + used to represent the integer. + """ + if byteorder not in ('little', 'big'): + raise ValueError("byteorder must be either 'little' or 'big'") + if isinstance(mybytes, unicode): + raise TypeError("cannot convert unicode objects to bytes") + # mybytes can also be passed as a sequence of integers on Py3. + # Test for this: + elif isinstance(mybytes, Iterable): + mybytes = newbytes(mybytes) + b = mybytes if byteorder == 'big' else mybytes[::-1] + if len(b) == 0: + b = b'\x00' + # The encode() method has been disabled by newbytes, but Py2's + # str has it: + num = int(native(b).encode('hex'), 16) + if signed and (b[0] & 0x80): + num = num - (2 ** (len(b)*8)) + return cls(num) + + +# def _twos_comp(val, bits): +# """compute the 2's compliment of int value val""" +# if( (val&(1<<(bits-1))) != 0 ): +# val = val - (1<>> from builtins import list +>>> l1 = list() # instead of {} for an empty list +>>> l1.append('hello') +>>> l2 = l1.copy() + +""" + +import sys +import copy + +from future.utils import with_metaclass +from future.types.newobject import newobject + + +_builtin_list = list +ver = sys.version_info[:2] + + +class BaseNewList(type): + def __instancecheck__(cls, instance): + if cls == newlist: + return isinstance(instance, _builtin_list) + else: + return issubclass(instance.__class__, cls) + + +class newlist(with_metaclass(BaseNewList, _builtin_list)): + """ + A backport of the Python 3 list object to Py2 + """ + def copy(self): + """ + L.copy() -> list -- a shallow copy of L + """ + return copy.copy(self) + + def clear(self): + """L.clear() -> None -- remove all items from L""" + for i in range(len(self)): + self.pop() + + def __new__(cls, *args, **kwargs): + """ + list() -> new empty list + list(iterable) -> new list initialized from iterable's items + """ + + if len(args) == 0: + return super(newlist, cls).__new__(cls) + elif type(args[0]) == newlist: + value = args[0] + else: + value = args[0] + return super(newlist, cls).__new__(cls, value) + + def __add__(self, value): + return newlist(super(newlist, self).__add__(value)) + + def __radd__(self, left): + " left + self " + try: + return newlist(left) + self + except: + return NotImplemented + + def __getitem__(self, y): + """ + x.__getitem__(y) <==> x[y] + + Warning: a bug in Python 2.x prevents indexing via a slice from + returning a newlist object. + """ + if isinstance(y, slice): + return newlist(super(newlist, self).__getitem__(y)) + else: + return super(newlist, self).__getitem__(y) + + def __native__(self): + """ + Hook for the future.utils.native() function + """ + return list(self) + + def __nonzero__(self): + return len(self) > 0 + + +__all__ = ['newlist'] diff --git a/lib/future/types/newmemoryview.py b/lib/future/types/newmemoryview.py new file mode 100644 index 00000000..09f804dc --- /dev/null +++ b/lib/future/types/newmemoryview.py @@ -0,0 +1,29 @@ +""" +A pretty lame implementation of a memoryview object for Python 2.6. +""" +from numbers import Integral +import string + +from future.utils import istext, isbytes, PY2, with_metaclass +from future.types import no, issubset + +if PY2: + from collections import Iterable +else: + from collections.abc import Iterable + +# class BaseNewBytes(type): +# def __instancecheck__(cls, instance): +# return isinstance(instance, _builtin_bytes) + + +class newmemoryview(object): # with_metaclass(BaseNewBytes, _builtin_bytes)): + """ + A pretty lame backport of the Python 2.7 and Python 3.x + memoryviewview object to Py2.6. + """ + def __init__(self, obj): + return obj + + +__all__ = ['newmemoryview'] diff --git a/lib/future/types/newobject.py b/lib/future/types/newobject.py new file mode 100644 index 00000000..31b84fc1 --- /dev/null +++ b/lib/future/types/newobject.py @@ -0,0 +1,117 @@ +""" +An object subclass for Python 2 that gives new-style classes written in the +style of Python 3 (with ``__next__`` and unicode-returning ``__str__`` methods) +the appropriate Python 2-style ``next`` and ``__unicode__`` methods for compatible. + +Example use:: + + from builtins import object + + my_unicode_str = u'Unicode string: \u5b54\u5b50' + + class A(object): + def __str__(self): + return my_unicode_str + + a = A() + print(str(a)) + + # On Python 2, these relations hold: + assert unicode(a) == my_unicode_string + assert str(a) == my_unicode_string.encode('utf-8') + + +Another example:: + + from builtins import object + + class Upper(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def __next__(self): # note the Py3 interface + return next(self._iter).upper() + def __iter__(self): + return self + + assert list(Upper('hello')) == list('HELLO') + +""" + + +class newobject(object): + """ + A magical object class that provides Python 2 compatibility methods:: + next + __unicode__ + __nonzero__ + + Subclasses of this class can merely define the Python 3 methods (__next__, + __str__, and __bool__). + """ + def next(self): + if hasattr(self, '__next__'): + return type(self).__next__(self) + raise TypeError('newobject is not an iterator') + + def __unicode__(self): + # All subclasses of the builtin object should have __str__ defined. + # Note that old-style classes do not have __str__ defined. + if hasattr(self, '__str__'): + s = type(self).__str__(self) + else: + s = str(self) + if isinstance(s, unicode): + return s + else: + return s.decode('utf-8') + + def __nonzero__(self): + if hasattr(self, '__bool__'): + return type(self).__bool__(self) + if hasattr(self, '__len__'): + return type(self).__len__(self) + # object has no __nonzero__ method + return True + + # Are these ever needed? + # def __div__(self): + # return self.__truediv__() + + # def __idiv__(self, other): + # return self.__itruediv__(other) + + def __long__(self): + if not hasattr(self, '__int__'): + return NotImplemented + return self.__int__() # not type(self).__int__(self) + + # def __new__(cls, *args, **kwargs): + # """ + # dict() -> new empty dictionary + # dict(mapping) -> new dictionary initialized from a mapping object's + # (key, value) pairs + # dict(iterable) -> new dictionary initialized as if via: + # d = {} + # for k, v in iterable: + # d[k] = v + # dict(**kwargs) -> new dictionary initialized with the name=value pairs + # in the keyword argument list. For example: dict(one=1, two=2) + # """ + + # if len(args) == 0: + # return super(newdict, cls).__new__(cls) + # elif type(args[0]) == newdict: + # return args[0] + # else: + # value = args[0] + # return super(newdict, cls).__new__(cls, value) + + def __native__(self): + """ + Hook for the future.utils.native() function + """ + return object(self) + + __slots__ = [] + +__all__ = ['newobject'] diff --git a/lib/future/types/newopen.py b/lib/future/types/newopen.py new file mode 100644 index 00000000..b75d45af --- /dev/null +++ b/lib/future/types/newopen.py @@ -0,0 +1,32 @@ +""" +A substitute for the Python 3 open() function. + +Note that io.open() is more complete but maybe slower. Even so, the +completeness may be a better default. TODO: compare these +""" + +_builtin_open = open + +class newopen(object): + """Wrapper providing key part of Python 3 open() interface. + + From IPython's py3compat.py module. License: BSD. + """ + def __init__(self, fname, mode="r", encoding="utf-8"): + self.f = _builtin_open(fname, mode) + self.enc = encoding + + def write(self, s): + return self.f.write(s.encode(self.enc)) + + def read(self, size=-1): + return self.f.read(size).decode(self.enc) + + def close(self): + return self.f.close() + + def __enter__(self): + return self + + def __exit__(self, etype, value, traceback): + self.f.close() diff --git a/lib/future/types/newrange.py b/lib/future/types/newrange.py new file mode 100644 index 00000000..eda01a5a --- /dev/null +++ b/lib/future/types/newrange.py @@ -0,0 +1,170 @@ +""" +Nearly identical to xrange.py, by Dan Crosta, from + + https://github.com/dcrosta/xrange.git + +This is included here in the ``future`` package rather than pointed to as +a dependency because there is no package for ``xrange`` on PyPI. It is +also tweaked to appear like a regular Python 3 ``range`` object rather +than a Python 2 xrange. + +From Dan Crosta's README: + + "A pure-Python implementation of Python 2.7's xrange built-in, with + some features backported from the Python 3.x range built-in (which + replaced xrange) in that version." + + Read more at + https://late.am/post/2012/06/18/what-the-heck-is-an-xrange +""" +from __future__ import absolute_import + +from future.utils import PY2 + +if PY2: + from collections import Sequence, Iterator +else: + from collections.abc import Sequence, Iterator +from itertools import islice + +from future.backports.misc import count # with step parameter on Py2.6 +# For backward compatibility with python-future versions < 0.14.4: +_count = count + + +class newrange(Sequence): + """ + Pure-Python backport of Python 3's range object. See `the CPython + documentation for details: + `_ + """ + + def __init__(self, *args): + if len(args) == 1: + start, stop, step = 0, args[0], 1 + elif len(args) == 2: + start, stop, step = args[0], args[1], 1 + elif len(args) == 3: + start, stop, step = args + else: + raise TypeError('range() requires 1-3 int arguments') + + try: + start, stop, step = int(start), int(stop), int(step) + except ValueError: + raise TypeError('an integer is required') + + if step == 0: + raise ValueError('range() arg 3 must not be zero') + elif step < 0: + stop = min(stop, start) + else: + stop = max(stop, start) + + self._start = start + self._stop = stop + self._step = step + self._len = (stop - start) // step + bool((stop - start) % step) + + @property + def start(self): + return self._start + + @property + def stop(self): + return self._stop + + @property + def step(self): + return self._step + + def __repr__(self): + if self._step == 1: + return 'range(%d, %d)' % (self._start, self._stop) + return 'range(%d, %d, %d)' % (self._start, self._stop, self._step) + + def __eq__(self, other): + return (isinstance(other, newrange) and + (self._len == 0 == other._len or + (self._start, self._step, self._len) == + (other._start, other._step, self._len))) + + def __len__(self): + return self._len + + def index(self, value): + """Return the 0-based position of integer `value` in + the sequence this range represents.""" + try: + diff = value - self._start + except TypeError: + raise ValueError('%r is not in range' % value) + quotient, remainder = divmod(diff, self._step) + if remainder == 0 and 0 <= quotient < self._len: + return abs(quotient) + raise ValueError('%r is not in range' % value) + + def count(self, value): + """Return the number of ocurrences of integer `value` + in the sequence this range represents.""" + # a value can occur exactly zero or one times + return int(value in self) + + def __contains__(self, value): + """Return ``True`` if the integer `value` occurs in + the sequence this range represents.""" + try: + self.index(value) + return True + except ValueError: + return False + + def __reversed__(self): + return iter(self[::-1]) + + def __getitem__(self, index): + """Return the element at position ``index`` in the sequence + this range represents, or raise :class:`IndexError` if the + position is out of range.""" + if isinstance(index, slice): + return self.__getitem_slice(index) + if index < 0: + # negative indexes access from the end + index = self._len + index + if index < 0 or index >= self._len: + raise IndexError('range object index out of range') + return self._start + index * self._step + + def __getitem_slice(self, slce): + """Return a range which represents the requested slce + of the sequence represented by this range. + """ + scaled_indices = (self._step * n for n in slce.indices(self._len)) + start_offset, stop_offset, new_step = scaled_indices + return newrange(self._start + start_offset, + self._start + stop_offset, + new_step) + + def __iter__(self): + """Return an iterator which enumerates the elements of the + sequence this range represents.""" + return range_iterator(self) + + +class range_iterator(Iterator): + """An iterator for a :class:`range`. + """ + def __init__(self, range_): + self._stepper = islice(count(range_.start, range_.step), len(range_)) + + def __iter__(self): + return self + + def __next__(self): + return next(self._stepper) + + def next(self): + return next(self._stepper) + + +__all__ = ['newrange'] diff --git a/lib/future/types/newstr.py b/lib/future/types/newstr.py new file mode 100644 index 00000000..8ca191f9 --- /dev/null +++ b/lib/future/types/newstr.py @@ -0,0 +1,426 @@ +""" +This module redefines ``str`` on Python 2.x to be a subclass of the Py2 +``unicode`` type that behaves like the Python 3.x ``str``. + +The main differences between ``newstr`` and Python 2.x's ``unicode`` type are +the stricter type-checking and absence of a `u''` prefix in the representation. + +It is designed to be used together with the ``unicode_literals`` import +as follows: + + >>> from __future__ import unicode_literals + >>> from builtins import str, isinstance + +On Python 3.x and normally on Python 2.x, these expressions hold + + >>> str('blah') is 'blah' + True + >>> isinstance('blah', str) + True + +However, on Python 2.x, with this import: + + >>> from __future__ import unicode_literals + +the same expressions are False: + + >>> str('blah') is 'blah' + False + >>> isinstance('blah', str) + False + +This module is designed to be imported together with ``unicode_literals`` on +Python 2 to bring the meaning of ``str`` back into alignment with unprefixed +string literals (i.e. ``unicode`` subclasses). + +Note that ``str()`` (and ``print()``) would then normally call the +``__unicode__`` method on objects in Python 2. To define string +representations of your objects portably across Py3 and Py2, use the +:func:`python_2_unicode_compatible` decorator in :mod:`future.utils`. + +""" + +from numbers import Number + +from future.utils import PY3, istext, with_metaclass, isnewbytes +from future.types import no, issubset +from future.types.newobject import newobject + + +if PY3: + # We'll probably never use newstr on Py3 anyway... + unicode = str + from collections.abc import Iterable +else: + from collections import Iterable + + +class BaseNewStr(type): + def __instancecheck__(cls, instance): + if cls == newstr: + return isinstance(instance, unicode) + else: + return issubclass(instance.__class__, cls) + + +class newstr(with_metaclass(BaseNewStr, unicode)): + """ + A backport of the Python 3 str object to Py2 + """ + no_convert_msg = "Can't convert '{0}' object to str implicitly" + + def __new__(cls, *args, **kwargs): + """ + From the Py3 str docstring: + + str(object='') -> str + str(bytes_or_buffer[, encoding[, errors]]) -> str + + Create a new string object from the given object. If encoding or + errors is specified, then the object must expose a data buffer + that will be decoded using the given encoding and error handler. + Otherwise, returns the result of object.__str__() (if defined) + or repr(object). + encoding defaults to sys.getdefaultencoding(). + errors defaults to 'strict'. + + """ + if len(args) == 0: + return super(newstr, cls).__new__(cls) + # Special case: If someone requests str(str(u'abc')), return the same + # object (same id) for consistency with Py3.3. This is not true for + # other objects like list or dict. + elif type(args[0]) == newstr and cls == newstr: + return args[0] + elif isinstance(args[0], unicode): + value = args[0] + elif isinstance(args[0], bytes): # i.e. Py2 bytes or newbytes + if 'encoding' in kwargs or len(args) > 1: + value = args[0].decode(*args[1:], **kwargs) + else: + value = args[0].__str__() + else: + value = args[0] + return super(newstr, cls).__new__(cls, value) + + def __repr__(self): + """ + Without the u prefix + """ + + value = super(newstr, self).__repr__() + # assert value[0] == u'u' + return value[1:] + + def __getitem__(self, y): + """ + Warning: Python <= 2.7.6 has a bug that causes this method never to be called + when y is a slice object. Therefore the type of newstr()[:2] is wrong + (unicode instead of newstr). + """ + return newstr(super(newstr, self).__getitem__(y)) + + def __contains__(self, key): + errmsg = "'in ' requires string as left operand, not {0}" + # Don't use isinstance() here because we only want to catch + # newstr, not Python 2 unicode: + if type(key) == newstr: + newkey = key + elif isinstance(key, unicode) or isinstance(key, bytes) and not isnewbytes(key): + newkey = newstr(key) + else: + raise TypeError(errmsg.format(type(key))) + return issubset(list(newkey), list(self)) + + @no('newbytes') + def __add__(self, other): + return newstr(super(newstr, self).__add__(other)) + + @no('newbytes') + def __radd__(self, left): + " left + self " + try: + return newstr(left) + self + except: + return NotImplemented + + def __mul__(self, other): + return newstr(super(newstr, self).__mul__(other)) + + def __rmul__(self, other): + return newstr(super(newstr, self).__rmul__(other)) + + def join(self, iterable): + errmsg = 'sequence item {0}: expected unicode string, found bytes' + for i, item in enumerate(iterable): + # Here we use type() rather than isinstance() because + # __instancecheck__ is being overridden. E.g. + # isinstance(b'abc', newbytes) is True on Py2. + if isnewbytes(item): + raise TypeError(errmsg.format(i)) + # Support use as a staticmethod: str.join('-', ['a', 'b']) + if type(self) == newstr: + return newstr(super(newstr, self).join(iterable)) + else: + return newstr(super(newstr, newstr(self)).join(iterable)) + + @no('newbytes') + def find(self, sub, *args): + return super(newstr, self).find(sub, *args) + + @no('newbytes') + def rfind(self, sub, *args): + return super(newstr, self).rfind(sub, *args) + + @no('newbytes', (1, 2)) + def replace(self, old, new, *args): + return newstr(super(newstr, self).replace(old, new, *args)) + + def decode(self, *args): + raise AttributeError("decode method has been disabled in newstr") + + def encode(self, encoding='utf-8', errors='strict'): + """ + Returns bytes + + Encode S using the codec registered for encoding. Default encoding + is 'utf-8'. errors may be given to set a different error + handling scheme. Default is 'strict' meaning that encoding errors raise + a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and + 'xmlcharrefreplace' as well as any other name registered with + codecs.register_error that can handle UnicodeEncodeErrors. + """ + from future.types.newbytes import newbytes + # Py2 unicode.encode() takes encoding and errors as optional parameter, + # not keyword arguments as in Python 3 str. + + # For the surrogateescape error handling mechanism, the + # codecs.register_error() function seems to be inadequate for an + # implementation of it when encoding. (Decoding seems fine, however.) + # For example, in the case of + # u'\udcc3'.encode('ascii', 'surrogateescape_handler') + # after registering the ``surrogateescape_handler`` function in + # future.utils.surrogateescape, both Python 2.x and 3.x raise an + # exception anyway after the function is called because the unicode + # string it has to return isn't encodable strictly as ASCII. + + if errors == 'surrogateescape': + if encoding == 'utf-16': + # Known to fail here. See test_encoding_works_normally() + raise NotImplementedError('FIXME: surrogateescape handling is ' + 'not yet implemented properly') + # Encode char by char, building up list of byte-strings + mybytes = [] + for c in self: + code = ord(c) + if 0xD800 <= code <= 0xDCFF: + mybytes.append(newbytes([code - 0xDC00])) + else: + mybytes.append(c.encode(encoding=encoding)) + return newbytes(b'').join(mybytes) + return newbytes(super(newstr, self).encode(encoding, errors)) + + @no('newbytes', 1) + def startswith(self, prefix, *args): + if isinstance(prefix, Iterable): + for thing in prefix: + if isnewbytes(thing): + raise TypeError(self.no_convert_msg.format(type(thing))) + return super(newstr, self).startswith(prefix, *args) + + @no('newbytes', 1) + def endswith(self, prefix, *args): + # Note we need the decorator above as well as the isnewbytes() + # check because prefix can be either a bytes object or e.g. a + # tuple of possible prefixes. (If it's a bytes object, each item + # in it is an int.) + if isinstance(prefix, Iterable): + for thing in prefix: + if isnewbytes(thing): + raise TypeError(self.no_convert_msg.format(type(thing))) + return super(newstr, self).endswith(prefix, *args) + + @no('newbytes', 1) + def split(self, sep=None, maxsplit=-1): + # Py2 unicode.split() takes maxsplit as an optional parameter, + # not as a keyword argument as in Python 3 str. + parts = super(newstr, self).split(sep, maxsplit) + return [newstr(part) for part in parts] + + @no('newbytes', 1) + def rsplit(self, sep=None, maxsplit=-1): + # Py2 unicode.rsplit() takes maxsplit as an optional parameter, + # not as a keyword argument as in Python 3 str. + parts = super(newstr, self).rsplit(sep, maxsplit) + return [newstr(part) for part in parts] + + @no('newbytes', 1) + def partition(self, sep): + parts = super(newstr, self).partition(sep) + return tuple(newstr(part) for part in parts) + + @no('newbytes', 1) + def rpartition(self, sep): + parts = super(newstr, self).rpartition(sep) + return tuple(newstr(part) for part in parts) + + @no('newbytes', 1) + def index(self, sub, *args): + """ + Like newstr.find() but raise ValueError when the substring is not + found. + """ + pos = self.find(sub, *args) + if pos == -1: + raise ValueError('substring not found') + return pos + + def splitlines(self, keepends=False): + """ + S.splitlines(keepends=False) -> list of strings + + Return a list of the lines in S, breaking at line boundaries. + Line breaks are not included in the resulting list unless keepends + is given and true. + """ + # Py2 unicode.splitlines() takes keepends as an optional parameter, + # not as a keyword argument as in Python 3 str. + parts = super(newstr, self).splitlines(keepends) + return [newstr(part) for part in parts] + + def __eq__(self, other): + if (isinstance(other, unicode) or + isinstance(other, bytes) and not isnewbytes(other)): + return super(newstr, self).__eq__(other) + else: + return NotImplemented + + def __hash__(self): + if (isinstance(self, unicode) or + isinstance(self, bytes) and not isnewbytes(self)): + return super(newstr, self).__hash__() + else: + raise NotImplementedError() + + def __ne__(self, other): + if (isinstance(other, unicode) or + isinstance(other, bytes) and not isnewbytes(other)): + return super(newstr, self).__ne__(other) + else: + return True + + unorderable_err = 'unorderable types: str() and {0}' + + def __lt__(self, other): + if (isinstance(other, unicode) or + isinstance(other, bytes) and not isnewbytes(other)): + return super(newstr, self).__lt__(other) + raise TypeError(self.unorderable_err.format(type(other))) + + def __le__(self, other): + if (isinstance(other, unicode) or + isinstance(other, bytes) and not isnewbytes(other)): + return super(newstr, self).__le__(other) + raise TypeError(self.unorderable_err.format(type(other))) + + def __gt__(self, other): + if (isinstance(other, unicode) or + isinstance(other, bytes) and not isnewbytes(other)): + return super(newstr, self).__gt__(other) + raise TypeError(self.unorderable_err.format(type(other))) + + def __ge__(self, other): + if (isinstance(other, unicode) or + isinstance(other, bytes) and not isnewbytes(other)): + return super(newstr, self).__ge__(other) + raise TypeError(self.unorderable_err.format(type(other))) + + def __getattribute__(self, name): + """ + A trick to cause the ``hasattr`` builtin-fn to return False for + the 'decode' method on Py2. + """ + if name in ['decode', u'decode']: + raise AttributeError("decode method has been disabled in newstr") + return super(newstr, self).__getattribute__(name) + + def __native__(self): + """ + A hook for the future.utils.native() function. + """ + return unicode(self) + + @staticmethod + def maketrans(x, y=None, z=None): + """ + Return a translation table usable for str.translate(). + + If there is only one argument, it must be a dictionary mapping Unicode + ordinals (integers) or characters to Unicode ordinals, strings or None. + Character keys will be then converted to ordinals. + If there are two arguments, they must be strings of equal length, and + in the resulting dictionary, each character in x will be mapped to the + character at the same position in y. If there is a third argument, it + must be a string, whose characters will be mapped to None in the result. + """ + + if y is None: + assert z is None + if not isinstance(x, dict): + raise TypeError('if you give only one argument to maketrans it must be a dict') + result = {} + for (key, value) in x.items(): + if len(key) > 1: + raise ValueError('keys in translate table must be strings or integers') + result[ord(key)] = value + else: + if not isinstance(x, unicode) and isinstance(y, unicode): + raise TypeError('x and y must be unicode strings') + if not len(x) == len(y): + raise ValueError('the first two maketrans arguments must have equal length') + result = {} + for (xi, yi) in zip(x, y): + if len(xi) > 1: + raise ValueError('keys in translate table must be strings or integers') + result[ord(xi)] = ord(yi) + + if z is not None: + for char in z: + result[ord(char)] = None + return result + + def translate(self, table): + """ + S.translate(table) -> str + + Return a copy of the string S, where all characters have been mapped + through the given translation table, which must be a mapping of + Unicode ordinals to Unicode ordinals, strings, or None. + Unmapped characters are left untouched. Characters mapped to None + are deleted. + """ + l = [] + for c in self: + if ord(c) in table: + val = table[ord(c)] + if val is None: + continue + elif isinstance(val, unicode): + l.append(val) + else: + l.append(chr(val)) + else: + l.append(c) + return ''.join(l) + + def isprintable(self): + raise NotImplementedError('fixme') + + def isidentifier(self): + raise NotImplementedError('fixme') + + def format_map(self): + raise NotImplementedError('fixme') + + +__all__ = ['newstr'] diff --git a/lib/future/utils/__init__.py b/lib/future/utils/__init__.py new file mode 100644 index 00000000..46bd96de --- /dev/null +++ b/lib/future/utils/__init__.py @@ -0,0 +1,767 @@ +""" +A selection of cross-compatible functions for Python 2 and 3. + +This module exports useful functions for 2/3 compatible code: + + * bind_method: binds functions to classes + * ``native_str_to_bytes`` and ``bytes_to_native_str`` + * ``native_str``: always equal to the native platform string object (because + this may be shadowed by imports from future.builtins) + * lists: lrange(), lmap(), lzip(), lfilter() + * iterable method compatibility: + - iteritems, iterkeys, itervalues + - viewitems, viewkeys, viewvalues + + These use the original method if available, otherwise they use items, + keys, values. + + * types: + + * text_type: unicode in Python 2, str in Python 3 + * string_types: basestring in Python 2, str in Python 3 + * binary_type: str in Python 2, bytes in Python 3 + * integer_types: (int, long) in Python 2, int in Python 3 + * class_types: (type, types.ClassType) in Python 2, type in Python 3 + + * bchr(c): + Take an integer and make a 1-character byte string + * bord(c) + Take the result of indexing on a byte string and make an integer + * tobytes(s) + Take a text string, a byte string, or a sequence of characters taken + from a byte string, and make a byte string. + + * raise_from() + * raise_with_traceback() + +This module also defines these decorators: + + * ``python_2_unicode_compatible`` + * ``with_metaclass`` + * ``implements_iterator`` + +Some of the functions in this module come from the following sources: + + * Jinja2 (BSD licensed: see + https://github.com/mitsuhiko/jinja2/blob/master/LICENSE) + * Pandas compatibility module pandas.compat + * six.py by Benjamin Peterson + * Django +""" + +import types +import sys +import numbers +import functools +import copy +import inspect + + +PY3 = sys.version_info[0] >= 3 +PY34_PLUS = sys.version_info[0:2] >= (3, 4) +PY35_PLUS = sys.version_info[0:2] >= (3, 5) +PY36_PLUS = sys.version_info[0:2] >= (3, 6) +PY2 = sys.version_info[0] == 2 +PY26 = sys.version_info[0:2] == (2, 6) +PY27 = sys.version_info[0:2] == (2, 7) +PYPY = hasattr(sys, 'pypy_translation_info') + + +def python_2_unicode_compatible(cls): + """ + A decorator that defines __unicode__ and __str__ methods under Python + 2. Under Python 3, this decorator is a no-op. + + To support Python 2 and 3 with a single code base, define a __str__ + method returning unicode text and apply this decorator to the class, like + this:: + + >>> from future.utils import python_2_unicode_compatible + + >>> @python_2_unicode_compatible + ... class MyClass(object): + ... def __str__(self): + ... return u'Unicode string: \u5b54\u5b50' + + >>> a = MyClass() + + Then, after this import: + + >>> from future.builtins import str + + the following is ``True`` on both Python 3 and 2:: + + >>> str(a) == a.encode('utf-8').decode('utf-8') + True + + and, on a Unicode-enabled terminal with the right fonts, these both print the + Chinese characters for Confucius:: + + >>> print(a) + >>> print(str(a)) + + The implementation comes from django.utils.encoding. + """ + if not PY3: + cls.__unicode__ = cls.__str__ + cls.__str__ = lambda self: self.__unicode__().encode('utf-8') + return cls + + +def with_metaclass(meta, *bases): + """ + Function from jinja2/_compat.py. License: BSD. + + Use it like this:: + + class BaseForm(object): + pass + + class FormType(type): + pass + + class Form(with_metaclass(FormType, BaseForm)): + pass + + This requires a bit of explanation: the basic idea is to make a + dummy metaclass for one level of class instantiation that replaces + itself with the actual metaclass. Because of internal type checks + we also need to make sure that we downgrade the custom metaclass + for one level to something closer to type (that's why __call__ and + __init__ comes back from type etc.). + + This has the advantage over six.with_metaclass of not introducing + dummy classes into the final MRO. + """ + class metaclass(meta): + __call__ = type.__call__ + __init__ = type.__init__ + def __new__(cls, name, this_bases, d): + if this_bases is None: + return type.__new__(cls, name, (), d) + return meta(name, bases, d) + return metaclass('temporary_class', None, {}) + + +# Definitions from pandas.compat and six.py follow: +if PY3: + def bchr(s): + return bytes([s]) + def bstr(s): + if isinstance(s, str): + return bytes(s, 'latin-1') + else: + return bytes(s) + def bord(s): + return s + + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + +else: + # Python 2 + def bchr(s): + return chr(s) + def bstr(s): + return str(s) + def bord(s): + return ord(s) + + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + +### + +if PY3: + def tobytes(s): + if isinstance(s, bytes): + return s + else: + if isinstance(s, str): + return s.encode('latin-1') + else: + return bytes(s) +else: + # Python 2 + def tobytes(s): + if isinstance(s, unicode): + return s.encode('latin-1') + else: + return ''.join(s) + +tobytes.__doc__ = """ + Encodes to latin-1 (where the first 256 chars are the same as + ASCII.) + """ + +if PY3: + def native_str_to_bytes(s, encoding='utf-8'): + return s.encode(encoding) + + def bytes_to_native_str(b, encoding='utf-8'): + return b.decode(encoding) + + def text_to_native_str(t, encoding=None): + return t +else: + # Python 2 + def native_str_to_bytes(s, encoding=None): + from future.types import newbytes # to avoid a circular import + return newbytes(s) + + def bytes_to_native_str(b, encoding=None): + return native(b) + + def text_to_native_str(t, encoding='ascii'): + """ + Use this to create a Py2 native string when "from __future__ import + unicode_literals" is in effect. + """ + return unicode(t).encode(encoding) + +native_str_to_bytes.__doc__ = """ + On Py3, returns an encoded string. + On Py2, returns a newbytes type, ignoring the ``encoding`` argument. + """ + +if PY3: + # list-producing versions of the major Python iterating functions + def lrange(*args, **kwargs): + return list(range(*args, **kwargs)) + + def lzip(*args, **kwargs): + return list(zip(*args, **kwargs)) + + def lmap(*args, **kwargs): + return list(map(*args, **kwargs)) + + def lfilter(*args, **kwargs): + return list(filter(*args, **kwargs)) +else: + import __builtin__ + # Python 2-builtin ranges produce lists + lrange = __builtin__.range + lzip = __builtin__.zip + lmap = __builtin__.map + lfilter = __builtin__.filter + + +def isidentifier(s, dotted=False): + ''' + A function equivalent to the str.isidentifier method on Py3 + ''' + if dotted: + return all(isidentifier(a) for a in s.split('.')) + if PY3: + return s.isidentifier() + else: + import re + _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$") + return bool(_name_re.match(s)) + + +def viewitems(obj, **kwargs): + """ + Function for iterating over dictionary items with the same set-like + behaviour on Py2.7 as on Py3. + + Passes kwargs to method.""" + func = getattr(obj, "viewitems", None) + if not func: + func = obj.items + return func(**kwargs) + + +def viewkeys(obj, **kwargs): + """ + Function for iterating over dictionary keys with the same set-like + behaviour on Py2.7 as on Py3. + + Passes kwargs to method.""" + func = getattr(obj, "viewkeys", None) + if not func: + func = obj.keys + return func(**kwargs) + + +def viewvalues(obj, **kwargs): + """ + Function for iterating over dictionary values with the same set-like + behaviour on Py2.7 as on Py3. + + Passes kwargs to method.""" + func = getattr(obj, "viewvalues", None) + if not func: + func = obj.values + return func(**kwargs) + + +def iteritems(obj, **kwargs): + """Use this only if compatibility with Python versions before 2.7 is + required. Otherwise, prefer viewitems(). + """ + func = getattr(obj, "iteritems", None) + if not func: + func = obj.items + return func(**kwargs) + + +def iterkeys(obj, **kwargs): + """Use this only if compatibility with Python versions before 2.7 is + required. Otherwise, prefer viewkeys(). + """ + func = getattr(obj, "iterkeys", None) + if not func: + func = obj.keys + return func(**kwargs) + + +def itervalues(obj, **kwargs): + """Use this only if compatibility with Python versions before 2.7 is + required. Otherwise, prefer viewvalues(). + """ + func = getattr(obj, "itervalues", None) + if not func: + func = obj.values + return func(**kwargs) + + +def bind_method(cls, name, func): + """Bind a method to class, python 2 and python 3 compatible. + + Parameters + ---------- + + cls : type + class to receive bound method + name : basestring + name of method on class instance + func : function + function to be bound as method + + Returns + ------- + None + """ + # only python 2 has an issue with bound/unbound methods + if not PY3: + setattr(cls, name, types.MethodType(func, None, cls)) + else: + setattr(cls, name, func) + + +def getexception(): + return sys.exc_info()[1] + + +def _get_caller_globals_and_locals(): + """ + Returns the globals and locals of the calling frame. + + Is there an alternative to frame hacking here? + """ + caller_frame = inspect.stack()[2] + myglobals = caller_frame[0].f_globals + mylocals = caller_frame[0].f_locals + return myglobals, mylocals + + +def _repr_strip(mystring): + """ + Returns the string without any initial or final quotes. + """ + r = repr(mystring) + if r.startswith("'") and r.endswith("'"): + return r[1:-1] + else: + return r + + +if PY3: + def raise_from(exc, cause): + """ + Equivalent to: + + raise EXCEPTION from CAUSE + + on Python 3. (See PEP 3134). + """ + myglobals, mylocals = _get_caller_globals_and_locals() + + # We pass the exception and cause along with other globals + # when we exec(): + myglobals = myglobals.copy() + myglobals['__python_future_raise_from_exc'] = exc + myglobals['__python_future_raise_from_cause'] = cause + execstr = "raise __python_future_raise_from_exc from __python_future_raise_from_cause" + exec(execstr, myglobals, mylocals) + + def raise_(tp, value=None, tb=None): + """ + A function that matches the Python 2.x ``raise`` statement. This + allows re-raising exceptions with the cls value and traceback on + Python 2 and 3. + """ + if isinstance(tp, BaseException): + # If the first object is an instance, the type of the exception + # is the class of the instance, the instance itself is the value, + # and the second object must be None. + if value is not None: + raise TypeError("instance exception may not have a separate value") + exc = tp + elif isinstance(tp, type) and not issubclass(tp, BaseException): + # If the first object is a class, it becomes the type of the + # exception. + raise TypeError("class must derive from BaseException, not %s" % tp.__name__) + else: + # The second object is used to determine the exception value: If it + # is an instance of the class, the instance becomes the exception + # value. If the second object is a tuple, it is used as the argument + # list for the class constructor; if it is None, an empty argument + # list is used, and any other object is treated as a single argument + # to the constructor. The instance so created by calling the + # constructor is used as the exception value. + if isinstance(value, tp): + exc = value + elif isinstance(value, tuple): + exc = tp(*value) + elif value is None: + exc = tp() + else: + exc = tp(value) + + if exc.__traceback__ is not tb: + raise exc.with_traceback(tb) + raise exc + + def raise_with_traceback(exc, traceback=Ellipsis): + if traceback == Ellipsis: + _, _, traceback = sys.exc_info() + raise exc.with_traceback(traceback) + +else: + def raise_from(exc, cause): + """ + Equivalent to: + + raise EXCEPTION from CAUSE + + on Python 3. (See PEP 3134). + """ + # Is either arg an exception class (e.g. IndexError) rather than + # instance (e.g. IndexError('my message here')? If so, pass the + # name of the class undisturbed through to "raise ... from ...". + if isinstance(exc, type) and issubclass(exc, Exception): + e = exc() + # exc = exc.__name__ + # execstr = "e = " + _repr_strip(exc) + "()" + # myglobals, mylocals = _get_caller_globals_and_locals() + # exec(execstr, myglobals, mylocals) + else: + e = exc + e.__suppress_context__ = False + if isinstance(cause, type) and issubclass(cause, Exception): + e.__cause__ = cause() + e.__cause__.__traceback__ = sys.exc_info()[2] + e.__suppress_context__ = True + elif cause is None: + e.__cause__ = None + e.__suppress_context__ = True + elif isinstance(cause, BaseException): + e.__cause__ = cause + object.__setattr__(e.__cause__, '__traceback__', sys.exc_info()[2]) + e.__suppress_context__ = True + else: + raise TypeError("exception causes must derive from BaseException") + e.__context__ = sys.exc_info()[1] + raise e + + exec(''' +def raise_(tp, value=None, tb=None): + raise tp, value, tb + +def raise_with_traceback(exc, traceback=Ellipsis): + if traceback == Ellipsis: + _, _, traceback = sys.exc_info() + raise exc, None, traceback +'''.strip()) + + +raise_with_traceback.__doc__ = ( +"""Raise exception with existing traceback. +If traceback is not passed, uses sys.exc_info() to get traceback.""" +) + + +# Deprecated alias for backward compatibility with ``future`` versions < 0.11: +reraise = raise_ + + +def implements_iterator(cls): + ''' + From jinja2/_compat.py. License: BSD. + + Use as a decorator like this:: + + @implements_iterator + class UppercasingIterator(object): + def __init__(self, iterable): + self._iter = iter(iterable) + def __iter__(self): + return self + def __next__(self): + return next(self._iter).upper() + + ''' + if PY3: + return cls + else: + cls.next = cls.__next__ + del cls.__next__ + return cls + +if PY3: + get_next = lambda x: x.next +else: + get_next = lambda x: x.__next__ + + +def encode_filename(filename): + if PY3: + return filename + else: + if isinstance(filename, unicode): + return filename.encode('utf-8') + return filename + + +def is_new_style(cls): + """ + Python 2.7 has both new-style and old-style classes. Old-style classes can + be pesky in some circumstances, such as when using inheritance. Use this + function to test for whether a class is new-style. (Python 3 only has + new-style classes.) + """ + return hasattr(cls, '__class__') and ('__dict__' in dir(cls) + or hasattr(cls, '__slots__')) + +# The native platform string and bytes types. Useful because ``str`` and +# ``bytes`` are redefined on Py2 by ``from future.builtins import *``. +native_str = str +native_bytes = bytes + + +def istext(obj): + """ + Deprecated. Use:: + >>> isinstance(obj, str) + after this import: + >>> from future.builtins import str + """ + return isinstance(obj, type(u'')) + + +def isbytes(obj): + """ + Deprecated. Use:: + >>> isinstance(obj, bytes) + after this import: + >>> from future.builtins import bytes + """ + return isinstance(obj, type(b'')) + + +def isnewbytes(obj): + """ + Equivalent to the result of ``type(obj) == type(newbytes)`` + in other words, it is REALLY a newbytes instance, not a Py2 native str + object? + + Note that this does not cover subclasses of newbytes, and it is not + equivalent to ininstance(obj, newbytes) + """ + return type(obj).__name__ == 'newbytes' + + +def isint(obj): + """ + Deprecated. Tests whether an object is a Py3 ``int`` or either a Py2 ``int`` or + ``long``. + + Instead of using this function, you can use: + + >>> from future.builtins import int + >>> isinstance(obj, int) + + The following idiom is equivalent: + + >>> from numbers import Integral + >>> isinstance(obj, Integral) + """ + + return isinstance(obj, numbers.Integral) + + +def native(obj): + """ + On Py3, this is a no-op: native(obj) -> obj + + On Py2, returns the corresponding native Py2 types that are + superclasses for backported objects from Py3: + + >>> from builtins import str, bytes, int + + >>> native(str(u'ABC')) + u'ABC' + >>> type(native(str(u'ABC'))) + unicode + + >>> native(bytes(b'ABC')) + b'ABC' + >>> type(native(bytes(b'ABC'))) + bytes + + >>> native(int(10**20)) + 100000000000000000000L + >>> type(native(int(10**20))) + long + + Existing native types on Py2 will be returned unchanged: + + >>> type(native(u'ABC')) + unicode + """ + if hasattr(obj, '__native__'): + return obj.__native__() + else: + return obj + + +# Implementation of exec_ is from ``six``: +if PY3: + import builtins + exec_ = getattr(builtins, "exec") +else: + def exec_(code, globs=None, locs=None): + """Execute code in a namespace.""" + if globs is None: + frame = sys._getframe(1) + globs = frame.f_globals + if locs is None: + locs = frame.f_locals + del frame + elif locs is None: + locs = globs + exec("""exec code in globs, locs""") + + +# Defined here for backward compatibility: +def old_div(a, b): + """ + DEPRECATED: import ``old_div`` from ``past.utils`` instead. + + Equivalent to ``a / b`` on Python 2 without ``from __future__ import + division``. + + TODO: generalize this to other objects (like arrays etc.) + """ + if isinstance(a, numbers.Integral) and isinstance(b, numbers.Integral): + return a // b + else: + return a / b + + +def as_native_str(encoding='utf-8'): + ''' + A decorator to turn a function or method call that returns text, i.e. + unicode, into one that returns a native platform str. + + Use it as a decorator like this:: + + from __future__ import unicode_literals + + class MyClass(object): + @as_native_str(encoding='ascii') + def __repr__(self): + return next(self._iter).upper() + ''' + if PY3: + return lambda f: f + else: + def encoder(f): + @functools.wraps(f) + def wrapper(*args, **kwargs): + return f(*args, **kwargs).encode(encoding=encoding) + return wrapper + return encoder + +# listvalues and listitems definitions from Nick Coghlan's (withdrawn) +# PEP 496: +try: + dict.iteritems +except AttributeError: + # Python 3 + def listvalues(d): + return list(d.values()) + def listitems(d): + return list(d.items()) +else: + # Python 2 + def listvalues(d): + return d.values() + def listitems(d): + return d.items() + +if PY3: + def ensure_new_type(obj): + return obj +else: + def ensure_new_type(obj): + from future.types.newbytes import newbytes + from future.types.newstr import newstr + from future.types.newint import newint + from future.types.newdict import newdict + + native_type = type(native(obj)) + + # Upcast only if the type is already a native (non-future) type + if issubclass(native_type, type(obj)): + # Upcast + if native_type == str: # i.e. Py2 8-bit str + return newbytes(obj) + elif native_type == unicode: + return newstr(obj) + elif native_type == int: + return newint(obj) + elif native_type == long: + return newint(obj) + elif native_type == dict: + return newdict(obj) + else: + return obj + else: + # Already a new type + assert type(obj) in [newbytes, newstr] + return obj + + +__all__ = ['PY2', 'PY26', 'PY3', 'PYPY', + 'as_native_str', 'binary_type', 'bind_method', 'bord', 'bstr', + 'bytes_to_native_str', 'class_types', 'encode_filename', + 'ensure_new_type', 'exec_', 'get_next', 'getexception', + 'implements_iterator', 'integer_types', 'is_new_style', 'isbytes', + 'isidentifier', 'isint', 'isnewbytes', 'istext', 'iteritems', + 'iterkeys', 'itervalues', 'lfilter', 'listitems', 'listvalues', + 'lmap', 'lrange', 'lzip', 'native', 'native_bytes', 'native_str', + 'native_str_to_bytes', 'old_div', + 'python_2_unicode_compatible', 'raise_', + 'raise_with_traceback', 'reraise', 'string_types', + 'text_to_native_str', 'text_type', 'tobytes', 'viewitems', + 'viewkeys', 'viewvalues', 'with_metaclass' + ] diff --git a/lib/future/utils/surrogateescape.py b/lib/future/utils/surrogateescape.py new file mode 100644 index 00000000..0dcc9fa6 --- /dev/null +++ b/lib/future/utils/surrogateescape.py @@ -0,0 +1,198 @@ +""" +This is Victor Stinner's pure-Python implementation of PEP 383: the "surrogateescape" error +handler of Python 3. + +Source: misc/python/surrogateescape.py in https://bitbucket.org/haypo/misc +""" + +# This code is released under the Python license and the BSD 2-clause license + +import codecs +import sys + +from future import utils + + +FS_ERRORS = 'surrogateescape' + +# # -- Python 2/3 compatibility ------------------------------------- +# FS_ERRORS = 'my_surrogateescape' + +def u(text): + if utils.PY3: + return text + else: + return text.decode('unicode_escape') + +def b(data): + if utils.PY3: + return data.encode('latin1') + else: + return data + +if utils.PY3: + _unichr = chr + bytes_chr = lambda code: bytes((code,)) +else: + _unichr = unichr + bytes_chr = chr + +def surrogateescape_handler(exc): + """ + Pure Python implementation of the PEP 383: the "surrogateescape" error + handler of Python 3. Undecodable bytes will be replaced by a Unicode + character U+DCxx on decoding, and these are translated into the + original bytes on encoding. + """ + mystring = exc.object[exc.start:exc.end] + + try: + if isinstance(exc, UnicodeDecodeError): + # mystring is a byte-string in this case + decoded = replace_surrogate_decode(mystring) + elif isinstance(exc, UnicodeEncodeError): + # In the case of u'\udcc3'.encode('ascii', + # 'this_surrogateescape_handler'), both Python 2.x and 3.x raise an + # exception anyway after this function is called, even though I think + # it's doing what it should. It seems that the strict encoder is called + # to encode the unicode string that this function returns ... + decoded = replace_surrogate_encode(mystring) + else: + raise exc + except NotASurrogateError: + raise exc + return (decoded, exc.end) + + +class NotASurrogateError(Exception): + pass + + +def replace_surrogate_encode(mystring): + """ + Returns a (unicode) string, not the more logical bytes, because the codecs + register_error functionality expects this. + """ + decoded = [] + for ch in mystring: + # if utils.PY3: + # code = ch + # else: + code = ord(ch) + + # The following magic comes from Py3.3's Python/codecs.c file: + if not 0xD800 <= code <= 0xDCFF: + # Not a surrogate. Fail with the original exception. + raise NotASurrogateError + # mybytes = [0xe0 | (code >> 12), + # 0x80 | ((code >> 6) & 0x3f), + # 0x80 | (code & 0x3f)] + # Is this a good idea? + if 0xDC00 <= code <= 0xDC7F: + decoded.append(_unichr(code - 0xDC00)) + elif code <= 0xDCFF: + decoded.append(_unichr(code - 0xDC00)) + else: + raise NotASurrogateError + return str().join(decoded) + + +def replace_surrogate_decode(mybytes): + """ + Returns a (unicode) string + """ + decoded = [] + for ch in mybytes: + # We may be parsing newbytes (in which case ch is an int) or a native + # str on Py2 + if isinstance(ch, int): + code = ch + else: + code = ord(ch) + if 0x80 <= code <= 0xFF: + decoded.append(_unichr(0xDC00 + code)) + elif code <= 0x7F: + decoded.append(_unichr(code)) + else: + # # It may be a bad byte + # # Try swallowing it. + # continue + # print("RAISE!") + raise NotASurrogateError + return str().join(decoded) + + +def encodefilename(fn): + if FS_ENCODING == 'ascii': + # ASCII encoder of Python 2 expects that the error handler returns a + # Unicode string encodable to ASCII, whereas our surrogateescape error + # handler has to return bytes in 0x80-0xFF range. + encoded = [] + for index, ch in enumerate(fn): + code = ord(ch) + if code < 128: + ch = bytes_chr(code) + elif 0xDC80 <= code <= 0xDCFF: + ch = bytes_chr(code - 0xDC00) + else: + raise UnicodeEncodeError(FS_ENCODING, + fn, index, index+1, + 'ordinal not in range(128)') + encoded.append(ch) + return bytes().join(encoded) + elif FS_ENCODING == 'utf-8': + # UTF-8 encoder of Python 2 encodes surrogates, so U+DC80-U+DCFF + # doesn't go through our error handler + encoded = [] + for index, ch in enumerate(fn): + code = ord(ch) + if 0xD800 <= code <= 0xDFFF: + if 0xDC80 <= code <= 0xDCFF: + ch = bytes_chr(code - 0xDC00) + encoded.append(ch) + else: + raise UnicodeEncodeError( + FS_ENCODING, + fn, index, index+1, 'surrogates not allowed') + else: + ch_utf8 = ch.encode('utf-8') + encoded.append(ch_utf8) + return bytes().join(encoded) + else: + return fn.encode(FS_ENCODING, FS_ERRORS) + +def decodefilename(fn): + return fn.decode(FS_ENCODING, FS_ERRORS) + +FS_ENCODING = 'ascii'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]') +# FS_ENCODING = 'cp932'; fn = b('[abc\x81\x00]'); encoded = u('[abc\udc81\x00]') +# FS_ENCODING = 'UTF-8'; fn = b('[abc\xff]'); encoded = u('[abc\udcff]') + + +# normalize the filesystem encoding name. +# For example, we expect "utf-8", not "UTF8". +FS_ENCODING = codecs.lookup(FS_ENCODING).name + + +def register_surrogateescape(): + """ + Registers the surrogateescape error handler on Python 2 (only) + """ + if utils.PY3: + return + try: + codecs.lookup_error(FS_ERRORS) + except LookupError: + codecs.register_error(FS_ERRORS, surrogateescape_handler) + + +if __name__ == '__main__': + pass + # # Tests: + # register_surrogateescape() + + # b = decodefilename(fn) + # assert b == encoded, "%r != %r" % (b, encoded) + # c = encodefilename(b) + # assert c == fn, '%r != %r' % (c, fn) + # # print("ok") diff --git a/lib/libfuturize/__init__.py b/lib/libfuturize/__init__.py new file mode 100644 index 00000000..4cb1cbcd --- /dev/null +++ b/lib/libfuturize/__init__.py @@ -0,0 +1 @@ +# empty to make this a package diff --git a/lib/libfuturize/fixer_util.py b/lib/libfuturize/fixer_util.py new file mode 100644 index 00000000..48e4689d --- /dev/null +++ b/lib/libfuturize/fixer_util.py @@ -0,0 +1,520 @@ +""" +Utility functions from 2to3, 3to2 and python-modernize (and some home-grown +ones). + +Licences: +2to3: PSF License v2 +3to2: Apache Software License (from 3to2/setup.py) +python-modernize licence: BSD (from python-modernize/LICENSE) +""" + +from lib2to3.fixer_util import (FromImport, Newline, is_import, + find_root, does_tree_import, Comma) +from lib2to3.pytree import Leaf, Node +from lib2to3.pygram import python_symbols as syms, python_grammar +from lib2to3.pygram import token +from lib2to3.fixer_util import (Node, Call, Name, syms, Comma, Number) +import re + + +def canonical_fix_name(fix, avail_fixes): + """ + Examples: + >>> canonical_fix_name('fix_wrap_text_literals') + 'libfuturize.fixes.fix_wrap_text_literals' + >>> canonical_fix_name('wrap_text_literals') + 'libfuturize.fixes.fix_wrap_text_literals' + >>> canonical_fix_name('wrap_te') + ValueError("unknown fixer name") + >>> canonical_fix_name('wrap') + ValueError("ambiguous fixer name") + """ + if ".fix_" in fix: + return fix + else: + if fix.startswith('fix_'): + fix = fix[4:] + # Infer the full module name for the fixer. + # First ensure that no names clash (e.g. + # lib2to3.fixes.fix_blah and libfuturize.fixes.fix_blah): + found = [f for f in avail_fixes + if f.endswith('fix_{0}'.format(fix))] + if len(found) > 1: + raise ValueError("Ambiguous fixer name. Choose a fully qualified " + "module name instead from these:\n" + + "\n".join(" " + myf for myf in found)) + elif len(found) == 0: + raise ValueError("Unknown fixer. Use --list-fixes or -l for a list.") + return found[0] + + + +## These functions are from 3to2 by Joe Amenta: + +def Star(prefix=None): + return Leaf(token.STAR, u'*', prefix=prefix) + +def DoubleStar(prefix=None): + return Leaf(token.DOUBLESTAR, u'**', prefix=prefix) + +def Minus(prefix=None): + return Leaf(token.MINUS, u'-', prefix=prefix) + +def commatize(leafs): + """ + Accepts/turns: (Name, Name, ..., Name, Name) + Returns/into: (Name, Comma, Name, Comma, ..., Name, Comma, Name) + """ + new_leafs = [] + for leaf in leafs: + new_leafs.append(leaf) + new_leafs.append(Comma()) + del new_leafs[-1] + return new_leafs + +def indentation(node): + """ + Returns the indentation for this node + Iff a node is in a suite, then it has indentation. + """ + while node.parent is not None and node.parent.type != syms.suite: + node = node.parent + if node.parent is None: + return u"" + # The first three children of a suite are NEWLINE, INDENT, (some other node) + # INDENT.value contains the indentation for this suite + # anything after (some other node) has the indentation as its prefix. + if node.type == token.INDENT: + return node.value + elif node.prev_sibling is not None and node.prev_sibling.type == token.INDENT: + return node.prev_sibling.value + elif node.prev_sibling is None: + return u"" + else: + return node.prefix + +def indentation_step(node): + """ + Dirty little trick to get the difference between each indentation level + Implemented by finding the shortest indentation string + (technically, the "least" of all of the indentation strings, but + tabs and spaces mixed won't get this far, so those are synonymous.) + """ + r = find_root(node) + # Collect all indentations into one set. + all_indents = set(i.value for i in r.pre_order() if i.type == token.INDENT) + if not all_indents: + # nothing is indented anywhere, so we get to pick what we want + return u" " # four spaces is a popular convention + else: + return min(all_indents) + +def suitify(parent): + """ + Turn the stuff after the first colon in parent's children + into a suite, if it wasn't already + """ + for node in parent.children: + if node.type == syms.suite: + # already in the prefered format, do nothing + return + + # One-liners have no suite node, we have to fake one up + for i, node in enumerate(parent.children): + if node.type == token.COLON: + break + else: + raise ValueError(u"No class suite and no ':'!") + # Move everything into a suite node + suite = Node(syms.suite, [Newline(), Leaf(token.INDENT, indentation(node) + indentation_step(node))]) + one_node = parent.children[i+1] + one_node.remove() + one_node.prefix = u'' + suite.append_child(one_node) + parent.append_child(suite) + +def NameImport(package, as_name=None, prefix=None): + """ + Accepts a package (Name node), name to import it as (string), and + optional prefix and returns a node: + import [as ] + """ + if prefix is None: + prefix = u"" + children = [Name(u"import", prefix=prefix), package] + if as_name is not None: + children.extend([Name(u"as", prefix=u" "), + Name(as_name, prefix=u" ")]) + return Node(syms.import_name, children) + +_compound_stmts = (syms.if_stmt, syms.while_stmt, syms.for_stmt, syms.try_stmt, syms.with_stmt) +_import_stmts = (syms.import_name, syms.import_from) + +def import_binding_scope(node): + """ + Generator yields all nodes for which a node (an import_stmt) has scope + The purpose of this is for a call to _find() on each of them + """ + # import_name / import_from are small_stmts + assert node.type in _import_stmts + test = node.next_sibling + # A small_stmt can only be followed by a SEMI or a NEWLINE. + while test.type == token.SEMI: + nxt = test.next_sibling + # A SEMI can only be followed by a small_stmt or a NEWLINE + if nxt.type == token.NEWLINE: + break + else: + yield nxt + # A small_stmt can only be followed by either a SEMI or a NEWLINE + test = nxt.next_sibling + # Covered all subsequent small_stmts after the import_stmt + # Now to cover all subsequent stmts after the parent simple_stmt + parent = node.parent + assert parent.type == syms.simple_stmt + test = parent.next_sibling + while test is not None: + # Yes, this will yield NEWLINE and DEDENT. Deal with it. + yield test + test = test.next_sibling + + context = parent.parent + # Recursively yield nodes following imports inside of a if/while/for/try/with statement + if context.type in _compound_stmts: + # import is in a one-liner + c = context + while c.next_sibling is not None: + yield c.next_sibling + c = c.next_sibling + context = context.parent + + # Can't chain one-liners on one line, so that takes care of that. + + p = context.parent + if p is None: + return + + # in a multi-line suite + + while p.type in _compound_stmts: + + if context.type == syms.suite: + yield context + + context = context.next_sibling + + if context is None: + context = p.parent + p = context.parent + if p is None: + break + +def ImportAsName(name, as_name, prefix=None): + new_name = Name(name) + new_as = Name(u"as", prefix=u" ") + new_as_name = Name(as_name, prefix=u" ") + new_node = Node(syms.import_as_name, [new_name, new_as, new_as_name]) + if prefix is not None: + new_node.prefix = prefix + return new_node + + +def is_docstring(node): + """ + Returns True if the node appears to be a docstring + """ + return (node.type == syms.simple_stmt and + len(node.children) > 0 and node.children[0].type == token.STRING) + + +def future_import(feature, node): + """ + This seems to work + """ + root = find_root(node) + + if does_tree_import(u"__future__", feature, node): + return + + # Look for a shebang or encoding line + shebang_encoding_idx = None + + for idx, node in enumerate(root.children): + # Is it a shebang or encoding line? + if is_shebang_comment(node) or is_encoding_comment(node): + shebang_encoding_idx = idx + if is_docstring(node): + # skip over docstring + continue + names = check_future_import(node) + if not names: + # not a future statement; need to insert before this + break + if feature in names: + # already imported + return + + import_ = FromImport(u'__future__', [Leaf(token.NAME, feature, prefix=" ")]) + if shebang_encoding_idx == 0 and idx == 0: + # If this __future__ import would go on the first line, + # detach the shebang / encoding prefix from the current first line. + # and attach it to our new __future__ import node. + import_.prefix = root.children[0].prefix + root.children[0].prefix = u'' + # End the __future__ import line with a newline and add a blank line + # afterwards: + children = [import_ , Newline()] + root.insert_child(idx, Node(syms.simple_stmt, children)) + + +def future_import2(feature, node): + """ + An alternative to future_import() which might not work ... + """ + root = find_root(node) + + if does_tree_import(u"__future__", feature, node): + return + + insert_pos = 0 + for idx, node in enumerate(root.children): + if node.type == syms.simple_stmt and node.children and \ + node.children[0].type == token.STRING: + insert_pos = idx + 1 + break + + for thing_after in root.children[insert_pos:]: + if thing_after.type == token.NEWLINE: + insert_pos += 1 + continue + + prefix = thing_after.prefix + thing_after.prefix = u"" + break + else: + prefix = u"" + + import_ = FromImport(u"__future__", [Leaf(token.NAME, feature, prefix=u" ")]) + + children = [import_, Newline()] + root.insert_child(insert_pos, Node(syms.simple_stmt, children, prefix=prefix)) + +def parse_args(arglist, scheme): + u""" + Parse a list of arguments into a dict + """ + arglist = [i for i in arglist if i.type != token.COMMA] + + ret_mapping = dict([(k, None) for k in scheme]) + + for i, arg in enumerate(arglist): + if arg.type == syms.argument and arg.children[1].type == token.EQUAL: + # argument < NAME '=' any > + slot = arg.children[0].value + ret_mapping[slot] = arg.children[2] + else: + slot = scheme[i] + ret_mapping[slot] = arg + + return ret_mapping + + +# def is_import_from(node): +# """Returns true if the node is a statement "from ... import ..." +# """ +# return node.type == syms.import_from + + +def is_import_stmt(node): + return (node.type == syms.simple_stmt and node.children and + is_import(node.children[0])) + + +def touch_import_top(package, name_to_import, node): + """Works like `does_tree_import` but adds an import statement at the + top if it was not imported (but below any __future__ imports) and below any + comments such as shebang lines). + + Based on lib2to3.fixer_util.touch_import() + + Calling this multiple times adds the imports in reverse order. + + Also adds "standard_library.install_aliases()" after "from future import + standard_library". This should probably be factored into another function. + """ + + root = find_root(node) + + if does_tree_import(package, name_to_import, root): + return + + # Ideally, we would look for whether futurize --all-imports has been run, + # as indicated by the presence of ``from builtins import (ascii, ..., + # zip)`` -- and, if it has, we wouldn't import the name again. + + # Look for __future__ imports and insert below them + found = False + for name in ['absolute_import', 'division', 'print_function', + 'unicode_literals']: + if does_tree_import('__future__', name, root): + found = True + break + if found: + # At least one __future__ import. We want to loop until we've seen them + # all. + start, end = None, None + for idx, node in enumerate(root.children): + if check_future_import(node): + start = idx + # Start looping + idx2 = start + while node: + node = node.next_sibling + idx2 += 1 + if not check_future_import(node): + end = idx2 + break + break + assert start is not None + assert end is not None + insert_pos = end + else: + # No __future__ imports. + # We look for a docstring and insert the new node below that. If no docstring + # exists, just insert the node at the top. + for idx, node in enumerate(root.children): + if node.type != syms.simple_stmt: + break + if not is_docstring(node): + # This is the usual case. + break + insert_pos = idx + + if package is None: + import_ = Node(syms.import_name, [ + Leaf(token.NAME, u"import"), + Leaf(token.NAME, name_to_import, prefix=u" ") + ]) + else: + import_ = FromImport(package, [Leaf(token.NAME, name_to_import, prefix=u" ")]) + if name_to_import == u'standard_library': + # Add: + # standard_library.install_aliases() + # after: + # from future import standard_library + install_hooks = Node(syms.simple_stmt, + [Node(syms.power, + [Leaf(token.NAME, u'standard_library'), + Node(syms.trailer, [Leaf(token.DOT, u'.'), + Leaf(token.NAME, u'install_aliases')]), + Node(syms.trailer, [Leaf(token.LPAR, u'('), + Leaf(token.RPAR, u')')]) + ]) + ] + ) + children_hooks = [install_hooks, Newline()] + else: + children_hooks = [] + + # FromImport(package, [Leaf(token.NAME, name_to_import, prefix=u" ")]) + + children_import = [import_, Newline()] + old_prefix = root.children[insert_pos].prefix + root.children[insert_pos].prefix = u'' + root.insert_child(insert_pos, Node(syms.simple_stmt, children_import, prefix=old_prefix)) + if len(children_hooks) > 0: + root.insert_child(insert_pos + 1, Node(syms.simple_stmt, children_hooks)) + + +## The following functions are from python-modernize by Armin Ronacher: +# (a little edited). + +def check_future_import(node): + """If this is a future import, return set of symbols that are imported, + else return None.""" + # node should be the import statement here + savenode = node + if not (node.type == syms.simple_stmt and node.children): + return set() + node = node.children[0] + # now node is the import_from node + if not (node.type == syms.import_from and + # node.type == token.NAME and # seems to break it + hasattr(node.children[1], 'value') and + node.children[1].value == u'__future__'): + return set() + if node.children[3].type == token.LPAR: + node = node.children[4] + else: + node = node.children[3] + # now node is the import_as_name[s] + # print(python_grammar.number2symbol[node.type]) # breaks sometimes + if node.type == syms.import_as_names: + result = set() + for n in node.children: + if n.type == token.NAME: + result.add(n.value) + elif n.type == syms.import_as_name: + n = n.children[0] + assert n.type == token.NAME + result.add(n.value) + return result + elif node.type == syms.import_as_name: + node = node.children[0] + assert node.type == token.NAME + return set([node.value]) + elif node.type == token.NAME: + return set([node.value]) + else: + # TODO: handle brackets like this: + # from __future__ import (absolute_import, division) + assert False, "strange import: %s" % savenode + + +SHEBANG_REGEX = r'^#!.*python' +ENCODING_REGEX = r"^#.*coding[:=]\s*([-\w.]+)" + + +def is_shebang_comment(node): + """ + Comments are prefixes for Leaf nodes. Returns whether the given node has a + prefix that looks like a shebang line or an encoding line: + + #!/usr/bin/env python + #!/usr/bin/python3 + """ + return bool(re.match(SHEBANG_REGEX, node.prefix)) + + +def is_encoding_comment(node): + """ + Comments are prefixes for Leaf nodes. Returns whether the given node has a + prefix that looks like an encoding line: + + # coding: utf-8 + # encoding: utf-8 + # -*- coding: -*- + # vim: set fileencoding= : + """ + return bool(re.match(ENCODING_REGEX, node.prefix)) + + +def wrap_in_fn_call(fn_name, args, prefix=None): + """ + Example: + >>> wrap_in_fn_call("oldstr", (arg,)) + oldstr(arg) + + >>> wrap_in_fn_call("olddiv", (arg1, arg2)) + olddiv(arg1, arg2) + + >>> wrap_in_fn_call("olddiv", [arg1, comma, arg2, comma, arg3]) + olddiv(arg1, arg2, arg3) + """ + assert len(args) > 0 + if len(args) == 2: + expr1, expr2 = args + newargs = [expr1, Comma(), expr2] + else: + newargs = args + return Call(Name(fn_name), newargs, prefix=prefix) diff --git a/lib/libfuturize/fixes/__init__.py b/lib/libfuturize/fixes/__init__.py new file mode 100644 index 00000000..0b562501 --- /dev/null +++ b/lib/libfuturize/fixes/__init__.py @@ -0,0 +1,97 @@ +import sys +from lib2to3 import refactor + +# The following fixers are "safe": they convert Python 2 code to more +# modern Python 2 code. They should be uncontroversial to apply to most +# projects that are happy to drop support for Py2.5 and below. Applying +# them first will reduce the size of the patch set for the real porting. +lib2to3_fix_names_stage1 = set([ + 'lib2to3.fixes.fix_apply', + 'lib2to3.fixes.fix_except', + 'lib2to3.fixes.fix_exec', + 'lib2to3.fixes.fix_exitfunc', + 'lib2to3.fixes.fix_funcattrs', + 'lib2to3.fixes.fix_has_key', + 'lib2to3.fixes.fix_idioms', + # 'lib2to3.fixes.fix_import', # makes any implicit relative imports explicit. (Use with ``from __future__ import absolute_import) + 'lib2to3.fixes.fix_intern', + 'lib2to3.fixes.fix_isinstance', + 'lib2to3.fixes.fix_methodattrs', + 'lib2to3.fixes.fix_ne', + # 'lib2to3.fixes.fix_next', # would replace ``next`` method names + # with ``__next__``. + 'lib2to3.fixes.fix_numliterals', # turns 1L into 1, 0755 into 0o755 + 'lib2to3.fixes.fix_paren', + # 'lib2to3.fixes.fix_print', # see the libfuturize fixer that also + # adds ``from __future__ import print_function`` + # 'lib2to3.fixes.fix_raise', # uses incompatible with_traceback() method on exceptions + 'lib2to3.fixes.fix_reduce', # reduce is available in functools on Py2.6/Py2.7 + 'lib2to3.fixes.fix_renames', # sys.maxint -> sys.maxsize + # 'lib2to3.fixes.fix_set_literal', # this is unnecessary and breaks Py2.6 support + 'lib2to3.fixes.fix_repr', + 'lib2to3.fixes.fix_standarderror', + 'lib2to3.fixes.fix_sys_exc', + 'lib2to3.fixes.fix_throw', + 'lib2to3.fixes.fix_tuple_params', + 'lib2to3.fixes.fix_types', + 'lib2to3.fixes.fix_ws_comma', # can perhaps decrease readability: see issue #58 + 'lib2to3.fixes.fix_xreadlines', +]) + +# The following fixers add a dependency on the ``future`` package on order to +# support Python 2: +lib2to3_fix_names_stage2 = set([ + # 'lib2to3.fixes.fix_buffer', # perhaps not safe. Test this. + # 'lib2to3.fixes.fix_callable', # not needed in Py3.2+ + 'lib2to3.fixes.fix_dict', # TODO: add support for utils.viewitems() etc. and move to stage2 + # 'lib2to3.fixes.fix_execfile', # some problems: see issue #37. + # We use a custom fixer instead (see below) + # 'lib2to3.fixes.fix_future', # we don't want to remove __future__ imports + 'lib2to3.fixes.fix_getcwdu', + # 'lib2to3.fixes.fix_imports', # called by libfuturize.fixes.fix_future_standard_library + # 'lib2to3.fixes.fix_imports2', # we don't handle this yet (dbm) + # 'lib2to3.fixes.fix_input', # Called conditionally by libfuturize.fixes.fix_input + 'lib2to3.fixes.fix_itertools', + 'lib2to3.fixes.fix_itertools_imports', + 'lib2to3.fixes.fix_filter', + 'lib2to3.fixes.fix_long', + 'lib2to3.fixes.fix_map', + # 'lib2to3.fixes.fix_metaclass', # causes SyntaxError in Py2! Use the one from ``six`` instead + 'lib2to3.fixes.fix_next', + 'lib2to3.fixes.fix_nonzero', # TODO: cause this to import ``object`` and/or add a decorator for mapping __bool__ to __nonzero__ + 'lib2to3.fixes.fix_operator', # we will need support for this by e.g. extending the Py2 operator module to provide those functions in Py3 + 'lib2to3.fixes.fix_raw_input', + # 'lib2to3.fixes.fix_unicode', # strips off the u'' prefix, which removes a potentially helpful source of information for disambiguating unicode/byte strings + # 'lib2to3.fixes.fix_urllib', # included in libfuturize.fix_future_standard_library_urllib + # 'lib2to3.fixes.fix_xrange', # custom one because of a bug with Py3.3's lib2to3 + 'lib2to3.fixes.fix_zip', +]) + +libfuturize_fix_names_stage1 = set([ + 'libfuturize.fixes.fix_absolute_import', + 'libfuturize.fixes.fix_next_call', # obj.next() -> next(obj). Unlike + # lib2to3.fixes.fix_next, doesn't change + # the ``next`` method to ``__next__``. + 'libfuturize.fixes.fix_print_with_import', + 'libfuturize.fixes.fix_raise', + # 'libfuturize.fixes.fix_order___future__imports', # TODO: consolidate to a single line to simplify testing +]) + +libfuturize_fix_names_stage2 = set([ + 'libfuturize.fixes.fix_basestring', + # 'libfuturize.fixes.fix_add__future__imports_except_unicode_literals', # just in case + 'libfuturize.fixes.fix_cmp', + 'libfuturize.fixes.fix_division_safe', + 'libfuturize.fixes.fix_execfile', + 'libfuturize.fixes.fix_future_builtins', + 'libfuturize.fixes.fix_future_standard_library', + 'libfuturize.fixes.fix_future_standard_library_urllib', + 'libfuturize.fixes.fix_input', + 'libfuturize.fixes.fix_metaclass', + 'libpasteurize.fixes.fix_newstyle', + 'libfuturize.fixes.fix_object', + # 'libfuturize.fixes.fix_order___future__imports', # TODO: consolidate to a single line to simplify testing + 'libfuturize.fixes.fix_unicode_keep_u', + # 'libfuturize.fixes.fix_unicode_literals_import', + 'libfuturize.fixes.fix_xrange_with_import', # custom one because of a bug with Py3.3's lib2to3 +]) diff --git a/lib/libfuturize/fixes/fix_UserDict.py b/lib/libfuturize/fixes/fix_UserDict.py new file mode 100644 index 00000000..cb0cfacc --- /dev/null +++ b/lib/libfuturize/fixes/fix_UserDict.py @@ -0,0 +1,102 @@ +"""Fix UserDict. + +Incomplete! + +TODO: base this on fix_urllib perhaps? +""" + + +# Local imports +from lib2to3 import fixer_base +from lib2to3.fixer_util import Name, attr_chain +from lib2to3.fixes.fix_imports import alternates, build_pattern, FixImports + +MAPPING = {'UserDict': 'collections', +} + +# def alternates(members): +# return "(" + "|".join(map(repr, members)) + ")" +# +# +# def build_pattern(mapping=MAPPING): +# mod_list = ' | '.join(["module_name='%s'" % key for key in mapping]) +# bare_names = alternates(mapping.keys()) +# +# yield """name_import=import_name< 'import' ((%s) | +# multiple_imports=dotted_as_names< any* (%s) any* >) > +# """ % (mod_list, mod_list) +# yield """import_from< 'from' (%s) 'import' ['('] +# ( any | import_as_name< any 'as' any > | +# import_as_names< any* >) [')'] > +# """ % mod_list +# yield """import_name< 'import' (dotted_as_name< (%s) 'as' any > | +# multiple_imports=dotted_as_names< +# any* dotted_as_name< (%s) 'as' any > any* >) > +# """ % (mod_list, mod_list) +# +# # Find usages of module members in code e.g. thread.foo(bar) +# yield "power< bare_with_attr=(%s) trailer<'.' any > any* >" % bare_names + + +# class FixUserDict(fixer_base.BaseFix): +class FixUserdict(FixImports): + + BM_compatible = True + keep_line_order = True + # This is overridden in fix_imports2. + mapping = MAPPING + + # We want to run this fixer late, so fix_import doesn't try to make stdlib + # renames into relative imports. + run_order = 6 + + def build_pattern(self): + return "|".join(build_pattern(self.mapping)) + + def compile_pattern(self): + # We override this, so MAPPING can be pragmatically altered and the + # changes will be reflected in PATTERN. + self.PATTERN = self.build_pattern() + super(FixImports, self).compile_pattern() + + # Don't match the node if it's within another match. + def match(self, node): + match = super(FixImports, self).match + results = match(node) + if results: + # Module usage could be in the trailer of an attribute lookup, so we + # might have nested matches when "bare_with_attr" is present. + if "bare_with_attr" not in results and \ + any(match(obj) for obj in attr_chain(node, "parent")): + return False + return results + return False + + def start_tree(self, tree, filename): + super(FixImports, self).start_tree(tree, filename) + self.replace = {} + + def transform(self, node, results): + import_mod = results.get("module_name") + if import_mod: + mod_name = import_mod.value + new_name = unicode(self.mapping[mod_name]) + import_mod.replace(Name(new_name, prefix=import_mod.prefix)) + if "name_import" in results: + # If it's not a "from x import x, y" or "import x as y" import, + # marked its usage to be replaced. + self.replace[mod_name] = new_name + if "multiple_imports" in results: + # This is a nasty hack to fix multiple imports on a line (e.g., + # "import StringIO, urlparse"). The problem is that I can't + # figure out an easy way to make a pattern recognize the keys of + # MAPPING randomly sprinkled in an import statement. + results = self.match(node) + if results: + self.transform(node, results) + else: + # Replace usage of the module. + bare_name = results["bare_with_attr"][0] + new_name = self.replace.get(bare_name.value) + if new_name: + bare_name.replace(Name(new_name, prefix=bare_name.prefix)) diff --git a/lib/libfuturize/fixes/fix_absolute_import.py b/lib/libfuturize/fixes/fix_absolute_import.py new file mode 100644 index 00000000..eab9c527 --- /dev/null +++ b/lib/libfuturize/fixes/fix_absolute_import.py @@ -0,0 +1,91 @@ +""" +Fixer for import statements, with a __future__ import line. + +Based on lib2to3/fixes/fix_import.py, but extended slightly so it also +supports Cython modules. + +If spam is being imported from the local directory, this import: + from spam import eggs +becomes: + from __future__ import absolute_import + from .spam import eggs + +and this import: + import spam +becomes: + from __future__ import absolute_import + from . import spam +""" + +from os.path import dirname, join, exists, sep +from lib2to3.fixes.fix_import import FixImport +from lib2to3.fixer_util import FromImport, syms +from lib2to3.fixes.fix_import import traverse_imports + +from libfuturize.fixer_util import future_import + + +class FixAbsoluteImport(FixImport): + run_order = 9 + + def transform(self, node, results): + """ + Copied from FixImport.transform(), but with this line added in + any modules that had implicit relative imports changed: + + from __future__ import absolute_import" + """ + if self.skip: + return + imp = results['imp'] + + if node.type == syms.import_from: + # Some imps are top-level (eg: 'import ham') + # some are first level (eg: 'import ham.eggs') + # some are third level (eg: 'import ham.eggs as spam') + # Hence, the loop + while not hasattr(imp, 'value'): + imp = imp.children[0] + if self.probably_a_local_import(imp.value): + imp.value = u"." + imp.value + imp.changed() + future_import(u"absolute_import", node) + else: + have_local = False + have_absolute = False + for mod_name in traverse_imports(imp): + if self.probably_a_local_import(mod_name): + have_local = True + else: + have_absolute = True + if have_absolute: + if have_local: + # We won't handle both sibling and absolute imports in the + # same statement at the moment. + self.warning(node, "absolute and local imports together") + return + + new = FromImport(u".", [imp]) + new.prefix = node.prefix + future_import(u"absolute_import", node) + return new + + def probably_a_local_import(self, imp_name): + """ + Like the corresponding method in the base class, but this also + supports Cython modules. + """ + if imp_name.startswith(u"."): + # Relative imports are certainly not local imports. + return False + imp_name = imp_name.split(u".", 1)[0] + base_path = dirname(self.filename) + base_path = join(base_path, imp_name) + # If there is no __init__.py next to the file its not in a package + # so can't be a relative import. + if not exists(join(dirname(base_path), "__init__.py")): + return False + for ext in [".py", sep, ".pyc", ".so", ".sl", ".pyd", ".pyx"]: + if exists(base_path + ext): + return True + return False diff --git a/lib/libfuturize/fixes/fix_add__future__imports_except_unicode_literals.py b/lib/libfuturize/fixes/fix_add__future__imports_except_unicode_literals.py new file mode 100644 index 00000000..37d7feec --- /dev/null +++ b/lib/libfuturize/fixes/fix_add__future__imports_except_unicode_literals.py @@ -0,0 +1,26 @@ +""" +Fixer for adding: + + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function + +This is "stage 1": hopefully uncontroversial changes. + +Stage 2 adds ``unicode_literals``. +""" + +from lib2to3 import fixer_base +from libfuturize.fixer_util import future_import + +class FixAddFutureImportsExceptUnicodeLiterals(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "file_input" + + run_order = 9 + + def transform(self, node, results): + # Reverse order: + future_import(u"absolute_import", node) + future_import(u"division", node) + future_import(u"print_function", node) diff --git a/lib/libfuturize/fixes/fix_basestring.py b/lib/libfuturize/fixes/fix_basestring.py new file mode 100644 index 00000000..5676d08f --- /dev/null +++ b/lib/libfuturize/fixes/fix_basestring.py @@ -0,0 +1,17 @@ +""" +Fixer that adds ``from past.builtins import basestring`` if there is a +reference to ``basestring`` +""" + +from lib2to3 import fixer_base + +from libfuturize.fixer_util import touch_import_top + + +class FixBasestring(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = "'basestring'" + + def transform(self, node, results): + touch_import_top(u'past.builtins', 'basestring', node) diff --git a/lib/libfuturize/fixes/fix_bytes.py b/lib/libfuturize/fixes/fix_bytes.py new file mode 100644 index 00000000..42021223 --- /dev/null +++ b/lib/libfuturize/fixes/fix_bytes.py @@ -0,0 +1,24 @@ +"""Optional fixer that changes all unprefixed string literals "..." to b"...". + +br'abcd' is a SyntaxError on Python 2 but valid on Python 3. +ur'abcd' is a SyntaxError on Python 3 but valid on Python 2. + +""" +from __future__ import unicode_literals + +import re +from lib2to3.pgen2 import token +from lib2to3 import fixer_base + +_literal_re = re.compile(r"[^bBuUrR]?[\'\"]") + +class FixBytes(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "STRING" + + def transform(self, node, results): + if node.type == token.STRING: + if _literal_re.match(node.value): + new = node.clone() + new.value = u'b' + new.value + return new diff --git a/lib/libfuturize/fixes/fix_cmp.py b/lib/libfuturize/fixes/fix_cmp.py new file mode 100644 index 00000000..762eb4b4 --- /dev/null +++ b/lib/libfuturize/fixes/fix_cmp.py @@ -0,0 +1,33 @@ +# coding: utf-8 +""" +Fixer for the cmp() function on Py2, which was removed in Py3. + +Adds this import line:: + + from past.builtins import cmp + +if cmp() is called in the code. +""" + +from __future__ import unicode_literals +from lib2to3 import fixer_base + +from libfuturize.fixer_util import touch_import_top + + +expression = "name='cmp'" + + +class FixCmp(fixer_base.BaseFix): + BM_compatible = True + run_order = 9 + + PATTERN = """ + power< + ({0}) trailer< '(' args=[any] ')' > + rest=any* > + """.format(expression) + + def transform(self, node, results): + name = results["name"] + touch_import_top(u'past.builtins', name.value, node) diff --git a/lib/libfuturize/fixes/fix_division.py b/lib/libfuturize/fixes/fix_division.py new file mode 100644 index 00000000..6975a52b --- /dev/null +++ b/lib/libfuturize/fixes/fix_division.py @@ -0,0 +1,12 @@ +""" +UNFINISHED +For the ``future`` package. + +Adds this import line: + + from __future__ import division + +at the top so the code runs identically on Py3 and Py2.6/2.7 +""" + +from libpasteurize.fixes.fix_division import FixDivision diff --git a/lib/libfuturize/fixes/fix_division_safe.py b/lib/libfuturize/fixes/fix_division_safe.py new file mode 100644 index 00000000..3d5909cc --- /dev/null +++ b/lib/libfuturize/fixes/fix_division_safe.py @@ -0,0 +1,104 @@ +""" +For the ``future`` package. + +Adds this import line: + + from __future__ import division + +at the top and changes any old-style divisions to be calls to +past.utils.old_div so the code runs as before on Py2.6/2.7 and has the same +behaviour on Py3. + +If "from __future__ import division" is already in effect, this fixer does +nothing. +""" + +import re +from lib2to3.fixer_util import Leaf, Node, Comma +from lib2to3 import fixer_base +from libfuturize.fixer_util import (token, future_import, touch_import_top, + wrap_in_fn_call) + + +def match_division(node): + u""" + __future__.division redefines the meaning of a single slash for division, + so we match that and only that. + """ + slash = token.SLASH + return node.type == slash and not node.next_sibling.type == slash and \ + not node.prev_sibling.type == slash + +const_re = re.compile('^[0-9]*[.][0-9]*$') + +def is_floaty(node): + return _is_floaty(node.prev_sibling) or _is_floaty(node.next_sibling) + + +def _is_floaty(expr): + if isinstance(expr, list): + expr = expr[0] + + if isinstance(expr, Leaf): + # If it's a leaf, let's see if it's a numeric constant containing a '.' + return const_re.match(expr.value) + elif isinstance(expr, Node): + # If the expression is a node, let's see if it's a direct cast to float + if isinstance(expr.children[0], Leaf): + return expr.children[0].value == u'float' + return False + + +class FixDivisionSafe(fixer_base.BaseFix): + # BM_compatible = True + run_order = 4 # this seems to be ignored? + + _accept_type = token.SLASH + + PATTERN = """ + term<(not('/') any)+ '/' ((not('/') any))> + """ + + def start_tree(self, tree, name): + """ + Skip this fixer if "__future__.division" is already imported. + """ + super(FixDivisionSafe, self).start_tree(tree, name) + self.skip = "division" in tree.future_features + + def match(self, node): + u""" + Since the tree needs to be fixed once and only once if and only if it + matches, we can start discarding matches after the first. + """ + if node.type == self.syms.term: + matched = False + skip = False + children = [] + for child in node.children: + if skip: + skip = False + continue + if match_division(child) and not is_floaty(child): + matched = True + + # Strip any leading space for the first number: + children[0].prefix = u'' + + children = [wrap_in_fn_call("old_div", + children + [Comma(), child.next_sibling.clone()], + prefix=node.prefix)] + skip = True + else: + children.append(child.clone()) + if matched: + return Node(node.type, children, fixers_applied=node.fixers_applied) + + return False + + def transform(self, node, results): + if self.skip: + return + future_import(u"division", node) + touch_import_top(u'past.utils', u'old_div', node) + return results diff --git a/lib/libfuturize/fixes/fix_execfile.py b/lib/libfuturize/fixes/fix_execfile.py new file mode 100644 index 00000000..cfe9d8d0 --- /dev/null +++ b/lib/libfuturize/fixes/fix_execfile.py @@ -0,0 +1,37 @@ +# coding: utf-8 +""" +Fixer for the execfile() function on Py2, which was removed in Py3. + +The Lib/lib2to3/fixes/fix_execfile.py module has some problems: see +python-future issue #37. This fixer merely imports execfile() from +past.builtins and leaves the code alone. + +Adds this import line:: + + from past.builtins import execfile + +for the function execfile() that was removed from Py3. +""" + +from __future__ import unicode_literals +from lib2to3 import fixer_base + +from libfuturize.fixer_util import touch_import_top + + +expression = "name='execfile'" + + +class FixExecfile(fixer_base.BaseFix): + BM_compatible = True + run_order = 9 + + PATTERN = """ + power< + ({0}) trailer< '(' args=[any] ')' > + rest=any* > + """.format(expression) + + def transform(self, node, results): + name = results["name"] + touch_import_top(u'past.builtins', name.value, node) diff --git a/lib/libfuturize/fixes/fix_future_builtins.py b/lib/libfuturize/fixes/fix_future_builtins.py new file mode 100644 index 00000000..eea6c6a1 --- /dev/null +++ b/lib/libfuturize/fixes/fix_future_builtins.py @@ -0,0 +1,59 @@ +""" +For the ``future`` package. + +Adds this import line:: + + from builtins import XYZ + +for each of the functions XYZ that is used in the module. + +Adds these imports after any other imports (in an initial block of them). +""" + +from __future__ import unicode_literals + +from lib2to3 import fixer_base +from lib2to3.pygram import python_symbols as syms +from lib2to3.fixer_util import Name, Call, in_special_context + +from libfuturize.fixer_util import touch_import_top + +# All builtins are: +# from future.builtins.iterators import (filter, map, zip) +# from future.builtins.misc import (ascii, chr, hex, input, isinstance, oct, open, round, super) +# from future.types import (bytes, dict, int, range, str) +# We don't need isinstance any more. + +replaced_builtin_fns = '''filter map zip + ascii chr hex input next oct + bytes range str raw_input'''.split() + # This includes raw_input as a workaround for the + # lib2to3 fixer for raw_input on Py3 (only), allowing + # the correct import to be included. (Py3 seems to run + # the fixers the wrong way around, perhaps ignoring the + # run_order class attribute below ...) + +expression = '|'.join(["name='{0}'".format(name) for name in replaced_builtin_fns]) + + +class FixFutureBuiltins(fixer_base.BaseFix): + BM_compatible = True + run_order = 7 + + # Currently we only match uses as a function. This doesn't match e.g.: + # if isinstance(s, str): + # ... + PATTERN = """ + power< + ({0}) trailer< '(' [arglist=any] ')' > + rest=any* > + | + power< + 'map' trailer< '(' [arglist=any] ')' > + > + """.format(expression) + + def transform(self, node, results): + name = results["name"] + touch_import_top(u'builtins', name.value, node) + # name.replace(Name(u"input", prefix=name.prefix)) diff --git a/lib/libfuturize/fixes/fix_future_standard_library.py b/lib/libfuturize/fixes/fix_future_standard_library.py new file mode 100644 index 00000000..a1c3f3d4 --- /dev/null +++ b/lib/libfuturize/fixes/fix_future_standard_library.py @@ -0,0 +1,24 @@ +""" +For the ``future`` package. + +Changes any imports needed to reflect the standard library reorganization. Also +Also adds these import lines: + + from future import standard_library + standard_library.install_aliases() + +after any __future__ imports but before any other imports. +""" + +from lib2to3.fixes.fix_imports import FixImports +from libfuturize.fixer_util import touch_import_top + + +class FixFutureStandardLibrary(FixImports): + run_order = 8 + + def transform(self, node, results): + result = super(FixFutureStandardLibrary, self).transform(node, results) + # TODO: add a blank line between any __future__ imports and this? + touch_import_top(u'future', u'standard_library', node) + return result diff --git a/lib/libfuturize/fixes/fix_future_standard_library_urllib.py b/lib/libfuturize/fixes/fix_future_standard_library_urllib.py new file mode 100644 index 00000000..cf673884 --- /dev/null +++ b/lib/libfuturize/fixes/fix_future_standard_library_urllib.py @@ -0,0 +1,28 @@ +""" +For the ``future`` package. + +A special fixer that ensures that these lines have been added:: + + from future import standard_library + standard_library.install_hooks() + +even if the only module imported was ``urllib``, in which case the regular fixer +wouldn't have added these lines. + +""" + +from lib2to3.fixes.fix_urllib import FixUrllib +from libfuturize.fixer_util import touch_import_top, find_root + + +class FixFutureStandardLibraryUrllib(FixUrllib): # not a subclass of FixImports + run_order = 8 + + def transform(self, node, results): + # transform_member() in lib2to3/fixes/fix_urllib.py breaks node so find_root(node) + # no longer works after the super() call below. So we find the root first: + root = find_root(node) + result = super(FixFutureStandardLibraryUrllib, self).transform(node, results) + # TODO: add a blank line between any __future__ imports and this? + touch_import_top(u'future', u'standard_library', root) + return result diff --git a/lib/libfuturize/fixes/fix_input.py b/lib/libfuturize/fixes/fix_input.py new file mode 100644 index 00000000..8a43882e --- /dev/null +++ b/lib/libfuturize/fixes/fix_input.py @@ -0,0 +1,32 @@ +""" +Fixer for input. + +Does a check for `from builtins import input` before running the lib2to3 fixer. +The fixer will not run when the input is already present. + + +this: + a = input() +becomes: + from builtins import input + a = eval(input()) + +and this: + from builtins import input + a = input() +becomes (no change): + from builtins import input + a = input() +""" + +import lib2to3.fixes.fix_input +from lib2to3.fixer_util import does_tree_import + + +class FixInput(lib2to3.fixes.fix_input.FixInput): + def transform(self, node, results): + + if does_tree_import('builtins', 'input', node): + return + + return super(FixInput, self).transform(node, results) diff --git a/lib/libfuturize/fixes/fix_metaclass.py b/lib/libfuturize/fixes/fix_metaclass.py new file mode 100644 index 00000000..2ac41c97 --- /dev/null +++ b/lib/libfuturize/fixes/fix_metaclass.py @@ -0,0 +1,262 @@ +# coding: utf-8 +"""Fixer for __metaclass__ = X -> (future.utils.with_metaclass(X)) methods. + + The various forms of classef (inherits nothing, inherits once, inherints + many) don't parse the same in the CST so we look at ALL classes for + a __metaclass__ and if we find one normalize the inherits to all be + an arglist. + + For one-liner classes ('class X: pass') there is no indent/dedent so + we normalize those into having a suite. + + Moving the __metaclass__ into the classdef can also cause the class + body to be empty so there is some special casing for that as well. + + This fixer also tries very hard to keep original indenting and spacing + in all those corner cases. +""" +# This is a derived work of Lib/lib2to3/fixes/fix_metaclass.py under the +# copyright of the Python Software Foundation, licensed under the Python +# Software Foundation License 2. +# +# Copyright notice: +# +# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +# 2011, 2012, 2013 Python Software Foundation. All rights reserved. +# +# Full license text: http://docs.python.org/3.4/license.html + +# Author: Jack Diederich, Daniel Neuhäuser + +# Local imports +from lib2to3 import fixer_base +from lib2to3.pygram import token +from lib2to3.fixer_util import Name, syms, Node, Leaf, touch_import, Call, \ + String, Comma, parenthesize + + +def has_metaclass(parent): + """ we have to check the cls_node without changing it. + There are two possiblities: + 1) clsdef => suite => simple_stmt => expr_stmt => Leaf('__meta') + 2) clsdef => simple_stmt => expr_stmt => Leaf('__meta') + """ + for node in parent.children: + if node.type == syms.suite: + return has_metaclass(node) + elif node.type == syms.simple_stmt and node.children: + expr_node = node.children[0] + if expr_node.type == syms.expr_stmt and expr_node.children: + left_side = expr_node.children[0] + if isinstance(left_side, Leaf) and \ + left_side.value == '__metaclass__': + return True + return False + + +def fixup_parse_tree(cls_node): + """ one-line classes don't get a suite in the parse tree so we add + one to normalize the tree + """ + for node in cls_node.children: + if node.type == syms.suite: + # already in the preferred format, do nothing + return + + # !%@#! oneliners have no suite node, we have to fake one up + for i, node in enumerate(cls_node.children): + if node.type == token.COLON: + break + else: + raise ValueError("No class suite and no ':'!") + + # move everything into a suite node + suite = Node(syms.suite, []) + while cls_node.children[i+1:]: + move_node = cls_node.children[i+1] + suite.append_child(move_node.clone()) + move_node.remove() + cls_node.append_child(suite) + node = suite + + +def fixup_simple_stmt(parent, i, stmt_node): + """ if there is a semi-colon all the parts count as part of the same + simple_stmt. We just want the __metaclass__ part so we move + everything efter the semi-colon into its own simple_stmt node + """ + for semi_ind, node in enumerate(stmt_node.children): + if node.type == token.SEMI: # *sigh* + break + else: + return + + node.remove() # kill the semicolon + new_expr = Node(syms.expr_stmt, []) + new_stmt = Node(syms.simple_stmt, [new_expr]) + while stmt_node.children[semi_ind:]: + move_node = stmt_node.children[semi_ind] + new_expr.append_child(move_node.clone()) + move_node.remove() + parent.insert_child(i, new_stmt) + new_leaf1 = new_stmt.children[0].children[0] + old_leaf1 = stmt_node.children[0].children[0] + new_leaf1.prefix = old_leaf1.prefix + + +def remove_trailing_newline(node): + if node.children and node.children[-1].type == token.NEWLINE: + node.children[-1].remove() + + +def find_metas(cls_node): + # find the suite node (Mmm, sweet nodes) + for node in cls_node.children: + if node.type == syms.suite: + break + else: + raise ValueError("No class suite!") + + # look for simple_stmt[ expr_stmt[ Leaf('__metaclass__') ] ] + for i, simple_node in list(enumerate(node.children)): + if simple_node.type == syms.simple_stmt and simple_node.children: + expr_node = simple_node.children[0] + if expr_node.type == syms.expr_stmt and expr_node.children: + # Check if the expr_node is a simple assignment. + left_node = expr_node.children[0] + if isinstance(left_node, Leaf) and \ + left_node.value == u'__metaclass__': + # We found a assignment to __metaclass__. + fixup_simple_stmt(node, i, simple_node) + remove_trailing_newline(simple_node) + yield (node, i, simple_node) + + +def fixup_indent(suite): + """ If an INDENT is followed by a thing with a prefix then nuke the prefix + Otherwise we get in trouble when removing __metaclass__ at suite start + """ + kids = suite.children[::-1] + # find the first indent + while kids: + node = kids.pop() + if node.type == token.INDENT: + break + + # find the first Leaf + while kids: + node = kids.pop() + if isinstance(node, Leaf) and node.type != token.DEDENT: + if node.prefix: + node.prefix = u'' + return + else: + kids.extend(node.children[::-1]) + + +class FixMetaclass(fixer_base.BaseFix): + BM_compatible = True + + PATTERN = """ + classdef + """ + + def transform(self, node, results): + if not has_metaclass(node): + return + + fixup_parse_tree(node) + + # find metaclasses, keep the last one + last_metaclass = None + for suite, i, stmt in find_metas(node): + last_metaclass = stmt + stmt.remove() + + text_type = node.children[0].type # always Leaf(nnn, 'class') + + # figure out what kind of classdef we have + if len(node.children) == 7: + # Node(classdef, ['class', 'name', '(', arglist, ')', ':', suite]) + # 0 1 2 3 4 5 6 + if node.children[3].type == syms.arglist: + arglist = node.children[3] + # Node(classdef, ['class', 'name', '(', 'Parent', ')', ':', suite]) + else: + parent = node.children[3].clone() + arglist = Node(syms.arglist, [parent]) + node.set_child(3, arglist) + elif len(node.children) == 6: + # Node(classdef, ['class', 'name', '(', ')', ':', suite]) + # 0 1 2 3 4 5 + arglist = Node(syms.arglist, []) + node.insert_child(3, arglist) + elif len(node.children) == 4: + # Node(classdef, ['class', 'name', ':', suite]) + # 0 1 2 3 + arglist = Node(syms.arglist, []) + node.insert_child(2, Leaf(token.RPAR, u')')) + node.insert_child(2, arglist) + node.insert_child(2, Leaf(token.LPAR, u'(')) + else: + raise ValueError("Unexpected class definition") + + # now stick the metaclass in the arglist + meta_txt = last_metaclass.children[0].children[0] + meta_txt.value = 'metaclass' + orig_meta_prefix = meta_txt.prefix + + # Was: touch_import(None, u'future.utils', node) + touch_import(u'future.utils', u'with_metaclass', node) + + metaclass = last_metaclass.children[0].children[2].clone() + metaclass.prefix = u'' + + arguments = [metaclass] + + if arglist.children: + if len(arglist.children) == 1: + base = arglist.children[0].clone() + base.prefix = u' ' + else: + # Unfortunately six.with_metaclass() only allows one base + # class, so we have to dynamically generate a base class if + # there is more than one. + bases = parenthesize(arglist.clone()) + bases.prefix = u' ' + base = Call(Name('type'), [ + String("'NewBase'"), + Comma(), + bases, + Comma(), + Node( + syms.atom, + [Leaf(token.LBRACE, u'{'), Leaf(token.RBRACE, u'}')], + prefix=u' ' + ) + ], prefix=u' ') + arguments.extend([Comma(), base]) + + arglist.replace(Call( + Name(u'with_metaclass', prefix=arglist.prefix), + arguments + )) + + fixup_indent(suite) + + # check for empty suite + if not suite.children: + # one-liner that was just __metaclass_ + suite.remove() + pass_leaf = Leaf(text_type, u'pass') + pass_leaf.prefix = orig_meta_prefix + node.append_child(pass_leaf) + node.append_child(Leaf(token.NEWLINE, u'\n')) + + elif len(suite.children) > 1 and \ + (suite.children[-2].type == token.INDENT and + suite.children[-1].type == token.DEDENT): + # there was only one line in the class body and it was __metaclass__ + pass_leaf = Leaf(text_type, u'pass') + suite.insert_child(-1, pass_leaf) + suite.insert_child(-1, Leaf(token.NEWLINE, u'\n')) diff --git a/lib/libfuturize/fixes/fix_next_call.py b/lib/libfuturize/fixes/fix_next_call.py new file mode 100644 index 00000000..282f1852 --- /dev/null +++ b/lib/libfuturize/fixes/fix_next_call.py @@ -0,0 +1,104 @@ +""" +Based on fix_next.py by Collin Winter. + +Replaces it.next() -> next(it), per PEP 3114. + +Unlike fix_next.py, this fixer doesn't replace the name of a next method with __next__, +which would break Python 2 compatibility without further help from fixers in +stage 2. +""" + +# Local imports +from lib2to3.pgen2 import token +from lib2to3.pygram import python_symbols as syms +from lib2to3 import fixer_base +from lib2to3.fixer_util import Name, Call, find_binding + +bind_warning = "Calls to builtin next() possibly shadowed by global binding" + + +class FixNextCall(fixer_base.BaseFix): + BM_compatible = True + PATTERN = """ + power< base=any+ trailer< '.' attr='next' > trailer< '(' ')' > > + | + power< head=any+ trailer< '.' attr='next' > not trailer< '(' ')' > > + | + global=global_stmt< 'global' any* 'next' any* > + """ + + order = "pre" # Pre-order tree traversal + + def start_tree(self, tree, filename): + super(FixNextCall, self).start_tree(tree, filename) + + n = find_binding('next', tree) + if n: + self.warning(n, bind_warning) + self.shadowed_next = True + else: + self.shadowed_next = False + + def transform(self, node, results): + assert results + + base = results.get("base") + attr = results.get("attr") + name = results.get("name") + + if base: + if self.shadowed_next: + # Omit this: + # attr.replace(Name("__next__", prefix=attr.prefix)) + pass + else: + base = [n.clone() for n in base] + base[0].prefix = "" + node.replace(Call(Name("next", prefix=node.prefix), base)) + elif name: + # Omit this: + # n = Name("__next__", prefix=name.prefix) + # name.replace(n) + pass + elif attr: + # We don't do this transformation if we're assigning to "x.next". + # Unfortunately, it doesn't seem possible to do this in PATTERN, + # so it's being done here. + if is_assign_target(node): + head = results["head"] + if "".join([str(n) for n in head]).strip() == '__builtin__': + self.warning(node, bind_warning) + return + # Omit this: + # attr.replace(Name("__next__")) + elif "global" in results: + self.warning(node, bind_warning) + self.shadowed_next = True + + +### The following functions help test if node is part of an assignment +### target. + +def is_assign_target(node): + assign = find_assign(node) + if assign is None: + return False + + for child in assign.children: + if child.type == token.EQUAL: + return False + elif is_subtree(child, node): + return True + return False + +def find_assign(node): + if node.type == syms.expr_stmt: + return node + if node.type == syms.simple_stmt or node.parent is None: + return None + return find_assign(node.parent) + +def is_subtree(root, node): + if root == node: + return True + return any(is_subtree(c, node) for c in root.children) diff --git a/lib/libfuturize/fixes/fix_object.py b/lib/libfuturize/fixes/fix_object.py new file mode 100644 index 00000000..accf2c52 --- /dev/null +++ b/lib/libfuturize/fixes/fix_object.py @@ -0,0 +1,17 @@ +""" +Fixer that adds ``from builtins import object`` if there is a line +like this: + class Foo(object): +""" + +from lib2to3 import fixer_base + +from libfuturize.fixer_util import touch_import_top + + +class FixObject(fixer_base.BaseFix): + + PATTERN = u"classdef< 'class' NAME '(' name='object' ')' colon=':' any >" + + def transform(self, node, results): + touch_import_top(u'builtins', 'object', node) diff --git a/lib/libfuturize/fixes/fix_oldstr_wrap.py b/lib/libfuturize/fixes/fix_oldstr_wrap.py new file mode 100644 index 00000000..ad58771d --- /dev/null +++ b/lib/libfuturize/fixes/fix_oldstr_wrap.py @@ -0,0 +1,39 @@ +""" +For the ``future`` package. + +Adds this import line: + + from past.builtins import str as oldstr + +at the top and wraps any unadorned string literals 'abc' or explicit byte-string +literals b'abc' in oldstr() calls so the code has the same behaviour on Py3 as +on Py2.6/2.7. +""" + +from __future__ import unicode_literals +import re +from lib2to3 import fixer_base +from lib2to3.pgen2 import token +from lib2to3.fixer_util import syms +from libfuturize.fixer_util import (future_import, touch_import_top, + wrap_in_fn_call) + + +_literal_re = re.compile(r"[^uUrR]?[\'\"]") + + +class FixOldstrWrap(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "STRING" + + def transform(self, node, results): + if node.type == token.STRING: + touch_import_top(u'past.types', u'oldstr', node) + if _literal_re.match(node.value): + new = node.clone() + # Strip any leading space or comments: + # TODO: check: do we really want to do this? + new.prefix = u'' + new.value = u'b' + new.value + wrapped = wrap_in_fn_call("oldstr", [new], prefix=node.prefix) + return wrapped diff --git a/lib/libfuturize/fixes/fix_order___future__imports.py b/lib/libfuturize/fixes/fix_order___future__imports.py new file mode 100644 index 00000000..00d7ef60 --- /dev/null +++ b/lib/libfuturize/fixes/fix_order___future__imports.py @@ -0,0 +1,36 @@ +""" +UNFINISHED + +Fixer for turning multiple lines like these: + + from __future__ import division + from __future__ import absolute_import + from __future__ import print_function + +into a single line like this: + + from __future__ import (absolute_import, division, print_function) + +This helps with testing of ``futurize``. +""" + +from lib2to3 import fixer_base +from libfuturize.fixer_util import future_import + +class FixOrderFutureImports(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "file_input" + + run_order = 10 + + # def match(self, node): + # """ + # Match only once per file + # """ + # if hasattr(node, 'type') and node.type == syms.file_input: + # return True + # return False + + def transform(self, node, results): + # TODO # write me + pass diff --git a/lib/libfuturize/fixes/fix_print.py b/lib/libfuturize/fixes/fix_print.py new file mode 100644 index 00000000..247b91b8 --- /dev/null +++ b/lib/libfuturize/fixes/fix_print.py @@ -0,0 +1,94 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Fixer for print. + +Change: + "print" into "print()" + "print ..." into "print(...)" + "print(...)" not changed + "print ... ," into "print(..., end=' ')" + "print >>x, ..." into "print(..., file=x)" + +No changes are applied if print_function is imported from __future__ + +""" + +# Local imports +from lib2to3 import patcomp, pytree, fixer_base +from lib2to3.pgen2 import token +from lib2to3.fixer_util import Name, Call, Comma, String +# from libmodernize import add_future + +parend_expr = patcomp.compile_pattern( + """atom< '(' [arith_expr|atom|power|term|STRING|NAME] ')' >""" + ) + + +class FixPrint(fixer_base.BaseFix): + + BM_compatible = True + + PATTERN = """ + simple_stmt< any* bare='print' any* > | print_stmt + """ + + def transform(self, node, results): + assert results + + bare_print = results.get("bare") + + if bare_print: + # Special-case print all by itself. + bare_print.replace(Call(Name(u"print"), [], + prefix=bare_print.prefix)) + # The "from __future__ import print_function"" declaration is added + # by the fix_print_with_import fixer, so we skip it here. + # add_future(node, u'print_function') + return + assert node.children[0] == Name(u"print") + args = node.children[1:] + if len(args) == 1 and parend_expr.match(args[0]): + # We don't want to keep sticking parens around an + # already-parenthesised expression. + return + + sep = end = file = None + if args and args[-1] == Comma(): + args = args[:-1] + end = " " + if args and args[0] == pytree.Leaf(token.RIGHTSHIFT, u">>"): + assert len(args) >= 2 + file = args[1].clone() + args = args[3:] # Strip a possible comma after the file expression + # Now synthesize a print(args, sep=..., end=..., file=...) node. + l_args = [arg.clone() for arg in args] + if l_args: + l_args[0].prefix = u"" + if sep is not None or end is not None or file is not None: + if sep is not None: + self.add_kwarg(l_args, u"sep", String(repr(sep))) + if end is not None: + self.add_kwarg(l_args, u"end", String(repr(end))) + if file is not None: + self.add_kwarg(l_args, u"file", file) + n_stmt = Call(Name(u"print"), l_args) + n_stmt.prefix = node.prefix + + # Note that there are corner cases where adding this future-import is + # incorrect, for example when the file also has a 'print ()' statement + # that was intended to print "()". + # add_future(node, u'print_function') + return n_stmt + + def add_kwarg(self, l_nodes, s_kwd, n_expr): + # XXX All this prefix-setting may lose comments (though rarely) + n_expr.prefix = u"" + n_argument = pytree.Node(self.syms.argument, + (Name(s_kwd), + pytree.Leaf(token.EQUAL, u"="), + n_expr)) + if l_nodes: + l_nodes.append(Comma()) + n_argument.prefix = u" " + l_nodes.append(n_argument) diff --git a/lib/libfuturize/fixes/fix_print_with_import.py b/lib/libfuturize/fixes/fix_print_with_import.py new file mode 100644 index 00000000..34490461 --- /dev/null +++ b/lib/libfuturize/fixes/fix_print_with_import.py @@ -0,0 +1,22 @@ +""" +For the ``future`` package. + +Turns any print statements into functions and adds this import line: + + from __future__ import print_function + +at the top to retain compatibility with Python 2.6+. +""" + +from libfuturize.fixes.fix_print import FixPrint +from libfuturize.fixer_util import future_import + +class FixPrintWithImport(FixPrint): + run_order = 7 + def transform(self, node, results): + # Add the __future__ import first. (Otherwise any shebang or encoding + # comment line attached as a prefix to the print statement will be + # copied twice and appear twice.) + future_import(u'print_function', node) + n_stmt = super(FixPrintWithImport, self).transform(node, results) + return n_stmt diff --git a/lib/libfuturize/fixes/fix_raise.py b/lib/libfuturize/fixes/fix_raise.py new file mode 100644 index 00000000..f7518416 --- /dev/null +++ b/lib/libfuturize/fixes/fix_raise.py @@ -0,0 +1,107 @@ +"""Fixer for 'raise E, V' + +From Armin Ronacher's ``python-modernize``. + +raise -> raise +raise E -> raise E +raise E, 5 -> raise E(5) +raise E, 5, T -> raise E(5).with_traceback(T) +raise E, None, T -> raise E.with_traceback(T) + +raise (((E, E'), E''), E'''), 5 -> raise E(5) +raise "foo", V, T -> warns about string exceptions + +raise E, (V1, V2) -> raise E(V1, V2) +raise E, (V1, V2), T -> raise E(V1, V2).with_traceback(T) + + +CAVEATS: +1) "raise E, V, T" cannot be translated safely in general. If V + is not a tuple or a (number, string, None) literal, then: + + raise E, V, T -> from future.utils import raise_ + raise_(E, V, T) +""" +# Author: Collin Winter, Armin Ronacher, Mark Huang + +# Local imports +from lib2to3 import pytree, fixer_base +from lib2to3.pgen2 import token +from lib2to3.fixer_util import Name, Call, is_tuple, Comma, Attr, ArgList + +from libfuturize.fixer_util import touch_import_top + + +class FixRaise(fixer_base.BaseFix): + + BM_compatible = True + PATTERN = """ + raise_stmt< 'raise' exc=any [',' val=any [',' tb=any]] > + """ + + def transform(self, node, results): + syms = self.syms + + exc = results["exc"].clone() + if exc.type == token.STRING: + msg = "Python 3 does not support string exceptions" + self.cannot_convert(node, msg) + return + + # Python 2 supports + # raise ((((E1, E2), E3), E4), E5), V + # as a synonym for + # raise E1, V + # Since Python 3 will not support this, we recurse down any tuple + # literals, always taking the first element. + if is_tuple(exc): + while is_tuple(exc): + # exc.children[1:-1] is the unparenthesized tuple + # exc.children[1].children[0] is the first element of the tuple + exc = exc.children[1].children[0].clone() + exc.prefix = u" " + + if "tb" in results: + tb = results["tb"].clone() + else: + tb = None + + if "val" in results: + val = results["val"].clone() + if is_tuple(val): + # Assume that exc is a subclass of Exception and call exc(*val). + args = [c.clone() for c in val.children[1:-1]] + exc = Call(exc, args) + elif val.type in (token.NUMBER, token.STRING): + # Handle numeric and string literals specially, e.g. + # "raise Exception, 5" -> "raise Exception(5)". + val.prefix = u"" + exc = Call(exc, [val]) + elif val.type == token.NAME and val.value == u"None": + # Handle None specially, e.g. + # "raise Exception, None" -> "raise Exception". + pass + else: + # val is some other expression. If val evaluates to an instance + # of exc, it should just be raised. If val evaluates to None, + # a default instance of exc should be raised (as above). If val + # evaluates to a tuple, exc(*val) should be called (as + # above). Otherwise, exc(val) should be called. We can only + # tell what to do at runtime, so defer to future.utils.raise_(), + # which handles all of these cases. + touch_import_top(u"future.utils", u"raise_", node) + exc.prefix = u"" + args = [exc, Comma(), val] + if tb is not None: + args += [Comma(), tb] + return Call(Name(u"raise_"), args) + + if tb is not None: + tb.prefix = "" + exc_list = Attr(exc, Name('with_traceback')) + [ArgList([tb])] + else: + exc_list = [exc] + + return pytree.Node(syms.raise_stmt, + [Name(u"raise")] + exc_list, + prefix=node.prefix) diff --git a/lib/libfuturize/fixes/fix_remove_old__future__imports.py b/lib/libfuturize/fixes/fix_remove_old__future__imports.py new file mode 100644 index 00000000..9336f75f --- /dev/null +++ b/lib/libfuturize/fixes/fix_remove_old__future__imports.py @@ -0,0 +1,26 @@ +""" +Fixer for removing any of these lines: + + from __future__ import with_statement + from __future__ import nested_scopes + from __future__ import generators + +The reason is that __future__ imports like these are required to be the first +line of code (after docstrings) on Python 2.6+, which can get in the way. + +These imports are always enabled in Python 2.6+, which is the minimum sane +version to target for Py2/3 compatibility. +""" + +from lib2to3 import fixer_base +from libfuturize.fixer_util import remove_future_import + +class FixRemoveOldFutureImports(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "file_input" + run_order = 1 + + def transform(self, node, results): + remove_future_import(u"with_statement", node) + remove_future_import(u"nested_scopes", node) + remove_future_import(u"generators", node) diff --git a/lib/libfuturize/fixes/fix_unicode_keep_u.py b/lib/libfuturize/fixes/fix_unicode_keep_u.py new file mode 100644 index 00000000..2e9a4e47 --- /dev/null +++ b/lib/libfuturize/fixes/fix_unicode_keep_u.py @@ -0,0 +1,24 @@ +"""Fixer that changes unicode to str and unichr to chr, but -- unlike the +lib2to3 fix_unicode.py fixer, does not change u"..." into "...". + +The reason is that Py3.3+ supports the u"..." string prefix, and, if +present, the prefix may provide useful information for disambiguating +between byte strings and unicode strings, which is often the hardest part +of the porting task. + +""" + +from lib2to3.pgen2 import token +from lib2to3 import fixer_base + +_mapping = {u"unichr" : u"chr", u"unicode" : u"str"} + +class FixUnicodeKeepU(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "'unicode' | 'unichr'" + + def transform(self, node, results): + if node.type == token.NAME: + new = node.clone() + new.value = _mapping[node.value] + return new diff --git a/lib/libfuturize/fixes/fix_unicode_literals_import.py b/lib/libfuturize/fixes/fix_unicode_literals_import.py new file mode 100644 index 00000000..51c50620 --- /dev/null +++ b/lib/libfuturize/fixes/fix_unicode_literals_import.py @@ -0,0 +1,18 @@ +""" +Adds this import: + + from __future__ import unicode_literals + +""" + +from lib2to3 import fixer_base +from libfuturize.fixer_util import future_import + +class FixUnicodeLiteralsImport(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "file_input" + + run_order = 9 + + def transform(self, node, results): + future_import(u"unicode_literals", node) diff --git a/lib/libfuturize/fixes/fix_xrange_with_import.py b/lib/libfuturize/fixes/fix_xrange_with_import.py new file mode 100644 index 00000000..c910f816 --- /dev/null +++ b/lib/libfuturize/fixes/fix_xrange_with_import.py @@ -0,0 +1,20 @@ +""" +For the ``future`` package. + +Turns any xrange calls into range calls and adds this import line: + + from builtins import range + +at the top. +""" + +from lib2to3.fixes.fix_xrange import FixXrange + +from libfuturize.fixer_util import touch_import_top + + +class FixXrangeWithImport(FixXrange): + def transform(self, node, results): + result = super(FixXrangeWithImport, self).transform(node, results) + touch_import_top('builtins', 'range', node) + return result diff --git a/lib/libfuturize/main.py b/lib/libfuturize/main.py new file mode 100644 index 00000000..634c2f25 --- /dev/null +++ b/lib/libfuturize/main.py @@ -0,0 +1,322 @@ +""" +futurize: automatic conversion to clean 2/3 code using ``python-future`` +====================================================================== + +Like Armin Ronacher's modernize.py, ``futurize`` attempts to produce clean +standard Python 3 code that runs on both Py2 and Py3. + +One pass +-------- + +Use it like this on Python 2 code: + + $ futurize --verbose mypython2script.py + +This will attempt to port the code to standard Py3 code that also +provides Py2 compatibility with the help of the right imports from +``future``. + +To write changes to the files, use the -w flag. + +Two stages +---------- + +The ``futurize`` script can also be called in two separate stages. First: + + $ futurize --stage1 mypython2script.py + +This produces more modern Python 2 code that is not yet compatible with Python +3. The tests should still run and the diff should be uncontroversial to apply to +most Python projects that are willing to drop support for Python 2.5 and lower. + +After this, the recommended approach is to explicitly mark all strings that must +be byte-strings with a b'' prefix and all text (unicode) strings with a u'' +prefix, and then invoke the second stage of Python 2 to 2/3 conversion with:: + + $ futurize --stage2 mypython2script.py + +Stage 2 adds a dependency on ``future``. It converts most remaining Python +2-specific code to Python 3 code and adds appropriate imports from ``future`` +to restore Py2 support. + +The command above leaves all unadorned string literals as native strings +(byte-strings on Py2, unicode strings on Py3). If instead you would like all +unadorned string literals to be promoted to unicode, you can also pass this +flag: + + $ futurize --stage2 --unicode-literals mypython2script.py + +This adds the declaration ``from __future__ import unicode_literals`` to the +top of each file, which implicitly declares all unadorned string literals to be +unicode strings (``unicode`` on Py2). + +All imports +----------- + +The --all-imports option forces adding all ``__future__`` imports, +``builtins`` imports, and standard library aliases, even if they don't +seem necessary for the current state of each module. (This can simplify +testing, and can reduce the need to think about Py2 compatibility when editing +the code further.) + +""" + +from __future__ import (absolute_import, print_function, unicode_literals) +import future.utils +from future import __version__ + +import sys +import logging +import optparse +import os + +from lib2to3.main import warn, StdoutRefactoringTool +from lib2to3 import refactor + +from libfuturize.fixes import (lib2to3_fix_names_stage1, + lib2to3_fix_names_stage2, + libfuturize_fix_names_stage1, + libfuturize_fix_names_stage2) + +fixer_pkg = 'libfuturize.fixes' + + +def main(args=None): + """Main program. + + Args: + fixer_pkg: the name of a package where the fixers are located. + args: optional; a list of command line arguments. If omitted, + sys.argv[1:] is used. + + Returns a suggested exit status (0, 1, 2). + """ + + # Set up option parser + parser = optparse.OptionParser(usage="futurize [options] file|dir ...") + parser.add_option("-V", "--version", action="store_true", + help="Report the version number of futurize") + parser.add_option("-a", "--all-imports", action="store_true", + help="Add all __future__ and future imports to each module") + parser.add_option("-1", "--stage1", action="store_true", + help="Modernize Python 2 code only; no compatibility with Python 3 (or dependency on ``future``)") + parser.add_option("-2", "--stage2", action="store_true", + help="Take modernized (stage1) code and add a dependency on ``future`` to provide Py3 compatibility.") + parser.add_option("-0", "--both-stages", action="store_true", + help="Apply both stages 1 and 2") + parser.add_option("-u", "--unicode-literals", action="store_true", + help="Add ``from __future__ import unicode_literals`` to implicitly convert all unadorned string literals '' into unicode strings") + parser.add_option("-f", "--fix", action="append", default=[], + help="Each FIX specifies a transformation; default: all.\nEither use '-f division -f metaclass' etc. or use the fully-qualified module name: '-f lib2to3.fixes.fix_types -f libfuturize.fixes.fix_unicode_keep_u'") + parser.add_option("-j", "--processes", action="store", default=1, + type="int", help="Run 2to3 concurrently") + parser.add_option("-x", "--nofix", action="append", default=[], + help="Prevent a fixer from being run.") + parser.add_option("-l", "--list-fixes", action="store_true", + help="List available transformations") + parser.add_option("-p", "--print-function", action="store_true", + help="Modify the grammar so that print() is a function") + parser.add_option("-v", "--verbose", action="store_true", + help="More verbose logging") + parser.add_option("--no-diffs", action="store_true", + help="Don't show diffs of the refactoring") + parser.add_option("-w", "--write", action="store_true", + help="Write back modified files") + parser.add_option("-n", "--nobackups", action="store_true", default=False, + help="Don't write backups for modified files.") + parser.add_option("-o", "--output-dir", action="store", type="str", + default="", help="Put output files in this directory " + "instead of overwriting the input files. Requires -n. " + "For Python >= 2.7 only.") + parser.add_option("-W", "--write-unchanged-files", action="store_true", + help="Also write files even if no changes were required" + " (useful with --output-dir); implies -w.") + parser.add_option("--add-suffix", action="store", type="str", default="", + help="Append this string to all output filenames." + " Requires -n if non-empty. For Python >= 2.7 only." + "ex: --add-suffix='3' will generate .py3 files.") + + # Parse command line arguments + flags = {} + refactor_stdin = False + options, args = parser.parse_args(args) + + if options.write_unchanged_files: + flags["write_unchanged_files"] = True + if not options.write: + warn("--write-unchanged-files/-W implies -w.") + options.write = True + # If we allowed these, the original files would be renamed to backup names + # but not replaced. + if options.output_dir and not options.nobackups: + parser.error("Can't use --output-dir/-o without -n.") + if options.add_suffix and not options.nobackups: + parser.error("Can't use --add-suffix without -n.") + + if not options.write and options.no_diffs: + warn("not writing files and not printing diffs; that's not very useful") + if not options.write and options.nobackups: + parser.error("Can't use -n without -w") + if "-" in args: + refactor_stdin = True + if options.write: + print("Can't write to stdin.", file=sys.stderr) + return 2 + # Is this ever necessary? + if options.print_function: + flags["print_function"] = True + + # Set up logging handler + level = logging.DEBUG if options.verbose else logging.INFO + logging.basicConfig(format='%(name)s: %(message)s', level=level) + logger = logging.getLogger('libfuturize.main') + + if options.stage1 or options.stage2: + assert options.both_stages is None + options.both_stages = False + else: + options.both_stages = True + + avail_fixes = set() + + if options.stage1 or options.both_stages: + avail_fixes.update(lib2to3_fix_names_stage1) + avail_fixes.update(libfuturize_fix_names_stage1) + if options.stage2 or options.both_stages: + avail_fixes.update(lib2to3_fix_names_stage2) + avail_fixes.update(libfuturize_fix_names_stage2) + + if options.unicode_literals: + avail_fixes.add('libfuturize.fixes.fix_unicode_literals_import') + + if options.version: + print(__version__) + return 0 + if options.list_fixes: + print("Available transformations for the -f/--fix option:") + # for fixname in sorted(refactor.get_all_fix_names(fixer_pkg)): + for fixname in sorted(avail_fixes): + print(fixname) + if not args: + return 0 + if not args: + print("At least one file or directory argument required.", + file=sys.stderr) + print("Use --help to show usage.", file=sys.stderr) + return 2 + + unwanted_fixes = set() + for fix in options.nofix: + if ".fix_" in fix: + unwanted_fixes.add(fix) + else: + # Infer the full module name for the fixer. + # First ensure that no names clash (e.g. + # lib2to3.fixes.fix_blah and libfuturize.fixes.fix_blah): + found = [f for f in avail_fixes + if f.endswith('fix_{0}'.format(fix))] + if len(found) > 1: + print("Ambiguous fixer name. Choose a fully qualified " + "module name instead from these:\n" + + "\n".join(" " + myf for myf in found), + file=sys.stderr) + return 2 + elif len(found) == 0: + print("Unknown fixer. Use --list-fixes or -l for a list.", + file=sys.stderr) + return 2 + unwanted_fixes.add(found[0]) + + extra_fixes = set() + if options.all_imports: + if options.stage1: + prefix = 'libfuturize.fixes.' + extra_fixes.add(prefix + + 'fix_add__future__imports_except_unicode_literals') + else: + # In case the user hasn't run stage1 for some reason: + prefix = 'libpasteurize.fixes.' + extra_fixes.add(prefix + 'fix_add_all__future__imports') + extra_fixes.add(prefix + 'fix_add_future_standard_library_import') + extra_fixes.add(prefix + 'fix_add_all_future_builtins') + explicit = set() + if options.fix: + all_present = False + for fix in options.fix: + if fix == 'all': + all_present = True + else: + if ".fix_" in fix: + explicit.add(fix) + else: + # Infer the full module name for the fixer. + # First ensure that no names clash (e.g. + # lib2to3.fixes.fix_blah and libfuturize.fixes.fix_blah): + found = [f for f in avail_fixes + if f.endswith('fix_{0}'.format(fix))] + if len(found) > 1: + print("Ambiguous fixer name. Choose a fully qualified " + "module name instead from these:\n" + + "\n".join(" " + myf for myf in found), + file=sys.stderr) + return 2 + elif len(found) == 0: + print("Unknown fixer. Use --list-fixes or -l for a list.", + file=sys.stderr) + return 2 + explicit.add(found[0]) + if len(explicit & unwanted_fixes) > 0: + print("Conflicting usage: the following fixers have been " + "simultaneously requested and disallowed:\n" + + "\n".join(" " + myf for myf in (explicit & unwanted_fixes)), + file=sys.stderr) + return 2 + requested = avail_fixes.union(explicit) if all_present else explicit + else: + requested = avail_fixes.union(explicit) + fixer_names = (requested | extra_fixes) - unwanted_fixes + + input_base_dir = os.path.commonprefix(args) + if (input_base_dir and not input_base_dir.endswith(os.sep) + and not os.path.isdir(input_base_dir)): + # One or more similar names were passed, their directory is the base. + # os.path.commonprefix() is ignorant of path elements, this corrects + # for that weird API. + input_base_dir = os.path.dirname(input_base_dir) + if options.output_dir: + input_base_dir = input_base_dir.rstrip(os.sep) + logger.info('Output in %r will mirror the input directory %r layout.', + options.output_dir, input_base_dir) + + # Initialize the refactoring tool + if future.utils.PY26: + extra_kwargs = {} + else: + extra_kwargs = { + 'append_suffix': options.add_suffix, + 'output_dir': options.output_dir, + 'input_base_dir': input_base_dir, + } + + rt = StdoutRefactoringTool( + sorted(fixer_names), flags, sorted(explicit), + options.nobackups, not options.no_diffs, + **extra_kwargs) + + # Refactor all files and directories passed as arguments + if not rt.errors: + if refactor_stdin: + rt.refactor_stdin() + else: + try: + rt.refactor(args, options.write, None, + options.processes) + except refactor.MultiprocessingUnsupported: + assert options.processes > 1 + print("Sorry, -j isn't " \ + "supported on this platform.", file=sys.stderr) + return 1 + rt.summarize() + + # Return error status (0 if rt.errors is zero) + return int(bool(rt.errors)) diff --git a/lib/libpasteurize/__init__.py b/lib/libpasteurize/__init__.py new file mode 100644 index 00000000..4cb1cbcd --- /dev/null +++ b/lib/libpasteurize/__init__.py @@ -0,0 +1 @@ +# empty to make this a package diff --git a/lib/libpasteurize/fixes/__init__.py b/lib/libpasteurize/fixes/__init__.py new file mode 100644 index 00000000..905aec47 --- /dev/null +++ b/lib/libpasteurize/fixes/__init__.py @@ -0,0 +1,54 @@ +import sys +from lib2to3 import refactor + +# The original set of these fixes comes from lib3to2 (https://bitbucket.org/amentajo/lib3to2): +fix_names = set([ + 'libpasteurize.fixes.fix_add_all__future__imports', # from __future__ import absolute_import etc. on separate lines + 'libpasteurize.fixes.fix_add_future_standard_library_import', # we force adding this import for now, even if it doesn't seem necessary to the fix_future_standard_library fixer, for ease of testing + # 'libfuturize.fixes.fix_order___future__imports', # consolidates to a single line to simplify testing -- UNFINISHED + 'libpasteurize.fixes.fix_future_builtins', # adds "from future.builtins import *" + 'libfuturize.fixes.fix_future_standard_library', # adds "from future import standard_library" + + 'libpasteurize.fixes.fix_annotations', + # 'libpasteurize.fixes.fix_bitlength', # ints have this in Py2.7 + # 'libpasteurize.fixes.fix_bool', # need a decorator or Mixin + # 'libpasteurize.fixes.fix_bytes', # leave bytes as bytes + # 'libpasteurize.fixes.fix_classdecorator', # available in + # Py2.6+ + # 'libpasteurize.fixes.fix_collections', hmmm ... + # 'libpasteurize.fixes.fix_dctsetcomp', # avail in Py27 + 'libpasteurize.fixes.fix_division', # yes + # 'libpasteurize.fixes.fix_except', # avail in Py2.6+ + # 'libpasteurize.fixes.fix_features', # ? + 'libpasteurize.fixes.fix_fullargspec', + # 'libpasteurize.fixes.fix_funcattrs', + 'libpasteurize.fixes.fix_getcwd', + 'libpasteurize.fixes.fix_imports', # adds "from future import standard_library" + 'libpasteurize.fixes.fix_imports2', + # 'libpasteurize.fixes.fix_input', + # 'libpasteurize.fixes.fix_int', + # 'libpasteurize.fixes.fix_intern', + # 'libpasteurize.fixes.fix_itertools', + 'libpasteurize.fixes.fix_kwargs', # yes, we want this + # 'libpasteurize.fixes.fix_memoryview', + # 'libpasteurize.fixes.fix_metaclass', # write a custom handler for + # this + # 'libpasteurize.fixes.fix_methodattrs', # __func__ and __self__ seem to be defined on Py2.7 already + 'libpasteurize.fixes.fix_newstyle', # yes, we want this: explicit inheritance from object. Without new-style classes in Py2, super() will break etc. + # 'libpasteurize.fixes.fix_next', # use a decorator for this + # 'libpasteurize.fixes.fix_numliterals', # prob not + # 'libpasteurize.fixes.fix_open', # huh? + # 'libpasteurize.fixes.fix_print', # no way + 'libpasteurize.fixes.fix_printfunction', # adds __future__ import print_function + # 'libpasteurize.fixes.fix_raise_', # TODO: get this working! + + # 'libpasteurize.fixes.fix_range', # nope + # 'libpasteurize.fixes.fix_reduce', + # 'libpasteurize.fixes.fix_setliteral', + # 'libpasteurize.fixes.fix_str', + # 'libpasteurize.fixes.fix_super', # maybe, if our magic super() isn't robust enough + 'libpasteurize.fixes.fix_throw', # yes, if Py3 supports it + # 'libpasteurize.fixes.fix_unittest', + 'libpasteurize.fixes.fix_unpacking', # yes, this is useful + # 'libpasteurize.fixes.fix_with' # way out of date + ]) diff --git a/lib/libpasteurize/fixes/feature_base.py b/lib/libpasteurize/fixes/feature_base.py new file mode 100644 index 00000000..c36d9a95 --- /dev/null +++ b/lib/libpasteurize/fixes/feature_base.py @@ -0,0 +1,57 @@ +u""" +Base classes for features that are backwards-incompatible. + +Usage: +features = Features() +features.add(Feature("py3k_feature", "power< 'py3k' any* >", "2.7")) +PATTERN = features.PATTERN +""" + +pattern_unformatted = u"%s=%s" # name=pattern, for dict lookups +message_unformatted = u""" +%s is only supported in Python %s and above.""" + +class Feature(object): + u""" + A feature has a name, a pattern, and a minimum version of Python 2.x + required to use the feature (or 3.x if there is no backwards-compatible + version of 2.x) + """ + def __init__(self, name, PATTERN, version): + self.name = name + self._pattern = PATTERN + self.version = version + + def message_text(self): + u""" + Format the above text with the name and minimum version required. + """ + return message_unformatted % (self.name, self.version) + +class Features(set): + u""" + A set of features that generates a pattern for the features it contains. + This set will act like a mapping in that we map names to patterns. + """ + mapping = {} + + def update_mapping(self): + u""" + Called every time we care about the mapping of names to features. + """ + self.mapping = dict([(f.name, f) for f in iter(self)]) + + @property + def PATTERN(self): + u""" + Uses the mapping of names to features to return a PATTERN suitable + for using the lib2to3 patcomp. + """ + self.update_mapping() + return u" |\n".join([pattern_unformatted % (f.name, f._pattern) for f in iter(self)]) + + def __getitem__(self, key): + u""" + Implement a simple mapping to get patterns from names. + """ + return self.mapping[key] diff --git a/lib/libpasteurize/fixes/fix_add_all__future__imports.py b/lib/libpasteurize/fixes/fix_add_all__future__imports.py new file mode 100644 index 00000000..a151f9f1 --- /dev/null +++ b/lib/libpasteurize/fixes/fix_add_all__future__imports.py @@ -0,0 +1,24 @@ +""" +Fixer for adding: + + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function + from __future__ import unicode_literals + +This is done when converting from Py3 to both Py3/Py2. +""" + +from lib2to3 import fixer_base +from libfuturize.fixer_util import future_import + +class FixAddAllFutureImports(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "file_input" + run_order = 1 + + def transform(self, node, results): + future_import(u"absolute_import", node) + future_import(u"division", node) + future_import(u"print_function", node) + future_import(u"unicode_literals", node) diff --git a/lib/libpasteurize/fixes/fix_add_all_future_builtins.py b/lib/libpasteurize/fixes/fix_add_all_future_builtins.py new file mode 100644 index 00000000..22911bad --- /dev/null +++ b/lib/libpasteurize/fixes/fix_add_all_future_builtins.py @@ -0,0 +1,37 @@ +""" +For the ``future`` package. + +Adds this import line:: + + from builtins import (ascii, bytes, chr, dict, filter, hex, input, + int, list, map, next, object, oct, open, pow, + range, round, str, super, zip) + +to a module, irrespective of whether each definition is used. + +Adds these imports after any other imports (in an initial block of them). +""" + +from __future__ import unicode_literals + +from lib2to3 import fixer_base + +from libfuturize.fixer_util import touch_import_top + + +class FixAddAllFutureBuiltins(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "file_input" + run_order = 1 + + def transform(self, node, results): + # import_str = """(ascii, bytes, chr, dict, filter, hex, input, + # int, list, map, next, object, oct, open, pow, + # range, round, str, super, zip)""" + touch_import_top(u'builtins', '*', node) + + # builtins = """ascii bytes chr dict filter hex input + # int list map next object oct open pow + # range round str super zip""" + # for builtin in sorted(builtins.split(), reverse=True): + # touch_import_top(u'builtins', builtin, node) diff --git a/lib/libpasteurize/fixes/fix_add_future_standard_library_import.py b/lib/libpasteurize/fixes/fix_add_future_standard_library_import.py new file mode 100644 index 00000000..0778406a --- /dev/null +++ b/lib/libpasteurize/fixes/fix_add_future_standard_library_import.py @@ -0,0 +1,23 @@ +""" +For the ``future`` package. + +Adds this import line: + + from future import standard_library + +after any __future__ imports but before any other imports. Doesn't actually +change the imports to Py3 style. +""" + +from lib2to3 import fixer_base +from libfuturize.fixer_util import touch_import_top + +class FixAddFutureStandardLibraryImport(fixer_base.BaseFix): + BM_compatible = True + PATTERN = "file_input" + run_order = 8 + + def transform(self, node, results): + # TODO: add a blank line between any __future__ imports and this? + touch_import_top(u'future', u'standard_library', node) + # TODO: also add standard_library.install_hooks() diff --git a/lib/libpasteurize/fixes/fix_annotations.py b/lib/libpasteurize/fixes/fix_annotations.py new file mode 100644 index 00000000..884b6741 --- /dev/null +++ b/lib/libpasteurize/fixes/fix_annotations.py @@ -0,0 +1,48 @@ +u""" +Fixer to remove function annotations +""" + +from lib2to3 import fixer_base +from lib2to3.pgen2 import token +from lib2to3.fixer_util import syms + +warning_text = u"Removing function annotations completely." + +def param_without_annotations(node): + return node.children[0] + +class FixAnnotations(fixer_base.BaseFix): + + warned = False + + def warn_once(self, node, reason): + if not self.warned: + self.warned = True + self.warning(node, reason=reason) + + PATTERN = u""" + funcdef< 'def' any parameters< '(' [params=any] ')' > ['->' ret=any] ':' any* > + """ + + def transform(self, node, results): + u""" + This just strips annotations from the funcdef completely. + """ + params = results.get(u"params") + ret = results.get(u"ret") + if ret is not None: + assert ret.prev_sibling.type == token.RARROW, u"Invalid return annotation" + self.warn_once(node, reason=warning_text) + ret.prev_sibling.remove() + ret.remove() + if params is None: return + if params.type == syms.typedargslist: + # more than one param in a typedargslist + for param in params.children: + if param.type == syms.tname: + self.warn_once(node, reason=warning_text) + param.replace(param_without_annotations(param)) + elif params.type == syms.tname: + # one param + self.warn_once(node, reason=warning_text) + params.replace(param_without_annotations(params)) diff --git a/lib/libpasteurize/fixes/fix_division.py b/lib/libpasteurize/fixes/fix_division.py new file mode 100644 index 00000000..6a048710 --- /dev/null +++ b/lib/libpasteurize/fixes/fix_division.py @@ -0,0 +1,28 @@ +u""" +Fixer for division: from __future__ import division if needed +""" + +from lib2to3 import fixer_base +from libfuturize.fixer_util import token, future_import + +def match_division(node): + u""" + __future__.division redefines the meaning of a single slash for division, + so we match that and only that. + """ + slash = token.SLASH + return node.type == slash and not node.next_sibling.type == slash and \ + not node.prev_sibling.type == slash + +class FixDivision(fixer_base.BaseFix): + run_order = 4 # this seems to be ignored? + + def match(self, node): + u""" + Since the tree needs to be fixed once and only once if and only if it + matches, then we can start discarding matches after we make the first. + """ + return match_division(node) + + def transform(self, node, results): + future_import(u"division", node) diff --git a/lib/libpasteurize/fixes/fix_features.py b/lib/libpasteurize/fixes/fix_features.py new file mode 100644 index 00000000..52630f98 --- /dev/null +++ b/lib/libpasteurize/fixes/fix_features.py @@ -0,0 +1,86 @@ +u""" +Warn about features that are not present in Python 2.5, giving a message that +points to the earliest version of Python 2.x (or 3.x, if none) that supports it +""" + +from .feature_base import Feature, Features +from lib2to3 import fixer_base + +FEATURES = [ + #(FeatureName, + # FeaturePattern, + # FeatureMinVersion, + #), + (u"memoryview", + u"power < 'memoryview' trailer < '(' any* ')' > any* >", + u"2.7", + ), + (u"numbers", + u"""import_from< 'from' 'numbers' 'import' any* > | + import_name< 'import' ('numbers' dotted_as_names< any* 'numbers' any* >) >""", + u"2.6", + ), + (u"abc", + u"""import_name< 'import' ('abc' dotted_as_names< any* 'abc' any* >) > | + import_from< 'from' 'abc' 'import' any* >""", + u"2.6", + ), + (u"io", + u"""import_name< 'import' ('io' dotted_as_names< any* 'io' any* >) > | + import_from< 'from' 'io' 'import' any* >""", + u"2.6", + ), + (u"bin", + u"power< 'bin' trailer< '(' any* ')' > any* >", + u"2.6", + ), + (u"formatting", + u"power< any trailer< '.' 'format' > trailer< '(' any* ')' > >", + u"2.6", + ), + (u"nonlocal", + u"global_stmt< 'nonlocal' any* >", + u"3.0", + ), + (u"with_traceback", + u"trailer< '.' 'with_traceback' >", + u"3.0", + ), +] + +class FixFeatures(fixer_base.BaseFix): + + run_order = 9 # Wait until all other fixers have run to check for these + + # To avoid spamming, we only want to warn for each feature once. + features_warned = set() + + # Build features from the list above + features = Features([Feature(name, pattern, version) for \ + name, pattern, version in FEATURES]) + + PATTERN = features.PATTERN + + def match(self, node): + to_ret = super(FixFeatures, self).match(node) + # We want the mapping only to tell us the node's specific information. + try: + del to_ret[u'node'] + except Exception: + # We want it to delete the 'node' from the results + # if it's there, so we don't care if it fails for normal reasons. + pass + return to_ret + + def transform(self, node, results): + for feature_name in results: + if feature_name in self.features_warned: + continue + else: + curr_feature = self.features[feature_name] + if curr_feature.version >= u"3": + fail = self.cannot_convert + else: + fail = self.warning + fail(node, reason=curr_feature.message_text()) + self.features_warned.add(feature_name) diff --git a/lib/libpasteurize/fixes/fix_fullargspec.py b/lib/libpasteurize/fixes/fix_fullargspec.py new file mode 100644 index 00000000..4bd37e15 --- /dev/null +++ b/lib/libpasteurize/fixes/fix_fullargspec.py @@ -0,0 +1,16 @@ +u""" +Fixer for getfullargspec -> getargspec +""" + +from lib2to3 import fixer_base +from lib2to3.fixer_util import Name + +warn_msg = u"some of the values returned by getfullargspec are not valid in Python 2 and have no equivalent." + +class FixFullargspec(fixer_base.BaseFix): + + PATTERN = u"'getfullargspec'" + + def transform(self, node, results): + self.warning(node, warn_msg) + return Name(u"getargspec", prefix=node.prefix) diff --git a/lib/libpasteurize/fixes/fix_future_builtins.py b/lib/libpasteurize/fixes/fix_future_builtins.py new file mode 100644 index 00000000..68496799 --- /dev/null +++ b/lib/libpasteurize/fixes/fix_future_builtins.py @@ -0,0 +1,46 @@ +""" +Adds this import line: + + from builtins import XYZ + +for each of the functions XYZ that is used in the module. +""" + +from __future__ import unicode_literals + +from lib2to3 import fixer_base +from lib2to3.pygram import python_symbols as syms +from lib2to3.fixer_util import Name, Call, in_special_context + +from libfuturize.fixer_util import touch_import_top + +# All builtins are: +# from future.builtins.iterators import (filter, map, zip) +# from future.builtins.misc import (ascii, chr, hex, input, isinstance, oct, open, round, super) +# from future.types import (bytes, dict, int, range, str) +# We don't need isinstance any more. + +replaced_builtins = '''filter map zip + ascii chr hex input next oct open round super + bytes dict int range str'''.split() + +expression = '|'.join(["name='{0}'".format(name) for name in replaced_builtins]) + + +class FixFutureBuiltins(fixer_base.BaseFix): + BM_compatible = True + run_order = 9 + + # Currently we only match uses as a function. This doesn't match e.g.: + # if isinstance(s, str): + # ... + PATTERN = """ + power< + ({0}) trailer< '(' args=[any] ')' > + rest=any* > + """.format(expression) + + def transform(self, node, results): + name = results["name"] + touch_import_top(u'builtins', name.value, node) + # name.replace(Name(u"input", prefix=name.prefix)) diff --git a/lib/libpasteurize/fixes/fix_getcwd.py b/lib/libpasteurize/fixes/fix_getcwd.py new file mode 100644 index 00000000..9b7f002b --- /dev/null +++ b/lib/libpasteurize/fixes/fix_getcwd.py @@ -0,0 +1,26 @@ +u""" +Fixer for os.getcwd() -> os.getcwdu(). +Also warns about "from os import getcwd", suggesting the above form. +""" + +from lib2to3 import fixer_base +from lib2to3.fixer_util import Name + +class FixGetcwd(fixer_base.BaseFix): + + PATTERN = u""" + power< 'os' trailer< dot='.' name='getcwd' > any* > + | + import_from< 'from' 'os' 'import' bad='getcwd' > + """ + + def transform(self, node, results): + if u"name" in results: + name = results[u"name"] + name.replace(Name(u"getcwdu", prefix=name.prefix)) + elif u"bad" in results: + # Can't convert to getcwdu and then expect to catch every use. + self.cannot_convert(node, u"import os, use os.getcwd() instead.") + return + else: + raise ValueError(u"For some reason, the pattern matcher failed.") diff --git a/lib/libpasteurize/fixes/fix_imports.py b/lib/libpasteurize/fixes/fix_imports.py new file mode 100644 index 00000000..2d6718f1 --- /dev/null +++ b/lib/libpasteurize/fixes/fix_imports.py @@ -0,0 +1,112 @@ +u""" +Fixer for standard library imports renamed in Python 3 +""" + +from lib2to3 import fixer_base +from lib2to3.fixer_util import Name, is_probably_builtin, Newline, does_tree_import +from lib2to3.pygram import python_symbols as syms +from lib2to3.pgen2 import token +from lib2to3.pytree import Node, Leaf + +from libfuturize.fixer_util import touch_import_top +# from ..fixer_util import NameImport + +# used in simple_mapping_to_pattern() +MAPPING = {u"reprlib": u"repr", + u"winreg": u"_winreg", + u"configparser": u"ConfigParser", + u"copyreg": u"copy_reg", + u"queue": u"Queue", + u"socketserver": u"SocketServer", + u"_markupbase": u"markupbase", + u"test.support": u"test.test_support", + u"dbm.bsd": u"dbhash", + u"dbm.ndbm": u"dbm", + u"dbm.dumb": u"dumbdbm", + u"dbm.gnu": u"gdbm", + u"html.parser": u"HTMLParser", + u"html.entities": u"htmlentitydefs", + u"http.client": u"httplib", + u"http.cookies": u"Cookie", + u"http.cookiejar": u"cookielib", +# "tkinter": "Tkinter", + u"tkinter.dialog": u"Dialog", + u"tkinter._fix": u"FixTk", + u"tkinter.scrolledtext": u"ScrolledText", + u"tkinter.tix": u"Tix", + u"tkinter.constants": u"Tkconstants", + u"tkinter.dnd": u"Tkdnd", + u"tkinter.__init__": u"Tkinter", + u"tkinter.colorchooser": u"tkColorChooser", + u"tkinter.commondialog": u"tkCommonDialog", + u"tkinter.font": u"tkFont", + u"tkinter.ttk": u"ttk", + u"tkinter.messagebox": u"tkMessageBox", + u"tkinter.turtle": u"turtle", + u"urllib.robotparser": u"robotparser", + u"xmlrpc.client": u"xmlrpclib", + u"builtins": u"__builtin__", +} + +# generic strings to help build patterns +# these variables mean (with http.client.HTTPConnection as an example): +# name = http +# attr = client +# used = HTTPConnection +# fmt_name is a formatted subpattern (simple_name_match or dotted_name_match) + +# helps match 'queue', as in 'from queue import ...' +simple_name_match = u"name='%s'" +# helps match 'client', to be used if client has been imported from http +subname_match = u"attr='%s'" +# helps match 'http.client', as in 'import urllib.request' +dotted_name_match = u"dotted_name=dotted_name< %s '.' %s >" +# helps match 'queue', as in 'queue.Queue(...)' +power_onename_match = u"%s" +# helps match 'http.client', as in 'http.client.HTTPConnection(...)' +power_twoname_match = u"power< %s trailer< '.' %s > any* >" +# helps match 'client.HTTPConnection', if 'client' has been imported from http +power_subname_match = u"power< %s any* >" +# helps match 'from http.client import HTTPConnection' +from_import_match = u"from_import=import_from< 'from' %s 'import' imported=any >" +# helps match 'from http import client' +from_import_submod_match = u"from_import_submod=import_from< 'from' %s 'import' (%s | import_as_name< %s 'as' renamed=any > | import_as_names< any* (%s | import_as_name< %s 'as' renamed=any >) any* > ) >" +# helps match 'import urllib.request' +name_import_match = u"name_import=import_name< 'import' %s > | name_import=import_name< 'import' dotted_as_name< %s 'as' renamed=any > >" +# helps match 'import http.client, winreg' +multiple_name_import_match = u"name_import=import_name< 'import' dotted_as_names< names=any* > >" + +def all_patterns(name): + u""" + Accepts a string and returns a pattern of possible patterns involving that name + Called by simple_mapping_to_pattern for each name in the mapping it receives. + """ + + # i_ denotes an import-like node + # u_ denotes a node that appears to be a usage of the name + if u'.' in name: + name, attr = name.split(u'.', 1) + simple_name = simple_name_match % (name) + simple_attr = subname_match % (attr) + dotted_name = dotted_name_match % (simple_name, simple_attr) + i_from = from_import_match % (dotted_name) + i_from_submod = from_import_submod_match % (simple_name, simple_attr, simple_attr, simple_attr, simple_attr) + i_name = name_import_match % (dotted_name, dotted_name) + u_name = power_twoname_match % (simple_name, simple_attr) + u_subname = power_subname_match % (simple_attr) + return u' | \n'.join((i_name, i_from, i_from_submod, u_name, u_subname)) + else: + simple_name = simple_name_match % (name) + i_name = name_import_match % (simple_name, simple_name) + i_from = from_import_match % (simple_name) + u_name = power_onename_match % (simple_name) + return u' | \n'.join((i_name, i_from, u_name)) + + +class FixImports(fixer_base.BaseFix): + + PATTERN = u' | \n'.join([all_patterns(name) for name in MAPPING]) + PATTERN = u' | \n'.join((PATTERN, multiple_name_import_match)) + + def transform(self, node, results): + touch_import_top(u'future', u'standard_library', node) diff --git a/lib/libpasteurize/fixes/fix_imports2.py b/lib/libpasteurize/fixes/fix_imports2.py new file mode 100644 index 00000000..70444e9e --- /dev/null +++ b/lib/libpasteurize/fixes/fix_imports2.py @@ -0,0 +1,174 @@ +u""" +Fixer for complicated imports +""" + +from lib2to3 import fixer_base +from lib2to3.fixer_util import Name, String, FromImport, Newline, Comma +from libfuturize.fixer_util import touch_import_top + + +TK_BASE_NAMES = (u'ACTIVE', u'ALL', u'ANCHOR', u'ARC',u'BASELINE', u'BEVEL', u'BOTH', + u'BOTTOM', u'BROWSE', u'BUTT', u'CASCADE', u'CENTER', u'CHAR', + u'CHECKBUTTON', u'CHORD', u'COMMAND', u'CURRENT', u'DISABLED', + u'DOTBOX', u'E', u'END', u'EW', u'EXCEPTION', u'EXTENDED', u'FALSE', + u'FIRST', u'FLAT', u'GROOVE', u'HIDDEN', u'HORIZONTAL', u'INSERT', + u'INSIDE', u'LAST', u'LEFT', u'MITER', u'MOVETO', u'MULTIPLE', u'N', + u'NE', u'NO', u'NONE', u'NORMAL', u'NS', u'NSEW', u'NUMERIC', u'NW', + u'OFF', u'ON', u'OUTSIDE', u'PAGES', u'PIESLICE', u'PROJECTING', + u'RADIOBUTTON', u'RAISED', u'READABLE', u'RIDGE', u'RIGHT', + u'ROUND', u'S', u'SCROLL', u'SE', u'SEL', u'SEL_FIRST', u'SEL_LAST', + u'SEPARATOR', u'SINGLE', u'SOLID', u'SUNKEN', u'SW', u'StringTypes', + u'TOP', u'TRUE', u'TclVersion', u'TkVersion', u'UNDERLINE', + u'UNITS', u'VERTICAL', u'W', u'WORD', u'WRITABLE', u'X', u'Y', u'YES', + u'wantobjects') + +PY2MODULES = { + u'urllib2' : ( + u'AbstractBasicAuthHandler', u'AbstractDigestAuthHandler', + u'AbstractHTTPHandler', u'BaseHandler', u'CacheFTPHandler', + u'FTPHandler', u'FileHandler', u'HTTPBasicAuthHandler', + u'HTTPCookieProcessor', u'HTTPDefaultErrorHandler', + u'HTTPDigestAuthHandler', u'HTTPError', u'HTTPErrorProcessor', + u'HTTPHandler', u'HTTPPasswordMgr', + u'HTTPPasswordMgrWithDefaultRealm', u'HTTPRedirectHandler', + u'HTTPSHandler', u'OpenerDirector', u'ProxyBasicAuthHandler', + u'ProxyDigestAuthHandler', u'ProxyHandler', u'Request', + u'StringIO', u'URLError', u'UnknownHandler', u'addinfourl', + u'build_opener', u'install_opener', u'parse_http_list', + u'parse_keqv_list', u'randombytes', u'request_host', u'urlopen'), + u'urllib' : ( + u'ContentTooShortError', u'FancyURLopener',u'URLopener', + u'basejoin', u'ftperrors', u'getproxies', + u'getproxies_environment', u'localhost', u'pathname2url', + u'quote', u'quote_plus', u'splitattr', u'splithost', + u'splitnport', u'splitpasswd', u'splitport', u'splitquery', + u'splittag', u'splittype', u'splituser', u'splitvalue', + u'thishost', u'unquote', u'unquote_plus', u'unwrap', + u'url2pathname', u'urlcleanup', u'urlencode', u'urlopen', + u'urlretrieve',), + u'urlparse' : ( + u'parse_qs', u'parse_qsl', u'urldefrag', u'urljoin', + u'urlparse', u'urlsplit', u'urlunparse', u'urlunsplit'), + u'dbm' : ( + u'ndbm', u'gnu', u'dumb'), + u'anydbm' : ( + u'error', u'open'), + u'whichdb' : ( + u'whichdb',), + u'BaseHTTPServer' : ( + u'BaseHTTPRequestHandler', u'HTTPServer'), + u'CGIHTTPServer' : ( + u'CGIHTTPRequestHandler',), + u'SimpleHTTPServer' : ( + u'SimpleHTTPRequestHandler',), + u'FileDialog' : TK_BASE_NAMES + ( + u'FileDialog', u'LoadFileDialog', u'SaveFileDialog', + u'dialogstates', u'test'), + u'tkFileDialog' : ( + u'Directory', u'Open', u'SaveAs', u'_Dialog', u'askdirectory', + u'askopenfile', u'askopenfilename', u'askopenfilenames', + u'askopenfiles', u'asksaveasfile', u'asksaveasfilename'), + u'SimpleDialog' : TK_BASE_NAMES + ( + u'SimpleDialog',), + u'tkSimpleDialog' : TK_BASE_NAMES + ( + u'askfloat', u'askinteger', u'askstring', u'Dialog'), + u'SimpleXMLRPCServer' : ( + u'CGIXMLRPCRequestHandler', u'SimpleXMLRPCDispatcher', + u'SimpleXMLRPCRequestHandler', u'SimpleXMLRPCServer', + u'list_public_methods', u'remove_duplicates', + u'resolve_dotted_attribute'), + u'DocXMLRPCServer' : ( + u'DocCGIXMLRPCRequestHandler', u'DocXMLRPCRequestHandler', + u'DocXMLRPCServer', u'ServerHTMLDoc',u'XMLRPCDocGenerator'), + } + +MAPPING = { u'urllib.request' : + (u'urllib2', u'urllib'), + u'urllib.error' : + (u'urllib2', u'urllib'), + u'urllib.parse' : + (u'urllib2', u'urllib', u'urlparse'), + u'dbm.__init__' : + (u'anydbm', u'whichdb'), + u'http.server' : + (u'CGIHTTPServer', u'SimpleHTTPServer', u'BaseHTTPServer'), + u'tkinter.filedialog' : + (u'tkFileDialog', u'FileDialog'), + u'tkinter.simpledialog' : + (u'tkSimpleDialog', u'SimpleDialog'), + u'xmlrpc.server' : + (u'DocXMLRPCServer', u'SimpleXMLRPCServer'), + } + +# helps match 'http', as in 'from http.server import ...' +simple_name = u"name='%s'" +# helps match 'server', as in 'from http.server import ...' +simple_attr = u"attr='%s'" +# helps match 'HTTPServer', as in 'from http.server import HTTPServer' +simple_using = u"using='%s'" +# helps match 'urllib.request', as in 'import urllib.request' +dotted_name = u"dotted_name=dotted_name< %s '.' %s >" +# helps match 'http.server', as in 'http.server.HTTPServer(...)' +power_twoname = u"pow=power< %s trailer< '.' %s > trailer< '.' using=any > any* >" +# helps match 'dbm.whichdb', as in 'dbm.whichdb(...)' +power_onename = u"pow=power< %s trailer< '.' using=any > any* >" +# helps match 'from http.server import HTTPServer' +# also helps match 'from http.server import HTTPServer, SimpleHTTPRequestHandler' +# also helps match 'from http.server import *' +from_import = u"from_import=import_from< 'from' %s 'import' (import_as_name< using=any 'as' renamed=any> | in_list=import_as_names< using=any* > | using='*' | using=NAME) >" +# helps match 'import urllib.request' +name_import = u"name_import=import_name< 'import' (%s | in_list=dotted_as_names< imp_list=any* >) >" + +############# +# WON'T FIX # +############# + +# helps match 'import urllib.request as name' +name_import_rename = u"name_import_rename=dotted_as_name< %s 'as' renamed=any >" +# helps match 'from http import server' +from_import_rename = u"from_import_rename=import_from< 'from' %s 'import' (%s | import_as_name< %s 'as' renamed=any > | in_list=import_as_names< any* (%s | import_as_name< %s 'as' renamed=any >) any* >) >" + + +def all_modules_subpattern(): + u""" + Builds a pattern for all toplevel names + (urllib, http, etc) + """ + names_dot_attrs = [mod.split(u".") for mod in MAPPING] + ret = u"( " + u" | ".join([dotted_name % (simple_name % (mod[0]), + simple_attr % (mod[1])) for mod in names_dot_attrs]) + ret += u" | " + ret += u" | ".join([simple_name % (mod[0]) for mod in names_dot_attrs if mod[1] == u"__init__"]) + u" )" + return ret + + +def build_import_pattern(mapping1, mapping2): + u""" + mapping1: A dict mapping py3k modules to all possible py2k replacements + mapping2: A dict mapping py2k modules to the things they do + This builds a HUGE pattern to match all ways that things can be imported + """ + # py3k: urllib.request, py2k: ('urllib2', 'urllib') + yield from_import % (all_modules_subpattern()) + for py3k, py2k in mapping1.items(): + name, attr = py3k.split(u'.') + s_name = simple_name % (name) + s_attr = simple_attr % (attr) + d_name = dotted_name % (s_name, s_attr) + yield name_import % (d_name) + yield power_twoname % (s_name, s_attr) + if attr == u'__init__': + yield name_import % (s_name) + yield power_onename % (s_name) + yield name_import_rename % (d_name) + yield from_import_rename % (s_name, s_attr, s_attr, s_attr, s_attr) + + +class FixImports2(fixer_base.BaseFix): + + run_order = 4 + + PATTERN = u" | \n".join(build_import_pattern(MAPPING, PY2MODULES)) + + def transform(self, node, results): + touch_import_top(u'future', u'standard_library', node) diff --git a/lib/libpasteurize/fixes/fix_kwargs.py b/lib/libpasteurize/fixes/fix_kwargs.py new file mode 100644 index 00000000..290f991e --- /dev/null +++ b/lib/libpasteurize/fixes/fix_kwargs.py @@ -0,0 +1,147 @@ +u""" +Fixer for Python 3 function parameter syntax +This fixer is rather sensitive to incorrect py3k syntax. +""" + +# Note: "relevant" parameters are parameters following the first STAR in the list. + +from lib2to3 import fixer_base +from lib2to3.fixer_util import token, String, Newline, Comma, Name +from libfuturize.fixer_util import indentation, suitify, DoubleStar + +_assign_template = u"%(name)s = %(kwargs)s['%(name)s']; del %(kwargs)s['%(name)s']" +_if_template = u"if '%(name)s' in %(kwargs)s: %(assign)s" +_else_template = u"else: %(name)s = %(default)s" +_kwargs_default_name = u"_3to2kwargs" + +def gen_params(raw_params): + u""" + Generator that yields tuples of (name, default_value) for each parameter in the list + If no default is given, then it is default_value is None (not Leaf(token.NAME, 'None')) + """ + assert raw_params[0].type == token.STAR and len(raw_params) > 2 + curr_idx = 2 # the first place a keyword-only parameter name can be is index 2 + max_idx = len(raw_params) + while curr_idx < max_idx: + curr_item = raw_params[curr_idx] + prev_item = curr_item.prev_sibling + if curr_item.type != token.NAME: + curr_idx += 1 + continue + if prev_item is not None and prev_item.type == token.DOUBLESTAR: + break + name = curr_item.value + nxt = curr_item.next_sibling + if nxt is not None and nxt.type == token.EQUAL: + default_value = nxt.next_sibling + curr_idx += 2 + else: + default_value = None + yield (name, default_value) + curr_idx += 1 + +def remove_params(raw_params, kwargs_default=_kwargs_default_name): + u""" + Removes all keyword-only args from the params list and a bare star, if any. + Does not add the kwargs dict if needed. + Returns True if more action is needed, False if not + (more action is needed if no kwargs dict exists) + """ + assert raw_params[0].type == token.STAR + if raw_params[1].type == token.COMMA: + raw_params[0].remove() + raw_params[1].remove() + kw_params = raw_params[2:] + else: + kw_params = raw_params[3:] + for param in kw_params: + if param.type != token.DOUBLESTAR: + param.remove() + else: + return False + else: + return True + +def needs_fixing(raw_params, kwargs_default=_kwargs_default_name): + u""" + Returns string with the name of the kwargs dict if the params after the first star need fixing + Otherwise returns empty string + """ + found_kwargs = False + needs_fix = False + + for t in raw_params[2:]: + if t.type == token.COMMA: + # Commas are irrelevant at this stage. + continue + elif t.type == token.NAME and not found_kwargs: + # Keyword-only argument: definitely need to fix. + needs_fix = True + elif t.type == token.NAME and found_kwargs: + # Return 'foobar' of **foobar, if needed. + return t.value if needs_fix else u'' + elif t.type == token.DOUBLESTAR: + # Found either '*' from **foobar. + found_kwargs = True + else: + # Never found **foobar. Return a synthetic name, if needed. + return kwargs_default if needs_fix else u'' + +class FixKwargs(fixer_base.BaseFix): + + run_order = 7 # Run after function annotations are removed + + PATTERN = u"funcdef< 'def' NAME parameters< '(' arglist=typedargslist< params=any* > ')' > ':' suite=any >" + + def transform(self, node, results): + params_rawlist = results[u"params"] + for i, item in enumerate(params_rawlist): + if item.type == token.STAR: + params_rawlist = params_rawlist[i:] + break + else: + return + # params is guaranteed to be a list starting with *. + # if fixing is needed, there will be at least 3 items in this list: + # [STAR, COMMA, NAME] is the minimum that we need to worry about. + new_kwargs = needs_fixing(params_rawlist) + # new_kwargs is the name of the kwargs dictionary. + if not new_kwargs: + return + suitify(node) + + # At this point, params_rawlist is guaranteed to be a list + # beginning with a star that includes at least one keyword-only param + # e.g., [STAR, NAME, COMMA, NAME, COMMA, DOUBLESTAR, NAME] or + # [STAR, COMMA, NAME], or [STAR, COMMA, NAME, COMMA, DOUBLESTAR, NAME] + + # Anatomy of a funcdef: ['def', 'name', parameters, ':', suite] + # Anatomy of that suite: [NEWLINE, INDENT, first_stmt, all_other_stmts] + # We need to insert our new stuff before the first_stmt and change the + # first_stmt's prefix. + + suite = node.children[4] + first_stmt = suite.children[2] + ident = indentation(first_stmt) + + for name, default_value in gen_params(params_rawlist): + if default_value is None: + suite.insert_child(2, Newline()) + suite.insert_child(2, String(_assign_template %{u'name':name, u'kwargs':new_kwargs}, prefix=ident)) + else: + suite.insert_child(2, Newline()) + suite.insert_child(2, String(_else_template %{u'name':name, u'default':default_value}, prefix=ident)) + suite.insert_child(2, Newline()) + suite.insert_child(2, String(_if_template %{u'assign':_assign_template %{u'name':name, u'kwargs':new_kwargs}, u'name':name, u'kwargs':new_kwargs}, prefix=ident)) + first_stmt.prefix = ident + suite.children[2].prefix = u"" + + # Now, we need to fix up the list of params. + + must_add_kwargs = remove_params(params_rawlist) + if must_add_kwargs: + arglist = results[u'arglist'] + if len(arglist.children) > 0 and arglist.children[-1].type != token.COMMA: + arglist.append_child(Comma()) + arglist.append_child(DoubleStar(prefix=u" ")) + arglist.append_child(Name(new_kwargs)) diff --git a/lib/libpasteurize/fixes/fix_memoryview.py b/lib/libpasteurize/fixes/fix_memoryview.py new file mode 100644 index 00000000..a20f6f3f --- /dev/null +++ b/lib/libpasteurize/fixes/fix_memoryview.py @@ -0,0 +1,21 @@ +u""" +Fixer for memoryview(s) -> buffer(s). +Explicit because some memoryview methods are invalid on buffer objects. +""" + +from lib2to3 import fixer_base +from lib2to3.fixer_util import Name + + +class FixMemoryview(fixer_base.BaseFix): + + explicit = True # User must specify that they want this. + + PATTERN = u""" + power< name='memoryview' trailer< '(' [any] ')' > + rest=any* > + """ + + def transform(self, node, results): + name = results[u"name"] + name.replace(Name(u"buffer", prefix=name.prefix)) diff --git a/lib/libpasteurize/fixes/fix_metaclass.py b/lib/libpasteurize/fixes/fix_metaclass.py new file mode 100644 index 00000000..52dd1d14 --- /dev/null +++ b/lib/libpasteurize/fixes/fix_metaclass.py @@ -0,0 +1,78 @@ +u""" +Fixer for (metaclass=X) -> __metaclass__ = X +Some semantics (see PEP 3115) may be altered in the translation.""" + +from lib2to3 import fixer_base +from lib2to3.fixer_util import Name, syms, Node, Leaf, Newline, find_root +from lib2to3.pygram import token +from libfuturize.fixer_util import indentation, suitify +# from ..fixer_util import Name, syms, Node, Leaf, Newline, find_root, indentation, suitify + +def has_metaclass(parent): + results = None + for node in parent.children: + kids = node.children + if node.type == syms.argument: + if kids[0] == Leaf(token.NAME, u"metaclass") and \ + kids[1] == Leaf(token.EQUAL, u"=") and \ + kids[2]: + #Hack to avoid "class X(=):" with this case. + results = [node] + kids + break + elif node.type == syms.arglist: + # Argument list... loop through it looking for: + # Node(*, [*, Leaf(token.NAME, u"metaclass"), Leaf(token.EQUAL, u"="), Leaf(*, *)] + for child in node.children: + if results: break + if child.type == token.COMMA: + #Store the last comma, which precedes the metaclass + comma = child + elif type(child) == Node: + meta = equal = name = None + for arg in child.children: + if arg == Leaf(token.NAME, u"metaclass"): + #We have the (metaclass) part + meta = arg + elif meta and arg == Leaf(token.EQUAL, u"="): + #We have the (metaclass=) part + equal = arg + elif meta and equal: + #Here we go, we have (metaclass=X) + name = arg + results = (comma, meta, equal, name) + break + return results + + +class FixMetaclass(fixer_base.BaseFix): + + PATTERN = u""" + classdef + """ + + def transform(self, node, results): + meta_results = has_metaclass(node) + if not meta_results: return + for meta in meta_results: + meta.remove() + target = Leaf(token.NAME, u"__metaclass__") + equal = Leaf(token.EQUAL, u"=", prefix=u" ") + # meta is the last item in what was returned by has_metaclass(): name + name = meta + name.prefix = u" " + stmt_node = Node(syms.atom, [target, equal, name]) + + suitify(node) + for item in node.children: + if item.type == syms.suite: + for stmt in item.children: + if stmt.type == token.INDENT: + # Insert, in reverse order, the statement, a newline, + # and an indent right after the first indented line + loc = item.children.index(stmt) + 1 + # Keep consistent indentation form + ident = Leaf(token.INDENT, stmt.value) + item.insert_child(loc, ident) + item.insert_child(loc, Newline()) + item.insert_child(loc, stmt_node) + break diff --git a/lib/libpasteurize/fixes/fix_newstyle.py b/lib/libpasteurize/fixes/fix_newstyle.py new file mode 100644 index 00000000..cc6b3adc --- /dev/null +++ b/lib/libpasteurize/fixes/fix_newstyle.py @@ -0,0 +1,33 @@ +u""" +Fixer for "class Foo: ..." -> "class Foo(object): ..." +""" + +from lib2to3 import fixer_base +from lib2to3.fixer_util import LParen, RParen, Name + +from libfuturize.fixer_util import touch_import_top + + +def insert_object(node, idx): + node.insert_child(idx, RParen()) + node.insert_child(idx, Name(u"object")) + node.insert_child(idx, LParen()) + +class FixNewstyle(fixer_base.BaseFix): + + # Match: + # class Blah: + # and: + # class Blah(): + + PATTERN = u"classdef< 'class' NAME ['(' ')'] colon=':' any >" + + def transform(self, node, results): + colon = results[u"colon"] + idx = node.children.index(colon) + if (node.children[idx-2].value == '(' and + node.children[idx-1].value == ')'): + del node.children[idx-2:idx] + idx -= 2 + insert_object(node, idx) + touch_import_top(u'builtins', 'object', node) diff --git a/lib/libpasteurize/fixes/fix_next.py b/lib/libpasteurize/fixes/fix_next.py new file mode 100644 index 00000000..9ecb6c04 --- /dev/null +++ b/lib/libpasteurize/fixes/fix_next.py @@ -0,0 +1,43 @@ +u""" +Fixer for: +it.__next__() -> it.next(). +next(it) -> it.next(). +""" + +from lib2to3.pgen2 import token +from lib2to3.pygram import python_symbols as syms +from lib2to3 import fixer_base +from lib2to3.fixer_util import Name, Call, find_binding, Attr + +bind_warning = u"Calls to builtin next() possibly shadowed by global binding" + + +class FixNext(fixer_base.BaseFix): + + PATTERN = u""" + power< base=any+ trailer< '.' attr='__next__' > any* > + | + power< head='next' trailer< '(' arg=any ')' > any* > + | + classdef< 'class' base=any+ ':' + suite< any* + funcdef< 'def' + attr='__next__' + parameters< '(' NAME ')' > any+ > + any* > > + """ + + def transform(self, node, results): + assert results + + base = results.get(u"base") + attr = results.get(u"attr") + head = results.get(u"head") + arg_ = results.get(u"arg") + if arg_: + arg = arg_.clone() + head.replace(Attr(Name(unicode(arg),prefix=head.prefix), + Name(u"next"))) + arg_.remove() + elif base: + attr.replace(Name(u"next", prefix=attr.prefix)) diff --git a/lib/libpasteurize/fixes/fix_printfunction.py b/lib/libpasteurize/fixes/fix_printfunction.py new file mode 100644 index 00000000..a2a6e084 --- /dev/null +++ b/lib/libpasteurize/fixes/fix_printfunction.py @@ -0,0 +1,17 @@ +u""" +Fixer for print: from __future__ import print_function. +""" + +from lib2to3 import fixer_base +from libfuturize.fixer_util import future_import + +class FixPrintfunction(fixer_base.BaseFix): + + # explicit = True + + PATTERN = u""" + power< 'print' trailer < '(' any* ')' > any* > + """ + + def transform(self, node, results): + future_import(u"print_function", node) diff --git a/lib/libpasteurize/fixes/fix_raise.py b/lib/libpasteurize/fixes/fix_raise.py new file mode 100644 index 00000000..9c9c192f --- /dev/null +++ b/lib/libpasteurize/fixes/fix_raise.py @@ -0,0 +1,25 @@ +u"""Fixer for 'raise E(V).with_traceback(T)' -> 'raise E, V, T'""" + +from lib2to3 import fixer_base +from lib2to3.fixer_util import Comma, Node, Leaf, token, syms + +class FixRaise(fixer_base.BaseFix): + + PATTERN = u""" + raise_stmt< 'raise' (power< name=any [trailer< '(' val=any* ')' >] + [trailer< '.' 'with_traceback' > trailer< '(' trc=any ')' >] > | any) ['from' chain=any] >""" + + def transform(self, node, results): + name, val, trc = (results.get(u"name"), results.get(u"val"), results.get(u"trc")) + chain = results.get(u"chain") + if chain is not None: + self.warning(node, u"explicit exception chaining is not supported in Python 2") + chain.prev_sibling.remove() + chain.remove() + if trc is not None: + val = val[0] if val else Leaf(token.NAME, u"None") + val.prefix = trc.prefix = u" " + kids = [Leaf(token.NAME, u"raise"), name.clone(), Comma(), + val.clone(), Comma(), trc.clone()] + raise_stmt = Node(syms.raise_stmt, kids) + node.replace(raise_stmt) diff --git a/lib/libpasteurize/fixes/fix_raise_.py b/lib/libpasteurize/fixes/fix_raise_.py new file mode 100644 index 00000000..0f020c45 --- /dev/null +++ b/lib/libpasteurize/fixes/fix_raise_.py @@ -0,0 +1,35 @@ +u"""Fixer for + raise E(V).with_traceback(T) + to: + from future.utils import raise_ + ... + raise_(E, V, T) + +TODO: FIXME!! + +""" + +from lib2to3 import fixer_base +from lib2to3.fixer_util import Comma, Node, Leaf, token, syms + +class FixRaise(fixer_base.BaseFix): + + PATTERN = u""" + raise_stmt< 'raise' (power< name=any [trailer< '(' val=any* ')' >] + [trailer< '.' 'with_traceback' > trailer< '(' trc=any ')' >] > | any) ['from' chain=any] >""" + + def transform(self, node, results): + FIXME + name, val, trc = (results.get(u"name"), results.get(u"val"), results.get(u"trc")) + chain = results.get(u"chain") + if chain is not None: + self.warning(node, u"explicit exception chaining is not supported in Python 2") + chain.prev_sibling.remove() + chain.remove() + if trc is not None: + val = val[0] if val else Leaf(token.NAME, u"None") + val.prefix = trc.prefix = u" " + kids = [Leaf(token.NAME, u"raise"), name.clone(), Comma(), + val.clone(), Comma(), trc.clone()] + raise_stmt = Node(syms.raise_stmt, kids) + node.replace(raise_stmt) diff --git a/lib/libpasteurize/fixes/fix_throw.py b/lib/libpasteurize/fixes/fix_throw.py new file mode 100644 index 00000000..c0feed1e --- /dev/null +++ b/lib/libpasteurize/fixes/fix_throw.py @@ -0,0 +1,23 @@ +u"""Fixer for 'g.throw(E(V).with_traceback(T))' -> 'g.throw(E, V, T)'""" + +from lib2to3 import fixer_base +from lib2to3.pytree import Node, Leaf +from lib2to3.pgen2 import token +from lib2to3.fixer_util import Comma + +class FixThrow(fixer_base.BaseFix): + + PATTERN = u""" + power< any trailer< '.' 'throw' > + trailer< '(' args=power< exc=any trailer< '(' val=any* ')' > + trailer< '.' 'with_traceback' > trailer< '(' trc=any ')' > > ')' > > + """ + + def transform(self, node, results): + syms = self.syms + exc, val, trc = (results[u"exc"], results[u"val"], results[u"trc"]) + val = val[0] if val else Leaf(token.NAME, u"None") + val.prefix = trc.prefix = u" " + kids = [exc.clone(), Comma(), val.clone(), Comma(), trc.clone()] + args = results[u"args"] + args.children = kids diff --git a/lib/libpasteurize/fixes/fix_unpacking.py b/lib/libpasteurize/fixes/fix_unpacking.py new file mode 100644 index 00000000..c2d3207a --- /dev/null +++ b/lib/libpasteurize/fixes/fix_unpacking.py @@ -0,0 +1,120 @@ +u""" +Fixer for: +(a,)* *b (,c)* [,] = s +for (a,)* *b (,c)* [,] in d: ... +""" + +from lib2to3 import fixer_base +from itertools import count +from lib2to3.fixer_util import (Assign, Comma, Call, Newline, Name, + Number, token, syms, Node, Leaf) +from libfuturize.fixer_util import indentation, suitify, commatize +# from libfuturize.fixer_util import Assign, Comma, Call, Newline, Name, Number, indentation, suitify, commatize, token, syms, Node, Leaf + +def assignment_source(num_pre, num_post, LISTNAME, ITERNAME): + u""" + Accepts num_pre and num_post, which are counts of values + before and after the starg (not including the starg) + Returns a source fit for Assign() from fixer_util + """ + children = [] + pre = unicode(num_pre) + post = unicode(num_post) + # This code builds the assignment source from lib2to3 tree primitives. + # It's not very readable, but it seems like the most correct way to do it. + if num_pre > 0: + pre_part = Node(syms.power, [Name(LISTNAME), Node(syms.trailer, [Leaf(token.LSQB, u"["), Node(syms.subscript, [Leaf(token.COLON, u":"), Number(pre)]), Leaf(token.RSQB, u"]")])]) + children.append(pre_part) + children.append(Leaf(token.PLUS, u"+", prefix=u" ")) + main_part = Node(syms.power, [Leaf(token.LSQB, u"[", prefix=u" "), Name(LISTNAME), Node(syms.trailer, [Leaf(token.LSQB, u"["), Node(syms.subscript, [Number(pre) if num_pre > 0 else Leaf(1, u""), Leaf(token.COLON, u":"), Node(syms.factor, [Leaf(token.MINUS, u"-"), Number(post)]) if num_post > 0 else Leaf(1, u"")]), Leaf(token.RSQB, u"]"), Leaf(token.RSQB, u"]")])]) + children.append(main_part) + if num_post > 0: + children.append(Leaf(token.PLUS, u"+", prefix=u" ")) + post_part = Node(syms.power, [Name(LISTNAME, prefix=u" "), Node(syms.trailer, [Leaf(token.LSQB, u"["), Node(syms.subscript, [Node(syms.factor, [Leaf(token.MINUS, u"-"), Number(post)]), Leaf(token.COLON, u":")]), Leaf(token.RSQB, u"]")])]) + children.append(post_part) + source = Node(syms.arith_expr, children) + return source + +class FixUnpacking(fixer_base.BaseFix): + + PATTERN = u""" + expl=expr_stmt< testlist_star_expr< + pre=(any ',')* + star_expr< '*' name=NAME > + post=(',' any)* [','] > '=' source=any > | + impl=for_stmt< 'for' lst=exprlist< + pre=(any ',')* + star_expr< '*' name=NAME > + post=(',' any)* [','] > 'in' it=any ':' suite=any>""" + + def fix_explicit_context(self, node, results): + pre, name, post, source = (results.get(n) for n in (u"pre", u"name", u"post", u"source")) + pre = [n.clone() for n in pre if n.type == token.NAME] + name.prefix = u" " + post = [n.clone() for n in post if n.type == token.NAME] + target = [n.clone() for n in commatize(pre + [name.clone()] + post)] + # to make the special-case fix for "*z, = ..." correct with the least + # amount of modification, make the left-side into a guaranteed tuple + target.append(Comma()) + source.prefix = u"" + setup_line = Assign(Name(self.LISTNAME), Call(Name(u"list"), [source.clone()])) + power_line = Assign(target, assignment_source(len(pre), len(post), self.LISTNAME, self.ITERNAME)) + return setup_line, power_line + + def fix_implicit_context(self, node, results): + u""" + Only example of the implicit context is + a for loop, so only fix that. + """ + pre, name, post, it = (results.get(n) for n in (u"pre", u"name", u"post", u"it")) + pre = [n.clone() for n in pre if n.type == token.NAME] + name.prefix = u" " + post = [n.clone() for n in post if n.type == token.NAME] + target = [n.clone() for n in commatize(pre + [name.clone()] + post)] + # to make the special-case fix for "*z, = ..." correct with the least + # amount of modification, make the left-side into a guaranteed tuple + target.append(Comma()) + source = it.clone() + source.prefix = u"" + setup_line = Assign(Name(self.LISTNAME), Call(Name(u"list"), [Name(self.ITERNAME)])) + power_line = Assign(target, assignment_source(len(pre), len(post), self.LISTNAME, self.ITERNAME)) + return setup_line, power_line + + def transform(self, node, results): + u""" + a,b,c,d,e,f,*g,h,i = range(100) changes to + _3to2list = list(range(100)) + a,b,c,d,e,f,g,h,i, = _3to2list[:6] + [_3to2list[6:-2]] + _3to2list[-2:] + + and + + for a,b,*c,d,e in iter_of_iters: do_stuff changes to + for _3to2iter in iter_of_iters: + _3to2list = list(_3to2iter) + a,b,c,d,e, = _3to2list[:2] + [_3to2list[2:-2]] + _3to2list[-2:] + do_stuff + """ + self.LISTNAME = self.new_name(u"_3to2list") + self.ITERNAME = self.new_name(u"_3to2iter") + expl, impl = results.get(u"expl"), results.get(u"impl") + if expl is not None: + setup_line, power_line = self.fix_explicit_context(node, results) + setup_line.prefix = expl.prefix + power_line.prefix = indentation(expl.parent) + setup_line.append_child(Newline()) + parent = node.parent + i = node.remove() + parent.insert_child(i, power_line) + parent.insert_child(i, setup_line) + elif impl is not None: + setup_line, power_line = self.fix_implicit_context(node, results) + suitify(node) + suite = [k for k in node.children if k.type == syms.suite][0] + setup_line.prefix = u"" + power_line.prefix = suite.children[1].value + suite.children[2].prefix = indentation(suite.children[2]) + suite.insert_child(2, Newline()) + suite.insert_child(2, power_line) + suite.insert_child(2, Newline()) + suite.insert_child(2, setup_line) + results.get(u"lst").replace(Name(self.ITERNAME, prefix=u" ")) diff --git a/lib/libpasteurize/main.py b/lib/libpasteurize/main.py new file mode 100644 index 00000000..4179174b --- /dev/null +++ b/lib/libpasteurize/main.py @@ -0,0 +1,204 @@ +""" +pasteurize: automatic conversion of Python 3 code to clean 2/3 code +=================================================================== + +``pasteurize`` attempts to convert existing Python 3 code into source-compatible +Python 2 and 3 code. + +Use it like this on Python 3 code: + + $ pasteurize --verbose mypython3script.py + +This removes any Py3-only syntax (e.g. new metaclasses) and adds these +import lines: + + from __future__ import absolute_import + from __future__ import division + from __future__ import print_function + from __future__ import unicode_literals + from future import standard_library + standard_library.install_hooks() + from builtins import * + +To write changes to the files, use the -w flag. + +It also adds any other wrappers needed for Py2/3 compatibility. + +Note that separate stages are not available (or needed) when converting from +Python 3 with ``pasteurize`` as they are when converting from Python 2 with +``futurize``. + +The --all-imports option forces adding all ``__future__`` imports, +``builtins`` imports, and standard library aliases, even if they don't +seem necessary for the current state of each module. (This can simplify +testing, and can reduce the need to think about Py2 compatibility when editing +the code further.) + +""" + +from __future__ import (absolute_import, print_function, unicode_literals) + +import sys +import logging +import optparse +from lib2to3.main import main, warn, StdoutRefactoringTool +from lib2to3 import refactor + +from future import __version__ +from libpasteurize.fixes import fix_names + + +def main(args=None): + """Main program. + + Returns a suggested exit status (0, 1, 2). + """ + # Set up option parser + parser = optparse.OptionParser(usage="pasteurize [options] file|dir ...") + parser.add_option("-V", "--version", action="store_true", + help="Report the version number of pasteurize") + parser.add_option("-a", "--all-imports", action="store_true", + help="Adds all __future__ and future imports to each module") + parser.add_option("-f", "--fix", action="append", default=[], + help="Each FIX specifies a transformation; default: all") + parser.add_option("-j", "--processes", action="store", default=1, + type="int", help="Run 2to3 concurrently") + parser.add_option("-x", "--nofix", action="append", default=[], + help="Prevent a fixer from being run.") + parser.add_option("-l", "--list-fixes", action="store_true", + help="List available transformations") + # parser.add_option("-p", "--print-function", action="store_true", + # help="Modify the grammar so that print() is a function") + parser.add_option("-v", "--verbose", action="store_true", + help="More verbose logging") + parser.add_option("--no-diffs", action="store_true", + help="Don't show diffs of the refactoring") + parser.add_option("-w", "--write", action="store_true", + help="Write back modified files") + parser.add_option("-n", "--nobackups", action="store_true", default=False, + help="Don't write backups for modified files.") + + # Parse command line arguments + refactor_stdin = False + flags = {} + options, args = parser.parse_args(args) + fixer_pkg = 'libpasteurize.fixes' + avail_fixes = fix_names + flags["print_function"] = True + + if not options.write and options.no_diffs: + warn("not writing files and not printing diffs; that's not very useful") + if not options.write and options.nobackups: + parser.error("Can't use -n without -w") + if options.version: + print(__version__) + return 0 + if options.list_fixes: + print("Available transformations for the -f/--fix option:") + for fixname in sorted(avail_fixes): + print(fixname) + if not args: + return 0 + if not args: + print("At least one file or directory argument required.", + file=sys.stderr) + print("Use --help to show usage.", file=sys.stderr) + return 2 + if "-" in args: + refactor_stdin = True + if options.write: + print("Can't write to stdin.", file=sys.stderr) + return 2 + + # Set up logging handler + level = logging.DEBUG if options.verbose else logging.INFO + logging.basicConfig(format='%(name)s: %(message)s', level=level) + + unwanted_fixes = set() + for fix in options.nofix: + if ".fix_" in fix: + unwanted_fixes.add(fix) + else: + # Infer the full module name for the fixer. + # First ensure that no names clash (e.g. + # lib2to3.fixes.fix_blah and libfuturize.fixes.fix_blah): + found = [f for f in avail_fixes + if f.endswith('fix_{0}'.format(fix))] + if len(found) > 1: + print("Ambiguous fixer name. Choose a fully qualified " + "module name instead from these:\n" + + "\n".join(" " + myf for myf in found), + file=sys.stderr) + return 2 + elif len(found) == 0: + print("Unknown fixer. Use --list-fixes or -l for a list.", + file=sys.stderr) + return 2 + unwanted_fixes.add(found[0]) + + extra_fixes = set() + if options.all_imports: + prefix = 'libpasteurize.fixes.' + extra_fixes.add(prefix + 'fix_add_all__future__imports') + extra_fixes.add(prefix + 'fix_add_future_standard_library_import') + extra_fixes.add(prefix + 'fix_add_all_future_builtins') + + explicit = set() + if options.fix: + all_present = False + for fix in options.fix: + if fix == 'all': + all_present = True + else: + if ".fix_" in fix: + explicit.add(fix) + else: + # Infer the full module name for the fixer. + # First ensure that no names clash (e.g. + # lib2to3.fixes.fix_blah and libpasteurize.fixes.fix_blah): + found = [f for f in avail_fixes + if f.endswith('fix_{0}'.format(fix))] + if len(found) > 1: + print("Ambiguous fixer name. Choose a fully qualified " + "module name instead from these:\n" + + "\n".join(" " + myf for myf in found), + file=sys.stderr) + return 2 + elif len(found) == 0: + print("Unknown fixer. Use --list-fixes or -l for a list.", + file=sys.stderr) + return 2 + explicit.add(found[0]) + if len(explicit & unwanted_fixes) > 0: + print("Conflicting usage: the following fixers have been " + "simultaneously requested and disallowed:\n" + + "\n".join(" " + myf for myf in (explicit & unwanted_fixes)), + file=sys.stderr) + return 2 + requested = avail_fixes.union(explicit) if all_present else explicit + else: + requested = avail_fixes.union(explicit) + + fixer_names = requested | extra_fixes - unwanted_fixes + + # Initialize the refactoring tool + rt = StdoutRefactoringTool(sorted(fixer_names), flags, set(), + options.nobackups, not options.no_diffs) + + # Refactor all files and directories passed as arguments + if not rt.errors: + if refactor_stdin: + rt.refactor_stdin() + else: + try: + rt.refactor(args, options.write, None, + options.processes) + except refactor.MultiprocessingUnsupported: + assert options.processes > 1 + print("Sorry, -j isn't " \ + "supported on this platform.", file=sys.stderr) + return 1 + rt.summarize() + + # Return error status (0 if rt.errors is zero) + return int(bool(rt.errors)) diff --git a/lib/past/__init__.py b/lib/past/__init__.py new file mode 100644 index 00000000..14713039 --- /dev/null +++ b/lib/past/__init__.py @@ -0,0 +1,90 @@ +# coding=utf-8 +""" +past: compatibility with Python 2 from Python 3 +=============================================== + +``past`` is a package to aid with Python 2/3 compatibility. Whereas ``future`` +contains backports of Python 3 constructs to Python 2, ``past`` provides +implementations of some Python 2 constructs in Python 3 and tools to import and +run Python 2 code in Python 3. It is intended to be used sparingly, as a way of +running old Python 2 code from Python 3 until the code is ported properly. + +Potential uses for libraries: + +- as a step in porting a Python 2 codebase to Python 3 (e.g. with the ``futurize`` script) +- to provide Python 3 support for previously Python 2-only libraries with the + same APIs as on Python 2 -- particularly with regard to 8-bit strings (the + ``past.builtins.str`` type). +- to aid in providing minimal-effort Python 3 support for applications using + libraries that do not yet wish to upgrade their code properly to Python 3, or + wish to upgrade it gradually to Python 3 style. + + +Here are some code examples that run identically on Python 3 and 2:: + + >>> from past.builtins import str as oldstr + + >>> philosopher = oldstr(u'\u5b54\u5b50'.encode('utf-8')) + >>> # This now behaves like a Py2 byte-string on both Py2 and Py3. + >>> # For example, indexing returns a Python 2-like string object, not + >>> # an integer: + >>> philosopher[0] + '\xe5' + >>> type(philosopher[0]) + + + >>> # List-producing versions of range, reduce, map, filter + >>> from past.builtins import range, reduce + >>> range(10) + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + >>> reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) + 15 + + >>> # Other functions removed in Python 3 are resurrected ... + >>> from past.builtins import execfile + >>> execfile('myfile.py') + + >>> from past.builtins import raw_input + >>> name = raw_input('What is your name? ') + What is your name? [cursor] + + >>> from past.builtins import reload + >>> reload(mymodule) # equivalent to imp.reload(mymodule) in Python 3 + + >>> from past.builtins import xrange + >>> for i in xrange(10): + ... pass + + +It also provides import hooks so you can import and use Python 2 modules like +this:: + + $ python3 + + >>> from past.translation import autotranslate + >>> authotranslate('mypy2module') + >>> import mypy2module + +until the authors of the Python 2 modules have upgraded their code. Then, for +example:: + + >>> mypy2module.func_taking_py2_string(oldstr(b'abcd')) + + +Credits +------- + +:Author: Ed Schofield, Jordan M. Adler, et al +:Sponsor: Python Charmers Pty Ltd, Australia: http://pythoncharmers.com + + +Licensing +--------- +Copyright 2013-2019 Python Charmers Pty Ltd, Australia. +The software is distributed under an MIT licence. See LICENSE.txt. +""" + +from future import __version__, __copyright__, __license__ + +__title__ = 'past' +__author__ = 'Ed Schofield' diff --git a/lib/past/builtins/__init__.py b/lib/past/builtins/__init__.py new file mode 100644 index 00000000..1b19e373 --- /dev/null +++ b/lib/past/builtins/__init__.py @@ -0,0 +1,72 @@ +""" +A resurrection of some old functions from Python 2 for use in Python 3. These +should be used sparingly, to help with porting efforts, since code using them +is no longer standard Python 3 code. + +This module provides the following: + +1. Implementations of these builtin functions which have no equivalent on Py3: + +- apply +- chr +- cmp +- execfile + +2. Aliases: + +- intern <- sys.intern +- raw_input <- input +- reduce <- functools.reduce +- reload <- imp.reload +- unichr <- chr +- unicode <- str +- xrange <- range + +3. List-producing versions of the corresponding Python 3 iterator-producing functions: + +- filter +- map +- range +- zip + +4. Forward-ported Py2 types: + +- basestring +- dict +- str +- long +- unicode + +""" + +from future.utils import PY3 +from past.builtins.noniterators import (filter, map, range, reduce, zip) +# from past.builtins.misc import (ascii, hex, input, oct, open) +if PY3: + from past.types import (basestring, + olddict as dict, + oldstr as str, + long, + unicode) +else: + from __builtin__ import (basestring, dict, str, long, unicode) + +from past.builtins.misc import (apply, chr, cmp, execfile, intern, oct, + raw_input, reload, unichr, unicode, xrange) +from past import utils + + +if utils.PY3: + # We only import names that shadow the builtins on Py3. No other namespace + # pollution on Py3. + + # Only shadow builtins on Py3; no new names + __all__ = ['filter', 'map', 'range', 'reduce', 'zip', + 'basestring', 'dict', 'str', 'long', 'unicode', + 'apply', 'chr', 'cmp', 'execfile', 'intern', 'raw_input', + 'reload', 'unichr', 'xrange' + ] + +else: + # No namespace pollution on Py2 + __all__ = [] diff --git a/lib/past/builtins/misc.py b/lib/past/builtins/misc.py new file mode 100644 index 00000000..ba50aa9e --- /dev/null +++ b/lib/past/builtins/misc.py @@ -0,0 +1,94 @@ +from __future__ import unicode_literals + +import inspect + +from future.utils import PY2, PY3, exec_ + +if PY2: + from collections import Mapping +else: + from collections.abc import Mapping + +if PY3: + import builtins + from collections.abc import Mapping + + def apply(f, *args, **kw): + return f(*args, **kw) + + from past.builtins import str as oldstr + + def chr(i): + """ + Return a byte-string of one character with ordinal i; 0 <= i <= 256 + """ + return oldstr(bytes((i,))) + + def cmp(x, y): + """ + cmp(x, y) -> integer + + Return negative if xy. + """ + return (x > y) - (x < y) + + from sys import intern + + def oct(number): + """oct(number) -> string + + Return the octal representation of an integer + """ + return '0' + builtins.oct(number)[2:] + + raw_input = input + from imp import reload + unicode = str + unichr = chr + xrange = range +else: + import __builtin__ + from collections import Mapping + apply = __builtin__.apply + chr = __builtin__.chr + cmp = __builtin__.cmp + execfile = __builtin__.execfile + intern = __builtin__.intern + oct = __builtin__.oct + raw_input = __builtin__.raw_input + reload = __builtin__.reload + unicode = __builtin__.unicode + unichr = __builtin__.unichr + xrange = __builtin__.xrange + + +if PY3: + def execfile(filename, myglobals=None, mylocals=None): + """ + Read and execute a Python script from a file in the given namespaces. + The globals and locals are dictionaries, defaulting to the current + globals and locals. If only globals is given, locals defaults to it. + """ + if myglobals is None: + # There seems to be no alternative to frame hacking here. + caller_frame = inspect.stack()[1] + myglobals = caller_frame[0].f_globals + mylocals = caller_frame[0].f_locals + elif mylocals is None: + # Only if myglobals is given do we set mylocals to it. + mylocals = myglobals + if not isinstance(myglobals, Mapping): + raise TypeError('globals must be a mapping') + if not isinstance(mylocals, Mapping): + raise TypeError('locals must be a mapping') + with open(filename, "rb") as fin: + source = fin.read() + code = compile(source, filename, "exec") + exec_(code, myglobals, mylocals) + + +if PY3: + __all__ = ['apply', 'chr', 'cmp', 'execfile', 'intern', 'raw_input', + 'reload', 'unichr', 'unicode', 'xrange'] +else: + __all__ = [] diff --git a/lib/past/builtins/noniterators.py b/lib/past/builtins/noniterators.py new file mode 100644 index 00000000..183ffffd --- /dev/null +++ b/lib/past/builtins/noniterators.py @@ -0,0 +1,272 @@ +""" +This module is designed to be used as follows:: + + from past.builtins.noniterators import filter, map, range, reduce, zip + +And then, for example:: + + assert isinstance(range(5), list) + +The list-producing functions this brings in are:: + +- ``filter`` +- ``map`` +- ``range`` +- ``reduce`` +- ``zip`` + +""" + +from __future__ import division, absolute_import, print_function + +from itertools import chain, starmap +import itertools # since zip_longest doesn't exist on Py2 +from past.types import basestring +from past.utils import PY3 + + +def flatmap(f, items): + return chain.from_iterable(map(f, items)) + + +if PY3: + import builtins + + # list-producing versions of the major Python iterating functions + def oldfilter(*args): + """ + filter(function or None, sequence) -> list, tuple, or string + + Return those items of sequence for which function(item) is true. + If function is None, return the items that are true. If sequence + is a tuple or string, return the same type, else return a list. + """ + mytype = type(args[1]) + if isinstance(args[1], basestring): + return mytype().join(builtins.filter(*args)) + elif isinstance(args[1], (tuple, list)): + return mytype(builtins.filter(*args)) + else: + # Fall back to list. Is this the right thing to do? + return list(builtins.filter(*args)) + + # This is surprisingly difficult to get right. For example, the + # solutions here fail with the test cases in the docstring below: + # http://stackoverflow.com/questions/8072755/ + def oldmap(func, *iterables): + """ + map(function, sequence[, sequence, ...]) -> list + + Return a list of the results of applying the function to the + items of the argument sequence(s). If more than one sequence is + given, the function is called with an argument list consisting of + the corresponding item of each sequence, substituting None for + missing values when not all sequences have the same length. If + the function is None, return a list of the items of the sequence + (or a list of tuples if more than one sequence). + + Test cases: + >>> oldmap(None, 'hello world') + ['h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd'] + + >>> oldmap(None, range(4)) + [0, 1, 2, 3] + + More test cases are in test_past.test_builtins. + """ + zipped = itertools.zip_longest(*iterables) + l = list(zipped) + if len(l) == 0: + return [] + if func is None: + result = l + else: + result = list(starmap(func, l)) + + # Inspect to see whether it's a simple sequence of tuples + try: + if max([len(item) for item in result]) == 1: + return list(chain.from_iterable(result)) + # return list(flatmap(func, result)) + except TypeError as e: + # Simple objects like ints have no len() + pass + return result + + ############################ + ### For reference, the source code for Py2.7 map function: + # static PyObject * + # builtin_map(PyObject *self, PyObject *args) + # { + # typedef struct { + # PyObject *it; /* the iterator object */ + # int saw_StopIteration; /* bool: did the iterator end? */ + # } sequence; + # + # PyObject *func, *result; + # sequence *seqs = NULL, *sqp; + # Py_ssize_t n, len; + # register int i, j; + # + # n = PyTuple_Size(args); + # if (n < 2) { + # PyErr_SetString(PyExc_TypeError, + # "map() requires at least two args"); + # return NULL; + # } + # + # func = PyTuple_GetItem(args, 0); + # n--; + # + # if (func == Py_None) { + # if (PyErr_WarnPy3k("map(None, ...) not supported in 3.x; " + # "use list(...)", 1) < 0) + # return NULL; + # if (n == 1) { + # /* map(None, S) is the same as list(S). */ + # return PySequence_List(PyTuple_GetItem(args, 1)); + # } + # } + # + # /* Get space for sequence descriptors. Must NULL out the iterator + # * pointers so that jumping to Fail_2 later doesn't see trash. + # */ + # if ((seqs = PyMem_NEW(sequence, n)) == NULL) { + # PyErr_NoMemory(); + # return NULL; + # } + # for (i = 0; i < n; ++i) { + # seqs[i].it = (PyObject*)NULL; + # seqs[i].saw_StopIteration = 0; + # } + # + # /* Do a first pass to obtain iterators for the arguments, and set len + # * to the largest of their lengths. + # */ + # len = 0; + # for (i = 0, sqp = seqs; i < n; ++i, ++sqp) { + # PyObject *curseq; + # Py_ssize_t curlen; + # + # /* Get iterator. */ + # curseq = PyTuple_GetItem(args, i+1); + # sqp->it = PyObject_GetIter(curseq); + # if (sqp->it == NULL) { + # static char errmsg[] = + # "argument %d to map() must support iteration"; + # char errbuf[sizeof(errmsg) + 25]; + # PyOS_snprintf(errbuf, sizeof(errbuf), errmsg, i+2); + # PyErr_SetString(PyExc_TypeError, errbuf); + # goto Fail_2; + # } + # + # /* Update len. */ + # curlen = _PyObject_LengthHint(curseq, 8); + # if (curlen > len) + # len = curlen; + # } + # + # /* Get space for the result list. */ + # if ((result = (PyObject *) PyList_New(len)) == NULL) + # goto Fail_2; + # + # /* Iterate over the sequences until all have stopped. */ + # for (i = 0; ; ++i) { + # PyObject *alist, *item=NULL, *value; + # int numactive = 0; + # + # if (func == Py_None && n == 1) + # alist = NULL; + # else if ((alist = PyTuple_New(n)) == NULL) + # goto Fail_1; + # + # for (j = 0, sqp = seqs; j < n; ++j, ++sqp) { + # if (sqp->saw_StopIteration) { + # Py_INCREF(Py_None); + # item = Py_None; + # } + # else { + # item = PyIter_Next(sqp->it); + # if (item) + # ++numactive; + # else { + # if (PyErr_Occurred()) { + # Py_XDECREF(alist); + # goto Fail_1; + # } + # Py_INCREF(Py_None); + # item = Py_None; + # sqp->saw_StopIteration = 1; + # } + # } + # if (alist) + # PyTuple_SET_ITEM(alist, j, item); + # else + # break; + # } + # + # if (!alist) + # alist = item; + # + # if (numactive == 0) { + # Py_DECREF(alist); + # break; + # } + # + # if (func == Py_None) + # value = alist; + # else { + # value = PyEval_CallObject(func, alist); + # Py_DECREF(alist); + # if (value == NULL) + # goto Fail_1; + # } + # if (i >= len) { + # int status = PyList_Append(result, value); + # Py_DECREF(value); + # if (status < 0) + # goto Fail_1; + # } + # else if (PyList_SetItem(result, i, value) < 0) + # goto Fail_1; + # } + # + # if (i < len && PyList_SetSlice(result, i, len, NULL) < 0) + # goto Fail_1; + # + # goto Succeed; + # + # Fail_1: + # Py_DECREF(result); + # Fail_2: + # result = NULL; + # Succeed: + # assert(seqs); + # for (i = 0; i < n; ++i) + # Py_XDECREF(seqs[i].it); + # PyMem_DEL(seqs); + # return result; + # } + + def oldrange(*args, **kwargs): + return list(builtins.range(*args, **kwargs)) + + def oldzip(*args, **kwargs): + return list(builtins.zip(*args, **kwargs)) + + filter = oldfilter + map = oldmap + range = oldrange + from functools import reduce + zip = oldzip + __all__ = ['filter', 'map', 'range', 'reduce', 'zip'] + +else: + import __builtin__ + # Python 2-builtin ranges produce lists + filter = __builtin__.filter + map = __builtin__.map + range = __builtin__.range + reduce = __builtin__.reduce + zip = __builtin__.zip + __all__ = [] diff --git a/lib/past/translation/__init__.py b/lib/past/translation/__init__.py new file mode 100644 index 00000000..7c678866 --- /dev/null +++ b/lib/past/translation/__init__.py @@ -0,0 +1,485 @@ +# -*- coding: utf-8 -*- +""" +past.translation +================== + +The ``past.translation`` package provides an import hook for Python 3 which +transparently runs ``futurize`` fixers over Python 2 code on import to convert +print statements into functions, etc. + +It is intended to assist users in migrating to Python 3.x even if some +dependencies still only support Python 2.x. + +Usage +----- + +Once your Py2 package is installed in the usual module search path, the import +hook is invoked as follows: + + >>> from past.translation import autotranslate + >>> autotranslate('mypackagename') + +Or: + + >>> autotranslate(['mypackage1', 'mypackage2']) + +You can unregister the hook using:: + + >>> from past.translation import remove_hooks + >>> remove_hooks() + +Author: Ed Schofield. +Inspired by and based on ``uprefix`` by Vinay M. Sajip. +""" + +import imp +import logging +import marshal +import os +import sys +import copy +from lib2to3.pgen2.parse import ParseError +from lib2to3.refactor import RefactoringTool + +from libfuturize import fixes + + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +myfixes = (list(fixes.libfuturize_fix_names_stage1) + + list(fixes.lib2to3_fix_names_stage1) + + list(fixes.libfuturize_fix_names_stage2) + + list(fixes.lib2to3_fix_names_stage2)) + + +# We detect whether the code is Py2 or Py3 by applying certain lib2to3 fixers +# to it. If the diff is empty, it's Python 3 code. + +py2_detect_fixers = [ +# From stage 1: + 'lib2to3.fixes.fix_apply', + # 'lib2to3.fixes.fix_dict', # TODO: add support for utils.viewitems() etc. and move to stage2 + 'lib2to3.fixes.fix_except', + 'lib2to3.fixes.fix_execfile', + 'lib2to3.fixes.fix_exitfunc', + 'lib2to3.fixes.fix_funcattrs', + 'lib2to3.fixes.fix_filter', + 'lib2to3.fixes.fix_has_key', + 'lib2to3.fixes.fix_idioms', + 'lib2to3.fixes.fix_import', # makes any implicit relative imports explicit. (Use with ``from __future__ import absolute_import) + 'lib2to3.fixes.fix_intern', + 'lib2to3.fixes.fix_isinstance', + 'lib2to3.fixes.fix_methodattrs', + 'lib2to3.fixes.fix_ne', + 'lib2to3.fixes.fix_numliterals', # turns 1L into 1, 0755 into 0o755 + 'lib2to3.fixes.fix_paren', + 'lib2to3.fixes.fix_print', + 'lib2to3.fixes.fix_raise', # uses incompatible with_traceback() method on exceptions + 'lib2to3.fixes.fix_renames', + 'lib2to3.fixes.fix_reduce', + # 'lib2to3.fixes.fix_set_literal', # this is unnecessary and breaks Py2.6 support + 'lib2to3.fixes.fix_repr', + 'lib2to3.fixes.fix_standarderror', + 'lib2to3.fixes.fix_sys_exc', + 'lib2to3.fixes.fix_throw', + 'lib2to3.fixes.fix_tuple_params', + 'lib2to3.fixes.fix_types', + 'lib2to3.fixes.fix_ws_comma', + 'lib2to3.fixes.fix_xreadlines', + +# From stage 2: + 'lib2to3.fixes.fix_basestring', + # 'lib2to3.fixes.fix_buffer', # perhaps not safe. Test this. + # 'lib2to3.fixes.fix_callable', # not needed in Py3.2+ + # 'lib2to3.fixes.fix_dict', # TODO: add support for utils.viewitems() etc. + 'lib2to3.fixes.fix_exec', + # 'lib2to3.fixes.fix_future', # we don't want to remove __future__ imports + 'lib2to3.fixes.fix_getcwdu', + # 'lib2to3.fixes.fix_imports', # called by libfuturize.fixes.fix_future_standard_library + # 'lib2to3.fixes.fix_imports2', # we don't handle this yet (dbm) + # 'lib2to3.fixes.fix_input', + # 'lib2to3.fixes.fix_itertools', + # 'lib2to3.fixes.fix_itertools_imports', + 'lib2to3.fixes.fix_long', + # 'lib2to3.fixes.fix_map', + # 'lib2to3.fixes.fix_metaclass', # causes SyntaxError in Py2! Use the one from ``six`` instead + 'lib2to3.fixes.fix_next', + 'lib2to3.fixes.fix_nonzero', # TODO: add a decorator for mapping __bool__ to __nonzero__ + # 'lib2to3.fixes.fix_operator', # we will need support for this by e.g. extending the Py2 operator module to provide those functions in Py3 + 'lib2to3.fixes.fix_raw_input', + # 'lib2to3.fixes.fix_unicode', # strips off the u'' prefix, which removes a potentially helpful source of information for disambiguating unicode/byte strings + # 'lib2to3.fixes.fix_urllib', + 'lib2to3.fixes.fix_xrange', + # 'lib2to3.fixes.fix_zip', +] + + +class RTs: + """ + A namespace for the refactoring tools. This avoids creating these at + the module level, which slows down the module import. (See issue #117). + + There are two possible grammars: with or without the print statement. + Hence we have two possible refactoring tool implementations. + """ + _rt = None + _rtp = None + _rt_py2_detect = None + _rtp_py2_detect = None + + @staticmethod + def setup(): + """ + Call this before using the refactoring tools to create them on demand + if needed. + """ + if None in [RTs._rt, RTs._rtp]: + RTs._rt = RefactoringTool(myfixes) + RTs._rtp = RefactoringTool(myfixes, {'print_function': True}) + + + @staticmethod + def setup_detect_python2(): + """ + Call this before using the refactoring tools to create them on demand + if needed. + """ + if None in [RTs._rt_py2_detect, RTs._rtp_py2_detect]: + RTs._rt_py2_detect = RefactoringTool(py2_detect_fixers) + RTs._rtp_py2_detect = RefactoringTool(py2_detect_fixers, + {'print_function': True}) + + +# We need to find a prefix for the standard library, as we don't want to +# process any files there (they will already be Python 3). +# +# The following method is used by Sanjay Vinip in uprefix. This fails for +# ``conda`` environments: +# # In a non-pythonv virtualenv, sys.real_prefix points to the installed Python. +# # In a pythonv venv, sys.base_prefix points to the installed Python. +# # Outside a virtual environment, sys.prefix points to the installed Python. + +# if hasattr(sys, 'real_prefix'): +# _syslibprefix = sys.real_prefix +# else: +# _syslibprefix = getattr(sys, 'base_prefix', sys.prefix) + +# Instead, we use the portion of the path common to both the stdlib modules +# ``math`` and ``urllib``. + +def splitall(path): + """ + Split a path into all components. From Python Cookbook. + """ + allparts = [] + while True: + parts = os.path.split(path) + if parts[0] == path: # sentinel for absolute paths + allparts.insert(0, parts[0]) + break + elif parts[1] == path: # sentinel for relative paths + allparts.insert(0, parts[1]) + break + else: + path = parts[0] + allparts.insert(0, parts[1]) + return allparts + + +def common_substring(s1, s2): + """ + Returns the longest common substring to the two strings, starting from the + left. + """ + chunks = [] + path1 = splitall(s1) + path2 = splitall(s2) + for (dir1, dir2) in zip(path1, path2): + if dir1 != dir2: + break + chunks.append(dir1) + return os.path.join(*chunks) + +# _stdlibprefix = common_substring(math.__file__, urllib.__file__) + + +def detect_python2(source, pathname): + """ + Returns a bool indicating whether we think the code is Py2 + """ + RTs.setup_detect_python2() + try: + tree = RTs._rt_py2_detect.refactor_string(source, pathname) + except ParseError as e: + if e.msg != 'bad input' or e.value != '=': + raise + tree = RTs._rtp.refactor_string(source, pathname) + + if source != str(tree)[:-1]: # remove added newline + # The above fixers made changes, so we conclude it's Python 2 code + logger.debug('Detected Python 2 code: {0}'.format(pathname)) + return True + else: + logger.debug('Detected Python 3 code: {0}'.format(pathname)) + return False + + +class Py2Fixer(object): + """ + An import hook class that uses lib2to3 for source-to-source translation of + Py2 code to Py3. + """ + + # See the comments on :class:future.standard_library.RenameImport. + # We add this attribute here so remove_hooks() and install_hooks() can + # unambiguously detect whether the import hook is installed: + PY2FIXER = True + + def __init__(self): + self.found = None + self.base_exclude_paths = ['future', 'past'] + self.exclude_paths = copy.copy(self.base_exclude_paths) + self.include_paths = [] + + def include(self, paths): + """ + Pass in a sequence of module names such as 'plotrique.plotting' that, + if present at the leftmost side of the full package name, would + specify the module to be transformed from Py2 to Py3. + """ + self.include_paths += paths + + def exclude(self, paths): + """ + Pass in a sequence of strings such as 'mymodule' that, if + present at the leftmost side of the full package name, would cause + the module not to undergo any source transformation. + """ + self.exclude_paths += paths + + def find_module(self, fullname, path=None): + logger.debug('Running find_module: {0}...'.format(fullname)) + if '.' in fullname: + parent, child = fullname.rsplit('.', 1) + if path is None: + loader = self.find_module(parent, path) + mod = loader.load_module(parent) + path = mod.__path__ + fullname = child + + # Perhaps we should try using the new importlib functionality in Python + # 3.3: something like this? + # thing = importlib.machinery.PathFinder.find_module(fullname, path) + try: + self.found = imp.find_module(fullname, path) + except Exception as e: + logger.debug('Py2Fixer could not find {0}') + logger.debug('Exception was: {0})'.format(fullname, e)) + return None + self.kind = self.found[-1][-1] + if self.kind == imp.PKG_DIRECTORY: + self.pathname = os.path.join(self.found[1], '__init__.py') + elif self.kind == imp.PY_SOURCE: + self.pathname = self.found[1] + return self + + def transform(self, source): + # This implementation uses lib2to3, + # you can override and use something else + # if that's better for you + + # lib2to3 likes a newline at the end + RTs.setup() + source += '\n' + try: + tree = RTs._rt.refactor_string(source, self.pathname) + except ParseError as e: + if e.msg != 'bad input' or e.value != '=': + raise + tree = RTs._rtp.refactor_string(source, self.pathname) + # could optimise a bit for only doing str(tree) if + # getattr(tree, 'was_changed', False) returns True + return str(tree)[:-1] # remove added newline + + def load_module(self, fullname): + logger.debug('Running load_module for {0}...'.format(fullname)) + if fullname in sys.modules: + mod = sys.modules[fullname] + else: + if self.kind in (imp.PY_COMPILED, imp.C_EXTENSION, imp.C_BUILTIN, + imp.PY_FROZEN): + convert = False + # elif (self.pathname.startswith(_stdlibprefix) + # and 'site-packages' not in self.pathname): + # # We assume it's a stdlib package in this case. Is this too brittle? + # # Please file a bug report at https://github.com/PythonCharmers/python-future + # # if so. + # convert = False + # in theory, other paths could be configured to be excluded here too + elif any([fullname.startswith(path) for path in self.exclude_paths]): + convert = False + elif any([fullname.startswith(path) for path in self.include_paths]): + convert = True + else: + convert = False + if not convert: + logger.debug('Excluded {0} from translation'.format(fullname)) + mod = imp.load_module(fullname, *self.found) + else: + logger.debug('Autoconverting {0} ...'.format(fullname)) + mod = imp.new_module(fullname) + sys.modules[fullname] = mod + + # required by PEP 302 + mod.__file__ = self.pathname + mod.__name__ = fullname + mod.__loader__ = self + + # This: + # mod.__package__ = '.'.join(fullname.split('.')[:-1]) + # seems to result in "SystemError: Parent module '' not loaded, + # cannot perform relative import" for a package's __init__.py + # file. We use the approach below. Another option to try is the + # minimal load_module pattern from the PEP 302 text instead. + + # Is the test in the next line more or less robust than the + # following one? Presumably less ... + # ispkg = self.pathname.endswith('__init__.py') + + if self.kind == imp.PKG_DIRECTORY: + mod.__path__ = [ os.path.dirname(self.pathname) ] + mod.__package__ = fullname + else: + #else, regular module + mod.__path__ = [] + mod.__package__ = fullname.rpartition('.')[0] + + try: + cachename = imp.cache_from_source(self.pathname) + if not os.path.exists(cachename): + update_cache = True + else: + sourcetime = os.stat(self.pathname).st_mtime + cachetime = os.stat(cachename).st_mtime + update_cache = cachetime < sourcetime + # # Force update_cache to work around a problem with it being treated as Py3 code??? + # update_cache = True + if not update_cache: + with open(cachename, 'rb') as f: + data = f.read() + try: + code = marshal.loads(data) + except Exception: + # pyc could be corrupt. Regenerate it + update_cache = True + if update_cache: + if self.found[0]: + source = self.found[0].read() + elif self.kind == imp.PKG_DIRECTORY: + with open(self.pathname) as f: + source = f.read() + + if detect_python2(source, self.pathname): + source = self.transform(source) + + code = compile(source, self.pathname, 'exec') + + dirname = os.path.dirname(cachename) + try: + if not os.path.exists(dirname): + os.makedirs(dirname) + with open(cachename, 'wb') as f: + data = marshal.dumps(code) + f.write(data) + except Exception: # could be write-protected + pass + exec(code, mod.__dict__) + except Exception as e: + # must remove module from sys.modules + del sys.modules[fullname] + raise # keep it simple + + if self.found[0]: + self.found[0].close() + return mod + +_hook = Py2Fixer() + + +def install_hooks(include_paths=(), exclude_paths=()): + if isinstance(include_paths, str): + include_paths = (include_paths,) + if isinstance(exclude_paths, str): + exclude_paths = (exclude_paths,) + assert len(include_paths) + len(exclude_paths) > 0, 'Pass at least one argument' + _hook.include(include_paths) + _hook.exclude(exclude_paths) + # _hook.debug = debug + enable = sys.version_info[0] >= 3 # enabled for all 3.x+ + if enable and _hook not in sys.meta_path: + sys.meta_path.insert(0, _hook) # insert at beginning. This could be made a parameter + + # We could return the hook when there are ways of configuring it + #return _hook + + +def remove_hooks(): + if _hook in sys.meta_path: + sys.meta_path.remove(_hook) + + +def detect_hooks(): + """ + Returns True if the import hooks are installed, False if not. + """ + return _hook in sys.meta_path + # present = any([hasattr(hook, 'PY2FIXER') for hook in sys.meta_path]) + # return present + + +class hooks(object): + """ + Acts as a context manager. Use like this: + + >>> from past import translation + >>> with translation.hooks(): + ... import mypy2module + >>> import requests # py2/3 compatible anyway + >>> # etc. + """ + def __enter__(self): + self.hooks_were_installed = detect_hooks() + install_hooks() + return self + + def __exit__(self, *args): + if not self.hooks_were_installed: + remove_hooks() + + +class suspend_hooks(object): + """ + Acts as a context manager. Use like this: + + >>> from past import translation + >>> translation.install_hooks() + >>> import http.client + >>> # ... + >>> with translation.suspend_hooks(): + >>> import requests # or others that support Py2/3 + + If the hooks were disabled before the context, they are not installed when + the context is left. + """ + def __enter__(self): + self.hooks_were_installed = detect_hooks() + remove_hooks() + return self + def __exit__(self, *args): + if self.hooks_were_installed: + install_hooks() + + +# alias +autotranslate = install_hooks diff --git a/lib/past/types/__init__.py b/lib/past/types/__init__.py new file mode 100644 index 00000000..91dd270f --- /dev/null +++ b/lib/past/types/__init__.py @@ -0,0 +1,29 @@ +""" +Forward-ports of types from Python 2 for use with Python 3: + +- ``basestring``: equivalent to ``(str, bytes)`` in ``isinstance`` checks +- ``dict``: with list-producing .keys() etc. methods +- ``str``: bytes-like, but iterating over them doesn't product integers +- ``long``: alias of Py3 int with ``L`` suffix in the ``repr`` +- ``unicode``: alias of Py3 str with ``u`` prefix in the ``repr`` + +""" + +from past import utils + +if utils.PY2: + import __builtin__ + basestring = __builtin__.basestring + dict = __builtin__.dict + str = __builtin__.str + long = __builtin__.long + unicode = __builtin__.unicode + __all__ = [] +else: + from .basestring import basestring + from .olddict import olddict + from .oldstr import oldstr + long = int + unicode = str + # from .unicode import unicode + __all__ = ['basestring', 'olddict', 'oldstr', 'long', 'unicode'] diff --git a/lib/past/types/basestring.py b/lib/past/types/basestring.py new file mode 100644 index 00000000..1cab22f6 --- /dev/null +++ b/lib/past/types/basestring.py @@ -0,0 +1,39 @@ +""" +An implementation of the basestring type for Python 3 + +Example use: + +>>> s = b'abc' +>>> assert isinstance(s, basestring) +>>> from past.types import str as oldstr +>>> s2 = oldstr(b'abc') +>>> assert isinstance(s2, basestring) + +""" + +import sys + +from past.utils import with_metaclass, PY2 + +if PY2: + str = unicode + +ver = sys.version_info[:2] + + +class BaseBaseString(type): + def __instancecheck__(cls, instance): + return isinstance(instance, (bytes, str)) + + def __subclasshook__(cls, thing): + # TODO: What should go here? + raise NotImplemented + + +class basestring(with_metaclass(BaseBaseString)): + """ + A minimal backport of the Python 2 basestring type to Py3 + """ + + +__all__ = ['basestring'] diff --git a/lib/past/types/olddict.py b/lib/past/types/olddict.py new file mode 100644 index 00000000..f4f92a26 --- /dev/null +++ b/lib/past/types/olddict.py @@ -0,0 +1,96 @@ +""" +A dict subclass for Python 3 that behaves like Python 2's dict + +Example use: + +>>> from past.builtins import dict +>>> d1 = dict() # instead of {} for an empty dict +>>> d2 = dict(key1='value1', key2='value2') + +The keys, values and items methods now return lists on Python 3.x and there are +methods for iterkeys, itervalues, iteritems, and viewkeys etc. + +>>> for d in (d1, d2): +... assert isinstance(d.keys(), list) +... assert isinstance(d.values(), list) +... assert isinstance(d.items(), list) +""" + +import sys + +from past.utils import with_metaclass + + +_builtin_dict = dict +ver = sys.version_info[:2] + + +class BaseOldDict(type): + def __instancecheck__(cls, instance): + return isinstance(instance, _builtin_dict) + + +class olddict(with_metaclass(BaseOldDict, _builtin_dict)): + """ + A backport of the Python 3 dict object to Py2 + """ + iterkeys = _builtin_dict.keys + viewkeys = _builtin_dict.keys + + def keys(self): + return list(super(olddict, self).keys()) + + itervalues = _builtin_dict.values + viewvalues = _builtin_dict.values + + def values(self): + return list(super(olddict, self).values()) + + iteritems = _builtin_dict.items + viewitems = _builtin_dict.items + + def items(self): + return list(super(olddict, self).items()) + + def has_key(self, k): + """ + D.has_key(k) -> True if D has a key k, else False + """ + return k in self + + # def __new__(cls, *args, **kwargs): + # """ + # dict() -> new empty dictionary + # dict(mapping) -> new dictionary initialized from a mapping object's + # (key, value) pairs + # dict(iterable) -> new dictionary initialized as if via: + # d = {} + # for k, v in iterable: + # d[k] = v + # dict(**kwargs) -> new dictionary initialized with the name=value pairs + # in the keyword argument list. For example: dict(one=1, two=2) + + # """ + # + # if len(args) == 0: + # return super(olddict, cls).__new__(cls) + # # Was: elif isinstance(args[0], newbytes): + # # We use type() instead of the above because we're redefining + # # this to be True for all unicode string subclasses. Warning: + # # This may render newstr un-subclassable. + # elif type(args[0]) == olddict: + # return args[0] + # # elif isinstance(args[0], _builtin_dict): + # # value = args[0] + # else: + # value = args[0] + # return super(olddict, cls).__new__(cls, value) + + def __native__(self): + """ + Hook for the past.utils.native() function + """ + return super(oldbytes, self) + + +__all__ = ['olddict'] diff --git a/lib/past/types/oldstr.py b/lib/past/types/oldstr.py new file mode 100644 index 00000000..a477d884 --- /dev/null +++ b/lib/past/types/oldstr.py @@ -0,0 +1,135 @@ +""" +Pure-Python implementation of a Python 2-like str object for Python 3. +""" + +from numbers import Integral + +from past.utils import PY2, with_metaclass + +if PY2: + from collections import Iterable +else: + from collections.abc import Iterable + +_builtin_bytes = bytes + + +class BaseOldStr(type): + def __instancecheck__(cls, instance): + return isinstance(instance, _builtin_bytes) + + +def unescape(s): + """ + Interprets strings with escape sequences + + Example: + >>> s = unescape(r'abc\\def') # i.e. 'abc\\\\def' + >>> print(s) + 'abc\def' + >>> s2 = unescape('abc\\ndef') + >>> len(s2) + 8 + >>> print(s2) + abc + def + """ + return s.encode().decode('unicode_escape') + + +class oldstr(with_metaclass(BaseOldStr, _builtin_bytes)): + """ + A forward port of the Python 2 8-bit string object to Py3 + """ + # Python 2 strings have no __iter__ method: + @property + def __iter__(self): + raise AttributeError + + def __dir__(self): + return [thing for thing in dir(_builtin_bytes) if thing != '__iter__'] + + # def __new__(cls, *args, **kwargs): + # """ + # From the Py3 bytes docstring: + + # bytes(iterable_of_ints) -> bytes + # bytes(string, encoding[, errors]) -> bytes + # bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer + # bytes(int) -> bytes object of size given by the parameter initialized with null bytes + # bytes() -> empty bytes object + # + # Construct an immutable array of bytes from: + # - an iterable yielding integers in range(256) + # - a text string encoded using the specified encoding + # - any object implementing the buffer API. + # - an integer + # """ + # + # if len(args) == 0: + # return super(newbytes, cls).__new__(cls) + # # Was: elif isinstance(args[0], newbytes): + # # We use type() instead of the above because we're redefining + # # this to be True for all unicode string subclasses. Warning: + # # This may render newstr un-subclassable. + # elif type(args[0]) == newbytes: + # return args[0] + # elif isinstance(args[0], _builtin_bytes): + # value = args[0] + # elif isinstance(args[0], unicode): + # if 'encoding' not in kwargs: + # raise TypeError('unicode string argument without an encoding') + # ### + # # Was: value = args[0].encode(**kwargs) + # # Python 2.6 string encode() method doesn't take kwargs: + # # Use this instead: + # newargs = [kwargs['encoding']] + # if 'errors' in kwargs: + # newargs.append(kwargs['errors']) + # value = args[0].encode(*newargs) + # ### + # elif isinstance(args[0], Iterable): + # if len(args[0]) == 0: + # # What is this? + # raise ValueError('unknown argument type') + # elif len(args[0]) > 0 and isinstance(args[0][0], Integral): + # # It's a list of integers + # value = b''.join([chr(x) for x in args[0]]) + # else: + # raise ValueError('item cannot be interpreted as an integer') + # elif isinstance(args[0], Integral): + # if args[0] < 0: + # raise ValueError('negative count') + # value = b'\x00' * args[0] + # else: + # value = args[0] + # return super(newbytes, cls).__new__(cls, value) + + def __repr__(self): + s = super(oldstr, self).__repr__() # e.g. b'abc' on Py3, b'abc' on Py3 + return s[1:] + + def __str__(self): + s = super(oldstr, self).__str__() # e.g. "b'abc'" or "b'abc\\ndef' + # TODO: fix this: + assert s[:2] == "b'" and s[-1] == "'" + return unescape(s[2:-1]) # e.g. 'abc' or 'abc\ndef' + + def __getitem__(self, y): + if isinstance(y, Integral): + return super(oldstr, self).__getitem__(slice(y, y+1)) + else: + return super(oldstr, self).__getitem__(y) + + def __getslice__(self, *args): + return self.__getitem__(slice(*args)) + + def __contains__(self, key): + if isinstance(key, int): + return False + + def __native__(self): + return bytes(self) + + +__all__ = ['oldstr'] diff --git a/lib/past/utils/__init__.py b/lib/past/utils/__init__.py new file mode 100644 index 00000000..f6b2642d --- /dev/null +++ b/lib/past/utils/__init__.py @@ -0,0 +1,97 @@ +""" +Various non-built-in utility functions and definitions for Py2 +compatibility in Py3. + +For example: + + >>> # The old_div() function behaves like Python 2's / operator + >>> # without "from __future__ import division" + >>> from past.utils import old_div + >>> old_div(3, 2) # like 3/2 in Py2 + 0 + >>> old_div(3, 2.0) # like 3/2.0 in Py2 + 1.5 +""" + +import sys +import numbers + +PY3 = sys.version_info[0] >= 3 +PY2 = sys.version_info[0] == 2 +PYPY = hasattr(sys, 'pypy_translation_info') + + +def with_metaclass(meta, *bases): + """ + Function from jinja2/_compat.py. License: BSD. + + Use it like this:: + + class BaseForm(object): + pass + + class FormType(type): + pass + + class Form(with_metaclass(FormType, BaseForm)): + pass + + This requires a bit of explanation: the basic idea is to make a + dummy metaclass for one level of class instantiation that replaces + itself with the actual metaclass. Because of internal type checks + we also need to make sure that we downgrade the custom metaclass + for one level to something closer to type (that's why __call__ and + __init__ comes back from type etc.). + + This has the advantage over six.with_metaclass of not introducing + dummy classes into the final MRO. + """ + class metaclass(meta): + __call__ = type.__call__ + __init__ = type.__init__ + def __new__(cls, name, this_bases, d): + if this_bases is None: + return type.__new__(cls, name, (), d) + return meta(name, bases, d) + return metaclass('temporary_class', None, {}) + + +def native(obj): + """ + On Py2, this is a no-op: native(obj) -> obj + + On Py3, returns the corresponding native Py3 types that are + superclasses for forward-ported objects from Py2: + + >>> from past.builtins import str, dict + + >>> native(str(b'ABC')) # Output on Py3 follows. On Py2, output is 'ABC' + b'ABC' + >>> type(native(str(b'ABC'))) + bytes + + Existing native types on Py3 will be returned unchanged: + + >>> type(native(b'ABC')) + bytes + """ + if hasattr(obj, '__native__'): + return obj.__native__() + else: + return obj + + +# An alias for future.utils.old_div(): +def old_div(a, b): + """ + Equivalent to ``a / b`` on Python 2 without ``from __future__ import + division``. + + TODO: generalize this to other objects (like arrays etc.) + """ + if isinstance(a, numbers.Integral) and isinstance(b, numbers.Integral): + return a // b + else: + return a / b + +__all__ = ['PY3', 'PY2', 'PYPY', 'with_metaclass', 'native', 'old_div']