Bump cherrypy from 18.8.0 to 18.9.0 (#2266)

* Bump cherrypy from 18.8.0 to 18.9.0 Bumps [cherrypy](https://github.com/cherrypy/cherrypy) from 18.8.0 to 18.9.0. - [Changelog](https://github.com/cherrypy/cherrypy/blob/main/CHANGES.rst) - [Commits](https://github.com/cherrypy/cherrypy/compare/v18.8.0...v18.9.0) --- updated-dependencies: - dependency-name: cherrypy dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * Update cherrypy==18.9.0 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
2025-08-20 21:33:18 -07:00 · 2024-03-24 15:25:44 -07:00 · 2024-03-24 15:25:44 -07:00 · faef9a94c4
commit faef9a94c4
parent cfefa928be
673 changed files with 159850 additions and 11583 deletions
--- a/lib/win32comext/ifilter/init.py
+++ b/lib/win32comext/ifilter/init.py
@ -0,0 +1 @@
+# empty file to designate as a package.
--- a/lib/win32comext/ifilter/demo/filterDemo.py
+++ b/lib/win32comext/ifilter/demo/filterDemo.py
@ -0,0 +1,300 @@
+import pythoncom
+import pywintypes
+from win32com import storagecon
+from win32com.ifilter import ifilter
+from win32com.ifilter.ifiltercon import *
+
+
+class FileParser:
+    # Property IDs for the Storage Property Set
+    PIDS_BODY = 0x00000013
+
+    # property IDs for HTML Storage Property Set
+    PIDH_DESCRIPTION = "DESCRIPTION"
+    PIDH_HREF = "A.HREF"
+    PIDH_IMGSRC = "IMG.SRC"
+
+    # conversion map to convert ifilter properties to more user friendly names
+    propertyToName = {
+        PSGUID_STORAGE: {PIDS_BODY: "body"},
+        PSGUID_SUMMARYINFORMATION: {
+            PIDSI_TITLE: "title",
+            PIDSI_SUBJECT: "description",
+            PIDSI_AUTHOR: "author",
+            PIDSI_KEYWORDS: "keywords",
+            PIDSI_COMMENTS: "comments",
+        },
+        PSGUID_HTMLINFORMATION: {PIDH_DESCRIPTION: "description"},
+        PSGUID_HTML2_INFORMATION: {PIDH_HREF: "href", PIDH_IMGSRC: "img"},
+    }
+
+    def __init__(self, verbose=False):
+        self.f = None
+        self.stg = None
+        self.verbose = verbose
+
+    def Close(self):
+        self.f = None
+        self.stg = None
+
+    def Parse(self, fileName, maxErrors=10):
+        properties = {}
+
+        try:
+            self._bind_to_filter(fileName)
+            try:
+                flags = self.f.Init(
+                    IFILTER_INIT_APPLY_INDEX_ATTRIBUTES
+                    | IFILTER_INIT_APPLY_OTHER_ATTRIBUTES
+                )
+                if flags == IFILTER_FLAGS_OLE_PROPERTIES and self.stg is not None:
+                    self._trace("filter requires to get properities via ole")
+                    self._get_properties(properties)
+
+                errCnt = 0
+                while True:
+                    try:
+                        # each chunk returns a tuple with the following:-
+                        # idChunk       = The chunk identifier. each chunk has a unique identifier
+                        # breakType     = The type of break that separates the previous chunk from the current chunk. Values are:-
+                        #                 CHUNK_NO_BREAK=0,CHUNK_EOW=1,CHUNK_EOS= 2,CHUNK_EOP= 3,CHUNK_EOC= 4
+                        # flags         = Flags indicate whether this chunk contains a text-type or a value-type property
+                        #                 locale = The language and sublanguage associated with a chunk of text
+                        # attr          = A tuple containing the property to be applied to the chunk. Tuple is (propertyset GUID, property ID)
+                        #                 Property ID can be a number or string
+                        # idChunkSource = The ID of the source of a chunk. The value of the idChunkSource member depends on the nature of the chunk
+                        # startSource   = The offset from which the source text for a derived chunk starts in the source chunk
+                        # lenSource     = The length in characters of the source text from which the current chunk was derived.
+                        #                 A zero value signifies character-by-character correspondence between the source text and the derived text.
+
+                        (
+                            idChunk,
+                            breakType,
+                            flags,
+                            locale,
+                            attr,
+                            idChunkSource,
+                            startSource,
+                            lenSource,
+                        ) = self.f.GetChunk()
+                        self._trace(
+                            "Chunk details:",
+                            idChunk,
+                            breakType,
+                            flags,
+                            locale,
+                            attr,
+                            idChunkSource,
+                            startSource,
+                            lenSource,
+                        )
+
+                        # attempt to map each property to a more user friendly name. If we don't know what it is just return
+                        # the set guid and property id. (note: the id can be a number or a string.
+                        propSet = self.propertyToName.get(attr[0])
+                        if propSet:
+                            propName = propSet.get(attr[1], "%s:%s" % attr)
+                        else:
+                            propName = "%s:%s" % attr
+
+                    except pythoncom.com_error as e:
+                        if e[0] == FILTER_E_END_OF_CHUNKS:
+                            # we have read all the chunks
+                            break
+                        elif e[0] in [
+                            FILTER_E_EMBEDDING_UNAVAILABLE,
+                            FILTER_E_LINK_UNAVAILABLE,
+                        ]:
+                            # the next chunk can't be read. Also keep track of the number of times we
+                            # fail as some filters (ie. the Msoft office ones can get stuck here)
+                            errCnt += 1
+                            if errCnt > maxErrors:
+                                raise
+                            else:
+                                continue
+                        elif e[0] == FILTER_E_ACCESS:
+                            self._trace("Access denied")
+                            raise
+                        elif e[0] == FILTER_E_PASSWORD:
+                            self._trace("Password required")
+                            raise
+                        else:
+                            # any other type of error really can't be recovered from
+                            raise
+
+                    # reset consecutive errors (some filters may get stuck in a lopp if embedding or link failures occurs
+                    errCnt = 0
+
+                    if flags == CHUNK_TEXT:
+                        # its a text segment - get all available text for this chunk.
+                        body_chunks = properties.setdefault(propName, [])
+                        self._get_text(body_chunks)
+                    elif flags == CHUNK_VALUE:
+                        # its a data segment - get the value
+                        properties[propName] = self.f.GetValue()
+                    else:
+                        self._trace("Unknown flag returned by GetChunk:", flags)
+            finally:
+                self.Close()
+
+        except pythoncom.com_error as e:
+            self._trace("ERROR processing file", e)
+            raise
+
+        return properties
+
+    def _bind_to_filter(self, fileName):
+        """
+        See if the file is a structured storage file or a normal file
+        and then return an ifilter interface by calling the appropriate bind/load function
+        """
+        if pythoncom.StgIsStorageFile(fileName):
+            self.stg = pythoncom.StgOpenStorage(
+                fileName, None, storagecon.STGM_READ | storagecon.STGM_SHARE_DENY_WRITE
+            )
+            try:
+                self.f = ifilter.BindIFilterFromStorage(self.stg)
+            except pythoncom.com_error as e:
+                if (
+                    e[0] == -2147467262
+                ):  # 0x80004002: # no interface, try the load interface (this happens for some MSoft files)
+                    self.f = ifilter.LoadIFilter(fileName)
+                else:
+                    raise
+        else:
+            self.f = ifilter.LoadIFilter(fileName)
+            self.stg = None
+
+    def _get_text(self, body_chunks):
+        """
+        Gets all the text for a particular chunk. We need to keep calling get text till all the
+        segments for this chunk are retrieved
+        """
+        while True:
+            try:
+                body_chunks.append(self.f.GetText())
+            except pythoncom.com_error as e:
+                if e[0] in [
+                    FILTER_E_NO_MORE_TEXT,
+                    FILTER_E_NO_MORE_TEXT,
+                    FILTER_E_NO_TEXT,
+                ]:
+                    break
+                else:
+                    raise  # not one of the values we were expecting
+
+    def _get_properties(self, properties):
+        """
+        Use OLE property sets to get base properties
+        """
+        try:
+            pss = self.stg.QueryInterface(pythoncom.IID_IPropertySetStorage)
+        except pythoncom.com_error as e:
+            self._trace("No Property information could be retrieved", e)
+            return
+
+        ps = pss.Open(PSGUID_SUMMARYINFORMATION)
+
+        props = (
+            PIDSI_TITLE,
+            PIDSI_SUBJECT,
+            PIDSI_AUTHOR,
+            PIDSI_KEYWORDS,
+            PIDSI_COMMENTS,
+        )
+
+        title, subject, author, keywords, comments = ps.ReadMultiple(props)
+        if title is not None:
+            properties["title"] = title
+        if subject is not None:
+            properties["description"] = subject
+        if author is not None:
+            properties["author"] = author
+        if keywords is not None:
+            properties["keywords"] = keywords
+        if comments is not None:
+            properties["comments"] = comments
+
+    def _trace(self, *args):
+        if self.verbose:
+            ret = " ".join([str(arg) for arg in args])
+            try:
+                print(ret)
+            except IOError:
+                pass
+
+
+def _usage():
+    import os
+
+    print("Usage: %s filename [verbose [dumpbody]]" % (os.path.basename(sys.argv[0]),))
+    print()
+    print("Where:-")
+    print("filename = name of the file to extract text & properties from")
+    print("verbose = 1=debug output, 0=no debug output (default=0)")
+    print("dumpbody = 1=print text content, 0=don't print content (default=1)")
+    print()
+    print("e.g. to dump a word file called spam.doc go:- filterDemo.py spam.doc")
+    print()
+    print("by default .htm, .txt, .doc, .dot, .xls, .xlt, .ppt are supported")
+    print("you can filter .pdf's by downloading adobes ifilter component. ")
+    print(
+        "(currently found at http://download.adobe.com/pub/adobe/acrobat/win/all/ifilter50.exe)."
+    )
+    print("ifilters for other filetypes are also available.")
+    print()
+    print(
+        "This extension is only supported on win2000 & winXP - because thats the only"
+    )
+    print("place the ifilter stuff is supported. For more info on the API check out ")
+    print("MSDN under ifilters")
+
+
+if __name__ == "__main__":
+    import operator
+    import sys
+
+    fName = ""
+    verbose = False
+    bDumpBody = True
+
+    if len(sys.argv) < 2:
+        _usage()
+        sys.exit(1)
+
+    try:
+        fName = sys.argv[1]
+        verbose = sys.argv[2] != "0"
+        bDumpBody = sys.argv[3] != "0"
+    except:
+        pass
+
+    p = FileParser(verbose)
+    propMap = p.Parse(fName)
+
+    if bDumpBody:
+        print("Body")
+        ch = " ".join(propMap.get("body", []))
+        try:
+            print(ch)
+        except UnicodeError:
+            print(ch.encode("iso8859-1", "ignore"))
+
+    print("Properties")
+    for propName, propValue in propMap.items():
+        print(propName, ":", end=" ")
+        if propName == "body":
+            print(
+                "<%s length: %d>"
+                % (
+                    propName,
+                    reduce(operator.add, [len(p) for p in propValue]),
+                )
+            )
+        elif type(propValue) == type([]):
+            print()
+            for pv in propValue:
+                print(pv)
+        else:
+            print(propValue)
+        print()
--- a/lib/win32comext/ifilter/ifilter.pyd
+++ b/lib/win32comext/ifilter/ifilter.pyd
--- a/lib/win32comext/ifilter/ifiltercon.py
+++ b/lib/win32comext/ifilter/ifiltercon.py
@ -0,0 +1,110 @@
+# manual stuff
+from pywintypes import IID
+
+PSGUID_STORAGE = IID("{B725F130-47EF-101A-A5F1-02608C9EEBAC}")
+PSGUID_SUMMARYINFORMATION = IID("{F29F85E0-4FF9-1068-AB91-08002B27B3D9}")
+PSGUID_HTMLINFORMATION = IID("{D1B5D3F0-C0B3-11CF-9A92-00A0C908DBF1}")
+PSGUID_HTML2_INFORMATION = IID("{C82BF597-B831-11D0-B733-00AA00A1EBD2}")
+
+IFILTER_INIT_CANON_PARAGRAPHS = 1
+IFILTER_INIT_HARD_LINE_BREAKS = 2
+IFILTER_INIT_CANON_HYPHENS = 4
+IFILTER_INIT_CANON_SPACES = 8
+IFILTER_INIT_APPLY_INDEX_ATTRIBUTES = 16
+IFILTER_INIT_APPLY_CRAWL_ATTRIBUTES = 256
+IFILTER_INIT_APPLY_OTHER_ATTRIBUTES = 32
+IFILTER_INIT_INDEXING_ONLY = 64
+IFILTER_INIT_SEARCH_LINKS = 128
+IFILTER_INIT_FILTER_OWNED_VALUE_OK = 512
+
+IFILTER_FLAGS_OLE_PROPERTIES = 1
+
+CHUNK_TEXT = 0x1
+CHUNK_VALUE = 0x2
+CHUNK_NO_BREAK = 0
+CHUNK_EOW = 1
+CHUNK_EOS = 2
+CHUNK_EOP = 3
+CHUNK_EOC = 4
+
+NOT_AN_ERROR = 0x00080000
+FILTER_E_END_OF_CHUNKS = -2147215616
+FILTER_E_NO_MORE_TEXT = -2147215615
+FILTER_E_NO_MORE_VALUES = -2147215614
+FILTER_E_ACCESS = -2147215613
+FILTER_W_MONIKER_CLIPPED = 0x00041704
+FILTER_E_NO_TEXT = -2147215611
+FILTER_E_NO_VALUES = -2147215610
+FILTER_E_EMBEDDING_UNAVAILABLE = -2147215609
+FILTER_E_LINK_UNAVAILABLE = -2147215608
+FILTER_S_LAST_TEXT = 0x00041709
+FILTER_S_LAST_VALUES = 0x0004170A
+FILTER_E_PASSWORD = -2147215605
+FILTER_E_UNKNOWNFORMAT = -2147215604
+
+# Generated by h2py from PropIdl.h
+PROPSETFLAG_DEFAULT = 0
+PROPSETFLAG_NONSIMPLE = 1
+PROPSETFLAG_ANSI = 2
+PROPSETFLAG_UNBUFFERED = 4
+PROPSETFLAG_CASE_SENSITIVE = 8
+PROPSET_BEHAVIOR_CASE_SENSITIVE = 1
+PID_DICTIONARY = 0
+PID_CODEPAGE = 0x1
+PID_FIRST_USABLE = 0x2
+PID_FIRST_NAME_DEFAULT = 0xFFF
+PID_LOCALE = -2147483648
+PID_MODIFY_TIME = -2147483647
+PID_SECURITY = -2147483646
+PID_BEHAVIOR = -2147483645
+PID_ILLEGAL = -1
+PID_MIN_READONLY = -2147483648
+PID_MAX_READONLY = -1073741825
+PIDDI_THUMBNAIL = 0x00000002
+PIDSI_TITLE = 0x00000002
+PIDSI_SUBJECT = 0x00000003
+PIDSI_AUTHOR = 0x00000004
+PIDSI_KEYWORDS = 0x00000005
+PIDSI_COMMENTS = 0x00000006
+PIDSI_TEMPLATE = 0x00000007
+PIDSI_LASTAUTHOR = 0x00000008
+PIDSI_REVNUMBER = 0x00000009
+PIDSI_EDITTIME = 0x0000000A
+PIDSI_LASTPRINTED = 0x0000000B
+PIDSI_CREATE_DTM = 0x0000000C
+PIDSI_LASTSAVE_DTM = 0x0000000D
+PIDSI_PAGECOUNT = 0x0000000E
+PIDSI_WORDCOUNT = 0x0000000F
+PIDSI_CHARCOUNT = 0x00000010
+PIDSI_THUMBNAIL = 0x00000011
+PIDSI_APPNAME = 0x00000012
+PIDSI_DOC_SECURITY = 0x00000013
+PIDDSI_CATEGORY = 0x00000002
+PIDDSI_PRESFORMAT = 0x00000003
+PIDDSI_BYTECOUNT = 0x00000004
+PIDDSI_LINECOUNT = 0x00000005
+PIDDSI_PARCOUNT = 0x00000006
+PIDDSI_SLIDECOUNT = 0x00000007
+PIDDSI_NOTECOUNT = 0x00000008
+PIDDSI_HIDDENCOUNT = 0x00000009
+PIDDSI_MMCLIPCOUNT = 0x0000000A
+PIDDSI_SCALE = 0x0000000B
+PIDDSI_HEADINGPAIR = 0x0000000C
+PIDDSI_DOCPARTS = 0x0000000D
+PIDDSI_MANAGER = 0x0000000E
+PIDDSI_COMPANY = 0x0000000F
+PIDDSI_LINKSDIRTY = 0x00000010
+PIDMSI_EDITOR = 0x00000002
+PIDMSI_SUPPLIER = 0x00000003
+PIDMSI_SOURCE = 0x00000004
+PIDMSI_SEQUENCE_NO = 0x00000005
+PIDMSI_PROJECT = 0x00000006
+PIDMSI_STATUS = 0x00000007
+PIDMSI_OWNER = 0x00000008
+PIDMSI_RATING = 0x00000009
+PIDMSI_PRODUCTION = 0x0000000A
+PIDMSI_COPYRIGHT = 0x0000000B
+PRSPEC_INVALID = -1
+PRSPEC_LPWSTR = 0
+PRSPEC_PROPID = 1
+CCH_MAX_PROPSTG_NAME = 31