splitting large files

2025-08-20 13:33:25 -07:00 · 2021-07-30 13:27:11 +04:00 · 2021-07-30 13:27:11 +04:00 · ee7d471f20
commit ee7d471f20
parent 30cc36c8e4
16 changed files with 805308 additions and 7 deletions
--- a/apps/DeepFaceLive/backend/FaceMarker.py
+++ b/apps/DeepFaceLive/backend/FaceMarker.py
@ -2,6 +2,8 @@ import time
 from enum import IntEnum
 import numpy as np
 from modelhub import onnx as onnx_models
+from modelhub import cv as cv_models
+
 from xlib import cv as lib_cv
 from xlib import os as lib_os
 from xlib.facemeta import FaceULandmarks
@ -87,7 +89,7 @@ class FaceMarkerWorker(BackendWorker):
            marker_state = state.get_marker_state()

            if state.marker_type == MarkerType.OPENCV_LBF:
-                self.opencv_lbf = lib_cv.FaceMarkerLBF()
+                self.opencv_lbf = cv_models.FaceMarkerLBF()
            elif state.marker_type == MarkerType.GOOGLE_FACEMESH:
                self.google_facemesh = onnx_models.FaceMesh(state.google_facemesh_state.device)

--- a/main.py
+++ b/main.py
@ -18,7 +18,6 @@ def main():
    subparsers = parser.add_subparsers()

    run_parser = subparsers.add_parser( "run", help="Run the application.")
-
    run_subparsers = run_parser.add_subparsers()

    def run_DeepFaceLive(args):
@ -31,6 +30,17 @@ def main():
    p.add_argument('--userdata-dir', default=None, action=fixPathAction, help="Workspace directory.")
    p.set_defaults(func=run_DeepFaceLive)

+
+    misc_parser = subparsers.add_parser("misc")
+    misc_subparsers = misc_parser.add_subparsers()
+    
+    def run_split_large_files(args):
+        from misc.split_large_files import split_large_files
+        split_large_files()
+    
+    p = misc_subparsers.add_parser('split_large_files')
+    p.set_defaults(func=run_split_large_files)
+
    def bad_args(arguments):
        parser.print_help()
        exit(0)
--- a/misc/split_large_files.py
+++ b/misc/split_large_files.py
@ -0,0 +1,16 @@
+from pathlib import Path
+from xlib.file import SplittedFile
+
+def split_large_files():
+    repo_root = Path(__file__).parent.parent
+    
+    files_list = [ (repo_root / 'modelhub' / 'onnx' / 'S3FD' / 'S3FD.onnx', 48*1024*1024), 
+                   (repo_root / 'modelhub' / 'torch' / 'S3FD' / 'S3FD.pth', 48*1024*1024), 
+                   (repo_root / 'modelhub' / 'cv' / 'FaceMarkerLBF' / 'lbfmodel.yaml', 34*1024*1024), 
+                 ]
+                 
+    for filepath, part_size in files_list:
+        print(f'Splitting {filepath}...')
+        SplittedFile.split(filepath, part_size=part_size, delete_original=False)
+    
+    print('Done')
--- a/modelhub/cv/FaceMarkerLBF/FaceMarkerLBF.py
+++ b/modelhub/cv/FaceMarkerLBF/FaceMarkerLBF.py
@ -3,12 +3,15 @@ from pathlib import Path
 import cv2
 import numpy as np
 from xlib.image import ImageProcessor
-
+from xlib.file import SplittedFile

 class FaceMarkerLBF:
    def __init__(self):
+        path = Path(__file__).parent / 'lbfmodel.yaml'
+        SplittedFile.merge(path, delete_parts=False)
+        
        marker = self.marker = cv2.face.createFacemarkLBF()
-        marker.loadModel(str(Path(__file__).parent / 'lbfmodel.yaml'))
+        marker.loadModel(str(path))

    def extract(self, img : np.ndarray):
        """
--- a/modelhub/cv/FaceMarkerLBF/lbfmodel.yaml.part0
+++ b/modelhub/cv/FaceMarkerLBF/lbfmodel.yaml.part0
--- a/modelhub/cv/FaceMarkerLBF/lbfmodel.yaml.part1
+++ b/modelhub/cv/FaceMarkerLBF/lbfmodel.yaml.part1
--- a/modelhub/cv/init.py
+++ b/modelhub/cv/init.py
@ -0,0 +1 @@
+from .FaceMarkerLBF.FaceMarkerLBF import FaceMarkerLBF
--- a/modelhub/onnx/S3FD/S3FD.onnx.part0
+++ b/modelhub/onnx/S3FD/S3FD.onnx.part0
--- a/modelhub/onnx/S3FD/S3FD.onnx.part1
+++ b/modelhub/onnx/S3FD/S3FD.onnx.part1
--- a/modelhub/onnx/S3FD/S3FD.py
+++ b/modelhub/onnx/S3FD/S3FD.py
@ -7,6 +7,7 @@ from xlib.image import ImageProcessor
 from xlib.onnxruntime import (InferenceSession_with_device, ORTDeviceInfo,
                              get_available_devices_info)

+from xlib.file import SplittedFile

 class S3FD:

@ -19,6 +20,8 @@ class S3FD:
            raise Exception(f'device_info {device_info} is not in available devices for S3FD')

        path = Path(__file__).parent / 'S3FD.onnx'
+        SplittedFile.merge(path, delete_parts=False)
+        
        self._sess = sess = InferenceSession_with_device(str(path), device_info)
        self._input_name = sess.get_inputs()[0].name

--- a/modelhub/torch/S3FD/S3FD.pth.part0
+++ b/modelhub/torch/S3FD/S3FD.pth.part0
--- a/modelhub/torch/S3FD/S3FD.pth.part1
+++ b/modelhub/torch/S3FD/S3FD.pth.part1
--- a/modelhub/torch/S3FD/S3FD.py
+++ b/modelhub/torch/S3FD/S3FD.py
@ -6,6 +6,7 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from xlib import math as lib_math
+from xlib.file import SplittedFile
 from xlib.image import ImageProcessor
 from xlib.torch import TorchDeviceInfo, get_cpu_device

@ -15,9 +16,12 @@ class S3FD:
        if device_info is None:
            device_info = get_cpu_device()
        self.device_info = device_info
-
+        
+        path = Path(__file__).parent / 'S3FD.pth'
+        SplittedFile.merge(path, delete_parts=False)
+        
        net = self.net = S3FDNet()
-        net.load_state_dict( torch.load(str(Path(__file__).parent / 's3fd.pth')) )
+        net.load_state_dict( torch.load(str(path) ))
        net.eval()

        if not device_info.is_cpu():
--- a/xlib/cv/init.py
+++ b/xlib/cv/init.py
@ -1,2 +1 @@
 from .cv import imread, imwrite
-from .FaceMarkerLBF.FaceMarkerLBF import FaceMarkerLBF
--- a/xlib/file/SplittedFile.py
+++ b/xlib/file/SplittedFile.py
@ -0,0 +1,76 @@
+import itertools
+from pathlib import Path
+from typing import List
+
+
+class SplittedFile:
+    @staticmethod
+    def split(filepath : Path, part_size : int, delete_original = False):
+        """
+        splits a file to the parts
+        
+        raises:
+            Exception
+            FileNotFoundError
+        """
+        if part_size == 0:
+            raise Exception(f'part_size == 0')
+        
+        if filepath.exists():
+            filesize = filepath.stat().st_size
+            
+            n_parts = filesize // part_size
+            
+            if filesize - part_size*n_parts != 0:
+                n_parts += 1
+            
+            if n_parts > 100:
+                raise Exception('n_parts > 100')
+                   
+            b = filepath.read_bytes()
+            for n in range(n_parts):
+                part_filepath = filepath.parent / (filepath.name + f'.part{n}')
+                part_filepath.write_bytes(b[n*part_size:(n+1)*part_size])
+            
+            
+            if delete_original:
+                filepath.unlink()
+        else:
+            raise FileNotFoundError()
+        
+    
+    @staticmethod
+    def merge(filepath : Path, delete_parts = False):
+        """
+        if filepath does not exist, merges parts of file if they exist
+        
+        example
+        
+        filename.ext.part0
+        filename.ext.part1
+        ...
+        merged to filename.ext
+        """
+        if filepath.exists():
+            return
+        
+        parts : List[Path] = []
+        for n in itertools.count(start=0):
+            part_filepath = filepath.parent / (filepath.name + f'.part{n}')
+            if part_filepath.exists():
+                parts.append(part_filepath)
+            else:
+                break
+        
+        if len(parts) != 0:
+            bytes_parts = []
+            for part_filepath in parts:
+                bytes_parts.append( part_filepath.read_bytes() )
+            
+            b = b''.join(bytes_parts)
+            
+            filepath.write_bytes(b)
+            
+            if delete_parts:
+                for part_filepath in parts:
+                    part_filepath.unlink()
--- a/xlib/file/init.py
+++ b/xlib/file/init.py
@ -0,0 +1 @@
+from .SplittedFile import SplittedFile
				`@ -0,0 +1 @@`
				`from .FaceMarkerLBF.FaceMarkerLBF import FaceMarkerLBF`